7 months ago · 47014b71f9
--- a/src/detection/detection_lidar_transfusion/.gitignore
+++ b/src/detection/detection_lidar_transfusion/.gitignore
@@ -0,0 +1,73 @@
 
															+# This file is used to ignore files which are generated
														
 
															+# ----------------------------------------------------------------------------
														
 
															+
														
 
															+*~
														
 
															+*.autosave
														
 
															+*.a
														
 
															+*.core
														
 
															+*.moc
														
 
															+*.o
														
 
															+*.obj
														
 
															+*.orig
														
 
															+*.rej
														
 
															+*.so
														
 
															+*.so.*
														
 
															+*_pch.h.cpp
														
 
															+*_resource.rc
														
 
															+*.qm
														
 
															+.#*
														
 
															+*.*#
														
 
															+core
														
 
															+!core/
														
 
															+tags
														
 
															+.DS_Store
														
 
															+.directory
														
 
															+*.debug
														
 
															+Makefile*
														
 
															+*.prl
														
 
															+*.app
														
 
															+moc_*.cpp
														
 
															+ui_*.h
														
 
															+qrc_*.cpp
														
 
															+Thumbs.db
														
 
															+*.res
														
 
															+*.rc
														
 
															+/.qmake.cache
														
 
															+/.qmake.stash
														
 
															+
														
 
															+# qtcreator generated files
														
 
															+*.pro.user*
														
 
															+
														
 
															+# xemacs temporary files
														
 
															+*.flc
														
 
															+
														
 
															+# Vim temporary files
														
 
															+.*.swp
														
 
															+
														
 
															+# Visual Studio generated files
														
 
															+*.ib_pdb_index
														
 
															+*.idb
														
 
															+*.ilk
														
 
															+*.pdb
														
 
															+*.sln
														
 
															+*.suo
														
 
															+*.vcproj
														
 
															+*vcproj.*.*.user
														
 
															+*.ncb
														
 
															+*.sdf
														
 
															+*.opensdf
														
 
															+*.vcxproj
														
 
															+*vcxproj.*
														
 
															+
														
 
															+# MinGW generated files
														
 
															+*.Debug
														
 
															+*.Release
														
 
															+
														
 
															+# Python byte code
														
 
															+*.pyc
														
 
															+
														
 
															+# Binaries
														
 
															+# --------
														
 
															+*.dll
														
 
															+*.exe
														
 
															+
														
--- a/src/detection/detection_lidar_transfusion/cuda_utils.hpp
+++ b/src/detection/detection_lidar_transfusion/cuda_utils.hpp
@@ -0,0 +1,126 @@
 
															+// Copyright 2024 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+/*
														
 
															+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
														
 
															+ *
														
 
															+ * Permission is hereby granted, free of charge, to any person obtaining a
														
 
															+ * copy of this software and associated documentation files (the "Software"),
														
 
															+ * to deal in the Software without restriction, including without limitation
														
 
															+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
														
 
															+ * and/or sell copies of the Software, and to permit persons to whom the
														
 
															+ * Software is furnished to do so, subject to the following conditions:
														
 
															+ *
														
 
															+ * The above copyright notice and this permission notice shall be included in
														
 
															+ * all copies or substantial portions of the Software.
														
 
															+ *
														
 
															+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
														
 
															+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
														
 
															+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
														
 
															+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
														
 
															+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
														
 
															+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
														
 
															+ * DEALINGS IN THE SOFTWARE.
														
 
															+ */
														
 
															+
														
 
															+/*
														
 
															+ * This code is licensed under CC0 1.0 Universal (Public Domain).
														
 
															+ * You can use this without any limitation.
														
 
															+ * https://creativecommons.org/publicdomain/zero/1.0/deed.en
														
 
															+ */
														
 
															+
														
 
															+#ifndef AUTOWARE__LIDAR_TRANSFUSION__CUDA_UTILS_HPP_
														
 
															+#define AUTOWARE__LIDAR_TRANSFUSION__CUDA_UTILS_HPP_
														
 
															+
														
 
															+#include <cuda_runtime_api.h>
														
 
															+
														
 
															+#include <memory>
														
 
															+#include <sstream>
														
 
															+#include <stdexcept>
														
 
															+#include <type_traits>
														
 
															+
														
 
															+#define CHECK_CUDA_ERROR(e) (cuda::check_error(e, __FILE__, __LINE__))
														
 
															+
														
 
															+namespace cuda
														
 
															+{
														
 
															+inline void check_error(const ::cudaError_t e, const char * f, int n)
														
 
															+{
														
 
															+  if (e != ::cudaSuccess) {
														
 
															+    ::std::stringstream s;
														
 
															+    s << ::cudaGetErrorName(e) << " (" << e << ")@" << f << "#L" << n << ": "
														
 
															+      << ::cudaGetErrorString(e);
														
 
															+    throw ::std::runtime_error{s.str()};
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+struct deleter
														
 
															+{
														
 
															+  void operator()(void * p) const { CHECK_CUDA_ERROR(::cudaFree(p)); }
														
 
															+};
														
 
															+
														
 
															+template <typename T>
														
 
															+using unique_ptr = ::std::unique_ptr<T, deleter>;
														
 
															+
														
 
															+template <typename T>
														
 
															+typename ::std::enable_if<::std::is_array<T>::value, cuda::unique_ptr<T>>::type make_unique(
														
 
															+  const ::std::size_t n)
														
 
															+{
														
 
															+  using U = typename ::std::remove_extent<T>::type;
														
 
															+  U * p;
														
 
															+  CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast<void **>(&p), sizeof(U) * n));
														
 
															+  return cuda::unique_ptr<T>{p};
														
 
															+}
														
 
															+
														
 
															+template <typename T>
														
 
															+cuda::unique_ptr<T> make_unique()
														
 
															+{
														
 
															+  T * p;
														
 
															+  CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast<void **>(&p), sizeof(T)));
														
 
															+  return cuda::unique_ptr<T>{p};
														
 
															+}
														
 
															+
														
 
															+constexpr size_t CUDA_ALIGN = 256;
														
 
															+
														
 
															+template <typename T>
														
 
															+inline size_t get_size_aligned(size_t num_elem)
														
 
															+{
														
 
															+  size_t size = num_elem * sizeof(T);
														
 
															+  size_t extra_align = 0;
														
 
															+  if (size % CUDA_ALIGN != 0) {
														
 
															+    extra_align = CUDA_ALIGN - size % CUDA_ALIGN;
														
 
															+  }
														
 
															+  return size + extra_align;
														
 
															+}
														
 
															+
														
 
															+template <typename T>
														
 
															+inline T * get_next_ptr(size_t num_elem, void *& workspace, size_t & workspace_size)
														
 
															+{
														
 
															+  size_t size = get_size_aligned<T>(num_elem);
														
 
															+  if (size > workspace_size) {
														
 
															+    throw ::std::runtime_error("Workspace is too small!");
														
 
															+  }
														
 
															+  workspace_size -= size;
														
 
															+  T * ptr = reinterpret_cast<T *>(workspace);
														
 
															+  workspace = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(workspace) + size);
														
 
															+  return ptr;
														
 
															+}
														
 
															+
														
 
															+template <typename T>
														
 
															+void clear_async(T * ptr, size_t num_elem, cudaStream_t stream)
														
 
															+{
														
 
															+  CHECK_CUDA_ERROR(::cudaMemsetAsync(ptr, 0, sizeof(T) * num_elem, stream));
														
 
															+}
														
 
															+
														
 
															+}  // namespace cuda
														
 
															+
														
 
															+#endif  // AUTOWARE__LIDAR_TRANSFUSION__CUDA_UTILS_HPP_
														
--- a/src/detection/detection_lidar_transfusion/detection_lidar_transfusion.pro
+++ b/src/detection/detection_lidar_transfusion/detection_lidar_transfusion.pro
@@ -0,0 +1,137 @@
 
															+QT -= gui
														
 
															+
														
 
															+CONFIG += c++1z console
														
 
															+CONFIG -= app_bundle
														
 
															+
														
 
															+# You can make your code fail to compile if it uses deprecated APIs.
														
 
															+# In order to do so, uncomment the following line.
														
 
															+#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000    # disables all the APIs deprecated before Qt 6.0.0
														
 
															+
														
 
															+SOURCES += \
														
 
															+        main.cpp \
														
 
															+        network/network_trt.cpp \
														
 
															+        postprocess/non_maximum_suppression.cpp \
														
 
															+        simple_profiler.cpp \
														
 
															+        tensorrt_common.cpp
														
 
															+
														
 
															+# Default rules for deployment.
														
 
															+qnx: target.path = /tmp/$${TARGET}/bin
														
 
															+else: unix:!android: target.path = /opt/$${TARGET}/bin
														
 
															+!isEmpty(target.path): INSTALLS += target
														
 
															+
														
 
															+DISTFILES += \
														
 
															+    postprocess/circle_nms_kernel.cu \
														
 
															+    postprocess/postprocess_kernel.cu
														
 
															+
														
 
															+HEADERS += \
														
 
															+    cuda_utils.hpp \
														
 
															+    include/network/network_trt.hpp \
														
 
															+    include/postprocess/circle_nms_kernel.hpp \
														
 
															+    include/postprocess/non_maximum_suppression.hpp \
														
 
															+    include/postprocess/postprocess_kernel.hpp \
														
 
															+    include/preprocess/pointcloud_densification.hpp \
														
 
															+    include/preprocess/preprocess_kernel.hpp \
														
 
															+    include/preprocess/voxel_generator.hpp \
														
 
															+    include/tensorrt_common/logger.hpp \
														
 
															+    include/tensorrt_common/simple_profiler.hpp \
														
 
															+    include/tensorrt_common/tensorrt_common.hpp \
														
 
															+    utils.hpp
														
 
															+
														
 
															+INCLUDEPATH += /usr/include/eigen3
														
 
															+
														
 
															+INCLUDEPATH += $$PWD/include
														
 
															+
														
 
															+INCLUDEPATH += $$CUDA_DIR/include
														
 
															+
														
 
															+LIBS += -L"/usr/local/lib" \
														
 
															+        -L"/usr/local/cuda/lib64" \
														
 
															+        -lcudart \
														
 
															+        -lcufft
														
 
															+
														
 
															+CUDA_SOURCES += \
														
 
															+    postprocess/circle_nms_kernel.cu \
														
 
															+    postprocess/postprocess_kernel.cu
														
 
															+
														
 
															+CUDA_SDK = "/usr/local/cuda/"   # cudaSDK路径
														
 
															+
														
 
															+CUDA_DIR = "/usr/local/cuda/"            # CUDA tookit路径
														
 
															+
														
 
															+SYSTEM_NAME = linux         # 自己系统环境 'Win32', 'x64', or 'Win64'
														
 
															+
														
 
															+SYSTEM_TYPE = 64           #操作系统位数 '32' or '64',
														
 
															+
														
 
															+CUDA_ARCH = sm_72 # xavier sm_72          # cuda架构, for example 'compute_10', 'compute_11', 'sm_10'
														
 
															+
														
 
															+NVCC_OPTIONS = --use_fast_math --compiler-options "-fPIC"
														
 
															+
														
 
															+
														
 
															+# include paths
														
 
															+
														
 
															+INCLUDEPATH += $$CUDA_DIR/include
														
 
															+INCLUDEPATH += $$PWD
														
 
															+#INCLUDEPATH += /usr/local/cuda-10.0/targets/aarch64-linux/include/crt
														
 
															+
														
 
															+# library directories
														
 
															+
														
 
															+QMAKE_LIBDIR += $$CUDA_DIR/lib/
														
 
															+
														
 
															+CUDA_OBJECTS_DIR = ./
														
 
															+
														
 
															+# The following library conflicts with something in Cuda
														
 
															+
														
 
															+#QMAKE_LFLAGS_RELEASE = /NODEFAULTLIB:msvcrt.lib
														
 
															+
														
 
															+#QMAKE_LFLAGS_DEBUG   = /NODEFAULTLIB:msvcrtd.lib
														
 
															+
														
 
															+# Add the necessary libraries
														
 
															+
														
 
															+CUDA_LIBS =  cudart cufft
														
 
															+
														
 
															+# The following makes sure all path names (which often include spaces) are put between quotation marks
														
 
															+
														
 
															+CUDA_INC = $$join(INCLUDEPATH,'" -I"','-I"','"')
														
 
															+
														
 
															+NVCC_LIBS = $$join(CUDA_LIBS,' -l','-l', '')
														
 
															+
														
 
															+#LIBS += $$join(CUDA_LIBS,'.so ', '', '.so')
														
 
															+
														
 
															+# Configuration of the Cuda compiler
														
 
															+
														
 
															+CONFIG(debug, debug|release) {
														
 
															+
														
 
															+    # Debug mode
														
 
															+
														
 
															+    cuda_d.input = CUDA_SOURCES
														
 
															+
														
 
															+    cuda_d.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
														
 
															+
														
 
															+    cuda_d.commands = $$CUDA_DIR/bin/nvcc -D_DEBUG $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
														
 
															+
														
 
															+    cuda_d.dependency_type = TYPE_C
														
 
															+
														
 
															+    QMAKE_EXTRA_COMPILERS += cuda_d
														
 
															+
														
 
															+}
														
 
															+
														
 
															+else {
														
 
															+
														
 
															+    # Release mode
														
 
															+
														
 
															+    cuda.input = CUDA_SOURCES
														
 
															+
														
 
															+    cuda.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}_cuda.o
														
 
															+
														
 
															+    cuda.commands = $$CUDA_DIR/bin/nvcc $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -O3 -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
														
 
															+
														
 
															+    cuda.dependency_type = TYPE_C
														
 
															+
														
 
															+    QMAKE_EXTRA_COMPILERS += cuda
														
 
															+
														
 
															+}
														
 
															+
														
 
															+
														
 
															+# include paths
														
 
															+
														
 
															+
														
 
															+
														
 
															+LIBS += -lrt -ldl -lnvinfer -lcudnn  -lcudart -lnvparsers -lnvonnxparser -lnvinfer_plugin -lstdc++fs
														
--- a/src/detection/detection_lidar_transfusion/include/network/network_trt.hpp
+++ b/src/detection/detection_lidar_transfusion/include/network/network_trt.hpp
@@ -0,0 +1,87 @@
 
															+// Copyright 2024 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#ifndef AUTOWARE__LIDAR_TRANSFUSION__NETWORK__NETWORK_TRT_HPP_
														
 
															+#define AUTOWARE__LIDAR_TRANSFUSION__NETWORK__NETWORK_TRT_HPP_
														
 
															+
														
 
															+#include "transfusion_config.hpp"
														
 
															+#include "utils.hpp"
														
 
															+
														
 
															+#include <tensorrt_common/tensorrt_common.hpp>
														
 
															+
														
 
															+#include <NvInfer.h>
														
 
															+
														
 
															+#include <array>
														
 
															+#include <iostream>
														
 
															+#include <memory>
														
 
															+#include <string>
														
 
															+#include <unordered_map>
														
 
															+#include <vector>
														
 
															+
														
 
															+namespace autoware::lidar_transfusion
														
 
															+{
														
 
															+
														
 
															+struct ProfileDimension
														
 
															+{
														
 
															+  nvinfer1::Dims min;
														
 
															+  nvinfer1::Dims opt;
														
 
															+  nvinfer1::Dims max;
														
 
															+
														
 
															+  bool operator!=(const ProfileDimension & rhs) const
														
 
															+  {
														
 
															+    return min.nbDims != rhs.min.nbDims || opt.nbDims != rhs.opt.nbDims ||
														
 
															+           max.nbDims != rhs.max.nbDims || !std::equal(min.d, min.d + min.nbDims, rhs.min.d) ||
														
 
															+           !std::equal(opt.d, opt.d + opt.nbDims, rhs.opt.d) ||
														
 
															+           !std::equal(max.d, max.d + max.nbDims, rhs.max.d);
														
 
															+  }
														
 
															+};
														
 
															+
														
 
															+class NetworkTRT
														
 
															+{
														
 
															+public:
														
 
															+  explicit NetworkTRT(const TransfusionConfig & config);
														
 
															+  ~NetworkTRT();
														
 
															+
														
 
															+  bool init(
														
 
															+    const std::string & onnx_path, const std::string & engine_path, const std::string & precision);
														
 
															+  const char * getTensorName(NetworkIO name);
														
 
															+
														
 
															+  tensorrt_common::TrtUniquePtr<nvinfer1::ICudaEngine> engine{nullptr};
														
 
															+  tensorrt_common::TrtUniquePtr<nvinfer1::IExecutionContext> context{nullptr};
														
 
															+
														
 
															+private:
														
 
															+  bool parseONNX(
														
 
															+    const std::string & onnx_path, const std::string & engine_path, const std::string & precision,
														
 
															+    size_t workspace_size = (1ULL << 30));
														
 
															+  bool saveEngine(const std::string & engine_path);
														
 
															+  bool loadEngine(const std::string & engine_path);
														
 
															+  bool createContext();
														
 
															+  bool setProfile(
														
 
															+    nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
														
 
															+    nvinfer1::IBuilderConfig & config);
														
 
															+  bool validateNetworkIO();
														
 
															+  nvinfer1::Dims validateTensorShape(NetworkIO name, const std::vector<int> shape);
														
 
															+
														
 
															+  tensorrt_common::TrtUniquePtr<nvinfer1::IRuntime> runtime_{nullptr};
														
 
															+  tensorrt_common::TrtUniquePtr<nvinfer1::IHostMemory> plan_{nullptr};
														
 
															+  tensorrt_common::Logger logger_;
														
 
															+  TransfusionConfig config_;
														
 
															+  std::vector<const char *> tensors_names_;
														
 
															+
														
 
															+  std::array<ProfileDimension, 3> in_profile_dims_;
														
 
															+};
														
 
															+
														
 
															+}  // namespace autoware::lidar_transfusion
														
 
															+
														
 
															+#endif  // AUTOWARE__LIDAR_TRANSFUSION__NETWORK__NETWORK_TRT_HPP_
														
--- a/src/detection/detection_lidar_transfusion/include/postprocess/circle_nms_kernel.hpp
+++ b/src/detection/detection_lidar_transfusion/include/postprocess/circle_nms_kernel.hpp
@@ -0,0 +1,32 @@
 
															+// Copyright 2024 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#ifndef AUTOWARE__LIDAR_TRANSFUSION__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_
														
 
															+#define AUTOWARE__LIDAR_TRANSFUSION__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_
														
 
															+
														
 
															+#include "utils.hpp"
														
 
															+
														
 
															+#include <thrust/device_vector.h>
														
 
															+
														
 
															+namespace autoware::lidar_transfusion
														
 
															+{
														
 
															+// Non-maximum suppression (NMS) uses the distance on the xy plane instead of
														
 
															+// intersection over union (IoU) to suppress overlapped objects.
														
 
															+std::size_t circleNMS(
														
 
															+  thrust::device_vector<Box3D> & boxes3d, const float distance_threshold,
														
 
															+  thrust::device_vector<bool> & keep_mask, cudaStream_t stream);
														
 
															+
														
 
															+}  // namespace autoware::lidar_transfusion
														
 
															+
														
 
															+#endif  // AUTOWARE__LIDAR_TRANSFUSION__POSTPROCESS__CIRCLE_NMS_KERNEL_HPP_
														
--- a/src/detection/detection_lidar_transfusion/include/postprocess/non_maximum_suppression.hpp
+++ b/src/detection/detection_lidar_transfusion/include/postprocess/non_maximum_suppression.hpp
@@ -0,0 +1,82 @@
 
															+// Copyright 2024 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#ifndef AUTOWARE__LIDAR_TRANSFUSION__POSTPROCESS__NON_MAXIMUM_SUPPRESSION_HPP_
														
 
															+#define AUTOWARE__LIDAR_TRANSFUSION__POSTPROCESS__NON_MAXIMUM_SUPPRESSION_HPP_
														
 
															+
														
 
															+//#include "autoware/lidar_transfusion/ros_utils.hpp"
														
 
															+
														
 
															+#include <Eigen/Eigen>
														
 
															+
														
 
															+//#include <autoware_perception_msgs/msg/detected_object.hpp>
														
 
															+
														
 
															+#include <string>
														
 
															+#include <vector>
														
 
															+
														
 
															+uint8_t getSemanticType(const std::string & class_name);
														
 
															+
														
 
															+namespace autoware::lidar_transfusion
														
 
															+{
														
 
															+//using autoware_perception_msgs::msg::DetectedObject;
														
 
															+
														
 
															+enum class NMS_TYPE {
														
 
															+  IoU_BEV
														
 
															+  // IoU_3D
														
 
															+  // Distance_2D
														
 
															+  // Distance_3D
														
 
															+};
														
 
															+
														
 
															+struct NMSParams
														
 
															+{
														
 
															+  NMS_TYPE nms_type_{};
														
 
															+  std::vector<std::string> target_class_names_{};
														
 
															+  double search_distance_2d_{};
														
 
															+  double iou_threshold_{};
														
 
															+  // double distance_threshold_{};
														
 
															+};
														
 
															+
														
 
															+std::vector<bool> classNamesToBooleanMask(const std::vector<std::string> & class_names)
														
 
															+{
														
 
															+  std::vector<bool> mask;
														
 
															+  constexpr std::size_t num_object_classification = 8;
														
 
															+  mask.resize(num_object_classification);
														
 
															+  for (const auto & class_name : class_names) {
														
 
															+    const auto semantic_type = getSemanticType(class_name);
														
 
															+    mask.at(semantic_type) = true;
														
 
															+  }
														
 
															+
														
 
															+  return mask;
														
 
															+}
														
 
															+
														
 
															+class NonMaximumSuppression
														
 
															+{
														
 
															+public:
														
 
															+  void setParameters(const NMSParams &);
														
 
															+
														
 
															+//  std::vector<DetectedObject> apply(const std::vector<DetectedObject> &);
														
 
															+
														
 
															+private:
														
 
															+  bool isTargetLabel(const std::uint8_t);
														
 
															+
														
 
															+//  bool isTargetPairObject(const DetectedObject &, const DetectedObject &);
														
 
															+
														
 
															+//  Eigen::MatrixXd generateIoUMatrix(const std::vector<DetectedObject> &);
														
 
															+
														
 
															+  NMSParams params_{};
														
 
															+  std::vector<bool> target_class_mask_{};
														
 
															+};
														
 
															+
														
 
															+}  // namespace autoware::lidar_transfusion
														
 
															+
														
 
															+#endif  // AUTOWARE__LIDAR_TRANSFUSION__POSTPROCESS__NON_MAXIMUM_SUPPRESSION_HPP_
														
--- a/src/detection/detection_lidar_transfusion/include/postprocess/postprocess_kernel.hpp
+++ b/src/detection/detection_lidar_transfusion/include/postprocess/postprocess_kernel.hpp
@@ -0,0 +1,47 @@
 
															+// Copyright 2024 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#ifndef AUTOWARE__LIDAR_TRANSFUSION__POSTPROCESS__POSTPROCESS_KERNEL_HPP_
														
 
															+#define AUTOWARE__LIDAR_TRANSFUSION__POSTPROCESS__POSTPROCESS_KERNEL_HPP_
														
 
															+
														
 
															+#include "transfusion_config.hpp"
														
 
															+#include "utils.hpp"
														
 
															+
														
 
															+#include <cuda.h>
														
 
															+#include <cuda_runtime_api.h>
														
 
															+
														
 
															+#include <vector>
														
 
															+
														
 
															+namespace autoware::lidar_transfusion
														
 
															+{
														
 
															+
														
 
															+class PostprocessCuda
														
 
															+{
														
 
															+public:
														
 
															+  explicit PostprocessCuda(const TransfusionConfig & config, cudaStream_t & stream);
														
 
															+
														
 
															+  cudaError_t generateDetectedBoxes3D_launch(
														
 
															+    const float * cls_output, const float * box_output, const float * dir_cls_output,
														
 
															+    std::vector<Box3D> & det_boxes3d, cudaStream_t stream);
														
 
															+
														
 
															+private:
														
 
															+  TransfusionConfig config_;
														
 
															+  cudaStream_t stream_;
														
 
															+  cudaStream_t stream_event_;
														
 
															+  cudaEvent_t start_, stop_;
														
 
															+};
														
 
															+
														
 
															+}  // namespace autoware::lidar_transfusion
														
 
															+
														
 
															+#endif  // AUTOWARE__LIDAR_TRANSFUSION__POSTPROCESS__POSTPROCESS_KERNEL_HPP_
														
--- a/src/detection/detection_lidar_transfusion/include/preprocess/pointcloud_densification.hpp
+++ b/src/detection/detection_lidar_transfusion/include/preprocess/pointcloud_densification.hpp
@@ -0,0 +1,101 @@
 
															+// Copyright 2024 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#ifndef AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_
														
 
															+#define AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_
														
 
															+
														
 
															+#include "autoware/lidar_transfusion/cuda_utils.hpp"
														
 
															+
														
 
															+#include <tf2_ros/buffer.h>
														
 
															+#include <tf2_ros/transform_listener.h>
														
 
															+#ifdef ROS_DISTRO_GALACTIC
														
 
															+#include <tf2_sensor_msgs/tf2_sensor_msgs.h>
														
 
															+#else
														
 
															+#include <tf2_sensor_msgs/tf2_sensor_msgs.hpp>
														
 
															+#endif
														
 
															+
														
 
															+#include <list>
														
 
															+#include <string>
														
 
															+#include <utility>
														
 
															+
														
 
															+namespace autoware::lidar_transfusion
														
 
															+{
														
 
															+
														
 
															+class DensificationParam
														
 
															+{
														
 
															+public:
														
 
															+  DensificationParam(const std::string & world_frame_id, const unsigned int num_past_frames)
														
 
															+  : world_frame_id_(std::move(world_frame_id)),
														
 
															+    pointcloud_cache_size_(num_past_frames + /*current frame*/ 1)
														
 
															+  {
														
 
															+  }
														
 
															+
														
 
															+  std::string world_frame_id() const { return world_frame_id_; }
														
 
															+  unsigned int pointcloud_cache_size() const { return pointcloud_cache_size_; }
														
 
															+
														
 
															+private:
														
 
															+  std::string world_frame_id_;
														
 
															+  unsigned int pointcloud_cache_size_{1};
														
 
															+};
														
 
															+
														
 
															+struct PointCloudWithTransform
														
 
															+{
														
 
															+  cuda::unique_ptr<uint8_t[]> data_d{nullptr};
														
 
															+  std_msgs::msg::Header header;
														
 
															+  size_t num_points{0};
														
 
															+  Eigen::Affine3f affine_past2world;
														
 
															+};
														
 
															+
														
 
															+class PointCloudDensification
														
 
															+{
														
 
															+public:
														
 
															+  explicit PointCloudDensification(const DensificationParam & param, cudaStream_t & stream);
														
 
															+
														
 
															+  bool enqueuePointCloud(
														
 
															+    const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer);
														
 
															+
														
 
															+  double getCurrentTimestamp() const { return current_timestamp_; }
														
 
															+  Eigen::Affine3f getAffineWorldToCurrent() const { return affine_world2current_; }
														
 
															+  std::list<PointCloudWithTransform>::iterator getPointCloudCacheIter()
														
 
															+  {
														
 
															+    return pointcloud_cache_.begin();
														
 
															+  }
														
 
															+  bool isCacheEnd(std::list<PointCloudWithTransform>::iterator iter)
														
 
															+  {
														
 
															+    return iter == pointcloud_cache_.end();
														
 
															+  }
														
 
															+  size_t getIdx(std::list<PointCloudWithTransform>::iterator iter)
														
 
															+  {
														
 
															+    return std::distance(pointcloud_cache_.begin(), iter);
														
 
															+  }
														
 
															+  size_t getCacheSize()
														
 
															+  {
														
 
															+    return std::distance(pointcloud_cache_.begin(), pointcloud_cache_.end());
														
 
															+  }
														
 
															+  unsigned int pointcloud_cache_size() const { return param_.pointcloud_cache_size(); }
														
 
															+
														
 
															+private:
														
 
															+  void enqueue(const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine);
														
 
															+  void dequeue();
														
 
															+
														
 
															+  DensificationParam param_;
														
 
															+  double current_timestamp_{0.0};
														
 
															+  Eigen::Affine3f affine_world2current_;
														
 
															+  std::list<PointCloudWithTransform> pointcloud_cache_;
														
 
															+  cudaStream_t stream_;
														
 
															+};
														
 
															+
														
 
															+}  // namespace autoware::lidar_transfusion
														
 
															+
														
 
															+#endif  // AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__POINTCLOUD_DENSIFICATION_HPP_
														
--- a/src/detection/detection_lidar_transfusion/include/preprocess/preprocess_kernel.hpp
+++ b/src/detection/detection_lidar_transfusion/include/preprocess/preprocess_kernel.hpp
@@ -0,0 +1,73 @@
 
															+// Copyright 2024 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+/*
														
 
															+ * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES.
														
 
															+ * All rights reserved. SPDX-License-Identifier: Apache-2.0
														
 
															+ *
														
 
															+ * Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+ * you may not use this file except in compliance with the License.
														
 
															+ * You may obtain a copy of the License at
														
 
															+ *
														
 
															+ * http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+#ifndef AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__PREPROCESS_KERNEL_HPP_
														
 
															+#define AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__PREPROCESS_KERNEL_HPP_
														
 
															+
														
 
															+#include "autoware/lidar_transfusion/cuda_utils.hpp"
														
 
															+#include "autoware/lidar_transfusion/transfusion_config.hpp"
														
 
															+#include "autoware/lidar_transfusion/utils.hpp"
														
 
															+
														
 
															+#include <cuda_runtime_api.h>
														
 
															+
														
 
															+namespace autoware::lidar_transfusion
														
 
															+{
														
 
															+
														
 
															+class PreprocessCuda
														
 
															+{
														
 
															+public:
														
 
															+  PreprocessCuda(const TransfusionConfig & config, cudaStream_t & stream);
														
 
															+
														
 
															+  void generateVoxels(
														
 
															+    float * points, unsigned int points_size, unsigned int * pillar_num, float * voxel_features,
														
 
															+    unsigned int * voxel_num, unsigned int * voxel_idxs);
														
 
															+
														
 
															+  cudaError_t generateVoxels_random_launch(
														
 
															+    float * points, unsigned int points_size, unsigned int * mask, float * voxels);
														
 
															+
														
 
															+  cudaError_t generateBaseFeatures_launch(
														
 
															+    unsigned int * mask, float * voxels, unsigned int * pillar_num, float * voxel_features,
														
 
															+    unsigned int * voxel_num, unsigned int * voxel_idxs);
														
 
															+
														
 
															+  cudaError_t generateSweepPoints_launch(
														
 
															+    const uint8_t * input_data, size_t points_size, int input_point_step, float time_lag,
														
 
															+    const float * transform, float * output_points);
														
 
															+
														
 
															+private:
														
 
															+  TransfusionConfig config_;
														
 
															+  cudaStream_t stream_;
														
 
															+  cuda::unique_ptr<unsigned int[]> mask_{nullptr};
														
 
															+  cuda::unique_ptr<float[]> voxels_{nullptr};
														
 
															+  unsigned int mask_size_;
														
 
															+  unsigned int voxels_size_;
														
 
															+};
														
 
															+}  // namespace autoware::lidar_transfusion
														
 
															+
														
 
															+#endif  // AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__PREPROCESS_KERNEL_HPP_
														
--- a/src/detection/detection_lidar_transfusion/include/preprocess/voxel_generator.hpp
+++ b/src/detection/detection_lidar_transfusion/include/preprocess/voxel_generator.hpp
@@ -0,0 +1,72 @@
 
															+// Copyright 2024 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#ifndef AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__VOXEL_GENERATOR_HPP_
														
 
															+#define AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__VOXEL_GENERATOR_HPP_
														
 
															+
														
 
															+#include "autoware/lidar_transfusion/cuda_utils.hpp"
														
 
															+#include "autoware/lidar_transfusion/preprocess/pointcloud_densification.hpp"
														
 
															+#include "autoware/lidar_transfusion/preprocess/preprocess_kernel.hpp"
														
 
															+#include "autoware/lidar_transfusion/ros_utils.hpp"
														
 
															+#include "autoware/lidar_transfusion/transfusion_config.hpp"
														
 
															+
														
 
															+#ifdef ROS_DISTRO_GALACTIC
														
 
															+#include <tf2_eigen/tf2_eigen.h>
														
 
															+#else
														
 
															+#include <tf2_eigen/tf2_eigen.hpp>
														
 
															+#endif
														
 
															+
														
 
															+#include <autoware_point_types/types.hpp>
														
 
															+
														
 
															+#include <sensor_msgs/msg/point_cloud2.hpp>
														
 
															+
														
 
															+#include <memory>
														
 
															+#include <string>
														
 
															+#include <tuple>
														
 
															+#include <vector>
														
 
															+
														
 
															+namespace autoware::lidar_transfusion
														
 
															+{
														
 
															+constexpr size_t AFF_MAT_SIZE = 16;  // 4x4 matrix
														
 
															+constexpr size_t MAX_CLOUD_STEP_SIZE = sizeof(autoware_point_types::PointXYZIRCAEDT);
														
 
															+
														
 
															+class VoxelGenerator
														
 
															+{
														
 
															+public:
														
 
															+  explicit VoxelGenerator(
														
 
															+    const DensificationParam & densification_param, const TransfusionConfig & config,
														
 
															+    cudaStream_t & stream);
														
 
															+  std::size_t generateSweepPoints(
														
 
															+    const sensor_msgs::msg::PointCloud2 & msg, cuda::unique_ptr<float[]> & points_d);
														
 
															+  bool enqueuePointCloud(
														
 
															+    const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer);
														
 
															+  void initCloudInfo(const sensor_msgs::msg::PointCloud2 & msg);
														
 
															+  std::tuple<const uint32_t, const uint8_t, const uint8_t> getFieldInfo(
														
 
															+    const sensor_msgs::msg::PointCloud2 & msg, const std::string & field_name);
														
 
															+
														
 
															+private:
														
 
															+  std::unique_ptr<PointCloudDensification> pd_ptr_{nullptr};
														
 
															+  std::unique_ptr<PreprocessCuda> pre_ptr_{nullptr};
														
 
															+  TransfusionConfig config_;
														
 
															+  cuda::unique_ptr<unsigned char[]> cloud_data_d_{nullptr};
														
 
															+  cuda::unique_ptr<float[]> affine_past2current_d_{nullptr};
														
 
															+  std::vector<float> points_;
														
 
															+  cudaStream_t stream_;
														
 
															+  CloudInfo cloud_info_;
														
 
															+  bool is_initialized_{false};
														
 
															+};
														
 
															+
														
 
															+}  // namespace autoware::lidar_transfusion
														
 
															+
														
 
															+#endif  // AUTOWARE__LIDAR_TRANSFUSION__PREPROCESS__VOXEL_GENERATOR_HPP_
														
--- a/src/detection/detection_lidar_transfusion/include/tensorrt_common/logger.hpp
+++ b/src/detection/detection_lidar_transfusion/include/tensorrt_common/logger.hpp
@@ -0,0 +1,552 @@
 
															+/*
														
 
															+ * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
														
 
															+ *
														
 
															+ * Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+ * you may not use this file except in compliance with the License.
														
 
															+ * You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+#ifndef TENSORRT_COMMON__LOGGER_HPP_
														
 
															+#define TENSORRT_COMMON__LOGGER_HPP_
														
 
															+
														
 
															+#include "NvInferRuntimeCommon.h"
														
 
															+
														
 
															+#include <atomic>
														
 
															+#include <cassert>
														
 
															+#include <ctime>
														
 
															+#include <iomanip>
														
 
															+#include <iostream>
														
 
															+#include <ostream>
														
 
															+#include <sstream>
														
 
															+#include <string>
														
 
															+#include <thread>
														
 
															+
														
 
															+namespace tensorrt_common
														
 
															+{
														
 
															+using Severity = nvinfer1::ILogger::Severity;
														
 
															+
														
 
															+class LogStreamConsumerBuffer : public std::stringbuf
														
 
															+{
														
 
															+public:
														
 
															+  LogStreamConsumerBuffer(std::ostream & stream, const std::string & prefix, bool shouldLog)
														
 
															+  : mOutput(stream), mPrefix(prefix), mShouldLog(shouldLog)
														
 
															+  {
														
 
															+  }
														
 
															+
														
 
															+  LogStreamConsumerBuffer(LogStreamConsumerBuffer && other) : mOutput(other.mOutput) {}
														
 
															+
														
 
															+  ~LogStreamConsumerBuffer()
														
 
															+  {
														
 
															+    // std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output
														
 
															+    // sequence std::streambuf::pptr() gives a pointer to the current position of the output
														
 
															+    // sequence if the pointer to the beginning is not equal to the pointer to the current position,
														
 
															+    // call putOutput() to log the output to the stream
														
 
															+    if (pbase() != pptr()) {
														
 
															+      putOutput();
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  // synchronizes the stream buffer and returns 0 on success
														
 
															+  // synchronizing the stream buffer consists of inserting the buffer contents into the stream,
														
 
															+  // resetting the buffer and flushing the stream
														
 
															+  virtual int sync()
														
 
															+  {
														
 
															+    putOutput();
														
 
															+    return 0;
														
 
															+  }
														
 
															+
														
 
															+  void putOutput()
														
 
															+  {
														
 
															+    if (mShouldLog) {
														
 
															+      // prepend timestamp
														
 
															+      // std::time_t timestamp = std::time(nullptr);
														
 
															+      // tm* tm_local = std::localtime(&timestamp);
														
 
															+      mOutput << mPrefix << str();
														
 
															+      // set the buffer to empty
														
 
															+      str("");
														
 
															+      // flush the stream
														
 
															+      mOutput.flush();
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  void setShouldLog(bool shouldLog) { mShouldLog = shouldLog; }
														
 
															+
														
 
															+private:
														
 
															+  std::ostream & mOutput;
														
 
															+  std::string mPrefix;
														
 
															+  bool mShouldLog;
														
 
															+};
														
 
															+
														
 
															+//!
														
 
															+//! \class LogStreamConsumerBase
														
 
															+//! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in
														
 
															+//! LogStreamConsumer
														
 
															+//!
														
 
															+class LogStreamConsumerBase
														
 
															+{
														
 
															+public:
														
 
															+  LogStreamConsumerBase(std::ostream & stream, const std::string & prefix, bool shouldLog)
														
 
															+  : mBuffer(stream, prefix, shouldLog)
														
 
															+  {
														
 
															+  }
														
 
															+
														
 
															+protected:
														
 
															+  LogStreamConsumerBuffer mBuffer;
														
 
															+};
														
 
															+
														
 
															+//!
														
 
															+//! \class LogStreamConsumer
														
 
															+//! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages.
														
 
															+//!  Order of base classes is LogStreamConsumerBase and then std::ostream.
														
 
															+//!  This is because the LogStreamConsumerBase class is used to initialize the
														
 
															+//!  LogStreamConsumerBuffer member field in LogStreamConsumer and then the address of the buffer is
														
 
															+//!  passed to std::ostream. This is necessary to prevent the address of an uninitialized buffer
														
 
															+//!  from being passed to std::ostream. Please do not change the order of the parent classes.
														
 
															+//!
														
 
															+class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream
														
 
															+{
														
 
															+public:
														
 
															+  //! \brief Creates a LogStreamConsumer which logs messages with level severity.
														
 
															+  //!  Reportable severity determines if the messages are severe enough to be logged.
														
 
															+  LogStreamConsumer(Severity reportableSeverity, Severity severity)
														
 
															+  : LogStreamConsumerBase(
														
 
															+      severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity),
														
 
															+    std::ostream(&mBuffer)  // links the stream buffer with the stream
														
 
															+    ,
														
 
															+    mShouldLog(severity <= reportableSeverity),
														
 
															+    mSeverity(severity)
														
 
															+  {
														
 
															+  }
														
 
															+
														
 
															+  LogStreamConsumer(LogStreamConsumer && other)
														
 
															+  : LogStreamConsumerBase(
														
 
															+      severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog),
														
 
															+    std::ostream(&mBuffer)  // links the stream buffer with the stream
														
 
															+    ,
														
 
															+    mShouldLog(other.mShouldLog),
														
 
															+    mSeverity(other.mSeverity)
														
 
															+  {
														
 
															+  }
														
 
															+
														
 
															+  void setReportableSeverity(Severity reportableSeverity)
														
 
															+  {
														
 
															+    mShouldLog = mSeverity <= reportableSeverity;
														
 
															+    mBuffer.setShouldLog(mShouldLog);
														
 
															+  }
														
 
															+
														
 
															+private:
														
 
															+  static std::ostream & severityOstream(Severity severity)
														
 
															+  {
														
 
															+    return severity >= Severity::kINFO ? std::cout : std::cerr;
														
 
															+  }
														
 
															+
														
 
															+  static std::string severityPrefix(Severity severity)
														
 
															+  {
														
 
															+    switch (severity) {
														
 
															+      case Severity::kINTERNAL_ERROR:
														
 
															+        return "[F] ";
														
 
															+      case Severity::kERROR:
														
 
															+        return "[E] ";
														
 
															+      case Severity::kWARNING:
														
 
															+        return "[W] ";
														
 
															+      case Severity::kINFO:
														
 
															+        return "[I] ";
														
 
															+      case Severity::kVERBOSE:
														
 
															+        return "[V] ";
														
 
															+      default:
														
 
															+        assert(0);
														
 
															+        return "";
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  bool mShouldLog;
														
 
															+  Severity mSeverity;
														
 
															+};
														
 
															+
														
 
															+//! \class Logger
														
 
															+//!
														
 
															+//! \brief Class which manages logging of TensorRT tools and samples
														
 
															+//!
														
 
															+//! \details This class provides a common interface for TensorRT tools and samples to log
														
 
															+//! information to the console, and supports logging two types of messages:
														
 
															+//!
														
 
															+//! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal)
														
 
															+//! - Test pass/fail messages
														
 
															+//!
														
 
															+//! The advantage of having all samples use this class for logging as opposed to emitting directly
														
 
															+//! to stdout/stderr is that the logic for controlling the verbosity and formatting of sample output
														
 
															+//! is centralized in one location.
														
 
															+//!
														
 
															+//! In the future, this class could be extended to support dumping test results to a file in some
														
 
															+//! standard format (for example, JUnit XML), and providing additional metadata (e.g. timing the
														
 
															+//! duration of a test run).
														
 
															+//!
														
 
															+//! TODO: For backwards compatibility with existing samples, this class inherits directly from the
														
 
															+//! nvinfer1::ILogger interface, which is problematic since there isn't a clean separation between
														
 
															+//! messages coming from the TensorRT library and messages coming from the sample.
														
 
															+//!
														
 
															+//! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger)
														
 
															+//! we can refactor the class to eliminate the inheritance and instead make the nvinfer1::ILogger
														
 
															+//! implementation a member of the Logger object.
														
 
															+
														
 
															+class Logger : public nvinfer1::ILogger  // NOLINT
														
 
															+{
														
 
															+public:
														
 
															+  //  Logger(Severity severity = Severity::kWARNING)
														
 
															+  //  Logger(Severity severity = Severity::kVERBOSE)
														
 
															+  explicit Logger(Severity severity = Severity::kINFO)
														
 
															+  : mReportableSeverity(severity), mVerbose(true), mThrottleStopFlag(false)
														
 
															+  {
														
 
															+  }
														
 
															+
														
 
															+  explicit Logger(const bool verbose, Severity severity = Severity::kINFO)
														
 
															+  : mReportableSeverity(severity), mVerbose(verbose), mThrottleStopFlag(false)
														
 
															+  {
														
 
															+  }
														
 
															+
														
 
															+  //!
														
 
															+  //! \enum TestResult
														
 
															+  //! \brief Represents the state of a given test
														
 
															+  //!
														
 
															+  enum class TestResult {
														
 
															+    kRUNNING,  //!< The test is running
														
 
															+    kPASSED,   //!< The test passed
														
 
															+    kFAILED,   //!< The test failed
														
 
															+    kWAIVED    //!< The test was waived
														
 
															+  };
														
 
															+
														
 
															+  //!
														
 
															+  //! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this
														
 
															+  //! Logger \return The nvinfer1::ILogger associated with this Logger
														
 
															+  //!
														
 
															+  //! TODO Once all samples are updated to use this method to register the logger with TensorRT,
														
 
															+  //! we can eliminate the inheritance of Logger from ILogger
														
 
															+  //!
														
 
															+  nvinfer1::ILogger & getTRTLogger()
														
 
															+  {
														
 
															+    printf("verbose\n");
														
 
															+    return *this;
														
 
															+  }
														
 
															+
														
 
															+  //!
														
 
															+  //! \brief Implementation of the nvinfer1::ILogger::log() virtual method
														
 
															+  //!
														
 
															+  //! Note samples should not be calling this function directly; it will eventually go away once we
														
 
															+  //! eliminate the inheritance from nvinfer1::ILogger
														
 
															+  //!
														
 
															+  void log(Severity severity, const char * msg) noexcept override
														
 
															+  {
														
 
															+    if (mVerbose) {
														
 
															+      LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl;
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * @brief Logging with throttle.
														
 
															+   *
														
 
															+   * @example
														
 
															+   * Logger logger();
														
 
															+   * auto log_thread = logger.log_throttle(nvinfer1::ILogger::Severity::kINFO, "SOME MSG", 1);
														
 
															+   * // some operation
														
 
															+   * logger.stop_throttle(log_thread);
														
 
															+   *
														
 
															+   * @param severity
														
 
															+   * @param msg
														
 
															+   * @param duration
														
 
															+   * @return std::thread
														
 
															+   *
														
 
															+   */
														
 
															+  std::thread log_throttle(Severity severity, const char * msg, const int duration) noexcept
														
 
															+  {
														
 
															+    mThrottleStopFlag.store(false);
														
 
															+    auto log_func = [this](Severity s, const char * m, const int d) {
														
 
															+      while (!mThrottleStopFlag.load()) {
														
 
															+        this->log(s, m);
														
 
															+        std::this_thread::sleep_for(std::chrono::seconds(d));
														
 
															+      }
														
 
															+    };
														
 
															+
														
 
															+    std::thread log_thread(log_func, severity, msg, duration);
														
 
															+    return log_thread;
														
 
															+  }
														
 
															+
														
 
															+  void stop_throttle(std::thread & log_thread) noexcept
														
 
															+  {
														
 
															+    mThrottleStopFlag.store(true);
														
 
															+    log_thread.join();
														
 
															+  }
														
 
															+
														
 
															+  //!
														
 
															+  //! \brief Method for controlling the verbosity of logging output
														
 
															+  //!
														
 
															+  //! \param severity The logger will only emit messages that have severity of this level or higher.
														
 
															+  //!
														
 
															+  void setReportableSeverity(Severity severity) { mReportableSeverity = severity; }
														
 
															+
														
 
															+  //!
														
 
															+  //! \brief Opaque handle that holds logging information for a particular test
														
 
															+  //!
														
 
															+  //! This object is an opaque handle to information used by the Logger to print test results.
														
 
															+  //! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used
														
 
															+  //! with Logger::reportTest{Start,End}().
														
 
															+  //!
														
 
															+  class TestAtom
														
 
															+  {
														
 
															+  public:
														
 
															+    TestAtom(TestAtom &&) = default;
														
 
															+
														
 
															+  private:
														
 
															+    friend class Logger;
														
 
															+
														
 
															+    TestAtom(bool started, const std::string & name, const std::string & cmdline)
														
 
															+    : mStarted(started), mName(name), mCmdline(cmdline)
														
 
															+    {
														
 
															+    }
														
 
															+
														
 
															+    bool mStarted;
														
 
															+    std::string mName;
														
 
															+    std::string mCmdline;
														
 
															+  };
														
 
															+
														
 
															+  //!
														
 
															+  //! \brief Define a test for logging
														
 
															+  //!
														
 
															+  //! \param[in] name The name of the test.  This should be a string starting with
														
 
															+  //!                  "TensorRT" and containing dot-separated strings containing
														
 
															+  //!                  the characters [A-Za-z0-9_].
														
 
															+  //!                  For example, "TensorRT.sample_googlenet"
														
 
															+  //! \param[in] cmdline The command line used to reproduce the test
														
 
															+  //
														
 
															+  //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
														
 
															+  //!
														
 
															+  static TestAtom defineTest(const std::string & name, const std::string & cmdline)
														
 
															+  {
														
 
															+    return TestAtom(false, name, cmdline);
														
 
															+  }
														
 
															+
														
 
															+  //!
														
 
															+  //! \brief A convenience overloaded version of defineTest() that accepts an array of command-line
														
 
															+  //! arguments
														
 
															+  //!        as input
														
 
															+  //!
														
 
															+  //! \param[in] name The name of the test
														
 
															+  //! \param[in] argc The number of command-line arguments
														
 
															+  //! \param[in] argv The array of command-line arguments (given as C strings)
														
 
															+  //!
														
 
															+  //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
														
 
															+  static TestAtom defineTest(const std::string & name, int argc, char const * const * argv)
														
 
															+  {
														
 
															+    auto cmdline = genCmdlineString(argc, argv);
														
 
															+    return defineTest(name, cmdline);
														
 
															+  }
														
 
															+
														
 
															+  //!
														
 
															+  //! \brief Report that a test has started.
														
 
															+  //!
														
 
															+  //! \pre reportTestStart() has not been called yet for the given testAtom
														
 
															+  //!
														
 
															+  //! \param[in] testAtom The handle to the test that has started
														
 
															+  //!
														
 
															+  static void reportTestStart(TestAtom & testAtom)
														
 
															+  {
														
 
															+    reportTestResult(testAtom, TestResult::kRUNNING);
														
 
															+    assert(!testAtom.mStarted);
														
 
															+    testAtom.mStarted = true;
														
 
															+  }
														
 
															+
														
 
															+  //!
														
 
															+  //! \brief Report that a test has ended.
														
 
															+  //!
														
 
															+  //! \pre reportTestStart() has been called for the given testAtom
														
 
															+  //!
														
 
															+  //! \param[in] testAtom The handle to the test that has ended
														
 
															+  //! \param[in] result The result of the test. Should be one of TestResult::kPASSED,
														
 
															+  //!                   TestResult::kFAILED, TestResult::kWAIVED
														
 
															+  //!
														
 
															+  static void reportTestEnd(const TestAtom & testAtom, TestResult result)
														
 
															+  {
														
 
															+    assert(result != TestResult::kRUNNING);
														
 
															+    assert(testAtom.mStarted);
														
 
															+    reportTestResult(testAtom, result);
														
 
															+  }
														
 
															+
														
 
															+  static int reportPass(const TestAtom & testAtom)
														
 
															+  {
														
 
															+    reportTestEnd(testAtom, TestResult::kPASSED);
														
 
															+    return EXIT_SUCCESS;
														
 
															+  }
														
 
															+
														
 
															+  static int reportFail(const TestAtom & testAtom)
														
 
															+  {
														
 
															+    reportTestEnd(testAtom, TestResult::kFAILED);
														
 
															+    return EXIT_FAILURE;
														
 
															+  }
														
 
															+
														
 
															+  static int reportWaive(const TestAtom & testAtom)
														
 
															+  {
														
 
															+    reportTestEnd(testAtom, TestResult::kWAIVED);
														
 
															+    return EXIT_SUCCESS;
														
 
															+  }
														
 
															+
														
 
															+  static int reportTest(const TestAtom & testAtom, bool pass)
														
 
															+  {
														
 
															+    return pass ? reportPass(testAtom) : reportFail(testAtom);
														
 
															+  }
														
 
															+
														
 
															+  Severity getReportableSeverity() const { return mReportableSeverity; }
														
 
															+
														
 
															+private:
														
 
															+  //!
														
 
															+  //! \brief returns an appropriate string for prefixing a log message with the given severity
														
 
															+  //!
														
 
															+  static const char * severityPrefix(Severity severity)
														
 
															+  {
														
 
															+    switch (severity) {
														
 
															+      case Severity::kINTERNAL_ERROR:
														
 
															+        return "[F] ";
														
 
															+      case Severity::kERROR:
														
 
															+        return "[E] ";
														
 
															+      case Severity::kWARNING:
														
 
															+        return "[W] ";
														
 
															+      case Severity::kINFO:
														
 
															+        return "[I] ";
														
 
															+      case Severity::kVERBOSE:
														
 
															+        return "[V] ";
														
 
															+      default:
														
 
															+        assert(0);
														
 
															+        return "";
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  //!
														
 
															+  //! \brief returns an appropriate string for prefixing a test result message with the given result
														
 
															+  //!
														
 
															+  static const char * testResultString(TestResult result)
														
 
															+  {
														
 
															+    switch (result) {
														
 
															+      case TestResult::kRUNNING:
														
 
															+        return "RUNNING";
														
 
															+      case TestResult::kPASSED:
														
 
															+        return "PASSED";
														
 
															+      case TestResult::kFAILED:
														
 
															+        return "FAILED";
														
 
															+      case TestResult::kWAIVED:
														
 
															+        return "WAIVED";
														
 
															+      default:
														
 
															+        assert(0);
														
 
															+        return "";
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  //!
														
 
															+  //! \brief returns an appropriate output stream (cout or cerr) to use with the given severity
														
 
															+  //!
														
 
															+  static std::ostream & severityOstream(Severity severity)
														
 
															+  {
														
 
															+    return severity >= Severity::kINFO ? std::cout : std::cerr;
														
 
															+  }
														
 
															+
														
 
															+  //!
														
 
															+  //! \brief method that implements logging test results
														
 
															+  //!
														
 
															+  static void reportTestResult(const TestAtom & testAtom, TestResult result)
														
 
															+  {
														
 
															+    severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName
														
 
															+                                     << " # " << testAtom.mCmdline << std::endl;
														
 
															+  }
														
 
															+
														
 
															+  //!
														
 
															+  //! \brief generate a command line string from the given (argc, argv) values
														
 
															+  //!
														
 
															+  static std::string genCmdlineString(int argc, char const * const * argv)
														
 
															+  {
														
 
															+    std::stringstream ss;
														
 
															+    for (int i = 0; i < argc; i++) {
														
 
															+      if (i > 0) ss << " ";
														
 
															+      ss << argv[i];
														
 
															+    }
														
 
															+    return ss.str();
														
 
															+  }
														
 
															+
														
 
															+  Severity mReportableSeverity;
														
 
															+  bool mVerbose;
														
 
															+  std::atomic<bool> mThrottleStopFlag;
														
 
															+};
														
 
															+
														
 
															+namespace
														
 
															+{
														
 
															+
														
 
															+//!
														
 
															+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE
														
 
															+//!
														
 
															+//! Example usage:
														
 
															+//!
														
 
															+//!     LOG_VERBOSE(logger) << "hello world" << std::endl;
														
 
															+//!
														
 
															+inline LogStreamConsumer LOG_VERBOSE(const Logger & logger)
														
 
															+{
														
 
															+  return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
														
 
															+}
														
 
															+
														
 
															+//!
														
 
															+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO
														
 
															+//!
														
 
															+//! Example usage:
														
 
															+//!
														
 
															+//!     LOG_INFO(logger) << "hello world" << std::endl;
														
 
															+//!
														
 
															+inline LogStreamConsumer LOG_INFO(const Logger & logger)
														
 
															+{
														
 
															+  return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
														
 
															+}
														
 
															+
														
 
															+//!
														
 
															+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING
														
 
															+//!
														
 
															+//! Example usage:
														
 
															+//!
														
 
															+//!     LOG_WARN(logger) << "hello world" << std::endl;
														
 
															+//!
														
 
															+inline LogStreamConsumer LOG_WARN(const Logger & logger)
														
 
															+{
														
 
															+  return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
														
 
															+}
														
 
															+
														
 
															+//!
														
 
															+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR
														
 
															+//!
														
 
															+//! Example usage:
														
 
															+//!
														
 
															+//!     LOG_ERROR(logger) << "hello world" << std::endl;
														
 
															+//!
														
 
															+inline LogStreamConsumer LOG_ERROR(const Logger & logger)
														
 
															+{
														
 
															+  return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
														
 
															+}
														
 
															+
														
 
															+//!
														
 
															+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity
														
 
															+//! kINTERNAL_ERROR
														
 
															+//         ("fatal" severity)
														
 
															+//!
														
 
															+//! Example usage:
														
 
															+//!
														
 
															+//!     LOG_FATAL(logger) << "hello world" << std::endl;
														
 
															+//!
														
 
															+inline LogStreamConsumer LOG_FATAL(const Logger & logger)
														
 
															+{
														
 
															+  return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR);
														
 
															+}
														
 
															+
														
 
															+}  // anonymous namespace
														
 
															+}  // namespace tensorrt_common
														
 
															+
														
 
															+#endif  // TENSORRT_COMMON__LOGGER_HPP_
														
--- a/src/detection/detection_lidar_transfusion/include/tensorrt_common/simple_profiler.hpp
+++ b/src/detection/detection_lidar_transfusion/include/tensorrt_common/simple_profiler.hpp
@@ -0,0 +1,70 @@
 
															+// Copyright 2023 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#ifndef TENSORRT_COMMON__SIMPLE_PROFILER_HPP_
														
 
															+#define TENSORRT_COMMON__SIMPLE_PROFILER_HPP_
														
 
															+
														
 
															+#include <NvInfer.h>
														
 
															+
														
 
															+#include <iostream>
														
 
															+#include <map>
														
 
															+#include <string>
														
 
															+#include <vector>
														
 
															+
														
 
															+namespace tensorrt_common
														
 
															+{
														
 
															+struct LayerInfo
														
 
															+{
														
 
															+  int in_c;
														
 
															+  int out_c;
														
 
															+  int w;
														
 
															+  int h;
														
 
															+  int k;
														
 
															+  int stride;
														
 
															+  int groups;
														
 
															+  nvinfer1::LayerType type;
														
 
															+};
														
 
															+
														
 
															+/**
														
 
															+ * @class Profiler
														
 
															+ * @brief Collect per-layer profile information, assuming times are reported in the same order
														
 
															+ */
														
 
															+class SimpleProfiler : public nvinfer1::IProfiler
														
 
															+{
														
 
															+public:
														
 
															+  struct Record
														
 
															+  {
														
 
															+    float time{0};
														
 
															+    int count{0};
														
 
															+    float min_time{-1.0};
														
 
															+    int index;
														
 
															+  };
														
 
															+  SimpleProfiler(
														
 
															+    std::string name,
														
 
															+    const std::vector<SimpleProfiler> & src_profilers = std::vector<SimpleProfiler>());
														
 
															+
														
 
															+  void reportLayerTime(const char * layerName, float ms) noexcept override;
														
 
															+
														
 
															+  void setProfDict(nvinfer1::ILayer * layer) noexcept;
														
 
															+
														
 
															+  friend std::ostream & operator<<(std::ostream & out, const SimpleProfiler & value);
														
 
															+
														
 
															+private:
														
 
															+  std::string m_name;
														
 
															+  std::map<std::string, Record> m_profile;
														
 
															+  int m_index;
														
 
															+  std::map<std::string, LayerInfo> m_layer_dict;
														
 
															+};
														
 
															+}  // namespace tensorrt_common
														
 
															+#endif  // TENSORRT_COMMON__SIMPLE_PROFILER_HPP_
														
--- a/src/detection/detection_lidar_transfusion/include/tensorrt_common/tensorrt_common.hpp
+++ b/src/detection/detection_lidar_transfusion/include/tensorrt_common/tensorrt_common.hpp
@@ -0,0 +1,232 @@
 
															+// Copyright 2023 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#ifndef TENSORRT_COMMON__TENSORRT_COMMON_HPP_
														
 
															+#define TENSORRT_COMMON__TENSORRT_COMMON_HPP_
														
 
															+
														
 
															+//#ifndef YOLOX_STANDALONE
														
 
															+//#include <rclcpp/rclcpp.hpp>
														
 
															+//#endif
														
 
															+
														
 
															+#include <NvInfer.h>
														
 
															+#include <NvOnnxParser.h>
														
 
															+
														
 
															+#if (defined(_MSC_VER) or (defined(__GNUC__) and (7 <= __GNUC_MAJOR__)))
														
 
															+#include <filesystem>
														
 
															+namespace fs = ::std::filesystem;
														
 
															+#else
														
 
															+#include <experimental/filesystem>
														
 
															+namespace fs = ::std::experimental::filesystem;
														
 
															+#endif
														
 
															+
														
 
															+#include <tensorrt_common/logger.hpp>
														
 
															+#include <tensorrt_common/simple_profiler.hpp>
														
 
															+
														
 
															+#include <memory>
														
 
															+#include <sstream>
														
 
															+#include <string>
														
 
															+#include <vector>
														
 
															+#include <algorithm>
														
 
															+
														
 
															+namespace tensorrt_common
														
 
															+{
														
 
															+/**
														
 
															+ * @struct BuildConfig
														
 
															+ * @brief Configuration to provide fine control regarding TensorRT builder
														
 
															+ */
														
 
															+struct BuildConfig
														
 
															+{
														
 
															+  // type for calibration
														
 
															+  std::string calib_type_str;
														
 
															+
														
 
															+  // DLA core ID that the process uses
														
 
															+  int dla_core_id;
														
 
															+
														
 
															+  // flag for partial quantization in first layer
														
 
															+  bool quantize_first_layer;  // For partial quantization
														
 
															+
														
 
															+  // flag for partial quantization in last layer
														
 
															+  bool quantize_last_layer;  // For partial quantization
														
 
															+
														
 
															+  // flag for per-layer profiler using IProfiler
														
 
															+  bool profile_per_layer;
														
 
															+
														
 
															+  // clip value for implicit quantization
														
 
															+  double clip_value;  // For implicit quantization
														
 
															+
														
 
															+  // Supported calibration type
														
 
															+  const std::array<std::string, 4> valid_calib_type = {"Entropy", "Legacy", "Percentile", "MinMax"};
														
 
															+
														
 
															+  BuildConfig()
														
 
															+  : calib_type_str("MinMax"),
														
 
															+    dla_core_id(-1),
														
 
															+    quantize_first_layer(false),
														
 
															+    quantize_last_layer(false),
														
 
															+    profile_per_layer(false),
														
 
															+    clip_value(0.0)
														
 
															+  {
														
 
															+  }
														
 
															+
														
 
															+  explicit BuildConfig(
														
 
															+    const std::string & calib_type_str, const int dla_core_id = -1,
														
 
															+    const bool quantize_first_layer = false, const bool quantize_last_layer = false,
														
 
															+    const bool profile_per_layer = false, const double clip_value = 0.0)
														
 
															+  : calib_type_str(calib_type_str),
														
 
															+    dla_core_id(dla_core_id),
														
 
															+    quantize_first_layer(quantize_first_layer),
														
 
															+    quantize_last_layer(quantize_last_layer),
														
 
															+    profile_per_layer(profile_per_layer),
														
 
															+    clip_value(clip_value)
														
 
															+  {
														
 
															+#ifndef YOLOX_STANDALONE
														
 
															+    if (
														
 
															+      std::find(valid_calib_type.begin(), valid_calib_type.end(), calib_type_str) ==
														
 
															+      valid_calib_type.end()) {
														
 
															+      std::stringstream message;
														
 
															+      message << "Invalid calibration type was specified: " << calib_type_str << std::endl
														
 
															+              << "Valid value is one of: [Entropy, (Legacy | Percentile), MinMax]" << std::endl
														
 
															+              << "Default calibration type will be used: MinMax" << std::endl;
														
 
															+      std::cerr << message.str();
														
 
															+    }
														
 
															+#endif
														
 
															+  }
														
 
															+};
														
 
															+
														
 
															+nvinfer1::Dims get_input_dims(const std::string & onnx_file_path);
														
 
															+
														
 
															+const std::array<std::string, 3> valid_precisions = {"fp32", "fp16", "int8"};
														
 
															+bool is_valid_precision_string(const std::string & precision);
														
 
															+
														
 
															+template <typename T>
														
 
															+struct InferDeleter  // NOLINT
														
 
															+{
														
 
															+  void operator()(T * obj) const
														
 
															+  {
														
 
															+    if (obj) {
														
 
															+#if TENSORRT_VERSION_MAJOR >= 8
														
 
															+      delete obj;
														
 
															+#else
														
 
															+      obj->destroy();
														
 
															+#endif
														
 
															+    }
														
 
															+  }
														
 
															+};
														
 
															+
														
 
															+template <typename T>
														
 
															+using TrtUniquePtr = std::unique_ptr<T, InferDeleter<T>>;
														
 
															+
														
 
															+using BatchConfig = std::array<int32_t, 3>;
														
 
															+
														
 
															+/**
														
 
															+ * @class TrtCommon
														
 
															+ * @brief TensorRT common library
														
 
															+ */
														
 
															+class TrtCommon  // NOLINT
														
 
															+{
														
 
															+public:
														
 
															+  /**
														
 
															+   * @brief Construct TrtCommon.
														
 
															+   * @param[in] mode_path ONNX model_path
														
 
															+   * @param[in] precision precision for inference
														
 
															+   * @param[in] calibrator pointer for any type of INT8 calibrator
														
 
															+   * @param[in] batch_config configuration for batched execution
														
 
															+   * @param[in] max_workspace_size maximum workspace for building TensorRT engine
														
 
															+   * @param[in] buildConfig configuration including precision, calibration method, dla, remaining
														
 
															+   * fp16 for first layer,  remaining fp16 for last layer and profiler for builder
														
 
															+   * @param[in] plugin_paths path for custom plugin
														
 
															+   */
														
 
															+  TrtCommon(
														
 
															+    const std::string & model_path, const std::string & precision,
														
 
															+    std::unique_ptr<nvinfer1::IInt8Calibrator> calibrator = nullptr,
														
 
															+    const BatchConfig & batch_config = {1, 1, 1}, const size_t max_workspace_size = (16 << 20),
														
 
															+    const BuildConfig & buildConfig = BuildConfig(),
														
 
															+    const std::vector<std::string> & plugin_paths = {});
														
 
															+
														
 
															+  /**
														
 
															+   * @brief Deconstruct TrtCommon
														
 
															+   */
														
 
															+  ~TrtCommon();
														
 
															+
														
 
															+  /**
														
 
															+   * @brief Load TensorRT engine
														
 
															+   * @param[in] engine_file_path path for a engine file
														
 
															+   * @return flag for whether loading are succeeded or failed
														
 
															+   */
														
 
															+  bool loadEngine(const std::string & engine_file_path);
														
 
															+
														
 
															+  /**
														
 
															+   * @brief Output layer information including GFLOPs and parameters
														
 
															+   * @param[in] onnx_file_path path for a onnx file
														
 
															+   * @warning This function is based on darknet log.
														
 
															+   */
														
 
															+  void printNetworkInfo(const std::string & onnx_file_path);
														
 
															+
														
 
															+  /**
														
 
															+   * @brief build TensorRT engine from ONNX
														
 
															+   * @param[in] onnx_file_path path for a onnx file
														
 
															+   * @param[in] output_engine_file_path path for a engine file
														
 
															+   */
														
 
															+  bool buildEngineFromOnnx(
														
 
															+    const std::string & onnx_file_path, const std::string & output_engine_file_path);
														
 
															+
														
 
															+  /**
														
 
															+   * @brief setup for TensorRT execution including building and loading engine
														
 
															+   */
														
 
															+  void setup();
														
 
															+
														
 
															+  bool isInitialized();
														
 
															+
														
 
															+  nvinfer1::Dims getBindingDimensions(const int32_t index) const;
														
 
															+  int32_t getNbBindings();
														
 
															+  bool setBindingDimensions(const int32_t index, const nvinfer1::Dims & dimensions) const;
														
 
															+  bool enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * input_consumed);
														
 
															+
														
 
															+  /**
														
 
															+   * @brief output per-layer information
														
 
															+   */
														
 
															+  void printProfiling(void);
														
 
															+
														
 
															+#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8200
														
 
															+  /**
														
 
															+   * @brief get per-layer information for trt-engine-profiler
														
 
															+   */
														
 
															+  std::string getLayerInformation(nvinfer1::LayerInformationFormat format);
														
 
															+#endif
														
 
															+
														
 
															+private:
														
 
															+  Logger logger_;
														
 
															+  fs::path model_file_path_;
														
 
															+  TrtUniquePtr<nvinfer1::IRuntime> runtime_;
														
 
															+  TrtUniquePtr<nvinfer1::ICudaEngine> engine_;
														
 
															+  TrtUniquePtr<nvinfer1::IExecutionContext> context_;
														
 
															+  std::unique_ptr<nvinfer1::IInt8Calibrator> calibrator_;
														
 
															+
														
 
															+  nvinfer1::Dims input_dims_;
														
 
															+  nvinfer1::Dims output_dims_;
														
 
															+  std::string precision_;
														
 
															+  BatchConfig batch_config_;
														
 
															+  size_t max_workspace_size_;
														
 
															+  bool is_initialized_{false};
														
 
															+
														
 
															+  // profiler for per-layer
														
 
															+  SimpleProfiler model_profiler_;
														
 
															+  // profiler for whole model
														
 
															+  SimpleProfiler host_profiler_;
														
 
															+
														
 
															+  std::unique_ptr<const BuildConfig> build_config_;
														
 
															+};
														
 
															+
														
 
															+}  // namespace tensorrt_common
														
 
															+
														
 
															+#endif  // TENSORRT_COMMON__TENSORRT_COMMON_HPP_
														
--- a/src/detection/detection_lidar_transfusion/main.cpp
+++ b/src/detection/detection_lidar_transfusion/main.cpp
@@ -0,0 +1,8 @@
 
															+#include <QCoreApplication>
														
 
															+
														
 
															+int main(int argc, char *argv[])
														
 
															+{
														
 
															+    QCoreApplication a(argc, argv);
														
 
															+
														
 
															+    return a.exec();
														
 
															+}
														
--- a/src/detection/detection_lidar_transfusion/network/network_trt.cpp
+++ b/src/detection/detection_lidar_transfusion/network/network_trt.cpp
@@ -0,0 +1,333 @@
 
															+// Copyright 2024 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#include "include/network/network_trt.hpp"
														
 
															+
														
 
															+#include <NvOnnxParser.h>
														
 
															+
														
 
															+#include <fstream>
														
 
															+#include <memory>
														
 
															+#include <string>
														
 
															+#include <bits/stdc++.h>
														
 
															+
														
 
															+namespace autoware::lidar_transfusion
														
 
															+{
														
 
															+
														
 
															+std::ostream & operator<<(std::ostream & os, const ProfileDimension & profile)
														
 
															+{
														
 
															+  std::string delim = "";
														
 
															+  os << "min->[";
														
 
															+  for (int i = 0; i < profile.min.nbDims; ++i) {
														
 
															+    os << delim << profile.min.d[i];
														
 
															+    delim = ", ";
														
 
															+  }
														
 
															+  os << "], opt->[";
														
 
															+  delim = "";
														
 
															+  for (int i = 0; i < profile.opt.nbDims; ++i) {
														
 
															+    os << delim << profile.opt.d[i];
														
 
															+    delim = ", ";
														
 
															+  }
														
 
															+  os << "], max->[";
														
 
															+  delim = "";
														
 
															+  for (int i = 0; i < profile.max.nbDims; ++i) {
														
 
															+    os << delim << profile.max.d[i];
														
 
															+    delim = ", ";
														
 
															+  }
														
 
															+  os << "]";
														
 
															+  return os;
														
 
															+}
														
 
															+
														
 
															+NetworkTRT::NetworkTRT(const TransfusionConfig & config) : config_(config)
														
 
															+{
														
 
															+  ProfileDimension voxels_dims = {
														
 
															+    nvinfer1::Dims3(
														
 
															+      config_.min_voxel_size_, config_.min_point_in_voxel_size_, config_.min_network_feature_size_),
														
 
															+    nvinfer1::Dims3(
														
 
															+      config_.opt_voxel_size_, config_.opt_point_in_voxel_size_, config_.opt_network_feature_size_),
														
 
															+    nvinfer1::Dims3(
														
 
															+      config_.max_voxel_size_, config_.max_point_in_voxel_size_,
														
 
															+      config_.max_network_feature_size_)};
														
 
															+  ProfileDimension num_points_dims = {
														
 
															+    nvinfer1::Dims{1, {static_cast<int32_t>(config_.min_points_size_)}},
														
 
															+    nvinfer1::Dims{1, {static_cast<int32_t>(config_.opt_points_size_)}},
														
 
															+    nvinfer1::Dims{1, {static_cast<int32_t>(config_.max_points_size_)}}};
														
 
															+  ProfileDimension coors_dims = {
														
 
															+    nvinfer1::Dims2(config_.min_coors_size_, config_.min_coors_dim_size_),
														
 
															+    nvinfer1::Dims2(config_.opt_coors_size_, config_.opt_coors_dim_size_),
														
 
															+    nvinfer1::Dims2(config_.max_coors_size_, config_.max_coors_dim_size_)};
														
 
															+  in_profile_dims_ = {voxels_dims, num_points_dims, coors_dims};
														
 
															+}
														
 
															+
														
 
															+NetworkTRT::~NetworkTRT()
														
 
															+{
														
 
															+  context.reset();
														
 
															+  runtime_.reset();
														
 
															+  plan_.reset();
														
 
															+  engine.reset();
														
 
															+}
														
 
															+
														
 
															+bool NetworkTRT::init(
														
 
															+  const std::string & onnx_path, const std::string & engine_path, const std::string & precision)
														
 
															+{
														
 
															+  runtime_ =
														
 
															+    tensorrt_common::TrtUniquePtr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(logger_));
														
 
															+  if (!runtime_) {
														
 
															+    tensorrt_common::LOG_ERROR(logger_) << "Failed to create runtime" << std::endl;
														
 
															+    return false;
														
 
															+  }
														
 
															+
														
 
															+  bool success;
														
 
															+  std::ifstream engine_file(engine_path);
														
 
															+  if (engine_file.is_open()) {
														
 
															+    success = loadEngine(engine_path);
														
 
															+  } else {
														
 
															+    auto log_thread = logger_.log_throttle(
														
 
															+      nvinfer1::ILogger::Severity::kINFO,
														
 
															+      "Applying optimizations and building TRT CUDA engine. Please wait a minutes...", 5);
														
 
															+    success = parseONNX(onnx_path, engine_path, precision);
														
 
															+    logger_.stop_throttle(log_thread);
														
 
															+  }
														
 
															+  success &= createContext();
														
 
															+
														
 
															+  return success;
														
 
															+}
														
 
															+
														
 
															+bool NetworkTRT::setProfile(
														
 
															+  nvinfer1::IBuilder & builder, nvinfer1::INetworkDefinition & network,
														
 
															+  nvinfer1::IBuilderConfig & config)
														
 
															+{
														
 
															+  auto profile = builder.createOptimizationProfile();
														
 
															+
														
 
															+  auto voxels_name = network.getInput(NetworkIO::voxels)->getName();
														
 
															+  auto num_points_name = network.getInput(NetworkIO::num_points)->getName();
														
 
															+  auto coors_name = network.getInput(NetworkIO::coors)->getName();
														
 
															+
														
 
															+  profile->setDimensions(
														
 
															+    voxels_name, nvinfer1::OptProfileSelector::kMIN, in_profile_dims_[NetworkIO::voxels].min);
														
 
															+  profile->setDimensions(
														
 
															+    voxels_name, nvinfer1::OptProfileSelector::kOPT, in_profile_dims_[NetworkIO::voxels].opt);
														
 
															+  profile->setDimensions(
														
 
															+    voxels_name, nvinfer1::OptProfileSelector::kMAX, in_profile_dims_[NetworkIO::voxels].max);
														
 
															+
														
 
															+  profile->setDimensions(
														
 
															+    num_points_name, nvinfer1::OptProfileSelector::kMIN,
														
 
															+    in_profile_dims_[NetworkIO::num_points].min);
														
 
															+  profile->setDimensions(
														
 
															+    num_points_name, nvinfer1::OptProfileSelector::kOPT,
														
 
															+    in_profile_dims_[NetworkIO::num_points].opt);
														
 
															+  profile->setDimensions(
														
 
															+    num_points_name, nvinfer1::OptProfileSelector::kMAX,
														
 
															+    in_profile_dims_[NetworkIO::num_points].max);
														
 
															+
														
 
															+  profile->setDimensions(
														
 
															+    coors_name, nvinfer1::OptProfileSelector::kMIN, in_profile_dims_[NetworkIO::coors].min);
														
 
															+  profile->setDimensions(
														
 
															+    coors_name, nvinfer1::OptProfileSelector::kOPT, in_profile_dims_[NetworkIO::coors].opt);
														
 
															+  profile->setDimensions(
														
 
															+    coors_name, nvinfer1::OptProfileSelector::kMAX, in_profile_dims_[NetworkIO::coors].max);
														
 
															+
														
 
															+  config.addOptimizationProfile(profile);
														
 
															+  return true;
														
 
															+}
														
 
															+
														
 
															+bool NetworkTRT::createContext()
														
 
															+{
														
 
															+  if (!engine) {
														
 
															+    tensorrt_common::LOG_ERROR(logger_)
														
 
															+      << "Failed to create context: Engine was not created" << std::endl;
														
 
															+    return false;
														
 
															+  }
														
 
															+
														
 
															+  context =
														
 
															+    tensorrt_common::TrtUniquePtr<nvinfer1::IExecutionContext>(engine->createExecutionContext());
														
 
															+  if (!context) {
														
 
															+    tensorrt_common::LOG_ERROR(logger_) << "Failed to create context" << std::endl;
														
 
															+    return false;
														
 
															+  }
														
 
															+
														
 
															+  return true;
														
 
															+}
														
 
															+
														
 
															+bool NetworkTRT::parseONNX(
														
 
															+  const std::string & onnx_path, const std::string & engine_path, const std::string & precision,
														
 
															+  const size_t workspace_size)
														
 
															+{
														
 
															+  auto builder =
														
 
															+    tensorrt_common::TrtUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(logger_));
														
 
															+  if (!builder) {
														
 
															+    tensorrt_common::LOG_ERROR(logger_) << "Failed to create builder" << std::endl;
														
 
															+    return false;
														
 
															+  }
														
 
															+
														
 
															+  auto config =
														
 
															+    tensorrt_common::TrtUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
														
 
															+  if (!config) {
														
 
															+    tensorrt_common::LOG_ERROR(logger_) << "Failed to create config" << std::endl;
														
 
															+    return false;
														
 
															+  }
														
 
															+#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8400
														
 
															+  config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, workspace_size);
														
 
															+#else
														
 
															+  config->setMaxWorkspaceSize(workspace_size);
														
 
															+#endif
														
 
															+  if (precision == "fp16") {
														
 
															+    if (builder->platformHasFastFp16()) {
														
 
															+      tensorrt_common::LOG_INFO(logger_) << "Using TensorRT FP16 Inference" << std::endl;
														
 
															+      config->setFlag(nvinfer1::BuilderFlag::kFP16);
														
 
															+    } else {
														
 
															+      tensorrt_common::LOG_INFO(logger_)
														
 
															+        << "TensorRT FP16 Inference isn't supported in this environment" << std::endl;
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  const auto flag =
														
 
															+    1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
														
 
															+  auto network =
														
 
															+    tensorrt_common::TrtUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(flag));
														
 
															+  if (!network) {
														
 
															+    tensorrt_common::LOG_ERROR(logger_) << "Failed to create network" << std::endl;
														
 
															+    return false;
														
 
															+  }
														
 
															+
														
 
															+  auto parser = tensorrt_common::TrtUniquePtr<nvonnxparser::IParser>(
														
 
															+    nvonnxparser::createParser(*network, logger_));
														
 
															+  parser->parseFromFile(onnx_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kERROR));
														
 
															+
														
 
															+  if (!setProfile(*builder, *network, *config)) {
														
 
															+    tensorrt_common::LOG_ERROR(logger_) << "Failed to set profile" << std::endl;
														
 
															+    return false;
														
 
															+  }
														
 
															+
														
 
															+  plan_ = tensorrt_common::TrtUniquePtr<nvinfer1::IHostMemory>(
														
 
															+    builder->buildSerializedNetwork(*network, *config));
														
 
															+  if (!plan_) {
														
 
															+    tensorrt_common::LOG_ERROR(logger_) << "Failed to create serialized network" << std::endl;
														
 
															+    return false;
														
 
															+  }
														
 
															+  engine = tensorrt_common::TrtUniquePtr<nvinfer1::ICudaEngine>(
														
 
															+    runtime_->deserializeCudaEngine(plan_->data(), plan_->size()));
														
 
															+  if (!engine) {
														
 
															+    tensorrt_common::LOG_ERROR(logger_) << "Failed to create engine" << std::endl;
														
 
															+    return false;
														
 
															+  }
														
 
															+
														
 
															+  return saveEngine(engine_path);
														
 
															+}
														
 
															+
														
 
															+bool NetworkTRT::saveEngine(const std::string & engine_path)
														
 
															+{
														
 
															+  tensorrt_common::LOG_INFO(logger_) << "Writing to " << engine_path << std::endl;
														
 
															+  std::ofstream file(engine_path, std::ios::out | std::ios::binary);
														
 
															+  file.write(reinterpret_cast<const char *>(plan_->data()), plan_->size());
														
 
															+  return validateNetworkIO();
														
 
															+}
														
 
															+
														
 
															+bool NetworkTRT::loadEngine(const std::string & engine_path)
														
 
															+{
														
 
															+  std::ifstream engine_file(engine_path);
														
 
															+  std::stringstream engine_buffer;
														
 
															+  engine_buffer << engine_file.rdbuf();
														
 
															+  std::string engine_str = engine_buffer.str();
														
 
															+  engine = tensorrt_common::TrtUniquePtr<nvinfer1::ICudaEngine>(runtime_->deserializeCudaEngine(
														
 
															+    reinterpret_cast<const void *>(engine_str.data()), engine_str.size()));
														
 
															+  tensorrt_common::LOG_INFO(logger_) << "Loaded engine from " << engine_path << std::endl;
														
 
															+  return validateNetworkIO();
														
 
															+}
														
 
															+
														
 
															+bool NetworkTRT::validateNetworkIO()
														
 
															+{
														
 
															+  // Whether the number of IO match the expected size
														
 
															+  if (engine->getNbIOTensors() != NetworkIO::ENUM_SIZE) {
														
 
															+    tensorrt_common::LOG_ERROR(logger_)
														
 
															+      << "Invalid network IO. Expected size: " << NetworkIO::ENUM_SIZE
														
 
															+      << ". Actual size: " << engine->getNbIOTensors() << "." << std::endl;
														
 
															+    throw std::runtime_error("Failed to initialize TRT network.");
														
 
															+  }
														
 
															+  for (int i = 0; i < NetworkIO::ENUM_SIZE; ++i) {
														
 
															+    tensors_names_.push_back(engine->getIOTensorName(i));
														
 
															+  }
														
 
															+
														
 
															+  // Log the network IO
														
 
															+  std::string tensors = std::accumulate(
														
 
															+    tensors_names_.begin(), tensors_names_.end(), std::string(),
														
 
															+    [](const std::string & a, const std::string & b) -> std::string { return a + b + " "; });
														
 
															+  tensorrt_common::LOG_INFO(logger_) << "Network IO: " << tensors << std::endl;
														
 
															+
														
 
															+  // Whether the current engine input profile match the config input profile
														
 
															+  for (int i = 0; i <= NetworkIO::coors; ++i) {
														
 
															+    ProfileDimension engine_dims{
														
 
															+      engine->getProfileShape(tensors_names_[i], 0, nvinfer1::OptProfileSelector::kMIN),
														
 
															+      engine->getProfileShape(tensors_names_[i], 0, nvinfer1::OptProfileSelector::kOPT),
														
 
															+      engine->getProfileShape(tensors_names_[i], 0, nvinfer1::OptProfileSelector::kMAX)};
														
 
															+
														
 
															+    tensorrt_common::LOG_INFO(logger_)
														
 
															+      << "Profile for " << tensors_names_[i] << ": " << engine_dims << std::endl;
														
 
															+
														
 
															+    if (engine_dims != in_profile_dims_[i]) {
														
 
															+      tensorrt_common::LOG_ERROR(logger_)
														
 
															+        << "Invalid network input dimension. Config: " << in_profile_dims_[i]
														
 
															+        << ". Please change the input profile or delete the engine file and build engine again."
														
 
															+        << std::endl;
														
 
															+      throw std::runtime_error("Failed to initialize TRT network.");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  // Whether the IO tensor shapes match the network config, -1 for dynamic size
														
 
															+  validateTensorShape(
														
 
															+    NetworkIO::voxels, {-1, static_cast<int>(config_.points_per_voxel_),
														
 
															+                        static_cast<int>(config_.num_point_feature_size_)});
														
 
															+  validateTensorShape(NetworkIO::num_points, {-1});
														
 
															+  validateTensorShape(NetworkIO::coors, {-1, static_cast<int>(config_.num_point_values_)});
														
 
															+  auto cls_score = validateTensorShape(
														
 
															+    NetworkIO::cls_score,
														
 
															+    {static_cast<int>(config_.batch_size_), static_cast<int>(config_.num_classes_),
														
 
															+     static_cast<int>(config_.num_proposals_)});
														
 
															+  tensorrt_common::LOG_INFO(logger_) << "Network num classes: " << cls_score.d[1] << std::endl;
														
 
															+  validateTensorShape(
														
 
															+    NetworkIO::dir_pred,
														
 
															+    {static_cast<int>(config_.batch_size_), 2, static_cast<int>(config_.num_proposals_)});  // x, y
														
 
															+  validateTensorShape(
														
 
															+    NetworkIO::bbox_pred,
														
 
															+    {static_cast<int>(config_.batch_size_), static_cast<int>(config_.num_box_values_),
														
 
															+     static_cast<int>(config_.num_proposals_)});
														
 
															+
														
 
															+  return true;
														
 
															+}
														
 
															+
														
 
															+const char * NetworkTRT::getTensorName(NetworkIO name)
														
 
															+{
														
 
															+  return tensors_names_.at(name);
														
 
															+}
														
 
															+
														
 
															+nvinfer1::Dims NetworkTRT::validateTensorShape(NetworkIO name, const std::vector<int> shape)
														
 
															+{
														
 
															+  auto tensor_shape = engine->getTensorShape(tensors_names_[name]);
														
 
															+  if (tensor_shape.nbDims != static_cast<int>(shape.size())) {
														
 
															+    tensorrt_common::LOG_ERROR(logger_)
														
 
															+      << "Invalid tensor shape for " << tensors_names_[name] << ". Expected size: " << shape.size()
														
 
															+      << ". Actual size: " << tensor_shape.nbDims << "." << std::endl;
														
 
															+    throw std::runtime_error("Failed to initialize TRT network.");
														
 
															+  }
														
 
															+  for (int i = 0; i < tensor_shape.nbDims; ++i) {
														
 
															+    if (tensor_shape.d[i] != static_cast<int>(shape[i])) {
														
 
															+      tensorrt_common::LOG_ERROR(logger_)
														
 
															+        << "Invalid tensor shape for " << tensors_names_[name] << ". Expected: " << shape[i]
														
 
															+        << ". Actual: " << tensor_shape.d[i] << "." << std::endl;
														
 
															+      throw std::runtime_error("Failed to initialize TRT network.");
														
 
															+    }
														
 
															+  }
														
 
															+  return tensor_shape;
														
 
															+}
														
 
															+
														
 
															+}  // namespace autoware::lidar_transfusion
														
--- a/src/detection/detection_lidar_transfusion/postprocess/circle_nms_kernel.cu
+++ b/src/detection/detection_lidar_transfusion/postprocess/circle_nms_kernel.cu
@@ -0,0 +1,144 @@
 
															+// Copyright 2024 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+// Modified from
														
 
															+// https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/iou3d_nms/src/iou3d_nms_kernel.cu
														
 
															+
														
 
															+/*
														
 
															+3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
														
 
															+Written by Shaoshuai Shi
														
 
															+All Rights Reserved 2019-2020.
														
 
															+*/
														
 
															+
														
 
															+#include "cuda_utils.hpp"
														
 
															+#include "postprocess/circle_nms_kernel.hpp"
														
 
															+#include "utils.hpp"
														
 
															+
														
 
															+#include <thrust/host_vector.h>
														
 
															+
														
 
															+namespace
														
 
															+{
														
 
															+const std::size_t THREADS_PER_BLOCK_NMS = 16;
														
 
															+}  // namespace
														
 
															+
														
 
															+namespace autoware::lidar_transfusion
														
 
															+{
														
 
															+
														
 
															+__device__ inline float dist2dPow(const Box3D * a, const Box3D * b)
														
 
															+{
														
 
															+  return powf(a->x - b->x, 2) + powf(a->y - b->y, 2);
														
 
															+}
														
 
															+
														
 
															+// cspell: ignore divup
														
 
															+__global__ void circleNMS_Kernel(
														
 
															+  const Box3D * boxes, const std::size_t num_boxes3d, const std::size_t col_blocks,
														
 
															+  const float dist2d_pow_threshold, std::uint64_t * mask)
														
 
															+{
														
 
															+  // params: boxes (N,)
														
 
															+  // params: mask (N, divup(N/THREADS_PER_BLOCK_NMS))
														
 
															+
														
 
															+  const auto row_start = blockIdx.y;
														
 
															+  const auto col_start = blockIdx.x;
														
 
															+
														
 
															+  if (row_start > col_start) return;
														
 
															+
														
 
															+  const std::size_t row_size =
														
 
															+    fminf(num_boxes3d - row_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
														
 
															+  const std::size_t col_size =
														
 
															+    fminf(num_boxes3d - col_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS);
														
 
															+
														
 
															+  __shared__ Box3D block_boxes[THREADS_PER_BLOCK_NMS];
														
 
															+
														
 
															+  if (threadIdx.x < col_size) {
														
 
															+    block_boxes[threadIdx.x] = boxes[THREADS_PER_BLOCK_NMS * col_start + threadIdx.x];
														
 
															+  }
														
 
															+  __syncthreads();
														
 
															+
														
 
															+  if (threadIdx.x < row_size) {
														
 
															+    const std::size_t cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x;
														
 
															+    const Box3D * cur_box = boxes + cur_box_idx;
														
 
															+
														
 
															+    std::uint64_t t = 0;
														
 
															+    std::size_t start = 0;
														
 
															+    if (row_start == col_start) {
														
 
															+      start = threadIdx.x + 1;
														
 
															+    }
														
 
															+    for (std::size_t i = start; i < col_size; i++) {
														
 
															+      if (dist2dPow(cur_box, block_boxes + i) < dist2d_pow_threshold) {
														
 
															+        t |= 1ULL << i;
														
 
															+      }
														
 
															+    }
														
 
															+    mask[cur_box_idx * col_blocks + col_start] = t;
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+cudaError_t circleNMS_launch(
														
 
															+  const thrust::device_vector<Box3D> & boxes3d, const std::size_t num_boxes3d,
														
 
															+  std::size_t col_blocks, const float distance_threshold,
														
 
															+  thrust::device_vector<std::uint64_t> & mask, cudaStream_t stream)
														
 
															+{
														
 
															+  const float dist2d_pow_thres = powf(distance_threshold, 2);
														
 
															+
														
 
															+  dim3 blocks(col_blocks, col_blocks);
														
 
															+  dim3 threads(THREADS_PER_BLOCK_NMS);
														
 
															+  circleNMS_Kernel<<<blocks, threads, 0, stream>>>(
														
 
															+    thrust::raw_pointer_cast(boxes3d.data()), num_boxes3d, col_blocks, dist2d_pow_thres,
														
 
															+    thrust::raw_pointer_cast(mask.data()));
														
 
															+
														
 
															+  return cudaGetLastError();
														
 
															+}
														
 
															+
														
 
															+std::size_t circleNMS(
														
 
															+  thrust::device_vector<Box3D> & boxes3d, const float distance_threshold,
														
 
															+  thrust::device_vector<bool> & keep_mask, cudaStream_t stream)
														
 
															+{
														
 
															+  const auto num_boxes3d = boxes3d.size();
														
 
															+  const auto col_blocks = divup(num_boxes3d, THREADS_PER_BLOCK_NMS);
														
 
															+  thrust::device_vector<std::uint64_t> mask_d(num_boxes3d * col_blocks);
														
 
															+
														
 
															+  CHECK_CUDA_ERROR(
														
 
															+    circleNMS_launch(boxes3d, num_boxes3d, col_blocks, distance_threshold, mask_d, stream));
														
 
															+
														
 
															+  // memcpy device to host
														
 
															+  thrust::host_vector<std::uint64_t> mask_h(mask_d.size());
														
 
															+  thrust::copy(mask_d.begin(), mask_d.end(), mask_h.begin());
														
 
															+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream));
														
 
															+
														
 
															+  // generate keep_mask
														
 
															+  std::vector<std::uint64_t> remv_h(col_blocks);
														
 
															+  thrust::host_vector<bool> keep_mask_h(keep_mask.size());
														
 
															+  std::size_t num_to_keep = 0;
														
 
															+  for (std::size_t i = 0; i < num_boxes3d; i++) {
														
 
															+    auto nblock = i / THREADS_PER_BLOCK_NMS;
														
 
															+    auto inblock = i % THREADS_PER_BLOCK_NMS;
														
 
															+
														
 
															+    if (!(remv_h[nblock] & (1ULL << inblock))) {
														
 
															+      keep_mask_h[i] = true;
														
 
															+      num_to_keep++;
														
 
															+      std::uint64_t * p = &mask_h[0] + i * col_blocks;
														
 
															+      for (std::size_t j = nblock; j < col_blocks; j++) {
														
 
															+        remv_h[j] |= p[j];
														
 
															+      }
														
 
															+    } else {
														
 
															+      keep_mask_h[i] = false;
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  // memcpy host to device
														
 
															+  keep_mask = keep_mask_h;
														
 
															+
														
 
															+  return num_to_keep;
														
 
															+}
														
 
															+
														
 
															+}  // namespace autoware::lidar_transfusion
														
--- a/src/detection/detection_lidar_transfusion/postprocess/non_maximum_suppression.cpp
+++ b/src/detection/detection_lidar_transfusion/postprocess/non_maximum_suppression.cpp
@@ -0,0 +1,147 @@
 
															+// Copyright 2024 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#include "postprocess/non_maximum_suppression.hpp"
														
 
															+
														
 
															+//#include <autoware/universe_utils/geometry/geometry.hpp>
														
 
															+//#include <object_recognition_utils/geometry.hpp>
														
 
															+//#include <object_recognition_utils/object_recognition_utils.hpp>
														
 
															+
														
 
															+// constant declarations
														
 
															+namespace Label {
														
 
															+static constexpr uint8_t UNKNOWN =
														
 
															+  0u;
														
 
															+static constexpr uint8_t CAR =
														
 
															+  1u;
														
 
															+static constexpr uint8_t TRUCK =
														
 
															+  2u;
														
 
															+static constexpr uint8_t BUS =
														
 
															+  3u;
														
 
															+static constexpr uint8_t TRAILER =
														
 
															+  4u;
														
 
															+static constexpr uint8_t MOTORCYCLE =
														
 
															+  5u;
														
 
															+static constexpr uint8_t BICYCLE =
														
 
															+  6u;
														
 
															+static constexpr uint8_t PEDESTRIAN =
														
 
															+  7u;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+uint8_t getSemanticType(const std::string & class_name)
														
 
															+{
														
 
															+  if (class_name == "CAR") {
														
 
															+    return Label::CAR;
														
 
															+  } else if (class_name == "TRUCK") {
														
 
															+    return Label::TRUCK;
														
 
															+  } else if (class_name == "BUS") {
														
 
															+    return Label::BUS;
														
 
															+  } else if (class_name == "TRAILER") {
														
 
															+    return Label::TRAILER;
														
 
															+  } else if (class_name == "MOTORCYCLE") {
														
 
															+    return Label::MOTORCYCLE;
														
 
															+  } else if (class_name == "BICYCLE") {
														
 
															+    return Label::BICYCLE;
														
 
															+  } else if (class_name == "PEDESTRIAN") {
														
 
															+    return Label::PEDESTRIAN;
														
 
															+  } else {  // CONSTRUCTION_VEHICLE, BARRIER, TRAFFIC_CONE
														
 
															+    return Label::UNKNOWN;
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+
														
 
															+namespace autoware::lidar_transfusion
														
 
															+{
														
 
															+
														
 
															+void NonMaximumSuppression::setParameters(const NMSParams & params)
														
 
															+{
														
 
															+  assert(params.search_distance_2d_ >= 0.0);
														
 
															+  assert(params.iou_threshold_ >= 0.0 && params.iou_threshold_ <= 1.0);
														
 
															+
														
 
															+  params_ = params;
														
 
															+  target_class_mask_ = classNamesToBooleanMask(params.target_class_names_);
														
 
															+}
														
 
															+
														
 
															+bool NonMaximumSuppression::isTargetLabel(const uint8_t label)
														
 
															+{
														
 
															+  if (label >= target_class_mask_.size()) {
														
 
															+    return false;
														
 
															+  }
														
 
															+  return target_class_mask_.at(label);
														
 
															+}
														
 
															+
														
 
															+//bool NonMaximumSuppression::isTargetPairObject(
														
 
															+//  const DetectedObject & object1, const DetectedObject & object2)
														
 
															+//{
														
 
															+//  const auto label1 = object_recognition_utils::getHighestProbLabel(object1.classification);
														
 
															+//  const auto label2 = object_recognition_utils::getHighestProbLabel(object2.classification);
														
 
															+
														
 
															+//  if (isTargetLabel(label1) && isTargetLabel(label2)) {
														
 
															+//    return true;
														
 
															+//  }
														
 
															+
														
 
															+//  const auto search_sqr_dist_2d = params_.search_distance_2d_ * params_.search_distance_2d_;
														
 
															+//  const auto sqr_dist_2d = autoware::universe_utils::calcSquaredDistance2d(
														
 
															+//    object_recognition_utils::getPose(object1), object_recognition_utils::getPose(object2));
														
 
															+//  return sqr_dist_2d <= search_sqr_dist_2d;
														
 
															+//}
														
 
															+
														
 
															+//Eigen::MatrixXd NonMaximumSuppression::generateIoUMatrix(
														
 
															+//  const std::vector<DetectedObject> & input_objects)
														
 
															+//{
														
 
															+//  // NOTE: row = target objects to be suppressed, col = source objects to be compared
														
 
															+//  Eigen::MatrixXd triangular_matrix =
														
 
															+//    Eigen::MatrixXd::Zero(input_objects.size(), input_objects.size());
														
 
															+//  for (std::size_t target_i = 0; target_i < input_objects.size(); ++target_i) {
														
 
															+//    for (std::size_t source_i = 0; source_i < target_i; ++source_i) {
														
 
															+//      const auto & target_obj = input_objects.at(target_i);
														
 
															+//      const auto & source_obj = input_objects.at(source_i);
														
 
															+//      if (!isTargetPairObject(target_obj, source_obj)) {
														
 
															+//        continue;
														
 
															+//      }
														
 
															+
														
 
															+//      if (params_.nms_type_ == NMS_TYPE::IoU_BEV) {
														
 
															+//        const double iou = object_recognition_utils::get2dIoU(target_obj, source_obj);
														
 
															+//        triangular_matrix(target_i, source_i) = iou;
														
 
															+//        // NOTE: If the target object has any objects with iou > iou_threshold, it
														
 
															+//        // will be suppressed regardless of later results.
														
 
															+//        if (iou > params_.iou_threshold_) {
														
 
															+//          break;
														
 
															+//        }
														
 
															+//      }
														
 
															+//    }
														
 
															+//  }
														
 
															+
														
 
															+//  return triangular_matrix;
														
 
															+//}
														
 
															+
														
 
															+//std::vector<DetectedObject> NonMaximumSuppression::apply(
														
 
															+//  const std::vector<DetectedObject> & input_objects)
														
 
															+//{
														
 
															+//  Eigen::MatrixXd iou_matrix = generateIoUMatrix(input_objects);
														
 
															+
														
 
															+//  std::vector<DetectedObject> output_objects;
														
 
															+//  output_objects.reserve(input_objects.size());
														
 
															+//  for (std::size_t i = 0; i < input_objects.size(); ++i) {
														
 
															+//    const auto value = iou_matrix.row(i).maxCoeff();
														
 
															+//    if (params_.nms_type_ == NMS_TYPE::IoU_BEV) {
														
 
															+//      if (value <= params_.iou_threshold_) {
														
 
															+//        output_objects.emplace_back(input_objects.at(i));
														
 
															+//      }
														
 
															+//    }
														
 
															+//  }
														
 
															+
														
 
															+//  return output_objects;
														
 
															+//}
														
 
															+}  // namespace autoware::lidar_transfusion
														
--- a/src/detection/detection_lidar_transfusion/postprocess/postprocess_kernel.cu
+++ b/src/detection/detection_lidar_transfusion/postprocess/postprocess_kernel.cu
@@ -0,0 +1,145 @@
 
															+// Copyright 2024 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#include "postprocess/circle_nms_kernel.hpp"
														
 
															+#include "postprocess/postprocess_kernel.hpp"
														
 
															+
														
 
															+#include <thrust/count.h>
														
 
															+#include <thrust/device_vector.h>
														
 
															+#include <thrust/sort.h>
														
 
															+
														
 
															+namespace autoware::lidar_transfusion
														
 
															+{
														
 
															+const size_t THREADS_PER_BLOCK = 256;
														
 
															+
														
 
															+struct is_score_greater
														
 
															+{
														
 
															+  is_score_greater(float t) : t_(t) {}
														
 
															+
														
 
															+  __device__ bool operator()(const Box3D & b) { return b.score > t_; }
														
 
															+
														
 
															+private:
														
 
															+  float t_{0.0};
														
 
															+};
														
 
															+
														
 
															+struct is_kept
														
 
															+{
														
 
															+  __device__ bool operator()(const bool keep) { return keep; }
														
 
															+};
														
 
															+
														
 
															+struct score_greater
														
 
															+{
														
 
															+  __device__ bool operator()(const Box3D & lb, const Box3D & rb) { return lb.score > rb.score; }
														
 
															+};
														
 
															+
														
 
															+__device__ inline float sigmoid(float x)
														
 
															+{
														
 
															+  return 1.0f / (1.0f + expf(-x));
														
 
															+}
														
 
															+
														
 
															+__global__ void generateBoxes3D_kernel(
														
 
															+  const float * __restrict__ cls_output, const float * __restrict__ box_output,
														
 
															+  const float * __restrict__ dir_cls_output, const float voxel_size_x, const float voxel_size_y,
														
 
															+  const float min_x_range, const float min_y_range, const int num_proposals, const int num_classes,
														
 
															+  const int num_point_values, const float * __restrict__ yaw_norm_thresholds,
														
 
															+  Box3D * __restrict__ det_boxes3d)
														
 
															+{
														
 
															+  int point_idx = blockIdx.x * blockDim.x + threadIdx.x;
														
 
															+  if (point_idx >= num_proposals) {
														
 
															+    return;
														
 
															+  }
														
 
															+
														
 
															+  int class_id = 0;
														
 
															+  float max_score = cls_output[point_idx];
														
 
															+
														
 
															+#pragma unroll
														
 
															+  for (int i = 1; i < num_classes; i++) {
														
 
															+    float score = cls_output[i * num_proposals + point_idx];
														
 
															+    if (score > max_score) {
														
 
															+      max_score = score;
														
 
															+      class_id = i;
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  // yaw validation
														
 
															+  const float yaw_sin = dir_cls_output[point_idx];
														
 
															+  const float yaw_cos = dir_cls_output[point_idx + num_proposals];
														
 
															+  const float yaw_norm = sqrtf(yaw_sin * yaw_sin + yaw_cos * yaw_cos);
														
 
															+
														
 
															+  det_boxes3d[point_idx].label = class_id;
														
 
															+  det_boxes3d[point_idx].score = yaw_norm >= yaw_norm_thresholds[class_id] ? max_score : 0.f;
														
 
															+  det_boxes3d[point_idx].x = box_output[point_idx] * num_point_values * voxel_size_x + min_x_range;
														
 
															+  det_boxes3d[point_idx].y =
														
 
															+    box_output[point_idx + num_proposals] * num_point_values * voxel_size_y + min_y_range;
														
 
															+  det_boxes3d[point_idx].z = box_output[point_idx + 2 * num_proposals];
														
 
															+  det_boxes3d[point_idx].width = expf(box_output[point_idx + 3 * num_proposals]);
														
 
															+  det_boxes3d[point_idx].length = expf(box_output[point_idx + 4 * num_proposals]);
														
 
															+  det_boxes3d[point_idx].height = expf(box_output[point_idx + 5 * num_proposals]);
														
 
															+  det_boxes3d[point_idx].yaw =
														
 
															+    atan2f(dir_cls_output[point_idx], dir_cls_output[point_idx + num_proposals]);
														
 
															+}
														
 
															+
														
 
															+PostprocessCuda::PostprocessCuda(const TransfusionConfig & config, cudaStream_t & stream)
														
 
															+: config_(config), stream_(stream)
														
 
															+{
														
 
															+}
														
 
															+
														
 
															+// cspell: ignore divup
														
 
															+cudaError_t PostprocessCuda::generateDetectedBoxes3D_launch(
														
 
															+  const float * cls_output, const float * box_output, const float * dir_cls_output,
														
 
															+  std::vector<Box3D> & det_boxes3d, cudaStream_t stream)
														
 
															+{
														
 
															+  dim3 threads = {THREADS_PER_BLOCK};
														
 
															+  dim3 blocks = {divup(config_.num_proposals_, threads.x)};
														
 
															+
														
 
															+  auto boxes3d_d = thrust::device_vector<Box3D>(config_.num_proposals_);
														
 
															+  auto yaw_norm_thresholds_d = thrust::device_vector<float>(
														
 
															+    config_.yaw_norm_thresholds_.begin(), config_.yaw_norm_thresholds_.end());
														
 
															+
														
 
															+  generateBoxes3D_kernel<<<blocks, threads, 0, stream>>>(
														
 
															+    cls_output, box_output, dir_cls_output, config_.voxel_x_size_, config_.voxel_y_size_,
														
 
															+    config_.min_x_range_, config_.min_y_range_, config_.num_proposals_, config_.num_classes_,
														
 
															+    config_.num_point_values_, thrust::raw_pointer_cast(yaw_norm_thresholds_d.data()),
														
 
															+    thrust::raw_pointer_cast(boxes3d_d.data()));
														
 
															+
														
 
															+  // suppress by score
														
 
															+  const auto num_det_boxes3d = thrust::count_if(
														
 
															+    thrust::device, boxes3d_d.begin(), boxes3d_d.end(), is_score_greater(config_.score_threshold_));
														
 
															+  if (num_det_boxes3d == 0) {
														
 
															+    return cudaGetLastError();
														
 
															+  }
														
 
															+  thrust::device_vector<Box3D> det_boxes3d_d(num_det_boxes3d);
														
 
															+  thrust::copy_if(
														
 
															+    thrust::device, boxes3d_d.begin(), boxes3d_d.end(), det_boxes3d_d.begin(),
														
 
															+    is_score_greater(config_.score_threshold_));
														
 
															+
														
 
															+  // sort by score
														
 
															+  thrust::sort(det_boxes3d_d.begin(), det_boxes3d_d.end(), score_greater());
														
 
															+
														
 
															+  // supress by NMS
														
 
															+  thrust::device_vector<bool> final_keep_mask_d(num_det_boxes3d);
														
 
															+  const auto num_final_det_boxes3d =
														
 
															+    circleNMS(det_boxes3d_d, config_.circle_nms_dist_threshold_, final_keep_mask_d, stream);
														
 
															+  thrust::device_vector<Box3D> final_det_boxes3d_d(num_final_det_boxes3d);
														
 
															+  thrust::copy_if(
														
 
															+    thrust::device, det_boxes3d_d.begin(), det_boxes3d_d.end(), final_keep_mask_d.begin(),
														
 
															+    final_det_boxes3d_d.begin(), is_kept());
														
 
															+
														
 
															+  // memcpy device to host
														
 
															+  det_boxes3d.resize(num_final_det_boxes3d);
														
 
															+  thrust::copy(final_det_boxes3d_d.begin(), final_det_boxes3d_d.end(), det_boxes3d.begin());
														
 
															+  return cudaGetLastError();
														
 
															+}
														
 
															+
														
 
															+}  // namespace autoware::lidar_transfusion
														
--- a/src/detection/detection_lidar_transfusion/preprocess/pointcloud_densification.cpp
+++ b/src/detection/detection_lidar_transfusion/preprocess/pointcloud_densification.cpp
@@ -0,0 +1,116 @@
 
															+// Copyright 2024 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#include "autoware/lidar_transfusion/preprocess/pointcloud_densification.hpp"
														
 
															+
														
 
															+#include <pcl_ros/transforms.hpp>
														
 
															+
														
 
															+#include <pcl_conversions/pcl_conversions.h>
														
 
															+#ifdef ROS_DISTRO_GALACTIC
														
 
															+#include <tf2_eigen/tf2_eigen.h>
														
 
															+#else
														
 
															+#include <tf2_eigen/tf2_eigen.hpp>
														
 
															+#endif
														
 
															+
														
 
															+#include <boost/optional.hpp>
														
 
															+
														
 
															+#include <string>
														
 
															+#include <utility>
														
 
															+
														
 
															+namespace
														
 
															+{
														
 
															+
														
 
															+boost::optional<geometry_msgs::msg::Transform> getTransform(
														
 
															+  const tf2_ros::Buffer & tf_buffer, const std::string & target_frame_id,
														
 
															+  const std::string & source_frame_id, const rclcpp::Time & time)
														
 
															+{
														
 
															+  try {
														
 
															+    geometry_msgs::msg::TransformStamped transform_stamped;
														
 
															+    transform_stamped = tf_buffer.lookupTransform(
														
 
															+      target_frame_id, source_frame_id, time, rclcpp::Duration::from_seconds(0.5));
														
 
															+    return transform_stamped.transform;
														
 
															+  } catch (tf2::TransformException & ex) {
														
 
															+    RCLCPP_WARN_STREAM(rclcpp::get_logger("lidar_transfusion"), ex.what());
														
 
															+    return boost::none;
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+Eigen::Affine3f transformToEigen(const geometry_msgs::msg::Transform & t)
														
 
															+{
														
 
															+  Eigen::Affine3f a;
														
 
															+  a.matrix() = tf2::transformToEigen(t).matrix().cast<float>();
														
 
															+  return a;
														
 
															+}
														
 
															+
														
 
															+}  // namespace
														
 
															+
														
 
															+namespace autoware::lidar_transfusion
														
 
															+{
														
 
															+
														
 
															+PointCloudDensification::PointCloudDensification(
														
 
															+  const DensificationParam & param, cudaStream_t & stream)
														
 
															+: param_(param), stream_(stream)
														
 
															+{
														
 
															+}
														
 
															+
														
 
															+bool PointCloudDensification::enqueuePointCloud(
														
 
															+  const sensor_msgs::msg::PointCloud2 & pointcloud_msg, const tf2_ros::Buffer & tf_buffer)
														
 
															+{
														
 
															+  const auto header = pointcloud_msg.header;
														
 
															+
														
 
															+  if (param_.pointcloud_cache_size() > 1) {
														
 
															+    auto transform_world2current =
														
 
															+      getTransform(tf_buffer, header.frame_id, param_.world_frame_id(), header.stamp);
														
 
															+    if (!transform_world2current) {
														
 
															+      return false;
														
 
															+    }
														
 
															+    auto affine_world2current = transformToEigen(transform_world2current.get());
														
 
															+
														
 
															+    enqueue(pointcloud_msg, affine_world2current);
														
 
															+  } else {
														
 
															+    enqueue(pointcloud_msg, Eigen::Affine3f::Identity());
														
 
															+  }
														
 
															+
														
 
															+  dequeue();
														
 
															+
														
 
															+  return true;
														
 
															+}
														
 
															+
														
 
															+void PointCloudDensification::enqueue(
														
 
															+  const sensor_msgs::msg::PointCloud2 & msg, const Eigen::Affine3f & affine_world2current)
														
 
															+{
														
 
															+  affine_world2current_ = affine_world2current;
														
 
															+  current_timestamp_ = rclcpp::Time(msg.header.stamp).seconds();
														
 
															+
														
 
															+  auto data_d = cuda::make_unique<uint8_t[]>(
														
 
															+    sizeof(uint8_t) * msg.width * msg.height * msg.point_step / sizeof(uint8_t));
														
 
															+
														
 
															+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
														
 
															+    data_d.get(), msg.data.data(), sizeof(uint8_t) * msg.width * msg.height * msg.point_step,
														
 
															+    cudaMemcpyHostToDevice, stream_));
														
 
															+
														
 
															+  PointCloudWithTransform pointcloud = {
														
 
															+    std::move(data_d), msg.header, msg.width * msg.height, affine_world2current.inverse()};
														
 
															+
														
 
															+  pointcloud_cache_.push_front(std::move(pointcloud));
														
 
															+}
														
 
															+
														
 
															+void PointCloudDensification::dequeue()
														
 
															+{
														
 
															+  if (pointcloud_cache_.size() > param_.pointcloud_cache_size()) {
														
 
															+    pointcloud_cache_.pop_back();
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+}  // namespace autoware::lidar_transfusion
														
--- a/src/detection/detection_lidar_transfusion/preprocess/preprocess_kernel.cu
+++ b/src/detection/detection_lidar_transfusion/preprocess/preprocess_kernel.cu
@@ -0,0 +1,221 @@
 
															+// Copyright 2024 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+/*
														
 
															+ * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES.
														
 
															+ * All rights reserved. SPDX-License-Identifier: Apache-2.0
														
 
															+ *
														
 
															+ * Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+ * you may not use this file except in compliance with the License.
														
 
															+ * You may obtain a copy of the License at
														
 
															+ *
														
 
															+ * http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+#include "autoware/lidar_transfusion/cuda_utils.hpp"
														
 
															+#include "autoware/lidar_transfusion/preprocess/preprocess_kernel.hpp"
														
 
															+
														
 
															+#include <cstdint>
														
 
															+
														
 
															+namespace autoware::lidar_transfusion
														
 
															+{
														
 
															+
														
 
															+PreprocessCuda::PreprocessCuda(const TransfusionConfig & config, cudaStream_t & stream)
														
 
															+: stream_(stream), config_(config)
														
 
															+{
														
 
															+  mask_size_ = config_.grid_z_size_ * config_.grid_y_size_ * config_.grid_x_size_;
														
 
															+  voxels_size_ = config_.grid_z_size_ * config_.grid_y_size_ * config_.grid_x_size_ *
														
 
															+                   config_.max_num_points_per_pillar_ * config_.num_point_feature_size_ +
														
 
															+                 1;
														
 
															+  mask_ = cuda::make_unique<unsigned int[]>(mask_size_);
														
 
															+  voxels_ = cuda::make_unique<float[]>(voxels_size_);
														
 
															+}
														
 
															+
														
 
															+void PreprocessCuda::generateVoxels(
														
 
															+  float * points, unsigned int points_size, unsigned int * pillar_num, float * voxel_features,
														
 
															+  unsigned int * voxel_num, unsigned int * voxel_idxs)
														
 
															+{
														
 
															+  cuda::clear_async(mask_.get(), mask_size_, stream_);
														
 
															+  cuda::clear_async(voxels_.get(), voxels_size_, stream_);
														
 
															+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
														
 
															+
														
 
															+  CHECK_CUDA_ERROR(generateVoxels_random_launch(points, points_size, mask_.get(), voxels_.get()));
														
 
															+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
														
 
															+
														
 
															+  CHECK_CUDA_ERROR(generateBaseFeatures_launch(
														
 
															+    mask_.get(), voxels_.get(), pillar_num, voxel_features, voxel_num, voxel_idxs));
														
 
															+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
														
 
															+}
														
 
															+
														
 
															+__global__ void generateVoxels_random_kernel(
														
 
															+  float * points, unsigned int points_size, float min_x_range, float max_x_range, float min_y_range,
														
 
															+  float max_y_range, float min_z_range, float max_z_range, float pillar_x_size, float pillar_y_size,
														
 
															+  float pillar_z_size, int grid_y_size, int grid_x_size, int points_per_voxel, unsigned int * mask,
														
 
															+  float * voxels)
														
 
															+{
														
 
															+  int point_idx = blockIdx.x * blockDim.x + threadIdx.x;
														
 
															+  if (point_idx >= points_size) return;
														
 
															+
														
 
															+  float x = points[point_idx * 5];
														
 
															+  float y = points[point_idx * 5 + 1];
														
 
															+  float z = points[point_idx * 5 + 2];
														
 
															+  float i = points[point_idx * 5 + 3];
														
 
															+  float t = points[point_idx * 5 + 4];
														
 
															+
														
 
															+  if (
														
 
															+    x <= min_x_range || x >= max_x_range || y <= min_y_range || y >= max_y_range ||
														
 
															+    z <= min_z_range || z >= max_z_range)
														
 
															+    return;
														
 
															+
														
 
															+  int voxel_idx = floorf((x - min_x_range) / pillar_x_size);
														
 
															+  int voxel_idy = floorf((y - min_y_range) / pillar_y_size);
														
 
															+  unsigned int voxel_index = voxel_idy * grid_x_size + voxel_idx;
														
 
															+
														
 
															+  unsigned int point_id = atomicAdd(&(mask[voxel_index]), 1);
														
 
															+
														
 
															+  if (point_id >= points_per_voxel) return;
														
 
															+  float * address = voxels + (voxel_index * points_per_voxel + point_id) * 5;
														
 
															+  atomicExch(address + 0, x);
														
 
															+  atomicExch(address + 1, y);
														
 
															+  atomicExch(address + 2, z);
														
 
															+  atomicExch(address + 3, i);
														
 
															+  atomicExch(address + 4, t);
														
 
															+}
														
 
															+
														
 
															+cudaError_t PreprocessCuda::generateVoxels_random_launch(
														
 
															+  float * points, unsigned int points_size, unsigned int * mask, float * voxels)
														
 
															+{
														
 
															+  if (points_size == 0) {
														
 
															+    return cudaGetLastError();
														
 
															+  }
														
 
															+  dim3 blocks(divup(points_size, config_.threads_for_voxel_));
														
 
															+  dim3 threads(config_.threads_for_voxel_);
														
 
															+
														
 
															+  generateVoxels_random_kernel<<<blocks, threads, 0, stream_>>>(
														
 
															+    points, points_size, config_.min_x_range_, config_.max_x_range_, config_.min_y_range_,
														
 
															+    config_.max_y_range_, config_.min_z_range_, config_.max_z_range_, config_.voxel_x_size_,
														
 
															+    config_.voxel_y_size_, config_.voxel_z_size_, config_.grid_y_size_, config_.grid_x_size_,
														
 
															+    config_.points_per_voxel_, mask, voxels);
														
 
															+  cudaError_t err = cudaGetLastError();
														
 
															+  return err;
														
 
															+}
														
 
															+
														
 
															+__global__ void generateBaseFeatures_kernel(
														
 
															+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, float points_per_voxel,
														
 
															+  float max_voxels, unsigned int * pillar_num, float * voxel_features, unsigned int * voxel_num,
														
 
															+  unsigned int * voxel_idxs)
														
 
															+{
														
 
															+  unsigned int voxel_idx = blockIdx.x * blockDim.x + threadIdx.x;
														
 
															+  unsigned int voxel_idy = blockIdx.y * blockDim.y + threadIdx.y;
														
 
															+
														
 
															+  if (voxel_idx >= grid_x_size || voxel_idy >= grid_y_size) return;
														
 
															+
														
 
															+  unsigned int voxel_index = voxel_idy * grid_x_size + voxel_idx;
														
 
															+  unsigned int count = mask[voxel_index];
														
 
															+  if (!(count > 0)) return;
														
 
															+  count = count < points_per_voxel ? count : points_per_voxel;
														
 
															+
														
 
															+  unsigned int current_pillarId = 0;
														
 
															+  current_pillarId = atomicAdd(pillar_num, 1);
														
 
															+  if (current_pillarId >= max_voxels) return;
														
 
															+
														
 
															+  voxel_num[current_pillarId] = count;
														
 
															+
														
 
															+  uint4 idx = {0, 0, voxel_idy, voxel_idx};
														
 
															+  ((uint4 *)voxel_idxs)[current_pillarId] = idx;
														
 
															+
														
 
															+  for (int i = 0; i < count; i++) {
														
 
															+    int inIndex = voxel_index * points_per_voxel + i;
														
 
															+    int outIndex = current_pillarId * points_per_voxel + i;
														
 
															+    voxel_features[outIndex * 5] = voxels[inIndex * 5];
														
 
															+    voxel_features[outIndex * 5 + 1] = voxels[inIndex * 5 + 1];
														
 
															+    voxel_features[outIndex * 5 + 2] = voxels[inIndex * 5 + 2];
														
 
															+    voxel_features[outIndex * 5 + 3] = voxels[inIndex * 5 + 3];
														
 
															+    voxel_features[outIndex * 5 + 4] = voxels[inIndex * 5 + 4];
														
 
															+  }
														
 
															+
														
 
															+  // clear buffer for next infer
														
 
															+  atomicExch(mask + voxel_index, 0);
														
 
															+}
														
 
															+
														
 
															+// create 4 channels
														
 
															+cudaError_t PreprocessCuda::generateBaseFeatures_launch(
														
 
															+  unsigned int * mask, float * voxels, unsigned int * pillar_num, float * voxel_features,
														
 
															+  unsigned int * voxel_num, unsigned int * voxel_idxs)
														
 
															+{
														
 
															+  dim3 threads = {32, 32};
														
 
															+  dim3 blocks = {divup(config_.grid_x_size_, threads.x), divup(config_.grid_y_size_, threads.y)};
														
 
															+
														
 
															+  generateBaseFeatures_kernel<<<blocks, threads, 0, stream_>>>(
														
 
															+    mask, voxels, config_.grid_y_size_, config_.grid_x_size_, config_.points_per_voxel_,
														
 
															+    config_.max_voxels_, pillar_num, voxel_features, voxel_num, voxel_idxs);
														
 
															+  cudaError_t err = cudaGetLastError();
														
 
															+  return err;
														
 
															+}
														
 
															+
														
 
															+__global__ void generateSweepPoints_kernel(
														
 
															+  const uint8_t * input_data, size_t points_size, int input_point_step, float time_lag,
														
 
															+  const float * transform_array, int num_features, float * output_points)
														
 
															+{
														
 
															+  int point_idx = blockIdx.x * blockDim.x + threadIdx.x;
														
 
															+  if (point_idx >= points_size) return;
														
 
															+
														
 
															+  union {
														
 
															+    uint32_t raw{0};
														
 
															+    float value;
														
 
															+  } input_x, input_y, input_z;
														
 
															+
														
 
															+#pragma unroll
														
 
															+  for (int i = 0; i < 4; i++) {  // 4 bytes for float32
														
 
															+    input_x.raw |= input_data[point_idx * input_point_step + i] << i * 8;
														
 
															+    input_y.raw |= input_data[point_idx * input_point_step + i + 4] << i * 8;
														
 
															+    input_z.raw |= input_data[point_idx * input_point_step + i + 8] << i * 8;
														
 
															+  }
														
 
															+
														
 
															+  float input_intensity = static_cast<float>(input_data[point_idx * input_point_step + 12]);
														
 
															+
														
 
															+  output_points[point_idx * num_features] =
														
 
															+    transform_array[0] * input_x.value + transform_array[4] * input_y.value +
														
 
															+    transform_array[8] * input_z.value + transform_array[12];
														
 
															+  output_points[point_idx * num_features + 1] =
														
 
															+    transform_array[1] * input_x.value + transform_array[5] * input_y.value +
														
 
															+    transform_array[9] * input_z.value + transform_array[13];
														
 
															+  output_points[point_idx * num_features + 2] =
														
 
															+    transform_array[2] * input_x.value + transform_array[6] * input_y.value +
														
 
															+    transform_array[10] * input_z.value + transform_array[14];
														
 
															+  output_points[point_idx * num_features + 3] = input_intensity;
														
 
															+  output_points[point_idx * num_features + 4] = time_lag;
														
 
															+}
														
 
															+
														
 
															+cudaError_t PreprocessCuda::generateSweepPoints_launch(
														
 
															+  const uint8_t * input_data, size_t points_size, int input_point_step, float time_lag,
														
 
															+  const float * transform_array, float * output_points)
														
 
															+{
														
 
															+  dim3 blocks(divup(points_size, config_.threads_for_voxel_));
														
 
															+  dim3 threads(config_.threads_for_voxel_);
														
 
															+
														
 
															+  generateSweepPoints_kernel<<<blocks, threads, 0, stream_>>>(
														
 
															+    input_data, points_size, input_point_step, time_lag, transform_array,
														
 
															+    config_.num_point_feature_size_, output_points);
														
 
															+
														
 
															+  cudaError_t err = cudaGetLastError();
														
 
															+  return err;
														
 
															+}
														
 
															+
														
 
															+}  // namespace autoware::lidar_transfusion
														
--- a/src/detection/detection_lidar_transfusion/preprocess/voxel_generator.cpp
+++ b/src/detection/detection_lidar_transfusion/preprocess/voxel_generator.cpp
@@ -0,0 +1,123 @@
 
															+// Copyright 2024 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#include "autoware/lidar_transfusion/preprocess/voxel_generator.hpp"
														
 
															+
														
 
															+#include "autoware/lidar_transfusion/preprocess/preprocess_kernel.hpp"
														
 
															+
														
 
															+#include <sensor_msgs/point_cloud2_iterator.hpp>
														
 
															+
														
 
															+#include <type_traits>
														
 
															+
														
 
															+namespace autoware::lidar_transfusion
														
 
															+{
														
 
															+
														
 
															+VoxelGenerator::VoxelGenerator(
														
 
															+  const DensificationParam & densification_param, const TransfusionConfig & config,
														
 
															+  cudaStream_t & stream)
														
 
															+: config_(config), stream_(stream)
														
 
															+{
														
 
															+  pd_ptr_ = std::make_unique<PointCloudDensification>(densification_param, stream_);
														
 
															+  pre_ptr_ = std::make_unique<PreprocessCuda>(config_, stream_);
														
 
															+  cloud_data_d_ = cuda::make_unique<unsigned char[]>(config_.cloud_capacity_ * MAX_CLOUD_STEP_SIZE);
														
 
															+  affine_past2current_d_ = cuda::make_unique<float[]>(AFF_MAT_SIZE);
														
 
															+}
														
 
															+
														
 
															+bool VoxelGenerator::enqueuePointCloud(
														
 
															+  const sensor_msgs::msg::PointCloud2 & msg, const tf2_ros::Buffer & tf_buffer)
														
 
															+{
														
 
															+  return pd_ptr_->enqueuePointCloud(msg, tf_buffer);
														
 
															+}
														
 
															+
														
 
															+std::size_t VoxelGenerator::generateSweepPoints(
														
 
															+  const sensor_msgs::msg::PointCloud2 & msg, cuda::unique_ptr<float[]> & points_d)
														
 
															+{
														
 
															+  if (!is_initialized_) {
														
 
															+    initCloudInfo(msg);
														
 
															+    std::stringstream ss;
														
 
															+    ss << "Input point cloud information: " << std::endl << cloud_info_;
														
 
															+    RCLCPP_DEBUG_STREAM(rclcpp::get_logger("lidar_transfusion"), ss.str());
														
 
															+
														
 
															+    CloudInfo default_cloud_info;
														
 
															+    if (cloud_info_ != default_cloud_info) {
														
 
															+      ss << "Expected point cloud information: " << std::endl << default_cloud_info;
														
 
															+      RCLCPP_ERROR_STREAM(rclcpp::get_logger("lidar_transfusion"), ss.str());
														
 
															+      throw std::runtime_error("Input point cloud has unsupported format");
														
 
															+    }
														
 
															+    is_initialized_ = true;
														
 
															+  }
														
 
															+
														
 
															+  size_t point_counter{0};
														
 
															+  CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
														
 
															+
														
 
															+  for (auto pc_cache_iter = pd_ptr_->getPointCloudCacheIter(); !pd_ptr_->isCacheEnd(pc_cache_iter);
														
 
															+       pc_cache_iter++) {
														
 
															+    auto sweep_num_points = pc_cache_iter->num_points;
														
 
															+    if (point_counter + sweep_num_points >= config_.cloud_capacity_) {
														
 
															+      RCLCPP_WARN_STREAM(
														
 
															+        rclcpp::get_logger("lidar_transfusion"), "Exceeding cloud capacity. Used "
														
 
															+                                                   << pd_ptr_->getIdx(pc_cache_iter) << " out of "
														
 
															+                                                   << pd_ptr_->getCacheSize() << " sweep(s)");
														
 
															+      break;
														
 
															+    }
														
 
															+    auto shift = point_counter * config_.num_point_feature_size_;
														
 
															+
														
 
															+    auto affine_past2current =
														
 
															+      pd_ptr_->getAffineWorldToCurrent() * pc_cache_iter->affine_past2world;
														
 
															+    static_assert(std::is_same<decltype(affine_past2current.matrix()), Eigen::Matrix4f &>::value);
														
 
															+    static_assert(!Eigen::Matrix4f::IsRowMajor, "matrices should be col-major.");
														
 
															+
														
 
															+    float time_lag = static_cast<float>(
														
 
															+      pd_ptr_->getCurrentTimestamp() - rclcpp::Time(pc_cache_iter->header.stamp).seconds());
														
 
															+
														
 
															+    CHECK_CUDA_ERROR(cudaMemcpyAsync(
														
 
															+      affine_past2current_d_.get(), affine_past2current.data(), AFF_MAT_SIZE * sizeof(float),
														
 
															+      cudaMemcpyHostToDevice, stream_));
														
 
															+    CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_));
														
 
															+
														
 
															+    pre_ptr_->generateSweepPoints_launch(
														
 
															+      pc_cache_iter->data_d.get(), sweep_num_points, cloud_info_.point_step, time_lag,
														
 
															+      affine_past2current_d_.get(), points_d.get() + shift);
														
 
															+    point_counter += sweep_num_points;
														
 
															+  }
														
 
															+
														
 
															+  return point_counter;
														
 
															+}
														
 
															+
														
 
															+void VoxelGenerator::initCloudInfo(const sensor_msgs::msg::PointCloud2 & msg)
														
 
															+{
														
 
															+  std::tie(cloud_info_.x_offset, cloud_info_.x_datatype, cloud_info_.x_count) =
														
 
															+    getFieldInfo(msg, "x");
														
 
															+  std::tie(cloud_info_.y_offset, cloud_info_.y_datatype, cloud_info_.y_count) =
														
 
															+    getFieldInfo(msg, "y");
														
 
															+  std::tie(cloud_info_.z_offset, cloud_info_.z_datatype, cloud_info_.z_count) =
														
 
															+    getFieldInfo(msg, "z");
														
 
															+  std::tie(
														
 
															+    cloud_info_.intensity_offset, cloud_info_.intensity_datatype, cloud_info_.intensity_count) =
														
 
															+    getFieldInfo(msg, "intensity");
														
 
															+  cloud_info_.point_step = msg.point_step;
														
 
															+  cloud_info_.is_bigendian = msg.is_bigendian;
														
 
															+}
														
 
															+
														
 
															+std::tuple<const uint32_t, const uint8_t, const uint8_t> VoxelGenerator::getFieldInfo(
														
 
															+  const sensor_msgs::msg::PointCloud2 & msg, const std::string & field_name)
														
 
															+{
														
 
															+  for (const auto & field : msg.fields) {
														
 
															+    if (field.name == field_name) {
														
 
															+      return std::make_tuple(field.offset, field.datatype, field.count);
														
 
															+    }
														
 
															+  }
														
 
															+  throw std::runtime_error("Missing field: " + field_name);
														
 
															+}
														
 
															+}  // namespace autoware::lidar_transfusion
														
--- a/src/detection/detection_lidar_transfusion/simple_profiler.cpp
+++ b/src/detection/detection_lidar_transfusion/simple_profiler.cpp
@@ -0,0 +1,132 @@
 
															+// Copyright 2023 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#include <tensorrt_common/simple_profiler.hpp>
														
 
															+
														
 
															+#include <iomanip>
														
 
															+
														
 
															+namespace tensorrt_common
														
 
															+{
														
 
															+
														
 
															+SimpleProfiler::SimpleProfiler(std::string name, const std::vector<SimpleProfiler> & src_profilers)
														
 
															+: m_name(name)
														
 
															+{
														
 
															+  m_index = 0;
														
 
															+  for (const auto & src_profiler : src_profilers) {
														
 
															+    for (const auto & rec : src_profiler.m_profile) {
														
 
															+      auto it = m_profile.find(rec.first);
														
 
															+      if (it == m_profile.end()) {
														
 
															+        m_profile.insert(rec);
														
 
															+      } else {
														
 
															+        it->second.time += rec.second.time;
														
 
															+        it->second.count += rec.second.count;
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+void SimpleProfiler::reportLayerTime(const char * layerName, float ms) noexcept
														
 
															+{
														
 
															+  m_profile[layerName].count++;
														
 
															+  m_profile[layerName].time += ms;
														
 
															+  if (m_profile[layerName].min_time == -1.0) {
														
 
															+    m_profile[layerName].min_time = ms;
														
 
															+    m_profile[layerName].index = m_index;
														
 
															+    m_index++;
														
 
															+  } else if (m_profile[layerName].min_time > ms) {
														
 
															+    m_profile[layerName].min_time = ms;
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+void SimpleProfiler::setProfDict(nvinfer1::ILayer * layer) noexcept
														
 
															+{
														
 
															+  std::string name = layer->getName();
														
 
															+  m_layer_dict[name];
														
 
															+  m_layer_dict[name].type = layer->getType();
														
 
															+  if (layer->getType() == nvinfer1::LayerType::kCONVOLUTION) {
														
 
															+    nvinfer1::IConvolutionLayer * conv = (nvinfer1::IConvolutionLayer *)layer;
														
 
															+    nvinfer1::ITensor * in = layer->getInput(0);
														
 
															+    nvinfer1::Dims dim_in = in->getDimensions();
														
 
															+    nvinfer1::ITensor * out = layer->getOutput(0);
														
 
															+    nvinfer1::Dims dim_out = out->getDimensions();
														
 
															+    nvinfer1::Dims k_dims = conv->getKernelSizeNd();
														
 
															+    nvinfer1::Dims s_dims = conv->getStrideNd();
														
 
															+    int groups = conv->getNbGroups();
														
 
															+    int stride = s_dims.d[0];
														
 
															+    int kernel = k_dims.d[0];
														
 
															+    m_layer_dict[name].in_c = dim_in.d[1];
														
 
															+    m_layer_dict[name].out_c = dim_out.d[1];
														
 
															+    m_layer_dict[name].w = dim_in.d[3];
														
 
															+    m_layer_dict[name].h = dim_in.d[2];
														
 
															+    m_layer_dict[name].k = kernel;
														
 
															+    ;
														
 
															+    m_layer_dict[name].stride = stride;
														
 
															+    m_layer_dict[name].groups = groups;
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+std::ostream & operator<<(std::ostream & out, const SimpleProfiler & value)
														
 
															+{
														
 
															+  out << "========== " << value.m_name << " profile ==========" << std::endl;
														
 
															+  float totalTime = 0;
														
 
															+  std::string layerNameStr = "Operation";
														
 
															+
														
 
															+  int maxLayerNameLength = static_cast<int>(layerNameStr.size());
														
 
															+  for (const auto & elem : value.m_profile) {
														
 
															+    totalTime += elem.second.time;
														
 
															+    maxLayerNameLength = std::max(maxLayerNameLength, static_cast<int>(elem.first.size()));
														
 
															+  }
														
 
															+
														
 
															+  auto old_settings = out.flags();
														
 
															+  auto old_precision = out.precision();
														
 
															+  // Output header
														
 
															+  {
														
 
															+    out << "index, " << std::setw(12);
														
 
															+    out << std::setw(maxLayerNameLength) << layerNameStr << " ";
														
 
															+    out << std::setw(12) << "Runtime"
														
 
															+        << "%,"
														
 
															+        << " ";
														
 
															+    out << std::setw(12) << "Invocations"
														
 
															+        << " , ";
														
 
															+    out << std::setw(12) << "Runtime[ms]"
														
 
															+        << " , ";
														
 
															+    out << std::setw(12) << "Avg Runtime[ms]"
														
 
															+        << " ,";
														
 
															+    out << std::setw(12) << "Min Runtime[ms]" << std::endl;
														
 
															+  }
														
 
															+  int index = value.m_index;
														
 
															+  for (int i = 0; i < index; i++) {
														
 
															+    for (const auto & elem : value.m_profile) {
														
 
															+      if (elem.second.index == i) {
														
 
															+        out << i << ",   ";
														
 
															+        out << std::setw(maxLayerNameLength) << elem.first << ",";
														
 
															+        out << std::setw(12) << std::fixed << std::setprecision(1)
														
 
															+            << (elem.second.time * 100.0F / totalTime) << "%"
														
 
															+            << ",";
														
 
															+        out << std::setw(12) << elem.second.count << ",";
														
 
															+        out << std::setw(12) << std::fixed << std::setprecision(2) << elem.second.time << ", ";
														
 
															+        out << std::setw(12) << std::fixed << std::setprecision(2)
														
 
															+            << elem.second.time / elem.second.count << ", ";
														
 
															+        out << std::setw(12) << std::fixed << std::setprecision(2) << elem.second.min_time
														
 
															+            << std::endl;
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+  out.flags(old_settings);
														
 
															+  out.precision(old_precision);
														
 
															+  out << "========== " << value.m_name << " total runtime = " << totalTime
														
 
															+      << " ms ==========" << std::endl;
														
 
															+  return out;
														
 
															+}
														
 
															+}  // namespace tensorrt_common
														
--- a/src/detection/detection_lidar_transfusion/tensorrt_common.cpp
+++ b/src/detection/detection_lidar_transfusion/tensorrt_common.cpp
@@ -0,0 +1,605 @@
 
															+// Copyright 2023 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#include <tensorrt_common/tensorrt_common.hpp>
														
 
															+
														
 
															+#include <NvInferPlugin.h>
														
 
															+#include <dlfcn.h>
														
 
															+
														
 
															+#include <fstream>
														
 
															+#include <functional>
														
 
															+#include <iostream>
														
 
															+#include <memory>
														
 
															+#include <string>
														
 
															+#include <utility>
														
 
															+
														
 
															+namespace
														
 
															+{
														
 
															+template <class T>
														
 
															+bool contain(const std::string & s, const T & v)
														
 
															+{
														
 
															+  return s.find(v) != std::string::npos;
														
 
															+}
														
 
															+}  // anonymous namespace
														
 
															+
														
 
															+namespace tensorrt_common
														
 
															+{
														
 
															+nvinfer1::Dims get_input_dims(const std::string & onnx_file_path)
														
 
															+{
														
 
															+  Logger logger_;
														
 
															+  auto builder = TrtUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(logger_));
														
 
															+  if (!builder) {
														
 
															+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create builder");
														
 
															+  }
														
 
															+
														
 
															+  const auto explicitBatch =
														
 
															+    1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
														
 
															+
														
 
															+  auto network =
														
 
															+    TrtUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(explicitBatch));
														
 
															+  if (!network) {
														
 
															+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create network");
														
 
															+  }
														
 
															+
														
 
															+  auto config = TrtUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
														
 
															+  if (!config) {
														
 
															+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create builder config");
														
 
															+  }
														
 
															+
														
 
															+  auto parser = TrtUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, logger_));
														
 
															+  if (!parser->parseFromFile(
														
 
															+        onnx_file_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kERROR))) {
														
 
															+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Failed to parse onnx file");
														
 
															+  }
														
 
															+
														
 
															+  const auto input = network->getInput(0);
														
 
															+  return input->getDimensions();
														
 
															+}
														
 
															+
														
 
															+bool is_valid_precision_string(const std::string & precision)
														
 
															+{
														
 
															+  if (
														
 
															+    std::find(valid_precisions.begin(), valid_precisions.end(), precision) ==
														
 
															+    valid_precisions.end()) {
														
 
															+    std::stringstream message;
														
 
															+    message << "Invalid precision was specified: " << precision << std::endl
														
 
															+            << "Valid string is one of: [";
														
 
															+    for (const auto & s : valid_precisions) {
														
 
															+      message << s << ", ";
														
 
															+    }
														
 
															+    message << "] (case sensitive)" << std::endl;
														
 
															+    std::cerr << message.str();
														
 
															+    return false;
														
 
															+  } else {
														
 
															+    return true;
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+TrtCommon::TrtCommon(
														
 
															+  const std::string & model_path, const std::string & precision,
														
 
															+  std::unique_ptr<nvinfer1::IInt8Calibrator> calibrator, const BatchConfig & batch_config,
														
 
															+  const size_t max_workspace_size, const BuildConfig & build_config,
														
 
															+  const std::vector<std::string> & plugin_paths)
														
 
															+: model_file_path_(model_path),
														
 
															+  calibrator_(std::move(calibrator)),
														
 
															+  precision_(precision),
														
 
															+  batch_config_(batch_config),
														
 
															+  max_workspace_size_(max_workspace_size),
														
 
															+  model_profiler_("Model"),
														
 
															+  host_profiler_("Host")
														
 
															+{
														
 
															+  // Check given precision is valid one
														
 
															+  if (!is_valid_precision_string(precision)) {
														
 
															+    return;
														
 
															+  }
														
 
															+  build_config_ = std::make_unique<const BuildConfig>(build_config);
														
 
															+
														
 
															+  for (const auto & plugin_path : plugin_paths) {
														
 
															+    int32_t flags{RTLD_LAZY};
														
 
															+// cspell: ignore asan
														
 
															+#if ENABLE_ASAN
														
 
															+    // https://github.com/google/sanitizers/issues/89
														
 
															+    // asan doesn't handle module unloading correctly and there are no plans on doing
														
 
															+    // so. In order to get proper stack traces, don't delete the shared library on
														
 
															+    // close so that asan can resolve the symbols correctly.
														
 
															+    flags |= RTLD_NODELETE;
														
 
															+#endif  // ENABLE_ASAN
														
 
															+    void * handle = dlopen(plugin_path.c_str(), flags);
														
 
															+    if (!handle) {
														
 
															+      logger_.log(nvinfer1::ILogger::Severity::kERROR, "Could not load plugin library");
														
 
															+    }
														
 
															+  }
														
 
															+  runtime_ = TrtUniquePtr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(logger_));
														
 
															+  if (build_config_->dla_core_id != -1) {
														
 
															+    runtime_->setDLACore(build_config_->dla_core_id);
														
 
															+  }
														
 
															+  initLibNvInferPlugins(&logger_, "");
														
 
															+}
														
 
															+
														
 
															+TrtCommon::~TrtCommon()
														
 
															+{
														
 
															+}
														
 
															+
														
 
															+void TrtCommon::setup()
														
 
															+{
														
 
															+  if (!fs::exists(model_file_path_)) {
														
 
															+    is_initialized_ = false;
														
 
															+    return;
														
 
															+  }
														
 
															+  std::string engine_path = model_file_path_;
														
 
															+  if (model_file_path_.extension() == ".engine") {
														
 
															+    std::cout << "Load ... " << model_file_path_ << std::endl;
														
 
															+    loadEngine(model_file_path_);
														
 
															+  } else if (model_file_path_.extension() == ".onnx") {
														
 
															+    fs::path cache_engine_path{model_file_path_};
														
 
															+    std::string ext;
														
 
															+    std::string calib_name = "";
														
 
															+    if (precision_ == "int8") {
														
 
															+      if (build_config_->calib_type_str == "Entropy") {
														
 
															+        calib_name = "EntropyV2-";
														
 
															+      } else if (
														
 
															+        build_config_->calib_type_str == "Legacy" ||
														
 
															+        build_config_->calib_type_str == "Percentile") {
														
 
															+        calib_name = "Legacy-";
														
 
															+      } else {
														
 
															+        calib_name = "MinMax-";
														
 
															+      }
														
 
															+    }
														
 
															+    if (build_config_->dla_core_id != -1) {
														
 
															+      ext = "DLA" + std::to_string(build_config_->dla_core_id) + "-" + calib_name + precision_;
														
 
															+      if (build_config_->quantize_first_layer) {
														
 
															+        ext += "-firstFP16";
														
 
															+      }
														
 
															+      if (build_config_->quantize_last_layer) {
														
 
															+        ext += "-lastFP16";
														
 
															+      }
														
 
															+      ext += "-batch" + std::to_string(batch_config_[0]) + ".engine";
														
 
															+    } else {
														
 
															+      ext = calib_name + precision_;
														
 
															+      if (build_config_->quantize_first_layer) {
														
 
															+        ext += "-firstFP16";
														
 
															+      }
														
 
															+      if (build_config_->quantize_last_layer) {
														
 
															+        ext += "-lastFP16";
														
 
															+      }
														
 
															+      ext += "-batch" + std::to_string(batch_config_[0]) + ".engine";
														
 
															+    }
														
 
															+    cache_engine_path.replace_extension(ext);
														
 
															+
														
 
															+    // Output Network Information
														
 
															+    printNetworkInfo(model_file_path_);
														
 
															+
														
 
															+    if (fs::exists(cache_engine_path)) {
														
 
															+      std::cout << "Loading... " << cache_engine_path << std::endl;
														
 
															+      loadEngine(cache_engine_path);
														
 
															+    } else {
														
 
															+      std::cout << "Building... " << cache_engine_path << std::endl;
														
 
															+      logger_.log(nvinfer1::ILogger::Severity::kINFO, "Start build engine");
														
 
															+      auto log_thread = logger_.log_throttle(
														
 
															+        nvinfer1::ILogger::Severity::kINFO,
														
 
															+        "Applying optimizations and building TRT CUDA engine. Please wait for a few minutes...", 5);
														
 
															+      buildEngineFromOnnx(model_file_path_, cache_engine_path);
														
 
															+      logger_.stop_throttle(log_thread);
														
 
															+      logger_.log(nvinfer1::ILogger::Severity::kINFO, "End build engine");
														
 
															+    }
														
 
															+    engine_path = cache_engine_path;
														
 
															+  } else {
														
 
															+    is_initialized_ = false;
														
 
															+    return;
														
 
															+  }
														
 
															+
														
 
															+  context_ = TrtUniquePtr<nvinfer1::IExecutionContext>(engine_->createExecutionContext());
														
 
															+  if (!context_) {
														
 
															+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create context");
														
 
															+    is_initialized_ = false;
														
 
															+    return;
														
 
															+  }
														
 
															+
														
 
															+  if (build_config_->profile_per_layer) {
														
 
															+    context_->setProfiler(&model_profiler_);
														
 
															+  }
														
 
															+#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8200
														
 
															+  // Write profiles for trt-engine-explorer
														
 
															+  // See: https://github.com/NVIDIA/TensorRT/tree/main/tools/experimental/trt-engine-explorer
														
 
															+  std::string j_ext = ".json";
														
 
															+  fs::path json_path{engine_path};
														
 
															+  json_path.replace_extension(j_ext);
														
 
															+  std::string ret = getLayerInformation(nvinfer1::LayerInformationFormat::kJSON);
														
 
															+  std::ofstream os(json_path, std::ofstream::trunc);
														
 
															+  os << ret << std::flush;
														
 
															+#endif
														
 
															+
														
 
															+  is_initialized_ = true;
														
 
															+}
														
 
															+
														
 
															+bool TrtCommon::loadEngine(const std::string & engine_file_path)
														
 
															+{
														
 
															+  std::ifstream engine_file(engine_file_path);
														
 
															+  std::stringstream engine_buffer;
														
 
															+  engine_buffer << engine_file.rdbuf();
														
 
															+  std::string engine_str = engine_buffer.str();
														
 
															+  engine_ = TrtUniquePtr<nvinfer1::ICudaEngine>(runtime_->deserializeCudaEngine(
														
 
															+    reinterpret_cast<const void *>(engine_str.data()), engine_str.size()));
														
 
															+  return true;
														
 
															+}
														
 
															+
														
 
															+void TrtCommon::printNetworkInfo(const std::string & onnx_file_path)
														
 
															+{
														
 
															+  auto builder = TrtUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(logger_));
														
 
															+  if (!builder) {
														
 
															+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create builder");
														
 
															+    return;
														
 
															+  }
														
 
															+
														
 
															+  const auto explicitBatch =
														
 
															+    1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
														
 
															+
														
 
															+  auto network =
														
 
															+    TrtUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(explicitBatch));
														
 
															+  if (!network) {
														
 
															+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create network");
														
 
															+    return;
														
 
															+  }
														
 
															+
														
 
															+  auto config = TrtUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
														
 
															+  if (!config) {
														
 
															+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create builder config");
														
 
															+    return;
														
 
															+  }
														
 
															+
														
 
															+  if (precision_ == "fp16" || precision_ == "int8") {
														
 
															+    config->setFlag(nvinfer1::BuilderFlag::kFP16);
														
 
															+  }
														
 
															+#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8400
														
 
															+  config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, max_workspace_size_);
														
 
															+#else
														
 
															+  config->setMaxWorkspaceSize(max_workspace_size_);
														
 
															+#endif
														
 
															+
														
 
															+  auto parser = TrtUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, logger_));
														
 
															+  if (!parser->parseFromFile(
														
 
															+        onnx_file_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kERROR))) {
														
 
															+    return;
														
 
															+  }
														
 
															+  int num = network->getNbLayers();
														
 
															+  float total_gflops = 0.0;
														
 
															+  int total_params = 0;
														
 
															+  for (int i = 0; i < num; i++) {
														
 
															+    nvinfer1::ILayer * layer = network->getLayer(i);
														
 
															+    auto layer_type = layer->getType();
														
 
															+    if (build_config_->profile_per_layer) {
														
 
															+      model_profiler_.setProfDict(layer);
														
 
															+    }
														
 
															+    if (layer_type == nvinfer1::LayerType::kCONSTANT) {
														
 
															+      continue;
														
 
															+    }
														
 
															+    nvinfer1::ITensor * in = layer->getInput(0);
														
 
															+    nvinfer1::Dims dim_in = in->getDimensions();
														
 
															+    nvinfer1::ITensor * out = layer->getOutput(0);
														
 
															+    nvinfer1::Dims dim_out = out->getDimensions();
														
 
															+
														
 
															+    if (layer_type == nvinfer1::LayerType::kCONVOLUTION) {
														
 
															+      nvinfer1::IConvolutionLayer * conv = (nvinfer1::IConvolutionLayer *)layer;
														
 
															+      nvinfer1::Dims k_dims = conv->getKernelSizeNd();
														
 
															+      nvinfer1::Dims s_dims = conv->getStrideNd();
														
 
															+      int groups = conv->getNbGroups();
														
 
															+      int stride = s_dims.d[0];
														
 
															+      int num_weights = (dim_in.d[1] / groups) * dim_out.d[1] * k_dims.d[0] * k_dims.d[1];
														
 
															+      float gflops = (2 * num_weights) * (dim_in.d[3] / stride * dim_in.d[2] / stride / 1e9);
														
 
															+      ;
														
 
															+      total_gflops += gflops;
														
 
															+      total_params += num_weights;
														
 
															+      std::cout << "L" << i << " [conv " << k_dims.d[0] << "x" << k_dims.d[1] << " (" << groups
														
 
															+                << ") "
														
 
															+                << "/" << s_dims.d[0] << "] " << dim_in.d[3] << "x" << dim_in.d[2] << "x"
														
 
															+                << dim_in.d[1] << " -> " << dim_out.d[3] << "x" << dim_out.d[2] << "x"
														
 
															+                << dim_out.d[1];
														
 
															+      std::cout << " weights:" << num_weights;
														
 
															+      std::cout << " GFLOPs:" << gflops;
														
 
															+      std::cout << std::endl;
														
 
															+    } else if (layer_type == nvinfer1::LayerType::kPOOLING) {
														
 
															+      nvinfer1::IPoolingLayer * pool = (nvinfer1::IPoolingLayer *)layer;
														
 
															+      auto p_type = pool->getPoolingType();
														
 
															+      nvinfer1::Dims dim_stride = pool->getStrideNd();
														
 
															+      nvinfer1::Dims dim_window = pool->getWindowSizeNd();
														
 
															+
														
 
															+      std::cout << "L" << i << " [";
														
 
															+      if (p_type == nvinfer1::PoolingType::kMAX) {
														
 
															+        std::cout << "max ";
														
 
															+      } else if (p_type == nvinfer1::PoolingType::kAVERAGE) {
														
 
															+        std::cout << "avg ";
														
 
															+      } else if (p_type == nvinfer1::PoolingType::kMAX_AVERAGE_BLEND) {
														
 
															+        std::cout << "max avg blend ";
														
 
															+      }
														
 
															+      float gflops = dim_in.d[1] * dim_window.d[0] / dim_stride.d[0] * dim_window.d[1] /
														
 
															+                     dim_stride.d[1] * dim_in.d[2] * dim_in.d[3] / 1e9;
														
 
															+      total_gflops += gflops;
														
 
															+      std::cout << "pool " << dim_window.d[0] << "x" << dim_window.d[1] << "]";
														
 
															+      std::cout << " GFLOPs:" << gflops;
														
 
															+      std::cout << std::endl;
														
 
															+    } else if (layer_type == nvinfer1::LayerType::kRESIZE) {
														
 
															+      std::cout << "L" << i << " [resize]" << std::endl;
														
 
															+    }
														
 
															+  }
														
 
															+  std::cout << "Total " << total_gflops << " GFLOPs" << std::endl;
														
 
															+  std::cout << "Total " << total_params / 1000.0 / 1000.0 << " M params" << std::endl;
														
 
															+  return;
														
 
															+}
														
 
															+
														
 
															+bool TrtCommon::buildEngineFromOnnx(
														
 
															+  const std::string & onnx_file_path, const std::string & output_engine_file_path)
														
 
															+{
														
 
															+  auto builder = TrtUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(logger_));
														
 
															+  if (!builder) {
														
 
															+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create builder");
														
 
															+    return false;
														
 
															+  }
														
 
															+
														
 
															+  const auto explicitBatch =
														
 
															+    1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
														
 
															+
														
 
															+  auto network =
														
 
															+    TrtUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(explicitBatch));
														
 
															+  if (!network) {
														
 
															+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create network");
														
 
															+    return false;
														
 
															+  }
														
 
															+
														
 
															+  auto config = TrtUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
														
 
															+  if (!config) {
														
 
															+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create builder config");
														
 
															+    return false;
														
 
															+  }
														
 
															+
														
 
															+  int num_available_dla = builder->getNbDLACores();
														
 
															+  if (build_config_->dla_core_id != -1) {
														
 
															+    if (num_available_dla > 0) {
														
 
															+      std::cout << "###" << num_available_dla << " DLAs are supported! ###" << std::endl;
														
 
															+    } else {
														
 
															+      std::cout << "###Warning : "
														
 
															+                << "No DLA is supported! ###" << std::endl;
														
 
															+    }
														
 
															+    config->setDefaultDeviceType(nvinfer1::DeviceType::kDLA);
														
 
															+    config->setDLACore(build_config_->dla_core_id);
														
 
															+#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8200
														
 
															+    config->setFlag(nvinfer1::BuilderFlag::kPREFER_PRECISION_CONSTRAINTS);
														
 
															+#else
														
 
															+    config->setFlag(nvinfer1::BuilderFlag::kSTRICT_TYPES);
														
 
															+#endif
														
 
															+    config->setFlag(nvinfer1::BuilderFlag::kGPU_FALLBACK);
														
 
															+  }
														
 
															+  if (precision_ == "fp16" || precision_ == "int8") {
														
 
															+    config->setFlag(nvinfer1::BuilderFlag::kFP16);
														
 
															+  }
														
 
															+#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8400
														
 
															+  config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, max_workspace_size_);
														
 
															+#else
														
 
															+  config->setMaxWorkspaceSize(max_workspace_size_);
														
 
															+#endif
														
 
															+
														
 
															+  auto parser = TrtUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, logger_));
														
 
															+  if (!parser->parseFromFile(
														
 
															+        onnx_file_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kERROR))) {
														
 
															+    std::cout << "Failed to parse onnx file" << std::endl;
														
 
															+    return false;
														
 
															+  }
														
 
															+
														
 
															+  const int num = network->getNbLayers();
														
 
															+  bool first = build_config_->quantize_first_layer;
														
 
															+  bool last = build_config_->quantize_last_layer;
														
 
															+  // Partial Quantization
														
 
															+  if (precision_ == "int8") {
														
 
															+    network->getInput(0)->setDynamicRange(0, 255.0);
														
 
															+    for (int i = 0; i < num; i++) {
														
 
															+      nvinfer1::ILayer * layer = network->getLayer(i);
														
 
															+      auto layer_type = layer->getType();
														
 
															+      std::string name = layer->getName();
														
 
															+      nvinfer1::ITensor * out = layer->getOutput(0);
														
 
															+      if (build_config_->clip_value > 0.0) {
														
 
															+        std::cout << "Set max value for outputs : " << build_config_->clip_value << "  " << name
														
 
															+                  << std::endl;
														
 
															+        out->setDynamicRange(0.0, build_config_->clip_value);
														
 
															+      }
														
 
															+
														
 
															+      if (layer_type == nvinfer1::LayerType::kCONVOLUTION) {
														
 
															+        if (first) {
														
 
															+          layer->setPrecision(nvinfer1::DataType::kHALF);
														
 
															+          std::cout << "Set kHALF in " << name << std::endl;
														
 
															+          first = false;
														
 
															+        }
														
 
															+        if (last) {
														
 
															+          // cspell: ignore preds
														
 
															+          if (
														
 
															+            contain(name, "reg_preds") || contain(name, "cls_preds") ||
														
 
															+            contain(name, "obj_preds")) {
														
 
															+            layer->setPrecision(nvinfer1::DataType::kHALF);
														
 
															+            std::cout << "Set kHALF in " << name << std::endl;
														
 
															+          }
														
 
															+          for (int j = num - 1; j >= 0; j--) {
														
 
															+            nvinfer1::ILayer * inner_layer = network->getLayer(j);
														
 
															+            auto inner_layer_type = inner_layer->getType();
														
 
															+            std::string inner_name = inner_layer->getName();
														
 
															+            if (inner_layer_type == nvinfer1::LayerType::kCONVOLUTION) {
														
 
															+              inner_layer->setPrecision(nvinfer1::DataType::kHALF);
														
 
															+              std::cout << "Set kHALF in " << inner_name << std::endl;
														
 
															+              break;
														
 
															+            }
														
 
															+            if (inner_layer_type == nvinfer1::LayerType::kMATRIX_MULTIPLY) {
														
 
															+              inner_layer->setPrecision(nvinfer1::DataType::kHALF);
														
 
															+              std::cout << "Set kHALF in " << inner_name << std::endl;
														
 
															+              break;
														
 
															+            }
														
 
															+          }
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  const auto input = network->getInput(0);
														
 
															+  const auto input_dims = input->getDimensions();
														
 
															+  const auto input_channel = input_dims.d[1];
														
 
															+  const auto input_height = input_dims.d[2];
														
 
															+  const auto input_width = input_dims.d[3];
														
 
															+  const auto input_batch = input_dims.d[0];
														
 
															+
														
 
															+  if (input_batch > 1) {
														
 
															+    batch_config_[0] = input_batch;
														
 
															+  }
														
 
															+
														
 
															+  if (batch_config_.at(0) > 1 && (batch_config_.at(0) == batch_config_.at(2))) {
														
 
															+    // Attention : below API is deprecated in TRT8.4
														
 
															+    builder->setMaxBatchSize(batch_config_.at(2));
														
 
															+  } else {
														
 
															+    if (build_config_->profile_per_layer) {
														
 
															+      auto profile = builder->createOptimizationProfile();
														
 
															+      profile->setDimensions(
														
 
															+        network->getInput(0)->getName(), nvinfer1::OptProfileSelector::kMIN,
														
 
															+        nvinfer1::Dims4{batch_config_.at(0), input_channel, input_height, input_width});
														
 
															+      profile->setDimensions(
														
 
															+        network->getInput(0)->getName(), nvinfer1::OptProfileSelector::kOPT,
														
 
															+        nvinfer1::Dims4{batch_config_.at(1), input_channel, input_height, input_width});
														
 
															+      profile->setDimensions(
														
 
															+        network->getInput(0)->getName(), nvinfer1::OptProfileSelector::kMAX,
														
 
															+        nvinfer1::Dims4{batch_config_.at(2), input_channel, input_height, input_width});
														
 
															+      config->addOptimizationProfile(profile);
														
 
															+    }
														
 
															+  }
														
 
															+  if (precision_ == "int8" && calibrator_) {
														
 
															+    config->setFlag(nvinfer1::BuilderFlag::kINT8);
														
 
															+#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8200
														
 
															+    config->setFlag(nvinfer1::BuilderFlag::kPREFER_PRECISION_CONSTRAINTS);
														
 
															+#else
														
 
															+    config->setFlag(nvinfer1::BuilderFlag::kSTRICT_TYPES);
														
 
															+#endif
														
 
															+    // QAT requires no calibrator.
														
 
															+    //    assert((calibrator != nullptr) && "Invalid calibrator for INT8 precision");
														
 
															+    config->setInt8Calibrator(calibrator_.get());
														
 
															+  }
														
 
															+  if (build_config_->profile_per_layer) {
														
 
															+#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8200
														
 
															+    config->setProfilingVerbosity(nvinfer1::ProfilingVerbosity::kDETAILED);
														
 
															+#else
														
 
															+    config->setProfilingVerbosity(nvinfer1::ProfilingVerbosity::kVERBOSE);
														
 
															+#endif
														
 
															+  }
														
 
															+
														
 
															+#if TENSORRT_VERSION_MAJOR >= 8
														
 
															+  auto plan =
														
 
															+    TrtUniquePtr<nvinfer1::IHostMemory>(builder->buildSerializedNetwork(*network, *config));
														
 
															+  if (!plan) {
														
 
															+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create host memory");
														
 
															+    return false;
														
 
															+  }
														
 
															+  engine_ = TrtUniquePtr<nvinfer1::ICudaEngine>(
														
 
															+    runtime_->deserializeCudaEngine(plan->data(), plan->size()));
														
 
															+#else
														
 
															+  engine_ = TrtUniquePtr<nvinfer1::ICudaEngine>(builder->buildEngineWithConfig(*network, *config));
														
 
															+#endif
														
 
															+
														
 
															+  if (!engine_) {
														
 
															+    logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create engine");
														
 
															+    return false;
														
 
															+  }
														
 
															+
														
 
															+  // save engine
														
 
															+#if TENSORRT_VERSION_MAJOR < 8
														
 
															+  auto data = TrtUniquePtr<nvinfer1::IHostMemory>(engine_->serialize());
														
 
															+#endif
														
 
															+  std::ofstream file;
														
 
															+  file.open(output_engine_file_path, std::ios::binary | std::ios::out);
														
 
															+  if (!file.is_open()) {
														
 
															+    return false;
														
 
															+  }
														
 
															+#if TENSORRT_VERSION_MAJOR < 8
														
 
															+  file.write(reinterpret_cast<const char *>(data->data()), data->size());
														
 
															+#else
														
 
															+  file.write(reinterpret_cast<const char *>(plan->data()), plan->size());
														
 
															+#endif
														
 
															+
														
 
															+  file.close();
														
 
															+
														
 
															+  return true;
														
 
															+}
														
 
															+
														
 
															+bool TrtCommon::isInitialized()
														
 
															+{
														
 
															+  return is_initialized_;
														
 
															+}
														
 
															+
														
 
															+nvinfer1::Dims TrtCommon::getBindingDimensions(const int32_t index) const
														
 
															+{
														
 
															+#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + (NV_TENSOR_PATCH * 10) >= 8500
														
 
															+  auto const & name = engine_->getIOTensorName(index);
														
 
															+  auto dims = context_->getTensorShape(name);
														
 
															+  bool const has_runtime_dim =
														
 
															+    std::any_of(dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; });
														
 
															+
														
 
															+  if (has_runtime_dim) {
														
 
															+    return dims;
														
 
															+  } else {
														
 
															+    return context_->getBindingDimensions(index);
														
 
															+  }
														
 
															+#else
														
 
															+  return context_->getBindingDimensions(index);
														
 
															+#endif
														
 
															+}
														
 
															+
														
 
															+int32_t TrtCommon::getNbBindings()
														
 
															+{
														
 
															+  return engine_->getNbBindings();
														
 
															+}
														
 
															+
														
 
															+bool TrtCommon::setBindingDimensions(const int32_t index, const nvinfer1::Dims & dimensions) const
														
 
															+{
														
 
															+  return context_->setBindingDimensions(index, dimensions);
														
 
															+}
														
 
															+
														
 
															+bool TrtCommon::enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * input_consumed)
														
 
															+{
														
 
															+  if (build_config_->profile_per_layer) {
														
 
															+    auto inference_start = std::chrono::high_resolution_clock::now();
														
 
															+
														
 
															+    bool ret = context_->enqueueV2(bindings, stream, input_consumed);
														
 
															+
														
 
															+    auto inference_end = std::chrono::high_resolution_clock::now();
														
 
															+    host_profiler_.reportLayerTime(
														
 
															+      "inference",
														
 
															+      std::chrono::duration<float, std::milli>(inference_end - inference_start).count());
														
 
															+    return ret;
														
 
															+  } else {
														
 
															+    return context_->enqueueV2(bindings, stream, input_consumed);
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+void TrtCommon::printProfiling()
														
 
															+{
														
 
															+  std::cout << host_profiler_;
														
 
															+  std::cout << std::endl;
														
 
															+  std::cout << model_profiler_;
														
 
															+}
														
 
															+
														
 
															+#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8200
														
 
															+std::string TrtCommon::getLayerInformation(nvinfer1::LayerInformationFormat format)
														
 
															+{
														
 
															+  auto runtime = std::unique_ptr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(logger_));
														
 
															+  auto inspector = std::unique_ptr<nvinfer1::IEngineInspector>(engine_->createEngineInspector());
														
 
															+  if (context_ != nullptr) {
														
 
															+    inspector->setExecutionContext(&(*context_));
														
 
															+  }
														
 
															+  std::string result = inspector->getEngineInformation(format);
														
 
															+  return result;
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+}  // namespace tensorrt_common
														
--- a/src/detection/detection_lidar_transfusion/transfusion_config.hpp
+++ b/src/detection/detection_lidar_transfusion/transfusion_config.hpp
@@ -0,0 +1,170 @@
 
															+// Copyright 2024 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#ifndef AUTOWARE__LIDAR_TRANSFUSION__TRANSFUSION_CONFIG_HPP_
														
 
															+#define AUTOWARE__LIDAR_TRANSFUSION__TRANSFUSION_CONFIG_HPP_
														
 
															+
														
 
															+#include <cstdint>
														
 
															+#include <vector>
														
 
															+
														
 
															+namespace autoware::lidar_transfusion
														
 
															+{
														
 
															+
														
 
															+class TransfusionConfig
														
 
															+{
														
 
															+public:
														
 
															+  TransfusionConfig(
														
 
															+    const std::size_t cloud_capacity, const std::vector<int64_t> & voxels_num,
														
 
															+    const std::vector<double> & point_cloud_range, const std::vector<double> & voxel_size,
														
 
															+    const std::size_t num_proposals, const float circle_nms_dist_threshold,
														
 
															+    const std::vector<double> & yaw_norm_thresholds, const float score_threshold)
														
 
															+  {
														
 
															+    cloud_capacity_ = cloud_capacity;
														
 
															+
														
 
															+    if (voxels_num.size() == 3) {
														
 
															+      max_voxels_ = voxels_num[2];
														
 
															+
														
 
															+      voxels_num_[0] = voxels_num[0];
														
 
															+      voxels_num_[1] = voxels_num[1];
														
 
															+      voxels_num_[2] = voxels_num[2];
														
 
															+
														
 
															+      min_voxel_size_ = voxels_num[0];
														
 
															+      opt_voxel_size_ = voxels_num[1];
														
 
															+      max_voxel_size_ = voxels_num[2];
														
 
															+
														
 
															+      min_points_size_ = voxels_num[0];
														
 
															+      opt_points_size_ = voxels_num[1];
														
 
															+      max_points_size_ = voxels_num[2];
														
 
															+
														
 
															+      min_coors_size_ = voxels_num[0];
														
 
															+      opt_coors_size_ = voxels_num[1];
														
 
															+      max_coors_size_ = voxels_num[2];
														
 
															+    }
														
 
															+    if (point_cloud_range.size() == 6) {
														
 
															+      min_x_range_ = static_cast<float>(point_cloud_range[0]);
														
 
															+      min_y_range_ = static_cast<float>(point_cloud_range[1]);
														
 
															+      min_z_range_ = static_cast<float>(point_cloud_range[2]);
														
 
															+      max_x_range_ = static_cast<float>(point_cloud_range[3]);
														
 
															+      max_y_range_ = static_cast<float>(point_cloud_range[4]);
														
 
															+      max_z_range_ = static_cast<float>(point_cloud_range[5]);
														
 
															+    }
														
 
															+    if (voxel_size.size() == 3) {
														
 
															+      voxel_x_size_ = static_cast<float>(voxel_size[0]);
														
 
															+      voxel_y_size_ = static_cast<float>(voxel_size[1]);
														
 
															+      voxel_z_size_ = static_cast<float>(voxel_size[2]);
														
 
															+    }
														
 
															+    if (num_proposals > 0) {
														
 
															+      num_proposals_ = num_proposals;
														
 
															+    }
														
 
															+    if (score_threshold > 0.0) {
														
 
															+      score_threshold_ = score_threshold;
														
 
															+    }
														
 
															+    if (circle_nms_dist_threshold > 0.0) {
														
 
															+      circle_nms_dist_threshold_ = circle_nms_dist_threshold;
														
 
															+    }
														
 
															+    yaw_norm_thresholds_ =
														
 
															+      std::vector<float>(yaw_norm_thresholds.begin(), yaw_norm_thresholds.end());
														
 
															+    for (auto & yaw_norm_threshold : yaw_norm_thresholds_) {
														
 
															+      yaw_norm_threshold =
														
 
															+        (yaw_norm_threshold >= 0.0 && yaw_norm_threshold < 1.0) ? yaw_norm_threshold : 0.0;
														
 
															+    }
														
 
															+    grid_x_size_ = static_cast<std::size_t>((max_x_range_ - min_x_range_) / voxel_x_size_);
														
 
															+    grid_y_size_ = static_cast<std::size_t>((max_y_range_ - min_y_range_) / voxel_y_size_);
														
 
															+    grid_z_size_ = static_cast<std::size_t>((max_z_range_ - min_z_range_) / voxel_z_size_);
														
 
															+
														
 
															+    feature_x_size_ = grid_x_size_ / out_size_factor_;
														
 
															+    feature_y_size_ = grid_y_size_ / out_size_factor_;
														
 
															+  }
														
 
															+
														
 
															+  ///// INPUT PARAMETERS /////
														
 
															+  std::size_t cloud_capacity_{};
														
 
															+  ///// KERNEL PARAMETERS /////
														
 
															+  const std::size_t threads_for_voxel_{256};  // threads number for a block
														
 
															+  const std::size_t points_per_voxel_{20};
														
 
															+  const std::size_t warp_size_{32};          // one warp(32 threads) for one pillar
														
 
															+  const std::size_t pillars_per_block_{64};  // one thread deals with one pillar
														
 
															+                                             // and a block has pillars_per_block threads
														
 
															+  const std::size_t pillar_feature_size_{64};
														
 
															+  std::size_t max_voxels_{60000};
														
 
															+
														
 
															+  ///// NETWORK PARAMETERS /////
														
 
															+  const std::size_t batch_size_{1};
														
 
															+  const std::size_t num_classes_{5};
														
 
															+  const std::size_t num_point_feature_size_{5};  // x, y, z, intensity, lag
														
 
															+  // the dimension of the input cloud
														
 
															+  float min_x_range_{-76.8};
														
 
															+  float max_x_range_{76.8};
														
 
															+  float min_y_range_{-76.8};
														
 
															+  float max_y_range_{76.8};
														
 
															+  float min_z_range_{-3.0};
														
 
															+  float max_z_range_{5.0};
														
 
															+  // the size of a pillar
														
 
															+  float voxel_x_size_{0.3};
														
 
															+  float voxel_y_size_{0.3};
														
 
															+  float voxel_z_size_{8.0};
														
 
															+  const std::size_t out_size_factor_{4};
														
 
															+  const std::size_t max_num_points_per_pillar_{points_per_voxel_};
														
 
															+  const std::size_t num_point_values_{4};
														
 
															+  std::size_t num_proposals_{200};
														
 
															+  // the number of feature maps for pillar scatter
														
 
															+  const std::size_t num_feature_scatter_{pillar_feature_size_};
														
 
															+  // the score threshold for classification
														
 
															+  float score_threshold_{0.2};
														
 
															+  float circle_nms_dist_threshold_{0.5};
														
 
															+  std::vector<float> yaw_norm_thresholds_{0.3, 0.3, 0.3, 0.3, 0.0};
														
 
															+  std::size_t max_num_pillars_{max_voxels_};
														
 
															+  const std::size_t pillar_points_bev_{max_num_points_per_pillar_ * max_num_pillars_};
														
 
															+  // the detected boxes result decode by (x, y, z, w, l, h, yaw)
														
 
															+  const std::size_t num_box_values_{8};
														
 
															+  // the input size of the 2D backbone network
														
 
															+  std::size_t grid_x_size_{512};
														
 
															+  std::size_t grid_y_size_{512};
														
 
															+  std::size_t grid_z_size_{1};
														
 
															+  // the output size of the 2D backbone network
														
 
															+  std::size_t feature_x_size_{grid_x_size_ / out_size_factor_};
														
 
															+  std::size_t feature_y_size_{grid_y_size_ / out_size_factor_};
														
 
															+
														
 
															+  ///// RUNTIME DIMENSIONS /////
														
 
															+  std::vector<std::size_t> voxels_num_{5000, 30000, 60000};
														
 
															+  // voxels
														
 
															+  std::size_t min_voxel_size_{voxels_num_[0]};
														
 
															+  std::size_t opt_voxel_size_{voxels_num_[1]};
														
 
															+  std::size_t max_voxel_size_{voxels_num_[2]};
														
 
															+
														
 
															+  std::size_t min_point_in_voxel_size_{points_per_voxel_};
														
 
															+  std::size_t opt_point_in_voxel_size_{points_per_voxel_};
														
 
															+  std::size_t max_point_in_voxel_size_{points_per_voxel_};
														
 
															+
														
 
															+  std::size_t min_network_feature_size_{num_point_feature_size_};
														
 
															+  std::size_t opt_network_feature_size_{num_point_feature_size_};
														
 
															+  std::size_t max_network_feature_size_{num_point_feature_size_};
														
 
															+
														
 
															+  // num_points
														
 
															+  std::size_t min_points_size_{voxels_num_[0]};
														
 
															+  std::size_t opt_points_size_{voxels_num_[1]};
														
 
															+  std::size_t max_points_size_{voxels_num_[2]};
														
 
															+
														
 
															+  // coors
														
 
															+  std::size_t min_coors_size_{voxels_num_[0]};
														
 
															+  std::size_t opt_coors_size_{voxels_num_[1]};
														
 
															+  std::size_t max_coors_size_{voxels_num_[2]};
														
 
															+
														
 
															+  std::size_t min_coors_dim_size_{num_point_values_};
														
 
															+  std::size_t opt_coors_dim_size_{num_point_values_};
														
 
															+  std::size_t max_coors_dim_size_{num_point_values_};
														
 
															+};
														
 
															+
														
 
															+}  // namespace autoware::lidar_transfusion
														
 
															+
														
 
															+#endif  // AUTOWARE__LIDAR_TRANSFUSION__TRANSFUSION_CONFIG_HPP_
														
--- a/src/detection/detection_lidar_transfusion/utils.hpp
+++ b/src/detection/detection_lidar_transfusion/utils.hpp
@@ -0,0 +1,57 @@
 
															+// Copyright 2024 TIER IV, Inc.
														
 
															+//
														
 
															+// Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+// you may not use this file except in compliance with the License.
														
 
															+// You may obtain a copy of the License at
														
 
															+//
														
 
															+//     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+//
														
 
															+// Unless required by applicable law or agreed to in writing, software
														
 
															+// distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+// See the License for the specific language governing permissions and
														
 
															+// limitations under the License.
														
 
															+
														
 
															+#ifndef AUTOWARE__LIDAR_TRANSFUSION__UTILS_HPP_
														
 
															+#define AUTOWARE__LIDAR_TRANSFUSION__UTILS_HPP_
														
 
															+
														
 
															+#include <cstddef>
														
 
															+#include <iostream>
														
 
															+#include <stdexcept>
														
 
															+#include <unordered_map>
														
 
															+
														
 
															+namespace autoware::lidar_transfusion
														
 
															+{
														
 
															+
														
 
															+struct Box3D
														
 
															+{
														
 
															+  int label;
														
 
															+  float score;
														
 
															+  float x;
														
 
															+  float y;
														
 
															+  float z;
														
 
															+  float width;
														
 
															+  float length;
														
 
															+  float height;
														
 
															+  float yaw;
														
 
															+};
														
 
															+
														
 
															+enum NetworkIO { voxels = 0, num_points, coors, cls_score, dir_pred, bbox_pred, ENUM_SIZE };
														
 
															+
														
 
															+// cspell: ignore divup
														
 
															+template <typename T1, typename T2>
														
 
															+unsigned int divup(const T1 a, const T2 b)
														
 
															+{
														
 
															+  if (a == 0) {
														
 
															+    throw std::runtime_error("A dividend of divup isn't positive.");
														
 
															+  }
														
 
															+  if (b == 0) {
														
 
															+    throw std::runtime_error("A divisor of divup isn't positive.");
														
 
															+  }
														
 
															+
														
 
															+  return (a + b - 1) / b;
														
 
															+}
														
 
															+
														
 
															+}  // namespace autoware::lidar_transfusion
														
 
															+
														
 
															+#endif  // AUTOWARE__LIDAR_TRANSFUSION__UTILS_HPP_