// Copyright 2021 TIER IV, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "lidar_centerpoint/centerpoint_trt.hpp" #include #include #include //#include #include #include #include #include namespace centerpoint { CenterPointTRT::CenterPointTRT( const NetworkParam & encoder_param, const NetworkParam & head_param, const DensificationParam & densification_param, const CenterPointConfig & config) : config_(config) { vg_ptr_ = std::make_unique(densification_param, config_); post_proc_ptr_ = std::make_unique(config_); // encoder encoder_trt_ptr_ = std::make_unique(config_, verbose_); encoder_trt_ptr_->init( encoder_param.onnx_path(), encoder_param.engine_path(), encoder_param.trt_precision()); encoder_trt_ptr_->context_->setBindingDimensions( 0, nvinfer1::Dims3( config_.max_voxel_size_, config_.max_point_in_voxel_size_, config_.encoder_in_feature_size_)); // head std::vector out_channel_sizes = { config_.class_size_, config_.head_out_offset_size_, config_.head_out_z_size_, config_.head_out_dim_size_, config_.head_out_rot_size_, config_.head_out_vel_size_}; head_trt_ptr_ = std::make_unique(out_channel_sizes, config_, verbose_); head_trt_ptr_->init(head_param.onnx_path(), head_param.engine_path(), head_param.trt_precision()); head_trt_ptr_->context_->setBindingDimensions( 0, nvinfer1::Dims4( config_.batch_size_, config_.encoder_out_feature_size_, config_.grid_size_y_, config_.grid_size_x_)); initPtr(); cudaStreamCreate(&stream_); } CenterPointTRT::~CenterPointTRT() { if (stream_) { cudaStreamSynchronize(stream_); cudaStreamDestroy(stream_); } } void CenterPointTRT::initPtr() { const auto voxels_size = config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_; const auto coordinates_size = config_.max_voxel_size_ * config_.point_dim_size_; encoder_in_feature_size_ = config_.max_voxel_size_ * config_.max_point_in_voxel_size_ * config_.encoder_in_feature_size_; const auto pillar_features_size = config_.max_voxel_size_ * config_.encoder_out_feature_size_; spatial_features_size_ = config_.grid_size_x_ * config_.grid_size_y_ * config_.encoder_out_feature_size_; const auto grid_xy_size = config_.down_grid_size_x_ * config_.down_grid_size_y_; // host voxels_.resize(voxels_size); coordinates_.resize(coordinates_size); num_points_per_voxel_.resize(config_.max_voxel_size_); // device voxels_d_ = cuda::make_unique(voxels_size); coordinates_d_ = cuda::make_unique(coordinates_size); num_points_per_voxel_d_ = cuda::make_unique(config_.max_voxel_size_); encoder_in_features_d_ = cuda::make_unique(encoder_in_feature_size_); pillar_features_d_ = cuda::make_unique(pillar_features_size); spatial_features_d_ = cuda::make_unique(spatial_features_size_); head_out_heatmap_d_ = cuda::make_unique(grid_xy_size * config_.class_size_); head_out_offset_d_ = cuda::make_unique(grid_xy_size * config_.head_out_offset_size_); head_out_z_d_ = cuda::make_unique(grid_xy_size * config_.head_out_z_size_); head_out_dim_d_ = cuda::make_unique(grid_xy_size * config_.head_out_dim_size_); head_out_rot_d_ = cuda::make_unique(grid_xy_size * config_.head_out_rot_size_); head_out_vel_d_ = cuda::make_unique(grid_xy_size * config_.head_out_vel_size_); } bool CenterPointTRT::detect( const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer, std::vector & det_boxes3d) { std::fill(voxels_.begin(), voxels_.end(), 0); std::fill(coordinates_.begin(), coordinates_.end(), -1); std::fill(num_points_per_voxel_.begin(), num_points_per_voxel_.end(), 0); CHECK_CUDA_ERROR(cudaMemsetAsync( encoder_in_features_d_.get(), 0, encoder_in_feature_size_ * sizeof(float), stream_)); CHECK_CUDA_ERROR( cudaMemsetAsync(spatial_features_d_.get(), 0, spatial_features_size_ * sizeof(float), stream_)); if (!preprocess(input_pointcloud_msg, tf_buffer)) { RCLCPP_WARN_STREAM( rclcpp::get_logger("lidar_centerpoint"), "Fail to preprocess and skip to detect."); return false; } inference(); postProcess(det_boxes3d); return true; } bool CenterPointTRT::preprocess( const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer) { bool is_success = vg_ptr_->enqueuePointCloud(input_pointcloud_msg, tf_buffer); if (!is_success) { return false; } num_voxels_ = vg_ptr_->pointsToVoxels(voxels_, coordinates_, num_points_per_voxel_); if (num_voxels_ == 0) { return false; } const auto voxels_size = num_voxels_ * config_.max_point_in_voxel_size_ * config_.point_feature_size_; const auto coordinates_size = num_voxels_ * config_.point_dim_size_; // memcpy from host to device (not copy empty voxels) CHECK_CUDA_ERROR(cudaMemcpyAsync( voxels_d_.get(), voxels_.data(), voxels_size * sizeof(float), cudaMemcpyHostToDevice)); CHECK_CUDA_ERROR(cudaMemcpyAsync( coordinates_d_.get(), coordinates_.data(), coordinates_size * sizeof(int), cudaMemcpyHostToDevice)); CHECK_CUDA_ERROR(cudaMemcpyAsync( num_points_per_voxel_d_.get(), num_points_per_voxel_.data(), num_voxels_ * sizeof(float), cudaMemcpyHostToDevice)); CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_)); CHECK_CUDA_ERROR(generateFeatures_launch( voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(), num_voxels_, config_.max_voxel_size_, config_.voxel_size_x_, config_.voxel_size_y_, config_.voxel_size_z_, config_.range_min_x_, config_.range_min_y_, config_.range_min_z_, encoder_in_features_d_.get(), stream_)); return true; } void CenterPointTRT::inference() { if (!encoder_trt_ptr_->context_ || !head_trt_ptr_->context_) { throw std::runtime_error("Failed to create tensorrt context."); } // pillar encoder network std::vector encoder_buffers{encoder_in_features_d_.get(), pillar_features_d_.get()}; encoder_trt_ptr_->context_->enqueueV2(encoder_buffers.data(), stream_, nullptr); // scatter CHECK_CUDA_ERROR(scatterFeatures_launch( pillar_features_d_.get(), coordinates_d_.get(), num_voxels_, config_.max_voxel_size_, config_.encoder_out_feature_size_, config_.grid_size_x_, config_.grid_size_y_, spatial_features_d_.get(), stream_)); // head network std::vector head_buffers = {spatial_features_d_.get(), head_out_heatmap_d_.get(), head_out_offset_d_.get(), head_out_z_d_.get(), head_out_dim_d_.get(), head_out_rot_d_.get(), head_out_vel_d_.get()}; head_trt_ptr_->context_->enqueueV2(head_buffers.data(), stream_, nullptr); } void CenterPointTRT::postProcess(std::vector & det_boxes3d) { CHECK_CUDA_ERROR(post_proc_ptr_->generateDetectedBoxes3D_launch( head_out_heatmap_d_.get(), head_out_offset_d_.get(), head_out_z_d_.get(), head_out_dim_d_.get(), head_out_rot_d_.get(), head_out_vel_d_.get(), det_boxes3d, stream_)); if (det_boxes3d.size() == 0) { RCLCPP_WARN_STREAM(rclcpp::get_logger("lidar_centerpoint"), "No detected boxes."); } } } // namespace centerpoint