123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160 |
- // Copyright 2022 TIER IV, Inc.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- #include "lidar_centerpoint/postprocess/circle_nms_kernel.hpp"
- #include <lidar_centerpoint/postprocess/postprocess_kernel.hpp>
- #include <thrust/count.h>
- #include <thrust/sort.h>
- namespace
- {
- const std::size_t THREADS_PER_BLOCK = 32;
- } // namespace
- namespace centerpoint
- {
- struct is_score_greater
- {
- is_score_greater(float t) : t_(t) {}
- __device__ bool operator()(const Box3D & b) { return b.score > t_; }
- private:
- float t_{0.0};
- };
- struct is_kept
- {
- __device__ bool operator()(const bool keep) { return keep; }
- };
- struct score_greater
- {
- __device__ bool operator()(const Box3D & lb, const Box3D & rb) { return lb.score > rb.score; }
- };
- __device__ inline float sigmoid(float x) { return 1.0f / expf(-x); }
- __global__ void generateBoxes3D_kernel(
- const float * out_heatmap, const float * out_offset, const float * out_z, const float * out_dim,
- const float * out_rot, const float * out_vel, const float voxel_size_x, const float voxel_size_y,
- const float range_min_x, const float range_min_y, const std::size_t down_grid_size_x,
- const std::size_t down_grid_size_y, const std::size_t downsample_factor, const int class_size,
- Box3D * det_boxes3d)
- {
- // generate boxes3d from the outputs of the network.
- // shape of out_*: (N, DOWN_GRID_SIZE_Y, DOWN_GRID_SIZE_X)
- // heatmap: N = class_size, offset: N = 2, z: N = 1, dim: N = 3, rot: N = 2, vel: N = 2
- const auto yi = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x;
- const auto xi = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y;
- const auto idx = down_grid_size_x * yi + xi;
- const auto down_grid_size = down_grid_size_y * down_grid_size_x;
- if (yi >= down_grid_size_y || xi >= down_grid_size_x) {
- return;
- }
- int label = -1;
- float max_score = -1;
- for (int ci = 0; ci < class_size; ci++) {
- float score = sigmoid(out_heatmap[down_grid_size * ci + idx]);
- if (score > max_score) {
- label = ci;
- max_score = score;
- }
- }
- const float offset_x = out_offset[down_grid_size * 0 + idx];
- const float offset_y = out_offset[down_grid_size * 1 + idx];
- const float x = voxel_size_x * downsample_factor * (xi + offset_x) + range_min_x;
- const float y = voxel_size_y * downsample_factor * (yi + offset_y) + range_min_y;
- const float z = out_z[idx];
- const float w = out_dim[down_grid_size * 0 + idx];
- const float l = out_dim[down_grid_size * 1 + idx];
- const float h = out_dim[down_grid_size * 2 + idx];
- const float yaw_sin = out_rot[down_grid_size * 0 + idx];
- const float yaw_cos = out_rot[down_grid_size * 1 + idx];
- const float vel_x = out_vel[down_grid_size * 0 + idx];
- const float vel_y = out_vel[down_grid_size * 1 + idx];
- det_boxes3d[idx].label = label;
- det_boxes3d[idx].score = max_score;
- det_boxes3d[idx].x = x;
- det_boxes3d[idx].y = y;
- det_boxes3d[idx].z = z;
- det_boxes3d[idx].length = expf(l);
- det_boxes3d[idx].width = expf(w);
- det_boxes3d[idx].height = expf(h);
- det_boxes3d[idx].yaw = atan2f(yaw_sin, yaw_cos);
- det_boxes3d[idx].vel_x = vel_x;
- det_boxes3d[idx].vel_y = vel_y;
- }
- PostProcessCUDA::PostProcessCUDA(const CenterPointConfig & config) : config_(config)
- {
- const auto num_raw_boxes3d = config.down_grid_size_y_ * config.down_grid_size_x_;
- boxes3d_d_ = thrust::device_vector<Box3D>(num_raw_boxes3d);
- }
- cudaError_t PostProcessCUDA::generateDetectedBoxes3D_launch(
- const float * out_heatmap, const float * out_offset, const float * out_z, const float * out_dim,
- const float * out_rot, const float * out_vel, std::vector<Box3D> & det_boxes3d,
- cudaStream_t stream)
- {
- dim3 blocks(
- divup(config_.down_grid_size_y_, THREADS_PER_BLOCK),
- divup(config_.down_grid_size_x_, THREADS_PER_BLOCK));
- dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK);
- generateBoxes3D_kernel<<<blocks, threads, 0, stream>>>(
- out_heatmap, out_offset, out_z, out_dim, out_rot, out_vel, config_.voxel_size_x_,
- config_.voxel_size_y_, config_.range_min_x_, config_.range_min_y_, config_.down_grid_size_x_,
- config_.down_grid_size_y_, config_.downsample_factor_, config_.class_size_,
- thrust::raw_pointer_cast(boxes3d_d_.data()));
- // suppress by socre
- const auto num_det_boxes3d = thrust::count_if(
- thrust::device, boxes3d_d_.begin(), boxes3d_d_.end(),
- is_score_greater(config_.score_threshold_));
- if (num_det_boxes3d == 0) {
- return cudaGetLastError();
- }
- thrust::device_vector<Box3D> det_boxes3d_d(num_det_boxes3d);
- thrust::copy_if(
- thrust::device, boxes3d_d_.begin(), boxes3d_d_.end(), det_boxes3d_d.begin(),
- is_score_greater(config_.score_threshold_));
- // sort by score
- thrust::sort(det_boxes3d_d.begin(), det_boxes3d_d.end(), score_greater());
- // supress by NMS
- thrust::device_vector<bool> final_keep_mask_d(num_det_boxes3d);
- const auto num_final_det_boxes3d =
- circleNMS(det_boxes3d_d, config_.circle_nms_dist_threshold_, final_keep_mask_d, stream);
- thrust::device_vector<Box3D> final_det_boxes3d_d(num_final_det_boxes3d);
- thrust::copy_if(
- thrust::device, det_boxes3d_d.begin(), det_boxes3d_d.end(), final_keep_mask_d.begin(),
- final_det_boxes3d_d.begin(), is_kept());
- // memcpy device to host
- det_boxes3d.resize(num_final_det_boxes3d);
- thrust::copy(final_det_boxes3d_d.begin(), final_det_boxes3d_d.end(), det_boxes3d.begin());
- return cudaGetLastError();
- }
- } // namespace centerpoint
|