5 meses atrás · 54dcac5a93
--- a/src/detection/label_sign_detection/README.md
+++ b/src/detection/label_sign_detection/README.md
@@ -0,0 +1,114 @@
 
				+## B站教学视频
			
 
				+
			
 
				+https://www.bilibili.com/video/BV1Pa4y1N7HS
			
 
				+
			
 
				+## Introduction
			
 
				+
			
 
				+- 基于**Tensorrt**加速**Yolov8**，本项目采用**ONNX转Tensorrt**方案
			
 
				+- 支持**Windows10**和**Linux**
			
 
				+- 支持**Python/C++**
			
 
				+
			
 
				+## YOLOv8
			
 
				+
			
 
				+<div align="center">
			
 
				+<img src="assets/1.png" width=800>
			
 
				+</div>
			
 
				+
			
 
				+## Environment
			
 
				+
			
 
				+- **Tensorrt 8.4.3.**
			
 
				+- **Cuda 11.6 Cudnn 8.4.1**
			
 
				+- **onnx 1.12.0**
			
 
				+
			
 
				+## Quick Start
			
 
				+
			
 
				+安装**yolov8**仓库，并下载官方模型。
			
 
				+
			
 
				+```
			
 
				+pip install ultralytics==8.0.5
			
 
				+pip install onnx==1.12.0
			
 
				+# download offical weights(".pt" file)
			
 
				+https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt
			
 
				+```
			
 
				+
			
 
				+使用官方命令**导出ONNX模型**。
			
 
				+
			
 
				+```
			
 
				+yolo mode=export model=yolov8n.pt format=onnx dynamic=False
			
 
				+```
			
 
				+
			
 
				+使用本仓库**v8_transform.py转换官方的ONNX模型**，会自动生成yolov8n.transd.onnx。
			
 
				+
			
 
				+```
			
 
				+python v8_transform.py yolov8n.onnx
			
 
				+```
			
 
				+
			
 
				+将生成的**onnx**模型复制到**tensorrt/bin**文件夹下，使用官方**trtexec**转化onnx模型。**FP32预测删除`--fp16`参数即可**。
			
 
				+
			
 
				+```
			
 
				+trtexec --onnx=yolov8n.transd.onnx --saveEngine=yolov8n_fp16.trt --fp16
			
 
				+```
			
 
				+
			
 
				+## C++
			
 
				+
			
 
				+配置**Opencv**、**Tensorrt**环境，具体可参考https://github.com/Monday-Leo/Yolov5_Tensorrt_Win10
			
 
				+
			
 
				+打开本仓库的**CMakeLists.txt**，修改**Opencv**、**Tensorrt**路径，之后cmake。
			
 
				+
			
 
				+```
			
 
				+#change to your own path
			
 
				+##################################################
			
 
				+set(OpenCV_DIR "E:/opencv/build")  
			
 
				+set(TRT_DIR "E:/TensorRT-8.4.3.1")  
			
 
				+##################################################
			
 
				+```
			
 
				+
			
 
				+<div align="center">
			
 
				+<img src="assets/2.png" width=600>
			
 
				+</div>
			
 
				+
			
 
				+将**预测图片zidane.jpg和模型yolov8n_fp16.trt放入exe文件夹**，直接运行程序，**没有做warmup预测，首次预测时间不准**，想要精确计时请自行修改代码做warmup。**想要修改模型路径和图片路径请修改主程序。**
			
 
				+
			
 
				+```
			
 
				+int main() {
			
 
				+	std::string img_path = "zidane.jpg";
			
 
				+	std::string model_path = "yolov8n_fp16.trt";
			
 
				+	single_inference(img_path,model_path);
			
 
				+	return 0;
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+<div align="center">
			
 
				+<img src="assets/3.jpg" width=600>
			
 
				+</div>
			
 
				+
			
 
				+## Python
			
 
				+
			
 
				+在刚才的C++工程中右键yolov8，点击属性，修改为**动态链接库**。
			
 
				+
			
 
				+<div align="center">
			
 
				+<img src="assets/4.png" width=600>
			
 
				+</div>
			
 
				+
			
 
				+将本仓库的**python_trt.py**复制到dll文件夹下。
			
 
				+
			
 
				+<div align="center">
			
 
				+<img src="assets/5.png" width=600>
			
 
				+</div>
			
 
				+
			
 
				+设置模型路径，**dll**路径和想要预测的图片路径，特别注意模型**路径需要加b''**
			
 
				+
			
 
				+```
			
 
				+det = Detector(model_path=b"./yolov8n_fp16.trt",dll_path="./yolov8.dll")  # b'' is needed
			
 
				+img = cv2.imread("./zidane.jpg")
			
 
				+```
			
 
				+
			
 
				+<div align="center">
			
 
				+<img src="assets/6.png" width=600>
			
 
				+</div>
			
 
				+
			
 
				+## Reference
			
 
				+
			
 
				+https://github.com/ultralytics/ultralytics
			
 
				+
			
 
				+https://github.com/shouxieai/infer
			
--- a/src/detection/label_sign_detection/include/Hungarian.h
+++ b/src/detection/label_sign_detection/include/Hungarian.h
@@ -0,0 +1,36 @@
 
				+//
			
 
				+// Created by lqx on 20-4-23.
			
 
				+//
			
 
				+
			
 
				+#ifndef TRACK_SORT_HUNGARIAN_H_H
			
 
				+#define TRACK_SORT_HUNGARIAN_H_H
			
 
				+
			
 
				+#include <iostream>
			
 
				+#include <vector>
			
 
				+
			
 
				+using namespace std;
			
 
				+
			
 
				+class HungarianAlgorithm
			
 
				+{
			
 
				+public:
			
 
				+    HungarianAlgorithm();
			
 
				+    ~HungarianAlgorithm();
			
 
				+    double Solve(vector<vector<double>>& DistMatrix, vector<int>& Assignment);
			
 
				+
			
 
				+private:
			
 
				+    void assignmentoptimal(int *assignment, double *cost, double *distMatrix, int nOfRows, int nOfColumns);
			
 
				+    void buildassignmentvector(int *assignment, bool *starMatrix, int nOfRows, int nOfColumns);
			
 
				+    void computeassignmentcost(int *assignment, double *cost, double *distMatrix, int nOfRows);
			
 
				+    void step2a(int *assignment, double *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix,
			
 
				+                bool *coveredColumns, bool *coveredRows, int nOfRows, int nOfColumns, int minDim);
			
 
				+    void step2b(int *assignment, double *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix,
			
 
				+                bool *coveredColumns, bool *coveredRows, int nOfRows, int nOfColumns, int minDim);
			
 
				+    void step3(int *assignment, double *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix,
			
 
				+               bool *coveredColumns, bool *coveredRows, int nOfRows, int nOfColumns, int minDim);
			
 
				+    void step4(int *assignment, double *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix,
			
 
				+               bool *coveredColumns, bool *coveredRows, int nOfRows, int nOfColumns, int minDim, int row, int col);
			
 
				+    void step5(int *assignment, double *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix,
			
 
				+               bool *coveredColumns, bool *coveredRows, int nOfRows, int nOfColumns, int minDim);
			
 
				+};
			
 
				+
			
 
				+#endif //TRACK_SORT_HUNGARIAN_H_H
			
--- a/src/detection/label_sign_detection/include/KalmanTracker.h
+++ b/src/detection/label_sign_detection/include/KalmanTracker.h
@@ -0,0 +1,90 @@
 
				+//
			
 
				+// Created by lqx on 20-4-23.
			
 
				+//
			
 
				+
			
 
				+#ifndef TRACK_SORT_KALMANTRACKER_H
			
 
				+#define TRACK_SORT_KALMANTRACKER_H
			
 
				+
			
 
				+///////////////////////////////////////////////////////////////////////////////
			
 
				+// KalmanTracker.h: KalmanTracker Class Declaration
			
 
				+
			
 
				+#include "opencv2/video/tracking.hpp"
			
 
				+#include "opencv2/highgui/highgui.hpp"
			
 
				+
			
 
				+using namespace std;
			
 
				+using namespace cv;
			
 
				+
			
 
				+#define StateType Rect_<float>
			
 
				+
			
 
				+
			
 
				+// This class represents the internel state of individual tracked objects observed as bounding box.
			
 
				+class KalmanTracker
			
 
				+{
			
 
				+public:
			
 
				+    KalmanTracker()
			
 
				+    {
			
 
				+        init_kf(StateType());
			
 
				+        m_time_since_update = 0;
			
 
				+        m_hits = 0;
			
 
				+        m_hit_streak = 0;
			
 
				+        m_age = 0;
			
 
				+        m_id = kf_count;
			
 
				+        //kf_count++;
			
 
				+    }
			
 
				+    KalmanTracker(StateType initRect)
			
 
				+    {
			
 
				+        init_kf(initRect);
			
 
				+        m_time_since_update = 0;
			
 
				+        m_hits = 0;
			
 
				+        m_hit_streak = 0;
			
 
				+        m_age = 0;
			
 
				+        m_id = kf_count;
			
 
				+        //kf_count++;
			
 
				+    }
			
 
				+
			
 
				+    KalmanTracker(StateType initRect, int classId,float prob)
			
 
				+    {
			
 
				+        init_kf(initRect);
			
 
				+        m_time_since_update = 0;
			
 
				+        m_hits = 0;
			
 
				+        m_hit_streak = 0;
			
 
				+        m_age = 0;
			
 
				+        m_id = kf_count;
			
 
				+        //kf_count++;
			
 
				+        m_class_id = classId;
			
 
				+        m_prob = prob;
			
 
				+    }
			
 
				+
			
 
				+    ~KalmanTracker()
			
 
				+    {
			
 
				+        m_history.clear();
			
 
				+        m_class_history.clear();
			
 
				+    }
			
 
				+
			
 
				+    StateType predict();
			
 
				+    void update(StateType stateMat,int classId, float prob);
			
 
				+
			
 
				+    StateType get_state();
			
 
				+    StateType get_rect_xysr(float cx, float cy, float s, float r);
			
 
				+
			
 
				+    static int kf_count;
			
 
				+
			
 
				+    int m_time_since_update;
			
 
				+    int m_hits;
			
 
				+    int m_hit_streak;
			
 
				+    int m_age;
			
 
				+    int m_id;
			
 
				+    int m_class_id;
			
 
				+    std::vector<int> m_class_history;
			
 
				+    float m_prob;
			
 
				+
			
 
				+private:
			
 
				+    void init_kf(StateType stateMat);
			
 
				+
			
 
				+    cv::KalmanFilter kf;
			
 
				+    cv::Mat measurement;
			
 
				+
			
 
				+    std::vector<StateType> m_history;
			
 
				+};
			
 
				+
			
 
				+#endif //TRACK_SORT_KALMANTRACKER_H
			
--- a/src/detection/label_sign_detection/include/cpm.hpp
+++ b/src/detection/label_sign_detection/include/cpm.hpp
@@ -0,0 +1,153 @@
 
				+#ifndef __CPM_HPP__
			
 
				+#define __CPM_HPP__
			
 
				+
			
 
				+// Comsumer Producer Model
			
 
				+
			
 
				+#include <algorithm>
			
 
				+#include <condition_variable>
			
 
				+#include <future>
			
 
				+#include <memory>
			
 
				+#include <queue>
			
 
				+#include <thread>
			
 
				+
			
 
				+namespace cpm {
			
 
				+
			
 
				+template <typename Result, typename Input, typename Model>
			
 
				+class Instance {
			
 
				+ protected:
			
 
				+  struct Item {
			
 
				+    Input input;
			
 
				+    std::shared_ptr<std::promise<Result>> pro;
			
 
				+  };
			
 
				+
			
 
				+  std::condition_variable cond_;
			
 
				+  std::queue<Item> input_queue_;
			
 
				+  std::mutex queue_lock_;
			
 
				+  std::shared_ptr<std::thread> worker_;
			
 
				+  volatile bool run_ = false;
			
 
				+  volatile int max_items_processed_ = 0;
			
 
				+  void *stream_ = nullptr;
			
 
				+
			
 
				+ public:
			
 
				+  virtual ~Instance() { stop(); }
			
 
				+
			
 
				+  void stop() {
			
 
				+    run_ = false;
			
 
				+    cond_.notify_one();
			
 
				+    {
			
 
				+      std::unique_lock<std::mutex> l(queue_lock_);
			
 
				+      while (!input_queue_.empty()) {
			
 
				+        auto &item = input_queue_.front();
			
 
				+        if (item.pro) item.pro->set_value(Result());
			
 
				+        input_queue_.pop();
			
 
				+      }
			
 
				+    };
			
 
				+
			
 
				+    if (worker_) {
			
 
				+      worker_->join();
			
 
				+      worker_.reset();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  virtual std::shared_future<Result> commit(const Input &input) {
			
 
				+    Item item;
			
 
				+    item.input = input;
			
 
				+    item.pro.reset(new std::promise<Result>());
			
 
				+    {
			
 
				+      std::unique_lock<std::mutex> __lock_(queue_lock_);
			
 
				+      input_queue_.push(item);
			
 
				+    }
			
 
				+    cond_.notify_one();
			
 
				+    return item.pro->get_future();
			
 
				+  }
			
 
				+
			
 
				+  virtual std::vector<std::shared_future<Result>> commits(const std::vector<Input> &inputs) {
			
 
				+    std::vector<std::shared_future<Result>> output;
			
 
				+    {
			
 
				+      std::unique_lock<std::mutex> __lock_(queue_lock_);
			
 
				+      for (int i = 0; i < (int)inputs.size(); ++i) {
			
 
				+        Item item;
			
 
				+        item.input = inputs[i];
			
 
				+        item.pro.reset(new std::promise<Result>());
			
 
				+        output.emplace_back(item.pro->get_future());
			
 
				+        input_queue_.push(item);
			
 
				+      }
			
 
				+    }
			
 
				+    cond_.notify_one();
			
 
				+    return output;
			
 
				+  }
			
 
				+
			
 
				+  template <typename LoadMethod>
			
 
				+  bool start(const LoadMethod &loadmethod, int max_items_processed = 1, void *stream = nullptr) {
			
 
				+    stop();
			
 
				+
			
 
				+    this->stream_ = stream;
			
 
				+    this->max_items_processed_ = max_items_processed;
			
 
				+    std::promise<bool> status;
			
 
				+    worker_ = std::make_shared<std::thread>(&Instance::worker<LoadMethod>, this,
			
 
				+                                            std::ref(loadmethod), std::ref(status));
			
 
				+    return status.get_future().get();
			
 
				+  }
			
 
				+
			
 
				+ private:
			
 
				+  template <typename LoadMethod>
			
 
				+  void worker(const LoadMethod &loadmethod, std::promise<bool> &status) {
			
 
				+    std::shared_ptr<Model> model = loadmethod();
			
 
				+    if (model == nullptr) {
			
 
				+      status.set_value(false);
			
 
				+      return;
			
 
				+    }
			
 
				+
			
 
				+    run_ = true;
			
 
				+    status.set_value(true);
			
 
				+
			
 
				+    std::vector<Item> fetch_items;
			
 
				+    std::vector<Input> inputs;
			
 
				+    while (get_items_and_wait(fetch_items, max_items_processed_)) {
			
 
				+      inputs.resize(fetch_items.size());
			
 
				+      std::transform(fetch_items.begin(), fetch_items.end(), inputs.begin(),
			
 
				+                     [](Item &item) { return item.input; });
			
 
				+
			
 
				+      auto ret = model->forwards(inputs, stream_);
			
 
				+      for (int i = 0; i < (int)fetch_items.size(); ++i) {
			
 
				+        if (i < (int)ret.size()) {
			
 
				+          fetch_items[i].pro->set_value(ret[i]);
			
 
				+        } else {
			
 
				+          fetch_items[i].pro->set_value(Result());
			
 
				+        }
			
 
				+      }
			
 
				+      inputs.clear();
			
 
				+      fetch_items.clear();
			
 
				+    }
			
 
				+    model.reset();
			
 
				+    run_ = false;
			
 
				+  }
			
 
				+
			
 
				+  virtual bool get_items_and_wait(std::vector<Item> &fetch_items, int max_size) {
			
 
				+    std::unique_lock<std::mutex> l(queue_lock_);
			
 
				+    cond_.wait(l, [&]() { return !run_ || !input_queue_.empty(); });
			
 
				+
			
 
				+    if (!run_) return false;
			
 
				+
			
 
				+    fetch_items.clear();
			
 
				+    for (int i = 0; i < max_size && !input_queue_.empty(); ++i) {
			
 
				+      fetch_items.emplace_back(std::move(input_queue_.front()));
			
 
				+      input_queue_.pop();
			
 
				+    }
			
 
				+    return true;
			
 
				+  }
			
 
				+
			
 
				+  virtual bool get_item_and_wait(Item &fetch_item) {
			
 
				+    std::unique_lock<std::mutex> l(queue_lock_);
			
 
				+    cond_.wait(l, [&]() { return !run_ || !input_queue_.empty(); });
			
 
				+
			
 
				+    if (!run_) return false;
			
 
				+
			
 
				+    fetch_item = std::move(input_queue_.front());
			
 
				+    input_queue_.pop();
			
 
				+    return true;
			
 
				+  }
			
 
				+};
			
 
				+};  // namespace cpm
			
 
				+
			
 
				+#endif  // __CPM_HPP__
			
--- a/src/detection/label_sign_detection/include/detect_obstacle.h
+++ b/src/detection/label_sign_detection/include/detect_obstacle.h
@@ -0,0 +1,54 @@
 
				+#ifndef DETECT_TURNSTILE_H
			
 
				+#define DETECT_TURNSTILE_H
			
 
				+
			
 
				+#include "Hungarian.h"
			
 
				+#include "KalmanTracker.h"
			
 
				+
			
 
				+namespace od{
			
 
				+
			
 
				+const int COLOR_MAP[3][3]={{0, 0, 255},{0, 255, 0},{0,255,255}};
			
 
				+const int max_age = 3;
			
 
				+const int min_hits = 3;
			
 
				+const double iouThreshold = 0.5;
			
 
				+
			
 
				+struct bbox_t {
			
 
				+    unsigned int x, y, w, h;       // (x,y) - top-left corner, (w, h) - width & height of bounded box
			
 
				+    float prob;                    // confidence - probability that the object was found correctly
			
 
				+    unsigned int obj_id;           // class of object - from range [0, classes-1]
			
 
				+    unsigned int track_id;         // tracking id for video (0 - untracked, 1 - inf - tracked object)
			
 
				+    unsigned int frames_counter;   // counter of frames on which the object was detected
			
 
				+    float x_3d, y_3d, z_3d;        // center of object (in Meters) if ZED 3D Camera is used
			
 
				+};
			
 
				+
			
 
				+typedef struct TrackingBox
			
 
				+{
			
 
				+    int frame;
			
 
				+    int id;
			
 
				+    int class_id;
			
 
				+    float prob;
			
 
				+    Rect_<float> box;
			
 
				+    vector<int> class_history;
			
 
				+}TrackingBox;
			
 
				+
			
 
				+//yolo data o DetectBox
			
 
				+typedef struct DetectBox
			
 
				+{
			
 
				+    int class_id;
			
 
				+    float prob;
			
 
				+    Rect_<float> box;
			
 
				+}DetectBox;
			
 
				+
			
 
				+//Computes IOU between two bounding boxes
			
 
				+double GetIOU(Rect_<float> bb_test, Rect_<float> bb_gt);
			
 
				+//画出检测框和相关信息
			
 
				+void DrawBoxes(Mat &frame, vector<string> classes, int classId, int turnstileId,float conf, int left, int top, int right, int bottom);
			
 
				+
			
 
				+//画出检测结果,image
			
 
				+void Drawer(Mat &frame, vector<bbox_t> outs, vector<string> classes);
			
 
				+//画出检测结果,video
			
 
				+void Drawer(Mat &frame, vector<od::TrackingBox> &track_result, vector<string> &classes);
			
 
				+//tracking obstacle
			
 
				+bool TrackObstacle(int frame_count,vector<KalmanTracker> &trackers,vector<bbox_t> &outs,vector<od::TrackingBox> &track_result);
			
 
				+}
			
 
				+
			
 
				+#endif // DETECT_TURNSTILE_H
			
--- a/src/detection/label_sign_detection/include/imageBuffer.h
+++ b/src/detection/label_sign_detection/include/imageBuffer.h
@@ -0,0 +1,82 @@
 
				+#ifndef IMAGEBUFFER_H
			
 
				+#define IMAGEBUFFER_H
			
 
				+
			
 
				+#include <opencv2/opencv.hpp>
			
 
				+#include <mutex>
			
 
				+#include <condition_variable>
			
 
				+#include <queue>
			
 
				+template<typename T>
			
 
				+class ConsumerProducerQueue
			
 
				+{
			
 
				+
			
 
				+public:
			
 
				+    ConsumerProducerQueue(int mxsz,bool dropFrame) :
			
 
				+            maxSize(mxsz),dropFrame(dropFrame)
			
 
				+    { }
			
 
				+
			
 
				+    bool add(T request)
			
 
				+    {
			
 
				+        std::unique_lock<std::mutex> lock(mutex);
			
 
				+        if(dropFrame && isFull())
			
 
				+        {
			
 
				+            //lock.unlock();
			
 
				+            //return false;
			
 
				+            cpq.pop();
			
 
				+            cpq.push(request);
			
 
				+            cond.notify_all();
			
 
				+            return true;
			
 
				+        }
			
 
				+        else {
			
 
				+            cond.wait(lock, [this]() { return !isFull(); });
			
 
				+            cpq.push(request);
			
 
				+            //lock.unlock();
			
 
				+            cond.notify_all();
			
 
				+            return true;
			
 
				+        }
			
 
				+    }
			
 
				+    void consume(T &request)
			
 
				+    {
			
 
				+        std::unique_lock<std::mutex> lock(mutex);
			
 
				+        cond.wait(lock, [this]()
			
 
				+        { return !isEmpty(); });
			
 
				+        request = cpq.front();
			
 
				+        cpq.pop();
			
 
				+        //lock.unlock();
			
 
				+        cond.notify_all();
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+    bool isFull() const
			
 
				+    {
			
 
				+        return cpq.size() >= maxSize;
			
 
				+    }
			
 
				+
			
 
				+    bool isEmpty() const
			
 
				+    {
			
 
				+        return cpq.size() == 0;
			
 
				+    }
			
 
				+
			
 
				+    int length() const
			
 
				+    {
			
 
				+        return cpq.size();
			
 
				+    }
			
 
				+
			
 
				+    void clear()
			
 
				+    {
			
 
				+        std::unique_lock<std::mutex> lock(mutex);
			
 
				+        while (!isEmpty())
			
 
				+        {
			
 
				+            cpq.pop();
			
 
				+        }
			
 
				+        lock.unlock();
			
 
				+        cond.notify_all();
			
 
				+    }
			
 
				+
			
 
				+private:
			
 
				+    std::condition_variable cond;  //条件变量允许通过通知进而实现线程同步
			
 
				+    std::mutex mutex;     //提供了多种互斥操作，可以显式避免数据竞争
			
 
				+    std::queue<T> cpq;    //容器适配器，它给予程序员队列的功能
			
 
				+    int maxSize;
			
 
				+    bool dropFrame;
			
 
				+};
			
 
				+#endif // IMAGEBUFFER_H
			
--- a/src/detection/label_sign_detection/include/infer.hpp
+++ b/src/detection/label_sign_detection/include/infer.hpp
@@ -0,0 +1,98 @@
 
				+#ifndef __INFER_HPP__
			
 
				+#define __INFER_HPP__
			
 
				+
			
 
				+#include <initializer_list>
			
 
				+#include <memory>
			
 
				+#include <string>
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace trt {
			
 
				+
			
 
				+#define INFO(...) trt::__log_func(__FILE__, __LINE__, __VA_ARGS__)
			
 
				+void __log_func(const char *file, int line, const char *fmt, ...);
			
 
				+
			
 
				+enum class DType : int { FLOAT = 0, HALF = 1, INT8 = 2, INT32 = 3, BOOL = 4, UINT8 = 5 };
			
 
				+
			
 
				+class Timer {
			
 
				+ public:
			
 
				+  Timer();
			
 
				+  virtual ~Timer();
			
 
				+  void start(void *stream = nullptr);
			
 
				+  float stop(const char *prefix = "Timer", bool print = true);
			
 
				+
			
 
				+ private:
			
 
				+  void *start_, *stop_;
			
 
				+  void *stream_;
			
 
				+};
			
 
				+
			
 
				+class BaseMemory {
			
 
				+ public:
			
 
				+  BaseMemory() = default;
			
 
				+  BaseMemory(void *cpu, size_t cpu_bytes, void *gpu, size_t gpu_bytes);
			
 
				+  virtual ~BaseMemory();
			
 
				+  virtual void *gpu_realloc(size_t bytes);
			
 
				+  virtual void *cpu_realloc(size_t bytes);
			
 
				+  void release_gpu();
			
 
				+  void release_cpu();
			
 
				+  void release();
			
 
				+  inline bool owner_gpu() const { return owner_gpu_; }
			
 
				+  inline bool owner_cpu() const { return owner_cpu_; }
			
 
				+  inline size_t cpu_bytes() const { return cpu_bytes_; }
			
 
				+  inline size_t gpu_bytes() const { return gpu_bytes_; }
			
 
				+  virtual inline void *get_gpu() const { return gpu_; }
			
 
				+  virtual inline void *get_cpu() const { return cpu_; }
			
 
				+  void reference(void *cpu, size_t cpu_bytes, void *gpu, size_t gpu_bytes);
			
 
				+
			
 
				+ protected:
			
 
				+  void *cpu_ = nullptr;
			
 
				+  size_t cpu_bytes_ = 0, cpu_capacity_ = 0;
			
 
				+  bool owner_cpu_ = true;
			
 
				+
			
 
				+  void *gpu_ = nullptr;
			
 
				+  size_t gpu_bytes_ = 0, gpu_capacity_ = 0;
			
 
				+  bool owner_gpu_ = true;
			
 
				+};
			
 
				+
			
 
				+template <typename _DT>
			
 
				+class Memory : public BaseMemory {
			
 
				+ public:
			
 
				+  Memory() = default;
			
 
				+  Memory(const Memory &other) = delete;
			
 
				+  Memory &operator=(const Memory &other) = delete;
			
 
				+  virtual _DT *gpu(size_t size) { return (_DT *)BaseMemory::gpu_realloc(size * sizeof(_DT)); }
			
 
				+  virtual _DT *cpu(size_t size) { return (_DT *)BaseMemory::cpu_realloc(size * sizeof(_DT)); }
			
 
				+
			
 
				+  inline size_t cpu_size() const { return cpu_bytes_ / sizeof(_DT); }
			
 
				+  inline size_t gpu_size() const { return gpu_bytes_ / sizeof(_DT); }
			
 
				+
			
 
				+  virtual inline _DT *gpu() const { return (_DT *)gpu_; }
			
 
				+  virtual inline _DT *cpu() const { return (_DT *)cpu_; }
			
 
				+};
			
 
				+
			
 
				+class Infer {
			
 
				+ public:
			
 
				+  virtual bool forward(const std::vector<void *> &bindings, void *stream = nullptr,
			
 
				+                       void *input_consum_event = nullptr) = 0;
			
 
				+  virtual int index(const std::string &name) = 0;
			
 
				+  virtual std::vector<int> run_dims(const std::string &name) = 0;
			
 
				+  virtual std::vector<int> run_dims(int ibinding) = 0;
			
 
				+  virtual std::vector<int> static_dims(const std::string &name) = 0;
			
 
				+  virtual std::vector<int> static_dims(int ibinding) = 0;
			
 
				+  virtual int numel(const std::string &name) = 0;
			
 
				+  virtual int numel(int ibinding) = 0;
			
 
				+  virtual int num_bindings() = 0;
			
 
				+  virtual bool is_input(int ibinding) = 0;
			
 
				+  virtual bool set_run_dims(const std::string &name, const std::vector<int> &dims) = 0;
			
 
				+  virtual bool set_run_dims(int ibinding, const std::vector<int> &dims) = 0;
			
 
				+  virtual DType dtype(const std::string &name) = 0;
			
 
				+  virtual DType dtype(int ibinding) = 0;
			
 
				+  virtual bool has_dynamic_dim() = 0;
			
 
				+  virtual void print() = 0;
			
 
				+};
			
 
				+
			
 
				+std::shared_ptr<Infer> load(const std::string &file);
			
 
				+std::string format_shape(const std::vector<int> &shape);
			
 
				+
			
 
				+}  // namespace trt
			
 
				+
			
 
				+#endif  // __INFER_HPP__
			
--- a/src/detection/label_sign_detection/include/yolo.hpp
+++ b/src/detection/label_sign_detection/include/yolo.hpp
@@ -0,0 +1,75 @@
 
				+#ifndef __YOLO_HPP__
			
 
				+#define __YOLO_HPP__
			
 
				+
			
 
				+#include <future>
			
 
				+#include <memory>
			
 
				+#include <string>
			
 
				+#include <vector>
			
 
				+
			
 
				+namespace yolo {
			
 
				+
			
 
				+enum class Type : int {
			
 
				+  V5 = 0,
			
 
				+  X = 1,
			
 
				+  V3 = 2,
			
 
				+  V7 = 3,
			
 
				+  V8 = 5,
			
 
				+  V8Seg = 6  // yolov8 instance segmentation
			
 
				+};
			
 
				+
			
 
				+struct InstanceSegmentMap {
			
 
				+  int width = 0, height = 0;      // width % 8 == 0
			
 
				+  unsigned char *data = nullptr;  // is width * height memory
			
 
				+
			
 
				+  InstanceSegmentMap(int width, int height);
			
 
				+  virtual ~InstanceSegmentMap();
			
 
				+};
			
 
				+
			
 
				+struct Box {
			
 
				+  float left, top, right, bottom, confidence;
			
 
				+  int class_label;
			
 
				+  std::shared_ptr<InstanceSegmentMap> seg;  // valid only in segment task
			
 
				+
			
 
				+  Box() = default;
			
 
				+  Box(float left, float top, float right, float bottom, float confidence, int class_label)
			
 
				+      : left(left),
			
 
				+        top(top),
			
 
				+        right(right),
			
 
				+        bottom(bottom),
			
 
				+        confidence(confidence),
			
 
				+        class_label(class_label) {}
			
 
				+};
			
 
				+
			
 
				+struct Image {
			
 
				+  const void *bgrptr = nullptr;
			
 
				+  int width = 0, height = 0;
			
 
				+
			
 
				+  Image() = default;
			
 
				+  Image(const void *bgrptr, int width, int height) : bgrptr(bgrptr), width(width), height(height) {}
			
 
				+};
			
 
				+
			
 
				+typedef std::vector<Box> BoxArray;
			
 
				+
			
 
				+// [Preprocess]: 0.50736 ms
			
 
				+// [Forward]: 3.96410 ms
			
 
				+// [BoxDecode]: 0.12016 ms
			
 
				+// [SegmentDecode]: 0.15610 ms
			
 
				+class Infer {
			
 
				+ public:
			
 
				+  virtual BoxArray forward(const Image &image, void *stream = nullptr) = 0;
			
 
				+  virtual std::vector<BoxArray> forwards(const std::vector<Image> &images,
			
 
				+                                         void *stream = nullptr) = 0;
			
 
				+};
			
 
				+
			
 
				+std::shared_ptr<Infer> load(const std::string &engine_file, Type type,
			
 
				+                            float confidence_threshold = 0.25f, float nms_threshold = 0.5f);
			
 
				+
			
 
				+Infer *loadraw(const std::string &engine_file, Type type, 
			
 
				+							float confidence_threshold = 0.25f, float nms_threshold = 0.5f);
			
 
				+
			
 
				+const char *type_name(Type type);
			
 
				+std::tuple<uint8_t, uint8_t, uint8_t> hsv2bgr(float h, float s, float v);
			
 
				+std::tuple<uint8_t, uint8_t, uint8_t> random_color(int id);
			
 
				+};  // namespace yolo
			
 
				+
			
 
				+#endif  // __YOLO_HPP__
			
--- a/src/detection/label_sign_detection/label_sign_config/label_sign_config.yaml
+++ b/src/detection/label_sign_detection/label_sign_config/label_sign_config.yaml
@@ -0,0 +1,25 @@
 
				+%YAML:1.0

			
 
				+---

			
 
				+imshow_flag: true

			
 
				+ivlog_flag: true

			
 
				+test_video:

			
 
				+  open: false

			
 
				+  video_path: /media/nvidia/Elements-lqx/saveVideoPy/1685957551.1323013.avi

			
 
				+trackstate: true

			
 
				+conf_thr: 0.4

			
 
				+nms_thr : 0.4

			
 
				+model:

			
 
				+  onnx_path: ./label_sign_config/best.transd.onnx

			
 
				+  engine_path: ./label_sign_config/best.transd.engine

			
 
				+camera:

			
 
				+  cropstate: true

			
 
				+  crop_height: 

			
 
				+    start: 0

			
 
				+    end: 720

			
 
				+  crop_width:

			
 
				+    start: 280

			
 
				+    end: 1000

			
 
				+

			
 
				+signlabels:

			
 
				+  - "Speed5"

			
 
				+  - "Park"

			
--- a/src/detection/label_sign_detection/label_signdetect.pro
+++ b/src/detection/label_sign_detection/label_signdetect.pro
@@ -0,0 +1,134 @@
 
				+QT -= gui
			
 
				+
			
 
				+CONFIG += c++11 console
			
 
				+CONFIG -= app_bundle
			
 
				+
			
 
				+# The following define makes your compiler emit warnings if you use
			
 
				+# any Qt feature that has been marked deprecated (the exact warnings
			
 
				+# depend on your compiler). Please consult the documentation of the
			
 
				+# deprecated API in order to know how to port your code away from it.
			
 
				+DEFINES += QT_DEPRECATED_WARNINGS
			
 
				+
			
 
				+# You can also make your code fail to compile if it uses deprecated APIs.
			
 
				+# In order to do so, uncomment the following line.
			
 
				+# You can also select to disable deprecated APIs only up to a certain version of Qt.
			
 
				+#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000    # disables all the APIs deprecated before Qt 6.0.0
			
 
				+
			
 
				+# Default rules for deployment.
			
 
				+qnx: target.path = /tmp/$${TARGET}/bin
			
 
				+else: unix:!android: target.path = /opt/$${TARGET}/bin
			
 
				+!isEmpty(target.path): INSTALLS += target
			
 
				+
			
 
				+
			
 
				+
			
 
				+SOURCES += main.cpp \
			
 
				+           src/Hungarian.cpp \
			
 
				+           src/KalmanTracker.cpp \
			
 
				+           src/detect_obstacle.cpp\
			
 
				+           $$PWD/../../include/msgtype/rawpic.pb.cc \
			
 
				+           $$PWD/../../include/msgtype/cameraobjectarray.pb.cc \
			
 
				+           $$PWD/../../include/msgtype/cameraobject.pb.cc
			
 
				+
			
 
				+
			
 
				+
			
 
				+INCLUDEPATH += include
			
 
				+
			
 
				+HEADERS += include/yolo.hpp \
			
 
				+           include/infer.hpp \
			
 
				+           include/cpm.hpp \
			
 
				+           include/imageBuffer.h \
			
 
				+           include/Hungarian.h \
			
 
				+           include/KalmanTracker.h \
			
 
				+           include/detect_obstacle.h
			
 
				+
			
 
				+
			
 
				+CUDA_SOURCES += src/infer.cu \
			
 
				+                src/yolo.cu
			
 
				+
			
 
				+DISTFILES +=  src/infer.cu \
			
 
				+              src/yolo.cu
			
 
				+
			
 
				+LIBS += -L"/usr/local/lib" \
			
 
				+        -lprotobuf
			
 
				+
			
 
				+# opencv
			
 
				+INCLUDEPATH += /usr/include/opencv4
			
 
				+LIBS += /usr/lib/aarch64-linux-gnu/libopencv_*.so
			
 
				+
			
 
				+
			
 
				+# tensorrt
			
 
				+INCLUDEPATH += /usr/src/tensorrt/include \
			
 
				+               /usr/include/aarch64-linux-gnu \
			
 
				+               /usr/local/cuda/include \
			
 
				+               /usr/src/tensorrt/samples/common \
			
 
				+               /usr/local/include
			
 
				+
			
 
				+LIBS += /usr/lib/aarch64-linux-gnu/libnvparsers.so \
			
 
				+        /usr/lib/aarch64-linux-gnu/libnvinfer.so \
			
 
				+        /usr/lib/aarch64-linux-gnu/libnvonnxparser.so \
			
 
				+        /usr/lib/aarch64-linux-gnu/libnvinfer_plugin.so
			
 
				+
			
 
				+# c++
			
 
				+LIBS += -L/usr/lib/aarch64-linux-gnu -lstdc++fs
			
 
				+# cuda
			
 
				+CUDA_SDK = "/usr/local/cuda"   # Path to cuda SDK install
			
 
				+CUDA_DIR = "/usr/local/cuda"            # Path to cuda toolkit install
			
 
				+
			
 
				+
			
 
				+INCLUDEPATH += ../../../include
			
 
				+
			
 
				+
			
 
				+#LIBS +=  $$PWD/../../../bin/libxmlparam.so \
			
 
				+#         $$PWD/../../../bin/libivlog.so \
			
 
				+#         $$PWD/../../../bin/libivfault.so \
			
 
				+#         $$PWD/../../../bin/libmodulecomm.so
			
 
				+
			
 
				+!include(../../../include/common.pri ) {
			
 
				+    error( "Couldn't find the common.pri file!" )
			
 
				+}
			
 
				+
			
 
				+!include(../../../include/ivprotobuf.pri ) {
			
 
				+    error( "Couldn't find the ivprotobuf.pri file!" )
			
 
				+}
			
 
				+!include(../../../include/ivopencv.pri ) {
			
 
				+    error( "Couldn't find the ivopencv.pri file!" )
			
 
				+}
			
 
				+
			
 
				+INCLUDEPATH += $$PWD/../../include/msgtype
			
 
				+LIBS += -L/usr/lib/aarch64-linux-gnu/ -lglog
			
 
				+
			
 
				+#####系统类型，计算能力###########
			
 
				+SYSTEM_NAME = linux         # Depending on your system either 'Win32', 'x64', or 'Win64'
			
 
				+SYSTEM_TYPE = 64            # '32' or '64', depending on your system
			
 
				+CUDA_ARCH = sm_72           # Type of CUDA architecture, for example 'compute_10', 'compute_11', 'sm_10'
			
 
				+NVCC_OPTIONS = --use_fast_math
			
 
				+
			
 
				+INCLUDEPATH += $$CUDA_DIR/include
			
 
				+QMAKE_LIBDIR += $$CUDA_DIR/lib64/
			
 
				+
			
 
				+CUDA_OBJECTS_DIR = ./
			
 
				+
			
 
				+# Add the necessary libraries
			
 
				+CUDA_LIBS = -lcuda -lcudart #-lcublas
			
 
				+
			
 
				+# The following makes sure all path names (which often include spaces) are put between quotation marks
			
 
				+CUDA_INC = $$join(INCLUDEPATH,'" -I"','-I"','"')
			
 
				+LIBS += $$CUDA_LIBS
			
 
				+
			
 
				+# Configuration of the Cuda compiler
			
 
				+CONFIG(debug, debug|release) {
			
 
				+    # Debug mode
			
 
				+    cuda_d.input = CUDA_SOURCES
			
 
				+    cuda_d.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}.o
			
 
				+    cuda_d.commands = $$CUDA_DIR/bin/nvcc -D_DEBUG $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
			
 
				+    cuda_d.dependency_type = TYPE_C
			
 
				+    QMAKE_EXTRA_COMPILERS += cuda_d
			
 
				+}
			
 
				+else {
			
 
				+    # Release mode
			
 
				+    cuda.input = CUDA_SOURCES
			
 
				+    cuda.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}.o
			
 
				+    cuda.commands = $$CUDA_DIR/bin/nvcc $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
			
 
				+    cuda.dependency_type = TYPE_C
			
 
				+    QMAKE_EXTRA_COMPILERS += cuda
			
 
				+}
			
--- a/src/detection/label_sign_detection/main.cpp
+++ b/src/detection/label_sign_detection/main.cpp
@@ -0,0 +1,553 @@
 
				+#include "NvInfer.h"
			
 
				+#include "cuda_runtime_api.h"
			
 
				+#include <fstream>
			
 
				+#include <iostream>
			
 
				+#include <opencv2/opencv.hpp>
			
 
				+#include "cpm.hpp"
			
 
				+#include "infer.hpp"
			
 
				+#include "yolo.hpp"
			
 
				+
			
 
				+#include <QCoreApplication>
			
 
				+#include "modulecomm.h"
			
 
				+#include "xmlparam.h"
			
 
				+#include "ivfault.h"
			
 
				+#include "ivlog.h"
			
 
				+#include "ivexit.h"
			
 
				+#include "ivversion.h"
			
 
				+#include "rawpic.pb.h"
			
 
				+#include "cameraobject.pb.h"
			
 
				+#include "cameraobjectarray.pb.h"
			
 
				+#include <thread>
			
 
				+#include "qmutex.h"
			
 
				+#include "condition_variable"
			
 
				+#include "imageBuffer.h"
			
 
				+#include "detect_obstacle.h"
			
 
				+// onnx转换头文件
			
 
				+#include "NvOnnxParser.h"
			
 
				+
			
 
				+
			
 
				+using namespace nvonnxparser;
			
 
				+using namespace nvinfer1;
			
 
				+using namespace std;
			
 
				+
			
 
				+static const char *cocolabels[] = {"Speed10","Speed5","Left","Right","Nopassing","Park"};
			
 
				+
			
 
				+std::vector<std::string> signlabels;
			
 
				+string config_file = "./label_sign_config/label_sign_config.yaml";
			
 
				+string onnx_path = "./label_sign_config/sign_best.onnx";
			
 
				+string engine_path = "./label_sign_config/sign_best.engine"; // engine paths
			
 
				+
			
 
				+bool imshow_flag = true;
			
 
				+bool ivlog_flag = false;
			
 
				+
			
 
				+
			
 
				+float conf_thr = 0.5;
			
 
				+float nms_thr = 0.4;
			
 
				+
			
 
				+bool calibrationstate = false;
			
 
				+string calibration_yamlpath = "./yaml/camera_middle_640_360.yaml";
			
 
				+
			
 
				+bool cropstate = true;
			
 
				+cv::Range crop_height = cv::Range(0,720);
			
 
				+cv::Range crop_width = cv::Range(280,1000);
			
 
				+
			
 
				+
			
 
				+bool test_video = false;
			
 
				+//string video_path = "v4l2src device=/dev/video0 ! video/x-raw, width=(int)1280, height=(int)720 ! videoconvert ! appsink";
			
 
				+string video_path = "/home/nvidia/code/modularization/src/detection/trafficlight_detection/data/camera_test3.mp4";
			
 
				+
			
 
				+//是否开启跟踪
			
 
				+bool trackstate = true;
			
 
				+
			
 
				+//摄像头
			
 
				+void * gpcamera;
			
 
				+string cameraname="image00";
			
 
				+ConsumerProducerQueue<cv::Mat> * imageBuffer =  new ConsumerProducerQueue<cv::Mat>(3,true);
			
 
				+
			
 
				+//共享内存
			
 
				+void * gpdetect;
			
 
				+string detectname = "signarray";  //检测结果
			
 
				+
			
 
				+void * gpresultimg;
			
 
				+string resultimgname = "signimage"; //检测结果图
			
 
				+
			
 
				+iv::Ivfault *gfault = nullptr;
			
 
				+iv::Ivlog *givlog = nullptr;
			
 
				+
			
 
				+cv::Mat camera_matrix,dist_coe,map1,map2;  //标定参数
			
 
				+
			
 
				+//读取视频数据
			
 
				+void ReadFunc(int n)
			
 
				+{
			
 
				+    cv::VideoCapture cap(video_path);
			
 
				+    if(!cap.isOpened())
			
 
				+    {
			
 
				+        cout<<"camera failed to open"<<endl;
			
 
				+    }
			
 
				+    while(1)
			
 
				+    {
			
 
				+        cv::Mat frame;
			
 
				+        //读视频的时候加上,读摄像头去掉
			
 
				+        if(imageBuffer->isFull())
			
 
				+        {
			
 
				+            continue;
			
 
				+        }
			
 
				+        if(cap.read(frame))
			
 
				+        {
			
 
				+            if(calibrationstate)
			
 
				+                cv::remap(frame,frame,map1,map2,cv::INTER_LINEAR,cv::BORDER_CONSTANT);
			
 
				+            if(cropstate)
			
 
				+                frame = frame(crop_height,crop_width);
			
 
				+            imageBuffer->add(frame);
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            std::this_thread::sleep_for(std::chrono::milliseconds(1));
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+void Listenpic(const char * strdata,const unsigned int nSize,const unsigned int index,const QDateTime * dt,const char * strmemname)
			
 
				+{
			
 
				+    if(nSize<1000)return;
			
 
				+    iv::vision::rawpic pic;
			
 
				+    if(false == pic.ParseFromArray(strdata,nSize))
			
 
				+    {
			
 
				+        std::cout<<"picview Listenpic fail."<<std::endl;
			
 
				+        return;
			
 
				+    }
			
 
				+    cv::Mat mat(pic.height(),pic.width(),pic.mattype());
			
 
				+    if(pic.type() == 1)
			
 
				+        memcpy(mat.data,pic.picdata().data(),mat.rows*mat.cols*mat.elemSize());
			
 
				+    else
			
 
				+    {
			
 
				+        //     mat.release();
			
 
				+        std::vector<unsigned char> buff(pic.picdata().data(),pic.picdata().data() + pic.picdata().size());
			
 
				+        mat = cv::imdecode(buff,cv::IMREAD_COLOR);
			
 
				+    }
			
 
				+    if(calibrationstate)
			
 
				+        cv::remap(mat,mat,map1,map2,cv::INTER_LINEAR,cv::BORDER_CONSTANT);
			
 
				+    if(cropstate)
			
 
				+        mat = mat(crop_height,crop_width);
			
 
				+    imageBuffer->add(mat);
			
 
				+    mat.release();
			
 
				+}
			
 
				+void shareSignMsg(vector<yolo::Box> results)
			
 
				+{
			
 
				+    iv::vision::cameraobjectarray sign_array;   //向共享内存传结果
			
 
				+    int index=0;
			
 
				+    for (auto &result : results)
			
 
				+    {
			
 
				+        float width = result.right - result.left;
			
 
				+        float height = result.bottom - result.top;
			
 
				+        float center_x = result.left + 0.5*width;
			
 
				+        float center_y = result.top + 0.5*height;
			
 
				+        /*---------------protobuf----------------*/
			
 
				+        iv::vision::cameraobject *object = sign_array.add_obj();
			
 
				+        object->set_id(index);
			
 
				+        auto type = signlabels[result.class_label];
			
 
				+        object->set_type(type);
			
 
				+        object->set_con(result.confidence);
			
 
				+        object->set_w(width);
			
 
				+        object->set_h(height);
			
 
				+        object->set_x(result.left);
			
 
				+        object->set_y(result.top);
			
 
				+
			
 
				+        //cout<<"sign id label: "<<index<<" "<<type<<endl;
			
 
				+
			
 
				+        if(ivlog_flag)
			
 
				+            givlog->verbose("sign id label: %d %s",index,type.data());
			
 
				+        index ++;
			
 
				+    }
			
 
				+    int size = sign_array.ByteSize();
			
 
				+    char * strdata = new char[sign_array.ByteSize()];
			
 
				+    if(sign_array.SerializeToArray(strdata, size))
			
 
				+    {
			
 
				+        iv::modulecomm::ModuleSendMsg(gpdetect, strdata, size);
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        std::cout<<"sign_array serialize error."<<std::endl;
			
 
				+    }
			
 
				+    sign_array.Clear();
			
 
				+    delete strdata;
			
 
				+
			
 
				+    /*--------------------test ParseFromArray-------------------*/
			
 
				+
			
 
				+//    iv::vision::cameraobjectarray light_array1;
			
 
				+//    light_array1.ParseFromArray(strdata,size);
			
 
				+//    cout<<"parsefromarray:"<<std::endl;
			
 
				+//    cout<<"light_size:"<<light_array1.obj_size()<<endl;
			
 
				+//    for (int i=0;i<light_array1.obj_size();i++) {
			
 
				+//        std::cout<<"index: "<<light_array1.obj(i).id()
			
 
				+//                <<" type: "<<light_array1.obj(i).type()
			
 
				+//                <<" lefttop: "<<light_array1.obj(i).x()
			
 
				+//                <<" "<<light_array1.obj(i).y()
			
 
				+//                <<" "<<light_array1.obj(i).w()
			
 
				+//                <<" "<<light_array1.obj(i).h()<<std::endl;
			
 
				+//    }
			
 
				+//    delete strdata;
			
 
				+
			
 
				+
			
 
				+}
			
 
				+
			
 
				+void SendResultImg(cv::Mat &resultimg, void* g_name)
			
 
				+{
			
 
				+    iv::vision::rawpic cameraPic;
			
 
				+    //cameraPic.set_time(img_info.timestamp);
			
 
				+    cameraPic.set_elemsize(resultimg.elemSize());
			
 
				+    cameraPic.set_width(resultimg.cols);
			
 
				+    cameraPic.set_height(resultimg.rows);
			
 
				+    cameraPic.set_mattype(resultimg.type());
			
 
				+    std::vector<int> param = std::vector<int>(2);
			
 
				+    param[0] = cv::IMWRITE_JPEG_QUALITY;
			
 
				+    param[1] = 95; // default(95) 0-100
			
 
				+    std::vector<unsigned char> buff;
			
 
				+    cv::imencode(".jpg",resultimg,buff,param);
			
 
				+    cameraPic.set_picdata(buff.data(),buff.size());
			
 
				+    buff.clear();
			
 
				+    cameraPic.set_type(2);
			
 
				+    std::string out_img = cameraPic.SerializeAsString();
			
 
				+    iv::modulecomm::ModuleSendMsg(g_name,out_img.data(),out_img.length());
			
 
				+}
			
 
				+
			
 
				+yolo::Image cvimg(const cv::Mat &image) { return yolo::Image(image.data, image.cols, image.rows); }
			
 
				+
			
 
				+
			
 
				+//static Logger gLogger;
			
 
				+//构建Logger
			
 
				+class Logger : public ILogger
			
 
				+{
			
 
				+    void log(Severity severity, const char* msg) noexcept override
			
 
				+    {
			
 
				+        // suppress info-level messages
			
 
				+        if (severity <= Severity::kWARNING)
			
 
				+            std::cout << msg << std::endl;
			
 
				+    }
			
 
				+} gLogger;
			
 
				+
			
 
				+// Creat the engine using only the API and not any parser.
			
 
				+ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config)
			
 
				+{
			
 
				+
			
 
				+    INetworkDefinition* network = builder->createNetworkV2(1U); //此处重点1U为OU就有问题
			
 
				+
			
 
				+    IParser* parser = createParser(*network, gLogger);
			
 
				+    parser->parseFromFile(onnx_path.c_str(), static_cast<int32_t>(ILogger::Severity::kWARNING));
			
 
				+    //解析有错误将返回
			
 
				+    for (int32_t i = 0; i < parser->getNbErrors(); ++i) { std::cout << parser->getError(i)->desc() << std::endl; }
			
 
				+    //std::cout << "successfully parse the onnx model" << std::endl;
			
 
				+
			
 
				+    // Build engine
			
 
				+    builder->setMaxBatchSize(maxBatchSize);
			
 
				+    config->setMaxWorkspaceSize(1 << 20);
			
 
				+    //config->setFlag(nvinfer1::BuilderFlag::kFP16); // 设置精度计算
			
 
				+    //config->setFlag(nvinfer1::BuilderFlag::kINT8);
			
 
				+    ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
			
 
				+    //std::cout << "successfully  convert onnx to  engine！！！ " << std::endl;
			
 
				+
			
 
				+    //销毁
			
 
				+    network->destroy();
			
 
				+    //parser->destroy();
			
 
				+
			
 
				+    return engine;
			
 
				+}
			
 
				+
			
 
				+void APIToModel(unsigned int maxBatchSize, IHostMemory** modelStream)
			
 
				+{
			
 
				+
			
 
				+    // Create builder
			
 
				+    IBuilder* builder = createInferBuilder(gLogger);
			
 
				+    IBuilderConfig* config = builder->createBuilderConfig();
			
 
				+
			
 
				+    // Create model to populate the network, then set the outputs and create an engine
			
 
				+    ICudaEngine* engine = createEngine(maxBatchSize, builder, config);
			
 
				+
			
 
				+    assert(engine != nullptr);
			
 
				+
			
 
				+
			
 
				+
			
 
				+    // Serialize the engine
			
 
				+    (*modelStream) = engine->serialize();
			
 
				+    // Close everything down
			
 
				+    engine->destroy();
			
 
				+    builder->destroy();
			
 
				+    config->destroy();
			
 
				+
			
 
				+}
			
 
				+
			
 
				+int get_trtengine() {
			
 
				+
			
 
				+    IHostMemory* modelStream{ nullptr };
			
 
				+    APIToModel(1, &modelStream);
			
 
				+    assert(modelStream != nullptr);
			
 
				+
			
 
				+    std::ofstream p(engine_path, std::ios::binary);
			
 
				+    if (!p)
			
 
				+    {
			
 
				+        std::cerr << "could not open plan output file" << std::endl;
			
 
				+        return -1;
			
 
				+    }
			
 
				+    p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
			
 
				+    modelStream->destroy();
			
 
				+
			
 
				+    return 0;
			
 
				+
			
 
				+}
			
 
				+
			
 
				+
			
 
				+int main(int argc, char** argv)
			
 
				+{
			
 
				+    showversion("yolov8");
			
 
				+
			
 
				+    QCoreApplication a(argc, argv);
			
 
				+
			
 
				+    gfault = new iv::Ivfault("sign_detection");
			
 
				+    givlog = new iv::Ivlog("sign_detection");
			
 
				+    gfault->SetFaultState(0,0,"yolov8 initialize.");
			
 
				+
			
 
				+    cv::FileStorage config(config_file, cv::FileStorage::READ);
			
 
				+    bool config_isOpened = config.isOpened();
			
 
				+    //const char* onnx_path_;
			
 
				+    if(config_isOpened)
			
 
				+    {
			
 
				+        onnx_path = string(config["model"]["onnx_path"]);
			
 
				+        engine_path = string(config["model"]["engine_path"]);
			
 
				+        imshow_flag = (string(config["imshow_flag"]) == "true");
			
 
				+        ivlog_flag = (string(config["ivlog_flag"]) == "true");
			
 
				+        test_video = (string(config["test_video"]["open"]) == "true");
			
 
				+        video_path = string(config["test_video"]["video_path"]);
			
 
				+        trackstate = (string(config["trackstate"]) == "true");
			
 
				+        conf_thr = float(config["conf_thr"]);
			
 
				+        nms_thr = float(config["nms_thr"]);
			
 
				+        cropstate = (string(config["camera"]["cropstate"]) == "true");
			
 
				+        crop_height = cv::Range(int(config["camera"]["crop_height"]["start"]),
			
 
				+                int(config["camera"]["crop_height"]["end"]));
			
 
				+
			
 
				+        crop_width = cv::Range(int(config["camera"]["crop_width"]["start"]),
			
 
				+                int(config["camera"]["crop_width"]["end"]));
			
 
				+
			
 
				+        cv::FileNode signarraynode = config["signlabels"];
			
 
				+        if(signarraynode.type() == cv::FileNode::SEQ)
			
 
				+        {
			
 
				+            cv::FileNodeIterator it = signarraynode.begin(),it_end = signarraynode.end();
			
 
				+            for (; it != it_end; it++)
			
 
				+            {
			
 
				+                std::string value = (std::string)*it;
			
 
				+                signlabels.push_back(value);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        std::cout<<"Error: Can't find the label_sign_config.yaml file"<<std::endl;
			
 
				+        if(ivlog_flag)
			
 
				+            givlog->verbose("Error: Can't find the label_sign_config.yaml file");
			
 
				+        return 0;
			
 
				+    }
			
 
				+    config.release();
			
 
				+    if(test_video)
			
 
				+        std::thread * readthread = new std::thread(ReadFunc,1);
			
 
				+    else
			
 
				+        gpcamera= iv::modulecomm::RegisterRecv(&cameraname[0],Listenpic);
			
 
				+
			
 
				+    //================================== camera calib init ==========================
			
 
				+    if (calibrationstate)
			
 
				+    {
			
 
				+        cv::FileStorage calib_file(calibration_yamlpath, cv::FileStorage::READ);
			
 
				+        calib_file["cameraMatrix"]>>camera_matrix;
			
 
				+        calib_file["distCoeffs"]>>dist_coe;
			
 
				+        cv::Mat R = cv::Mat::eye(3, 3, CV_64F);
			
 
				+        cv::Size imgsize=cv::Size(1280,720);
			
 
				+        cv::initUndistortRectifyMap(camera_matrix, dist_coe, R, camera_matrix,imgsize,CV_16SC2,map1,map2);
			
 
				+    }
			
 
				+
			
 
				+    //共享内存，检测结果
			
 
				+    gpdetect = iv::modulecomm::RegisterSend(&detectname[0],10000,1);
			
 
				+
			
 
				+    //gpresultimg = iv::modulecomm::RegisterSend(&resultimgname[0],1000000,1);
			
 
				+
			
 
				+
			
 
				+    //检查engine文件是否存在,若不存在则生成
			
 
				+    std::ifstream file(engine_path, std::ios::binary);
			
 
				+    if(!file)
			
 
				+    {
			
 
				+        cout<<engine_path<<" not found!"<<endl;
			
 
				+        cout<<"Build engine to "<< engine_path <<endl;
			
 
				+        cout<<"Please wait a few minutes !!!"<<endl;
			
 
				+        if(ivlog_flag)
			
 
				+            givlog->verbose("Build engine now,Please wait a few minutes !!!");
			
 
				+        get_trtengine();
			
 
				+        cout << "Build engine done !!!!"<<endl;
			
 
				+        if(ivlog_flag)
			
 
				+            givlog->verbose("Build engine done !!!!");
			
 
				+    }
			
 
				+    else file.close();
			
 
				+
			
 
				+    //初始化模型
			
 
				+    auto yolo = yolo::load(engine_path, yolo::Type::V8, conf_thr, nms_thr);
			
 
				+    if (yolo == nullptr)
			
 
				+    {
			
 
				+
			
 
				+        std::cout <<"Error: load engine failed,yolo == nullptr"<<std::endl;
			
 
				+        if(ivlog_flag)
			
 
				+            givlog->verbose("Error: load engine failed,yolo == nullptr");
			
 
				+        return 0;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    vector<KalmanTracker> trackers_90;
			
 
				+    KalmanTracker::kf_count = 0; // tracking id relies on this, so we have to reset it in each seq.
			
 
				+    int frame_count = 0;
			
 
				+    double waittime = (double)cv::getTickCount();
			
 
				+    while (1)
			
 
				+    {
			
 
				+        if(imageBuffer->isEmpty())
			
 
				+        {
			
 
				+            double waittotal = (double)cv::getTickCount() - waittime;
			
 
				+            double totaltime = waittotal/cv::getTickFrequency();
			
 
				+            //                    if(totaltime>10.0)
			
 
				+            //                    {
			
 
				+            //                        cout<<"Cant't get frame and quit"<<endl;
			
 
				+            //                        lightstart = false;
			
 
				+            //                        cv::destroyAllWindows();
			
 
				+            //                        std::cout<<"------end program------"<<std::endl;
			
 
				+            //                        break;
			
 
				+            //                    }
			
 
				+            cout<<"Wait for frame "<<totaltime<<"s"<<endl;
			
 
				+            if(ivlog_flag)
			
 
				+                givlog->verbose("Wait for frame %f s",totaltime);
			
 
				+            std::this_thread::sleep_for(std::chrono::milliseconds(10));
			
 
				+            continue;
			
 
				+        }        
			
 
				+        auto start = std::chrono::system_clock::now();  //时间函数
			
 
				+        cv::Mat frame;
			
 
				+        imageBuffer->consume(frame);
			
 
				+        frame_count++;
			
 
				+        cv::Mat image;
			
 
				+        frame.copyTo(image);
			
 
				+        auto objs = yolo->forward(cvimg(image));
			
 
				+        auto end = std::chrono::system_clock::now();  //时间函数
			
 
				+
			
 
				+//        std::cout <<"sign detection infer time: "<<
			
 
				+//                    std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count()
			
 
				+//                 << "ms" << std::endl;
			
 
				+        //================================== track ==========================
			
 
				+        std::vector<yolo::Box> objs_track;
			
 
				+        if (trackstate)
			
 
				+        {
			
 
				+            auto start_track = std::chrono::system_clock::now();  //时间函数
			
 
				+            od::bbox_t bbox_t_90;   //转成跟踪格式
			
 
				+            vector<od::bbox_t> outs_90;
			
 
				+            for (auto &obj : objs)
			
 
				+            {
			
 
				+                //-------------判断标志牌的大小-----------
			
 
				+                //------------------跟踪是中心点和宽高--------------------
			
 
				+                float width = obj.right - obj.left;
			
 
				+                float height = obj.bottom - obj.top;
			
 
				+                if(width<0 || height<0)
			
 
				+                {
			
 
				+                    std::cout<<"Error: bbox width<0 || height<0 "<<std::endl;
			
 
				+                    if(ivlog_flag)
			
 
				+                        givlog->verbose("Error: bbox width<0 || height<0 ");
			
 
				+                    continue;
			
 
				+                }
			
 
				+                bbox_t_90.x = obj.left + 0.5*width;
			
 
				+                bbox_t_90.y = obj.top + 0.5*height;
			
 
				+                bbox_t_90.w = width;
			
 
				+                bbox_t_90.h = height;
			
 
				+                bbox_t_90.prob = obj.confidence;
			
 
				+                bbox_t_90.obj_id = obj.class_label;
			
 
				+                outs_90.push_back(bbox_t_90);
			
 
				+            }
			
 
				+            vector<od::TrackingBox>track_result_90;
			
 
				+            bool track_flag_90 = od::TrackObstacle(frame_count,trackers_90,outs_90,track_result_90);
			
 
				+            //sort(track_result_90.begin(), track_result_90.end(), comp);  //track id 本来就是由大到小
			
 
				+            for(unsigned int i=0;i < track_result_90.size(); i++)
			
 
				+            {
			
 
				+                yolo::Box obstacle;
			
 
				+                float width = track_result_90[i].box.width;
			
 
				+                float height = track_result_90[i].box.height;
			
 
				+                obstacle.left = track_result_90[i].box.x - 0.5*width;
			
 
				+                obstacle.top = track_result_90[i].box.y - 0.5*height;
			
 
				+                obstacle.right = track_result_90[i].box.x + 0.5*width;
			
 
				+                obstacle.bottom = track_result_90[i].box.y + 0.5*height;
			
 
				+                //cout<<"11111: "<<track_result_90[i].id<<endl;
			
 
				+                //-----------------------通过判断5帧数输出最终的结果,参数修改在KalmanTracker.h-------------
			
 
				+                vector<int> class_history;
			
 
				+                class_history = track_result_90[i].class_history;
			
 
				+                if(class_history.size()>0)
			
 
				+                {
			
 
				+                    vector<int> color_num(6);   //共有6类
			
 
				+                    for(int j=0;j<class_history.size();j++)
			
 
				+                    {
			
 
				+                        int class_id = class_history[j];
			
 
				+                        color_num[class_id] += 1;
			
 
				+                    }
			
 
				+                    std::vector<int>::iterator biggest = std::max_element(std::begin(color_num),std::end(color_num));
			
 
				+                    int maxindex = std::distance(std::begin(color_num),biggest);
			
 
				+                    obstacle.class_label = maxindex;
			
 
				+                }
			
 
				+                else {obstacle.class_label = track_result_90[i].class_id;}
			
 
				+                obstacle.confidence = track_result_90[i].prob;
			
 
				+                objs_track.push_back(obstacle);
			
 
				+                //-------------------------------------------------------
			
 
				+            }
			
 
				+            auto end_track = std::chrono::system_clock::now();  //时间函数
			
 
				+            //std::cout <<"track: "<< std::chrono::duration_cast<std::chrono::milliseconds>(end_track - start_track).count() << "ms" << std::endl;
			
 
				+
			
 
				+        }
			
 
				+        //================================== track ==========================
			
 
				+
			
 
				+        vector<yolo::Box>results_final;
			
 
				+        results_final = (trackstate)?objs_track:objs;
			
 
				+        shareSignMsg(results_final);
			
 
				+        auto end_final = std::chrono::system_clock::now();  //时间函数
			
 
				+        std::cout <<"sign detection total time: "<<
			
 
				+                    std::chrono::duration_cast<std::chrono::milliseconds>
			
 
				+                                          (end_final - start).count() << "ms" << std::endl;
			
 
				+        //将结果可视化
			
 
				+        for (auto &result : results_final)
			
 
				+        {
			
 
				+            uint8_t b, g, r;
			
 
				+            tie(b, g, r) = yolo::random_color(result.class_label);
			
 
				+            cv::rectangle(image, cv::Point(result.left, result.top),
			
 
				+                          cv::Point(result.right, result.bottom),
			
 
				+                          cv::Scalar(b, g, r), 2);
			
 
				+
			
 
				+            auto name = signlabels[result.class_label];
			
 
				+            auto caption = cv::format("%s %.2f", name.data(), result.confidence);
			
 
				+            int width = int(cv::getTextSize(caption, 0, 1, 1, nullptr).width*0.8);
			
 
				+            cv::rectangle(image, cv::Point(result.left, result.top - 25),
			
 
				+                          cv::Point(result.left + width, result.top), cv::Scalar(b, g, r), -1);
			
 
				+            cv::putText(image, caption, cv::Point(result.left, result.top - 5), 0, 0.7, cv::Scalar::all(0), 1, 16);
			
 
				+
			
 
				+        }
			
 
				+        //将检测结果图像传到共享内存
			
 
				+        //SendResultImg(image,gpresultimg);
			
 
				+
			
 
				+
			
 
				+        if (imshow_flag)
			
 
				+        {
			
 
				+
			
 
				+            cv::namedWindow("Result",cv::WINDOW_NORMAL);
			
 
				+            cv::imshow("Result",image);
			
 
				+            if(cv::waitKey(10) == 'q')
			
 
				+            {
			
 
				+                cv::destroyAllWindows();
			
 
				+                //yolo_context->destroy();
			
 
				+                //start_up = false;
			
 
				+                break;
			
 
				+            }
			
 
				+            if(cv::waitKey(1) == 's')
			
 
				+                cv::waitKey(0);
			
 
				+        }
			
 
				+
			
 
				+        //writer << frame;
			
 
				+        waittime = (double)cv::getTickCount();
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
--- a/src/detection/label_sign_detection/src/Hungarian.cpp
+++ b/src/detection/label_sign_detection/src/Hungarian.cpp
@@ -0,0 +1,398 @@
 
				+//
			
 
				+// Created by lqx on 20-4-23.
			
 
				+//
			
 
				+///////////////////////////////////////////////////////////////////////////////
			
 
				+// Hungarian.cpp: Implementation file for Class HungarianAlgorithm.
			
 
				+//
			
 
				+// This is a C++ wrapper with slight modification of a hungarian algorithm implementation by Markus Buehren.
			
 
				+// The original implementation is a few mex-functions for use in MATLAB, found here:
			
 
				+// http://www.mathworks.com/matlabcentral/fileexchange/6543-functions-for-the-rectangular-assignment-problem
			
 
				+//
			
 
				+// Both this code and the orignal code are published under the BSD license.
			
 
				+// by Cong Ma, 2016
			
 
				+//
			
 
				+
			
 
				+#include <math.h>
			
 
				+#include <cfloat>
			
 
				+#include "Hungarian.h"
			
 
				+
			
 
				+
			
 
				+HungarianAlgorithm::HungarianAlgorithm(){}
			
 
				+HungarianAlgorithm::~HungarianAlgorithm(){}
			
 
				+
			
 
				+
			
 
				+//********************************************************//
			
 
				+// A single function wrapper for solving assignment problem.
			
 
				+//********************************************************//
			
 
				+double HungarianAlgorithm::Solve(vector<vector<double>>& DistMatrix, vector<int>& Assignment)
			
 
				+{
			
 
				+    unsigned int nRows = DistMatrix.size();
			
 
				+    unsigned int nCols = DistMatrix[0].size();
			
 
				+
			
 
				+    double *distMatrixIn = new double[nRows * nCols];
			
 
				+    int *assignment = new int[nRows];
			
 
				+    double cost = 0.0;
			
 
				+
			
 
				+    // Fill in the distMatrixIn. Mind the index is "i + nRows * j".
			
 
				+    // Here the cost matrix of size MxN is defined as a double precision array of N*M elements.
			
 
				+    // In the solving functions matrices are seen to be saved MATLAB-internally in row-order.
			
 
				+    // (i.e. the matrix [1 2; 3 4] will be stored as a vector [1 3 2 4], NOT [1 2 3 4]).
			
 
				+    for (unsigned int i = 0; i < nRows; i++)
			
 
				+        for (unsigned int j = 0; j < nCols; j++)
			
 
				+            distMatrixIn[i + nRows * j] = DistMatrix[i][j];
			
 
				+
			
 
				+    // call solving function
			
 
				+    assignmentoptimal(assignment, &cost, distMatrixIn, nRows, nCols);
			
 
				+
			
 
				+    Assignment.clear();
			
 
				+    for (unsigned int r = 0; r < nRows; r++)
			
 
				+        Assignment.push_back(assignment[r]);
			
 
				+
			
 
				+    delete[] distMatrixIn;
			
 
				+    delete[] assignment;
			
 
				+    return cost;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+//********************************************************//
			
 
				+// Solve optimal solution for assignment problem using Munkres algorithm, also known as Hungarian Algorithm.
			
 
				+//********************************************************//
			
 
				+void HungarianAlgorithm::assignmentoptimal(int *assignment, double *cost, double *distMatrixIn, int nOfRows, int nOfColumns)
			
 
				+{
			
 
				+    double *distMatrix, *distMatrixTemp, *distMatrixEnd, *columnEnd, value, minValue;
			
 
				+    bool *coveredColumns, *coveredRows, *starMatrix, *newStarMatrix, *primeMatrix;
			
 
				+    int nOfElements, minDim, row, col;
			
 
				+
			
 
				+    /* initialization */
			
 
				+    *cost = 0;
			
 
				+    for (row = 0; row<nOfRows; row++)
			
 
				+        assignment[row] = -1;
			
 
				+
			
 
				+    /* generate working copy of distance Matrix */
			
 
				+    /* check if all matrix elements are positive */
			
 
				+    nOfElements = nOfRows * nOfColumns;
			
 
				+    distMatrix = (double *)malloc(nOfElements * sizeof(double));
			
 
				+    distMatrixEnd = distMatrix + nOfElements;
			
 
				+
			
 
				+    for (row = 0; row<nOfElements; row++)
			
 
				+    {
			
 
				+        value = distMatrixIn[row];
			
 
				+        if (value < 0)
			
 
				+            cerr << "All matrix elements have to be non-negative." << endl;
			
 
				+        distMatrix[row] = value;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    /* memory allocation */
			
 
				+    coveredColumns = (bool *)calloc(nOfColumns, sizeof(bool));
			
 
				+    coveredRows = (bool *)calloc(nOfRows, sizeof(bool));
			
 
				+    starMatrix = (bool *)calloc(nOfElements, sizeof(bool));
			
 
				+    primeMatrix = (bool *)calloc(nOfElements, sizeof(bool));
			
 
				+    newStarMatrix = (bool *)calloc(nOfElements, sizeof(bool)); /* used in step4 */
			
 
				+
			
 
				+    /* preliminary steps */
			
 
				+    if (nOfRows <= nOfColumns)
			
 
				+    {
			
 
				+        minDim = nOfRows;
			
 
				+
			
 
				+        for (row = 0; row<nOfRows; row++)
			
 
				+        {
			
 
				+            /* find the smallest element in the row */
			
 
				+            distMatrixTemp = distMatrix + row;
			
 
				+            minValue = *distMatrixTemp;
			
 
				+            distMatrixTemp += nOfRows;
			
 
				+            while (distMatrixTemp < distMatrixEnd)
			
 
				+            {
			
 
				+                value = *distMatrixTemp;
			
 
				+                if (value < minValue)
			
 
				+                    minValue = value;
			
 
				+                distMatrixTemp += nOfRows;
			
 
				+            }
			
 
				+
			
 
				+            /* subtract the smallest element from each element of the row */
			
 
				+            distMatrixTemp = distMatrix + row;
			
 
				+            while (distMatrixTemp < distMatrixEnd)
			
 
				+            {
			
 
				+                *distMatrixTemp -= minValue;
			
 
				+                distMatrixTemp += nOfRows;
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        /* Steps 1 and 2a */
			
 
				+        for (row = 0; row<nOfRows; row++)
			
 
				+            for (col = 0; col<nOfColumns; col++)
			
 
				+                if (fabs(distMatrix[row + nOfRows*col]) < DBL_EPSILON)
			
 
				+                    if (!coveredColumns[col])
			
 
				+                    {
			
 
				+                        starMatrix[row + nOfRows*col] = true;
			
 
				+                        coveredColumns[col] = true;
			
 
				+                        break;
			
 
				+                    }
			
 
				+    }
			
 
				+    else /* if(nOfRows > nOfColumns) */
			
 
				+    {
			
 
				+        minDim = nOfColumns;
			
 
				+
			
 
				+        for (col = 0; col<nOfColumns; col++)
			
 
				+        {
			
 
				+            /* find the smallest element in the column */
			
 
				+            distMatrixTemp = distMatrix + nOfRows*col;
			
 
				+            columnEnd = distMatrixTemp + nOfRows;
			
 
				+
			
 
				+            minValue = *distMatrixTemp++;
			
 
				+            while (distMatrixTemp < columnEnd)
			
 
				+            {
			
 
				+                value = *distMatrixTemp++;
			
 
				+                if (value < minValue)
			
 
				+                    minValue = value;
			
 
				+            }
			
 
				+
			
 
				+            /* subtract the smallest element from each element of the column */
			
 
				+            distMatrixTemp = distMatrix + nOfRows*col;
			
 
				+            while (distMatrixTemp < columnEnd)
			
 
				+                *distMatrixTemp++ -= minValue;
			
 
				+        }
			
 
				+
			
 
				+        /* Steps 1 and 2a */
			
 
				+        for (col = 0; col<nOfColumns; col++)
			
 
				+            for (row = 0; row<nOfRows; row++)
			
 
				+                if (fabs(distMatrix[row + nOfRows*col]) < DBL_EPSILON)
			
 
				+                    if (!coveredRows[row])
			
 
				+                    {
			
 
				+                        starMatrix[row + nOfRows*col] = true;
			
 
				+                        coveredColumns[col] = true;
			
 
				+                        coveredRows[row] = true;
			
 
				+                        break;
			
 
				+                    }
			
 
				+        for (row = 0; row<nOfRows; row++)
			
 
				+            coveredRows[row] = false;
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+    /* move to step 2b */
			
 
				+    step2b(assignment, distMatrix, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim);
			
 
				+
			
 
				+    /* compute cost and remove invalid assignments */
			
 
				+    computeassignmentcost(assignment, cost, distMatrixIn, nOfRows);
			
 
				+
			
 
				+    /* free allocated memory */
			
 
				+    free(distMatrix);
			
 
				+    free(coveredColumns);
			
 
				+    free(coveredRows);
			
 
				+    free(starMatrix);
			
 
				+    free(primeMatrix);
			
 
				+    free(newStarMatrix);
			
 
				+
			
 
				+    return;
			
 
				+}
			
 
				+
			
 
				+/********************************************************/
			
 
				+void HungarianAlgorithm::buildassignmentvector(int *assignment, bool *starMatrix, int nOfRows, int nOfColumns)
			
 
				+{
			
 
				+    int row, col;
			
 
				+
			
 
				+    for (row = 0; row<nOfRows; row++)
			
 
				+        for (col = 0; col<nOfColumns; col++)
			
 
				+            if (starMatrix[row + nOfRows*col])
			
 
				+            {
			
 
				+#ifdef ONE_INDEXING
			
 
				+                assignment[row] = col + 1; /* MATLAB-Indexing */
			
 
				+#else
			
 
				+                assignment[row] = col;
			
 
				+#endif
			
 
				+                break;
			
 
				+            }
			
 
				+}
			
 
				+
			
 
				+/********************************************************/
			
 
				+void HungarianAlgorithm::computeassignmentcost(int *assignment, double *cost, double *distMatrix, int nOfRows)
			
 
				+{
			
 
				+    int row, col;
			
 
				+
			
 
				+    for (row = 0; row<nOfRows; row++)
			
 
				+    {
			
 
				+        col = assignment[row];
			
 
				+        if (col >= 0)
			
 
				+            *cost += distMatrix[row + nOfRows*col];
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/********************************************************/
			
 
				+void HungarianAlgorithm::step2a(int *assignment, double *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, int nOfRows, int nOfColumns, int minDim)
			
 
				+{
			
 
				+    bool *starMatrixTemp, *columnEnd;
			
 
				+    int col;
			
 
				+
			
 
				+    /* cover every column containing a starred zero */
			
 
				+    for (col = 0; col<nOfColumns; col++)
			
 
				+    {
			
 
				+        starMatrixTemp = starMatrix + nOfRows*col;
			
 
				+        columnEnd = starMatrixTemp + nOfRows;
			
 
				+        while (starMatrixTemp < columnEnd){
			
 
				+            if (*starMatrixTemp++)
			
 
				+            {
			
 
				+                coveredColumns[col] = true;
			
 
				+                break;
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    /* move to step 3 */
			
 
				+    step2b(assignment, distMatrix, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim);
			
 
				+}
			
 
				+
			
 
				+/********************************************************/
			
 
				+void HungarianAlgorithm::step2b(int *assignment, double *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, int nOfRows, int nOfColumns, int minDim)
			
 
				+{
			
 
				+    int col, nOfCoveredColumns;
			
 
				+
			
 
				+    /* count covered columns */
			
 
				+    nOfCoveredColumns = 0;
			
 
				+    for (col = 0; col<nOfColumns; col++)
			
 
				+        if (coveredColumns[col])
			
 
				+            nOfCoveredColumns++;
			
 
				+
			
 
				+    if (nOfCoveredColumns == minDim)
			
 
				+    {
			
 
				+        /* algorithm finished */
			
 
				+        buildassignmentvector(assignment, starMatrix, nOfRows, nOfColumns);
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        /* move to step 3 */
			
 
				+        step3(assignment, distMatrix, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim);
			
 
				+    }
			
 
				+
			
 
				+}
			
 
				+
			
 
				+/********************************************************/
			
 
				+void HungarianAlgorithm::step3(int *assignment, double *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, int nOfRows, int nOfColumns, int minDim)
			
 
				+{
			
 
				+    bool zerosFound;
			
 
				+    int row, col, starCol;
			
 
				+
			
 
				+    zerosFound = true;
			
 
				+    while (zerosFound)
			
 
				+    {
			
 
				+        zerosFound = false;
			
 
				+        for (col = 0; col<nOfColumns; col++)
			
 
				+            if (!coveredColumns[col])
			
 
				+                for (row = 0; row<nOfRows; row++)
			
 
				+                    if ((!coveredRows[row]) && (fabs(distMatrix[row + nOfRows*col]) < DBL_EPSILON))
			
 
				+                    {
			
 
				+                        /* prime zero */
			
 
				+                        primeMatrix[row + nOfRows*col] = true;
			
 
				+
			
 
				+                        /* find starred zero in current row */
			
 
				+                        for (starCol = 0; starCol<nOfColumns; starCol++)
			
 
				+                            if (starMatrix[row + nOfRows*starCol])
			
 
				+                                break;
			
 
				+
			
 
				+                        if (starCol == nOfColumns) /* no starred zero found */
			
 
				+                        {
			
 
				+                            /* move to step 4 */
			
 
				+                            step4(assignment, distMatrix, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim, row, col);
			
 
				+                            return;
			
 
				+                        }
			
 
				+                        else
			
 
				+                        {
			
 
				+                            coveredRows[row] = true;
			
 
				+                            coveredColumns[starCol] = false;
			
 
				+                            zerosFound = true;
			
 
				+                            break;
			
 
				+                        }
			
 
				+                    }
			
 
				+    }
			
 
				+
			
 
				+    /* move to step 5 */
			
 
				+    step5(assignment, distMatrix, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim);
			
 
				+}
			
 
				+
			
 
				+/********************************************************/
			
 
				+void HungarianAlgorithm::step4(int *assignment, double *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, int nOfRows, int nOfColumns, int minDim, int row, int col)
			
 
				+{
			
 
				+    int n, starRow, starCol, primeRow, primeCol;
			
 
				+    int nOfElements = nOfRows*nOfColumns;
			
 
				+
			
 
				+    /* generate temporary copy of starMatrix */
			
 
				+    for (n = 0; n<nOfElements; n++)
			
 
				+        newStarMatrix[n] = starMatrix[n];
			
 
				+
			
 
				+    /* star current zero */
			
 
				+    newStarMatrix[row + nOfRows*col] = true;
			
 
				+
			
 
				+    /* find starred zero in current column */
			
 
				+    starCol = col;
			
 
				+    for (starRow = 0; starRow<nOfRows; starRow++)
			
 
				+        if (starMatrix[starRow + nOfRows*starCol])
			
 
				+            break;
			
 
				+
			
 
				+    while (starRow<nOfRows)
			
 
				+    {
			
 
				+        /* unstar the starred zero */
			
 
				+        newStarMatrix[starRow + nOfRows*starCol] = false;
			
 
				+
			
 
				+        /* find primed zero in current row */
			
 
				+        primeRow = starRow;
			
 
				+        for (primeCol = 0; primeCol<nOfColumns; primeCol++)
			
 
				+            if (primeMatrix[primeRow + nOfRows*primeCol])
			
 
				+                break;
			
 
				+
			
 
				+        /* star the primed zero */
			
 
				+        newStarMatrix[primeRow + nOfRows*primeCol] = true;
			
 
				+
			
 
				+        /* find starred zero in current column */
			
 
				+        starCol = primeCol;
			
 
				+        for (starRow = 0; starRow<nOfRows; starRow++)
			
 
				+            if (starMatrix[starRow + nOfRows*starCol])
			
 
				+                break;
			
 
				+    }
			
 
				+
			
 
				+    /* use temporary copy as new starMatrix */
			
 
				+    /* delete all primes, uncover all rows */
			
 
				+    for (n = 0; n<nOfElements; n++)
			
 
				+    {
			
 
				+        primeMatrix[n] = false;
			
 
				+        starMatrix[n] = newStarMatrix[n];
			
 
				+    }
			
 
				+    for (n = 0; n<nOfRows; n++)
			
 
				+        coveredRows[n] = false;
			
 
				+
			
 
				+    /* move to step 2a */
			
 
				+    step2a(assignment, distMatrix, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim);
			
 
				+}
			
 
				+
			
 
				+/********************************************************/
			
 
				+void HungarianAlgorithm::step5(int *assignment, double *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, int nOfRows, int nOfColumns, int minDim)
			
 
				+{
			
 
				+    double h, value;
			
 
				+    int row, col;
			
 
				+
			
 
				+    /* find smallest uncovered element h */
			
 
				+    h = DBL_MAX;
			
 
				+    for (row = 0; row<nOfRows; row++)
			
 
				+        if (!coveredRows[row])
			
 
				+            for (col = 0; col<nOfColumns; col++)
			
 
				+                if (!coveredColumns[col])
			
 
				+                {
			
 
				+                    value = distMatrix[row + nOfRows*col];
			
 
				+                    if (value < h)
			
 
				+                        h = value;
			
 
				+                }
			
 
				+
			
 
				+    /* add h to each covered row */
			
 
				+    for (row = 0; row<nOfRows; row++)
			
 
				+        if (coveredRows[row])
			
 
				+            for (col = 0; col<nOfColumns; col++)
			
 
				+                distMatrix[row + nOfRows*col] += h;
			
 
				+
			
 
				+    /* subtract h from each uncovered column */
			
 
				+    for (col = 0; col<nOfColumns; col++)
			
 
				+        if (!coveredColumns[col])
			
 
				+            for (row = 0; row<nOfRows; row++)
			
 
				+                distMatrix[row + nOfRows*col] -= h;
			
 
				+
			
 
				+    /* move to step 3 */
			
 
				+    step3(assignment, distMatrix, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim);
			
 
				+}
			
 
				+
			
--- a/src/detection/label_sign_detection/src/KalmanTracker.cpp
+++ b/src/detection/label_sign_detection/src/KalmanTracker.cpp
@@ -0,0 +1,107 @@
 
				+//
			
 
				+// Created by lqx on 20-4-23.
			
 
				+//
			
 
				+
			
 
				+///////////////////////////////////////////////////////////////////////////////
			
 
				+// KalmanTracker.cpp: KalmanTracker Class Implementation Declaration
			
 
				+
			
 
				+#include "KalmanTracker.h"
			
 
				+#include "detect_obstacle.h"
			
 
				+#include <iostream>
			
 
				+
			
 
				+int KalmanTracker::kf_count = 0;
			
 
				+
			
 
				+// initialize Kalman filter
			
 
				+void KalmanTracker::init_kf(StateType stateMat)
			
 
				+{
			
 
				+    int stateNum = 7;
			
 
				+    int measureNum = 4;
			
 
				+    kf = KalmanFilter(stateNum, measureNum, 0);
			
 
				+
			
 
				+    measurement = Mat::zeros(measureNum, 1, CV_32F);
			
 
				+
			
 
				+    kf.transitionMatrix = (Mat_<float>(stateNum, stateNum) <<
			
 
				+                                                            1, 0, 0, 0, 1, 0, 0,
			
 
				+            0, 1, 0, 0, 0, 1, 0,
			
 
				+            0, 0, 1, 0, 0, 0, 1,
			
 
				+            0, 0, 0, 1, 0, 0, 0,
			
 
				+            0, 0, 0, 0, 1, 0, 0,
			
 
				+            0, 0, 0, 0, 0, 1, 0,
			
 
				+            0, 0, 0, 0, 0, 0, 1);
			
 
				+
			
 
				+    setIdentity(kf.measurementMatrix);
			
 
				+    setIdentity(kf.processNoiseCov, Scalar::all(1e-2));
			
 
				+    setIdentity(kf.measurementNoiseCov, Scalar::all(1e-1));
			
 
				+    setIdentity(kf.errorCovPost, Scalar::all(1));
			
 
				+
			
 
				+    // initialize state vector with bounding box in [cx,cy,s,r] style
			
 
				+    kf.statePost.at<float>(0, 0) = stateMat.x + stateMat.width / 2;
			
 
				+    kf.statePost.at<float>(1, 0) = stateMat.y + stateMat.height / 2;
			
 
				+    kf.statePost.at<float>(2, 0) = stateMat.area();
			
 
				+    kf.statePost.at<float>(3, 0) = stateMat.width / stateMat.height;
			
 
				+}
			
 
				+
			
 
				+// Predict the estimated bounding box.
			
 
				+StateType KalmanTracker::predict()
			
 
				+{
			
 
				+    // predict
			
 
				+    Mat p = kf.predict();
			
 
				+    m_age += 1;
			
 
				+
			
 
				+    if (m_time_since_update > od::max_age)
			
 
				+        m_hit_streak = 0;
			
 
				+    m_time_since_update += 1;
			
 
				+    StateType predictBox = get_rect_xysr(p.at<float>(0, 0), p.at<float>(1, 0), p.at<float>(2, 0), p.at<float>(3, 0));
			
 
				+    m_history.push_back(predictBox);
			
 
				+    return m_history.back();
			
 
				+}
			
 
				+
			
 
				+// Update the state vector with observed bounding box.
			
 
				+void KalmanTracker::update(StateType stateMat, int classId, float prob)
			
 
				+{
			
 
				+    m_time_since_update = 0;
			
 
				+    m_history.clear();
			
 
				+    m_hits += 1;
			
 
				+    m_hit_streak += 1;
			
 
				+    m_class_id = classId;
			
 
				+
			
 
				+    //--------------只保留最近的5帧检测类别-------------
			
 
				+    if (m_class_history.size() >= 5)
			
 
				+        m_class_history.erase(m_class_history.begin());
			
 
				+    m_class_history.push_back(classId);
			
 
				+    //--------------只保留最近的5帧检测类别---------------
			
 
				+
			
 
				+    m_prob = prob;
			
 
				+    // measurement
			
 
				+    measurement.at<float>(0, 0) = stateMat.x + stateMat.width / 2;
			
 
				+    measurement.at<float>(1, 0) = stateMat.y + stateMat.height / 2;
			
 
				+    measurement.at<float>(2, 0) = stateMat.area();
			
 
				+    measurement.at<float>(3, 0) = stateMat.width / stateMat.height;
			
 
				+
			
 
				+    // update
			
 
				+    kf.correct(measurement);
			
 
				+}
			
 
				+
			
 
				+// Return the current state vector
			
 
				+StateType KalmanTracker::get_state()
			
 
				+{
			
 
				+    Mat s = kf.statePost;
			
 
				+    return get_rect_xysr(s.at<float>(0, 0), s.at<float>(1, 0), s.at<float>(2, 0), s.at<float>(3, 0));
			
 
				+}
			
 
				+
			
 
				+
			
 
				+// Convert bounding box from [cx,cy,s,r] to [x,y,w,h] style.
			
 
				+StateType KalmanTracker::get_rect_xysr(float cx, float cy, float s, float r)
			
 
				+{
			
 
				+    float w = sqrt(s * r);
			
 
				+    float h = s / w;
			
 
				+    float x = (cx - w / 2);
			
 
				+    float y = (cy - h / 2);
			
 
				+
			
 
				+    if (x < 0 && cx > 0)
			
 
				+        x = 0;
			
 
				+    if (y < 0 && cy > 0)
			
 
				+        y = 0;
			
 
				+
			
 
				+    return StateType(x, y, w, h);
			
 
				+}
			
--- a/src/detection/label_sign_detection/src/detect_obstacle.cpp
+++ b/src/detection/label_sign_detection/src/detect_obstacle.cpp
@@ -0,0 +1,260 @@
 
				+
			
 
				+#include <set>
			
 
				+#include "detect_obstacle.h"
			
 
				+
			
 
				+namespace od{
			
 
				+// Computes IOU between two bounding boxes
			
 
				+double GetIOU(Rect_<float> bb_test, Rect_<float> bb_gt)
			
 
				+{
			
 
				+    float in = (bb_test & bb_gt).area();
			
 
				+    float un = bb_test.area() + bb_gt.area() - in;
			
 
				+
			
 
				+    if (un < DBL_EPSILON)
			
 
				+        return 0;
			
 
				+
			
 
				+    return (double)(in / un);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+//tracking obstacle
			
 
				+bool TrackObstacle(int frame_count,vector<KalmanTracker> &trackers,vector<bbox_t> &outs,vector<od::TrackingBox> &track_result)
			
 
				+{
			
 
				+    // variables used in the for-loop
			
 
				+    vector<Rect_<float>> predictedBoxes;
			
 
				+    vector<vector<double>> iouMatrix;
			
 
				+    vector<int> assignment;
			
 
				+    set<int> unmatchedDetections;
			
 
				+    set<int> unmatchedTrajectories;
			
 
				+    set<int> allItems;
			
 
				+    set<int> matchedItems;
			
 
				+    vector<cv::Point> matchedPairs;
			
 
				+    unsigned int trkNum = 0;
			
 
				+    unsigned int detNum = 0;
			
 
				+    vector<od::DetectBox> detect_outs;
			
 
				+    //bbox_t to Detect_box
			
 
				+    for(unsigned int i=0;i<outs.size();i++)
			
 
				+    {
			
 
				+        od::DetectBox detect_temp;
			
 
				+        detect_temp.class_id = outs[i].obj_id;
			
 
				+        detect_temp.prob = outs[i].prob;
			
 
				+        float tpx = outs[i].x;
			
 
				+        float tpy = outs[i].y;
			
 
				+        float tpw = outs[i].w;
			
 
				+        float tph = outs[i].h;
			
 
				+        //detect_temp.box = Rect_<float>(Point_<float>(tpx, tpy),Point_<float>(tpx + tpw, tpy + tph));
			
 
				+        detect_temp.box = Rect_<float>(tpx,tpy,tpw,tph);
			
 
				+        detect_outs.push_back(detect_temp);
			
 
				+    }
			
 
				+    //tracking
			
 
				+    if (trackers.size() == 0) // the first frame met
			
 
				+    {
			
 
				+        // initialize kalman trackers using first detections.
			
 
				+        for (unsigned int i = 0; i < outs.size(); i++)
			
 
				+        {
			
 
				+            KalmanTracker trk = KalmanTracker(detect_outs[i].box,
			
 
				+                                              detect_outs[i].class_id,
			
 
				+                                              detect_outs[i].prob);
			
 
				+            trackers.push_back(trk);
			
 
				+        }
			
 
				+        return false;
			
 
				+    }
			
 
				+    ///////////////////////////////////////
			
 
				+    // 3.1. get predicted locations from existing trackers.
			
 
				+    predictedBoxes.clear();
			
 
				+
			
 
				+    for (auto it = trackers.begin(); it != trackers.end();)
			
 
				+    {
			
 
				+        Rect_<float> pBox = (*it).predict();
			
 
				+        if (pBox.x >= 0 && pBox.y >= 0)
			
 
				+        {
			
 
				+            predictedBoxes.push_back(pBox);
			
 
				+            it++;
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+
			
 
				+            cerr << "Box invalid at frame: " << frame_count <<" id "<<(*it).m_id+1<<endl;
			
 
				+            it = trackers.erase(it);
			
 
				+
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    if (trackers.size() == 0 || detect_outs.size() == 0) return false;
			
 
				+
			
 
				+    ///////////////////////////////////////
			
 
				+    // 3.2. associate detections to tracked object (both represented as bounding boxes)
			
 
				+    // dets : detFrameData[fi]
			
 
				+    trkNum = predictedBoxes.size();
			
 
				+    detNum = outs.size();
			
 
				+
			
 
				+    iouMatrix.clear();
			
 
				+    iouMatrix.resize(trkNum, vector<double>(detNum, 0));
			
 
				+
			
 
				+    for (unsigned int i = 0; i < trkNum; i++) // compute iou matrix as a distance matrix
			
 
				+    {
			
 
				+        for (unsigned int j = 0; j < detNum; j++)
			
 
				+        {
			
 
				+            // use 1-iou because the hungarian algorithm computes a minimum-cost assignment.
			
 
				+            iouMatrix[i][j] = 1 - GetIOU(predictedBoxes[i], detect_outs[j].box);
			
 
				+        }
			
 
				+    }
			
 
				+    // solve the assignment problem using hungarian algorithm.
			
 
				+    // the resulting assignment is [track(prediction) : detection], with len=preNum
			
 
				+    HungarianAlgorithm HungAlgo;
			
 
				+    assignment.clear();
			
 
				+    HungAlgo.Solve(iouMatrix, assignment);
			
 
				+
			
 
				+    // find matches, unmatched_detections and unmatched_predictions
			
 
				+    unmatchedTrajectories.clear();
			
 
				+    unmatchedDetections.clear();
			
 
				+    allItems.clear();
			
 
				+    matchedItems.clear();
			
 
				+    if (detNum > trkNum) //	there are unmatched detections
			
 
				+    {
			
 
				+        for (unsigned int n = 0; n < detNum; n++)
			
 
				+            allItems.insert(n);
			
 
				+
			
 
				+        for (unsigned int i = 0; i < trkNum; ++i)
			
 
				+            matchedItems.insert(assignment[i]);
			
 
				+
			
 
				+        set_difference(allItems.begin(), allItems.end(),
			
 
				+                       matchedItems.begin(), matchedItems.end(),
			
 
				+                       insert_iterator<set<int>>(unmatchedDetections, unmatchedDetections.begin()));
			
 
				+    }
			
 
				+    else if (detNum < trkNum) // there are unmatched trajectory/predictions
			
 
				+    {
			
 
				+        for (unsigned int i = 0; i < trkNum; ++i)
			
 
				+            if (assignment[i] == -1) // unassigned label will be set as -1 in the assignment algorithm
			
 
				+                unmatchedTrajectories.insert(i);
			
 
				+    }
			
 
				+    // filter out matched with low IOU
			
 
				+    matchedPairs.clear();
			
 
				+    for (unsigned int i = 0; i < trkNum; ++i)
			
 
				+    {
			
 
				+        if (assignment[i] == -1) // pass over invalid values
			
 
				+            continue;
			
 
				+        if (1 - iouMatrix[i][assignment[i]] < od::iouThreshold)
			
 
				+        {
			
 
				+            unmatchedTrajectories.insert(i);
			
 
				+            unmatchedDetections.insert(assignment[i]);
			
 
				+        }
			
 
				+        else
			
 
				+            matchedPairs.push_back(cv::Point(i, assignment[i]));
			
 
				+    }
			
 
				+    ///////////////////////////////////////
			
 
				+    // 3.3. updating trackers
			
 
				+    // update matched trackers with assigned detections.
			
 
				+    // each prediction is corresponding to a tracker
			
 
				+    int detIdx, trkIdx;
			
 
				+    for (unsigned int i = 0; i < matchedPairs.size(); i++)
			
 
				+    {
			
 
				+        trkIdx = matchedPairs[i].x;
			
 
				+        detIdx = matchedPairs[i].y;
			
 
				+        trackers[trkIdx].update(detect_outs[detIdx].box,
			
 
				+                                detect_outs[detIdx].class_id,
			
 
				+                                detect_outs[detIdx].prob);
			
 
				+    }
			
 
				+    // create and initialise new trackers for unmatched detections
			
 
				+    for (auto umd : unmatchedDetections)
			
 
				+    {
			
 
				+        KalmanTracker tracker = KalmanTracker(detect_outs[umd].box,
			
 
				+                                              detect_outs[umd].class_id,
			
 
				+                                              detect_outs[umd].prob);
			
 
				+        trackers.push_back(tracker);
			
 
				+    }
			
 
				+
			
 
				+#if 0
			
 
				+    //get unique trackers,merg same trackers
			
 
				+    unsigned int trackers_num = trackers.size();
			
 
				+    iouMatrix.clear();
			
 
				+    iouMatrix.resize(trackers_num, vector<double>(trackers_num, 0));
			
 
				+    for (unsigned int i = 0; i < trackers_num; i++) // compute iou matrix as a distance matrix
			
 
				+    {
			
 
				+        for (unsigned int j = 0; j < trackers_num; j++)
			
 
				+        {
			
 
				+            // use 1-iou because the hungarian algorithm computes a minimum-cost assignment.
			
 
				+            if(j==i)
			
 
				+                iouMatrix[i][j] = 1;
			
 
				+            else
			
 
				+                iouMatrix[i][j] = 1 - GetIOU(trackers[i].get_state(), trackers[j].get_state());
			
 
				+        }
			
 
				+    }
			
 
				+    // solve the assignment problem using hungarian algorithm.
			
 
				+    // the resulting assignment is [track(prediction) : detection], with len=preNum
			
 
				+    assignment.clear();
			
 
				+    HungAlgo.Solve(iouMatrix, assignment);
			
 
				+    // filter out matched with low IOU
			
 
				+    matchedPairs.clear();
			
 
				+    for (unsigned int i = 0; i < trackers_num; ++i)
			
 
				+    {
			
 
				+        if (assignment[i] == -1) // pass over invalid values
			
 
				+            continue;
			
 
				+        if (iouMatrix[i][assignment[i]] < od::iouThreshold)
			
 
				+        {
			
 
				+            matchedPairs.push_back(cv::Point(i, assignment[i]));
			
 
				+        }
			
 
				+    }
			
 
				+    int index1,index2;
			
 
				+    vector<int> delete_index;
			
 
				+    for (unsigned int i = 0; i < matchedPairs.size(); i++)
			
 
				+    {
			
 
				+        index1 = matchedPairs[i].x;
			
 
				+        index2 = matchedPairs[i].y;
			
 
				+        if(index1 >= index2)
			
 
				+            continue;
			
 
				+        if((trackers[index1].m_id > trackers[index2].m_id) && (trackers[index1].m_class_history.size()>0))
			
 
				+        {
			
 
				+            trackers[index1].m_id = trackers[index2].m_id;
			
 
				+            trackers[index1].m_class_history.insert(trackers[index1].m_class_history.begin(),
			
 
				+                                                    trackers[index2].m_class_history.begin(),trackers[index2].m_class_history.end());
			
 
				+            delete_index.push_back(index2);
			
 
				+        }
			
 
				+        else if((trackers[index2].m_id > trackers[index1].m_id) && (trackers[index2].m_class_history.size()>0))
			
 
				+        {
			
 
				+            trackers[index2].m_id = trackers[index1].m_id;
			
 
				+            trackers[index2].m_class_history.insert(trackers[index2].m_class_history.begin(),
			
 
				+                                                    trackers[index1].m_class_history.begin(),trackers[index1].m_class_history.end());
			
 
				+            delete_index.push_back(index1);
			
 
				+
			
 
				+        }
			
 
				+    }
			
 
				+    for(unsigned int i = 0; i < delete_index.size(); i++)
			
 
				+    {
			
 
				+        int idx = delete_index[i] - i;
			
 
				+        trackers.erase(trackers.begin() + idx);
			
 
				+    }
			
 
				+#endif
			
 
				+
			
 
				+    // get trackers' output
			
 
				+    track_result.clear();
			
 
				+    for (auto it = trackers.begin(); it != trackers.end();)
			
 
				+    {
			
 
				+        if (((*it).m_time_since_update <= od::max_age) &&
			
 
				+                ((*it).m_hit_streak >= od::min_hits || frame_count <= od::min_hits))
			
 
				+        {
			
 
				+            od::TrackingBox res;
			
 
				+            res.box = (*it).get_state();
			
 
				+            res.id = (*it).m_id + 1;
			
 
				+            res.frame = frame_count;
			
 
				+            res.class_id = (*it).m_class_id;
			
 
				+            res.prob = (*it).m_prob;
			
 
				+            res.class_history = (*it).m_class_history;
			
 
				+            track_result.push_back(res);
			
 
				+            it++;
			
 
				+        }
			
 
				+        else
			
 
				+            it ++;
			
 
				+        //remove dead tracklet
			
 
				+        if(it != trackers.end() && (*it).m_time_since_update > od::max_age)
			
 
				+        {
			
 
				+            it = trackers.erase(it);
			
 
				+        }
			
 
				+    }
			
 
				+    if(track_result.size()>0)
			
 
				+        return true;
			
 
				+    else return false;
			
 
				+}
			
 
				+}
			
 
				+
			
 
				+
			
 
				+
			
--- a/src/detection/label_sign_detection/src/infer.cu
+++ b/src/detection/label_sign_detection/src/infer.cu
@@ -0,0 +1,443 @@
 
				+
			
 
				+#include <NvInfer.h>
			
 
				+#include <cuda_runtime.h>
			
 
				+#include <stdarg.h>
			
 
				+#include <fstream>
			
 
				+#include <numeric>
			
 
				+#include <sstream>
			
 
				+#include <unordered_map>
			
 
				+#include "../include/infer.hpp"
			
 
				+
			
 
				+
			
 
				+namespace trt {
			
 
				+
			
 
				+using namespace std;
			
 
				+using namespace nvinfer1;
			
 
				+
			
 
				+#define checkRuntime(call)                                                                 \
			
 
				+  do {                                                                                     \
			
 
				+    auto ___call__ret_code__ = (call);                                                     \
			
 
				+    if (___call__ret_code__ != cudaSuccess) {                                              \
			
 
				+      INFO("CUDA Runtime error💥 %s # %s, code = %s [ %d ]", #call,                         \
			
 
				+           cudaGetErrorString(___call__ret_code__), cudaGetErrorName(___call__ret_code__), \
			
 
				+           ___call__ret_code__);                                                           \
			
 
				+      abort();                                                                             \
			
 
				+    }                                                                                      \
			
 
				+  } while (0)
			
 
				+
			
 
				+#define checkKernel(...)                 \
			
 
				+  do {                                   \
			
 
				+    { (__VA_ARGS__); }                   \
			
 
				+    checkRuntime(cudaPeekAtLastError()); \
			
 
				+  } while (0)
			
 
				+
			
 
				+#define Assert(op)                 \
			
 
				+  do {                             \
			
 
				+    bool cond = !(!(op));          \
			
 
				+    if (!cond) {                   \
			
 
				+      INFO("Assert failed, " #op); \
			
 
				+      abort();                     \
			
 
				+    }                              \
			
 
				+  } while (0)
			
 
				+
			
 
				+#define Assertf(op, ...)                             \
			
 
				+  do {                                               \
			
 
				+    bool cond = !(!(op));                            \
			
 
				+    if (!cond) {                                     \
			
 
				+      INFO("Assert failed, " #op " : " __VA_ARGS__); \
			
 
				+      abort();                                       \
			
 
				+    }                                                \
			
 
				+  } while (0)
			
 
				+
			
 
				+static string file_name(const string &path, bool include_suffix) {
			
 
				+  if (path.empty()) return "";
			
 
				+
			
 
				+  int p = path.rfind('/');
			
 
				+  int e = path.rfind('\\');
			
 
				+  p = max(p, e);
			
 
				+  p += 1;
			
 
				+
			
 
				+  // include suffix
			
 
				+  if (include_suffix) return path.substr(p);
			
 
				+
			
 
				+  int u = path.rfind('.');
			
 
				+  if (u == -1) return path.substr(p);
			
 
				+
			
 
				+  if (u <= p) u = path.size();
			
 
				+  return path.substr(p, u - p);
			
 
				+}
			
 
				+
			
 
				+void __log_func(const char *file, int line, const char *fmt, ...) {
			
 
				+  va_list vl;
			
 
				+  va_start(vl, fmt);
			
 
				+  char buffer[2048];
			
 
				+  string filename = file_name(file, true);
			
 
				+  int n = snprintf(buffer, sizeof(buffer), "[%s:%d]: ", filename.c_str(), line);
			
 
				+  vsnprintf(buffer + n, sizeof(buffer) - n, fmt, vl);
			
 
				+  fprintf(stdout, "%s\n", buffer);
			
 
				+}
			
 
				+
			
 
				+static std::string format_shape(const Dims &shape) {
			
 
				+  stringstream output;
			
 
				+  char buf[64];
			
 
				+  const char *fmts[] = {"%d", "x%d"};
			
 
				+  for (int i = 0; i < shape.nbDims; ++i) {
			
 
				+    snprintf(buf, sizeof(buf), fmts[i != 0], shape.d[i]);
			
 
				+    output << buf;
			
 
				+  }
			
 
				+  return output.str();
			
 
				+}
			
 
				+
			
 
				+Timer::Timer() {
			
 
				+  checkRuntime(cudaEventCreate((cudaEvent_t *)&start_));
			
 
				+  checkRuntime(cudaEventCreate((cudaEvent_t *)&stop_));
			
 
				+}
			
 
				+
			
 
				+Timer::~Timer() {
			
 
				+  checkRuntime(cudaEventDestroy((cudaEvent_t)start_));
			
 
				+  checkRuntime(cudaEventDestroy((cudaEvent_t)stop_));
			
 
				+}
			
 
				+
			
 
				+void Timer::start(void *stream) {
			
 
				+  stream_ = stream;
			
 
				+  checkRuntime(cudaEventRecord((cudaEvent_t)start_, (cudaStream_t)stream_));
			
 
				+}
			
 
				+
			
 
				+float Timer::stop(const char *prefix, bool print) {
			
 
				+  checkRuntime(cudaEventRecord((cudaEvent_t)stop_, (cudaStream_t)stream_));
			
 
				+  checkRuntime(cudaEventSynchronize((cudaEvent_t)stop_));
			
 
				+
			
 
				+  float latency = 0;
			
 
				+  checkRuntime(cudaEventElapsedTime(&latency, (cudaEvent_t)start_, (cudaEvent_t)stop_));
			
 
				+
			
 
				+  if (print) {
			
 
				+    printf("[%s]: %.5f ms\n", prefix, latency);
			
 
				+  }
			
 
				+  return latency;
			
 
				+}
			
 
				+
			
 
				+BaseMemory::BaseMemory(void *cpu, size_t cpu_bytes, void *gpu, size_t gpu_bytes) {
			
 
				+  reference(cpu, cpu_bytes, gpu, gpu_bytes);
			
 
				+}
			
 
				+
			
 
				+void BaseMemory::reference(void *cpu, size_t cpu_bytes, void *gpu, size_t gpu_bytes) {
			
 
				+  release();
			
 
				+
			
 
				+  if (cpu == nullptr || cpu_bytes == 0) {
			
 
				+    cpu = nullptr;
			
 
				+    cpu_bytes = 0;
			
 
				+  }
			
 
				+
			
 
				+  if (gpu == nullptr || gpu_bytes == 0) {
			
 
				+    gpu = nullptr;
			
 
				+    gpu_bytes = 0;
			
 
				+  }
			
 
				+
			
 
				+  this->cpu_ = cpu;
			
 
				+  this->cpu_capacity_ = cpu_bytes;
			
 
				+  this->cpu_bytes_ = cpu_bytes;
			
 
				+  this->gpu_ = gpu;
			
 
				+  this->gpu_capacity_ = gpu_bytes;
			
 
				+  this->gpu_bytes_ = gpu_bytes;
			
 
				+
			
 
				+  this->owner_cpu_ = !(cpu && cpu_bytes > 0);
			
 
				+  this->owner_gpu_ = !(gpu && gpu_bytes > 0);
			
 
				+}
			
 
				+
			
 
				+BaseMemory::~BaseMemory() { release(); }
			
 
				+
			
 
				+void *BaseMemory::gpu_realloc(size_t bytes) {
			
 
				+  if (gpu_capacity_ < bytes) {
			
 
				+    release_gpu();
			
 
				+
			
 
				+    gpu_capacity_ = bytes;
			
 
				+    checkRuntime(cudaMalloc(&gpu_, bytes));
			
 
				+    // checkRuntime(cudaMemset(gpu_, 0, size));
			
 
				+  }
			
 
				+  gpu_bytes_ = bytes;
			
 
				+  return gpu_;
			
 
				+}
			
 
				+
			
 
				+void *BaseMemory::cpu_realloc(size_t bytes) {
			
 
				+  if (cpu_capacity_ < bytes) {
			
 
				+    release_cpu();
			
 
				+
			
 
				+    cpu_capacity_ = bytes;
			
 
				+    checkRuntime(cudaMallocHost(&cpu_, bytes));
			
 
				+    Assert(cpu_ != nullptr);
			
 
				+    // memset(cpu_, 0, size);
			
 
				+  }
			
 
				+  cpu_bytes_ = bytes;
			
 
				+  return cpu_;
			
 
				+}
			
 
				+
			
 
				+void BaseMemory::release_cpu() {
			
 
				+  if (cpu_) {
			
 
				+    if (owner_cpu_) {
			
 
				+      checkRuntime(cudaFreeHost(cpu_));
			
 
				+    }
			
 
				+    cpu_ = nullptr;
			
 
				+  }
			
 
				+  cpu_capacity_ = 0;
			
 
				+  cpu_bytes_ = 0;
			
 
				+}
			
 
				+
			
 
				+void BaseMemory::release_gpu() {
			
 
				+  if (gpu_) {
			
 
				+    if (owner_gpu_) {
			
 
				+      checkRuntime(cudaFree(gpu_));
			
 
				+    }
			
 
				+    gpu_ = nullptr;
			
 
				+  }
			
 
				+  gpu_capacity_ = 0;
			
 
				+  gpu_bytes_ = 0;
			
 
				+}
			
 
				+
			
 
				+void BaseMemory::release() {
			
 
				+  release_cpu();
			
 
				+  release_gpu();
			
 
				+}
			
 
				+
			
 
				+class __native_nvinfer_logger : public ILogger {
			
 
				+ public:
			
 
				+  virtual void log(Severity severity, const char *msg) noexcept override {
			
 
				+    if (severity == Severity::kINTERNAL_ERROR) {
			
 
				+      INFO("NVInfer INTERNAL_ERROR: %s", msg);
			
 
				+      abort();
			
 
				+    } else if (severity == Severity::kERROR) {
			
 
				+      INFO("NVInfer: %s", msg);
			
 
				+    }
			
 
				+    // else  if (severity == Severity::kWARNING) {
			
 
				+    //     INFO("NVInfer: %s", msg);
			
 
				+    // }
			
 
				+    // else  if (severity == Severity::kINFO) {
			
 
				+    //     INFO("NVInfer: %s", msg);
			
 
				+    // }
			
 
				+    // else {
			
 
				+    //     INFO("%s", msg);
			
 
				+    // }
			
 
				+  }
			
 
				+};
			
 
				+static __native_nvinfer_logger gLogger;
			
 
				+
			
 
				+template <typename _T>
			
 
				+static void destroy_nvidia_pointer(_T *ptr) {
			
 
				+  if (ptr) ptr->destroy();
			
 
				+}
			
 
				+
			
 
				+static std::vector<uint8_t> load_file(const string &file) {
			
 
				+  ifstream in(file, ios::in | ios::binary);
			
 
				+  if (!in.is_open()) return {};
			
 
				+
			
 
				+  in.seekg(0, ios::end);
			
 
				+  size_t length = in.tellg();
			
 
				+
			
 
				+  std::vector<uint8_t> data;
			
 
				+  if (length > 0) {
			
 
				+    in.seekg(0, ios::beg);
			
 
				+    data.resize(length);
			
 
				+
			
 
				+    in.read((char *)&data[0], length);
			
 
				+  }
			
 
				+  in.close();
			
 
				+  return data;
			
 
				+}
			
 
				+
			
 
				+class __native_engine_context {
			
 
				+ public:
			
 
				+  virtual ~__native_engine_context() { destroy(); }
			
 
				+
			
 
				+  bool construct(const void *pdata, size_t size) {
			
 
				+    destroy();
			
 
				+
			
 
				+    if (pdata == nullptr || size == 0) return false;
			
 
				+
			
 
				+    runtime_ = shared_ptr<IRuntime>(createInferRuntime(gLogger), destroy_nvidia_pointer<IRuntime>);
			
 
				+    if (runtime_ == nullptr) return false;
			
 
				+
			
 
				+    engine_ = shared_ptr<ICudaEngine>(runtime_->deserializeCudaEngine(pdata, size, nullptr),
			
 
				+                                      destroy_nvidia_pointer<ICudaEngine>);
			
 
				+    if (engine_ == nullptr) return false;
			
 
				+
			
 
				+    context_ = shared_ptr<IExecutionContext>(engine_->createExecutionContext(),
			
 
				+                                             destroy_nvidia_pointer<IExecutionContext>);
			
 
				+    return context_ != nullptr;
			
 
				+  }
			
 
				+
			
 
				+ private:
			
 
				+  void destroy() {
			
 
				+    context_.reset();
			
 
				+    engine_.reset();
			
 
				+    runtime_.reset();
			
 
				+  }
			
 
				+
			
 
				+ public:
			
 
				+  shared_ptr<IExecutionContext> context_;
			
 
				+  shared_ptr<ICudaEngine> engine_;
			
 
				+  shared_ptr<IRuntime> runtime_ = nullptr;
			
 
				+};
			
 
				+
			
 
				+class InferImpl : public Infer {
			
 
				+ public:
			
 
				+  shared_ptr<__native_engine_context> context_;
			
 
				+  unordered_map<string, int> binding_name_to_index_;
			
 
				+
			
 
				+  virtual ~InferImpl() = default;
			
 
				+
			
 
				+  bool construct(const void *data, size_t size) {
			
 
				+    context_ = make_shared<__native_engine_context>();
			
 
				+    if (!context_->construct(data, size)) {
			
 
				+      return false;
			
 
				+    }
			
 
				+
			
 
				+    setup();
			
 
				+    return true;
			
 
				+  }
			
 
				+
			
 
				+  bool load(const string &file) {
			
 
				+    auto data = load_file(file);
			
 
				+    if (data.empty()) {
			
 
				+      INFO("An empty file has been loaded. Please confirm your file path: %s", file.c_str());
			
 
				+      return false;
			
 
				+    }
			
 
				+    return this->construct(data.data(), data.size());
			
 
				+  }
			
 
				+
			
 
				+  void setup() {
			
 
				+    auto engine = this->context_->engine_;
			
 
				+    int nbBindings = engine->getNbBindings();
			
 
				+
			
 
				+    binding_name_to_index_.clear();
			
 
				+    for (int i = 0; i < nbBindings; ++i) {
			
 
				+      const char *bindingName = engine->getBindingName(i);
			
 
				+      binding_name_to_index_[bindingName] = i;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  virtual int index(const std::string &name) override {
			
 
				+    auto iter = binding_name_to_index_.find(name);
			
 
				+    Assertf(iter != binding_name_to_index_.end(), "Can not found the binding name: %s",
			
 
				+            name.c_str());
			
 
				+    return iter->second;
			
 
				+  }
			
 
				+
			
 
				+  virtual bool forward(const std::vector<void *> &bindings, void *stream,
			
 
				+                       void *input_consum_event) override {
			
 
				+    return this->context_->context_->enqueueV2((void**)bindings.data(), (cudaStream_t)stream,
			
 
				+                                               (cudaEvent_t *)input_consum_event);
			
 
				+  }
			
 
				+
			
 
				+  virtual std::vector<int> run_dims(const std::string &name) override {
			
 
				+    return run_dims(index(name));
			
 
				+  }
			
 
				+
			
 
				+  virtual std::vector<int> run_dims(int ibinding) override {
			
 
				+    auto dim = this->context_->context_->getBindingDimensions(ibinding);
			
 
				+    return std::vector<int>(dim.d, dim.d + dim.nbDims);
			
 
				+  }
			
 
				+
			
 
				+  virtual std::vector<int> static_dims(const std::string &name) override {
			
 
				+    return static_dims(index(name));
			
 
				+  }
			
 
				+
			
 
				+  virtual std::vector<int> static_dims(int ibinding) override {
			
 
				+    auto dim = this->context_->engine_->getBindingDimensions(ibinding);
			
 
				+    return std::vector<int>(dim.d, dim.d + dim.nbDims);
			
 
				+  }
			
 
				+
			
 
				+  virtual int num_bindings() override { return this->context_->engine_->getNbBindings(); }
			
 
				+
			
 
				+  virtual bool is_input(int ibinding) override {
			
 
				+    return this->context_->engine_->bindingIsInput(ibinding);
			
 
				+  }
			
 
				+
			
 
				+  virtual bool set_run_dims(const std::string &name, const std::vector<int> &dims) override {
			
 
				+    return this->set_run_dims(index(name), dims);
			
 
				+  }
			
 
				+
			
 
				+  virtual bool set_run_dims(int ibinding, const std::vector<int> &dims) override {
			
 
				+    Dims d;
			
 
				+    memcpy(d.d, dims.data(), sizeof(int) * dims.size());
			
 
				+    d.nbDims = dims.size();
			
 
				+    return this->context_->context_->setBindingDimensions(ibinding, d);
			
 
				+  }
			
 
				+
			
 
				+  virtual int numel(const std::string &name) override { return numel(index(name)); }
			
 
				+
			
 
				+  virtual int numel(int ibinding) override {
			
 
				+    auto dim = this->context_->context_->getBindingDimensions(ibinding);
			
 
				+    return std::accumulate(dim.d, dim.d + dim.nbDims, 1, std::multiplies<int>());
			
 
				+  }
			
 
				+
			
 
				+  virtual DType dtype(const std::string &name) override { return dtype(index(name)); }
			
 
				+
			
 
				+  virtual DType dtype(int ibinding) override {
			
 
				+    return (DType)this->context_->engine_->getBindingDataType(ibinding);
			
 
				+  }
			
 
				+
			
 
				+  virtual bool has_dynamic_dim() override {
			
 
				+    // check if any input or output bindings have dynamic shapes
			
 
				+    // code from ChatGPT
			
 
				+    int numBindings = this->context_->engine_->getNbBindings();
			
 
				+    for (int i = 0; i < numBindings; ++i) {
			
 
				+      nvinfer1::Dims dims = this->context_->engine_->getBindingDimensions(i);
			
 
				+      for (int j = 0; j < dims.nbDims; ++j) {
			
 
				+        if (dims.d[j] == -1) return true;
			
 
				+      }
			
 
				+    }
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  virtual void print() override {
			
 
				+    INFO("Infer %p [%s]", this, has_dynamic_dim() ? "DynamicShape" : "StaticShape");
			
 
				+
			
 
				+    int num_input = 0;
			
 
				+    int num_output = 0;
			
 
				+    auto engine = this->context_->engine_;
			
 
				+    for (int i = 0; i < engine->getNbBindings(); ++i) {
			
 
				+      if (engine->bindingIsInput(i))
			
 
				+        num_input++;
			
 
				+      else
			
 
				+        num_output++;
			
 
				+    }
			
 
				+
			
 
				+    INFO("Inputs: %d", num_input);
			
 
				+    for (int i = 0; i < num_input; ++i) {
			
 
				+      auto name = engine->getBindingName(i);
			
 
				+      auto dim = engine->getBindingDimensions(i);
			
 
				+      INFO("\t%d.%s : shape {%s}", i, name, format_shape(dim).c_str());
			
 
				+    }
			
 
				+
			
 
				+    INFO("Outputs: %d", num_output);
			
 
				+    for (int i = 0; i < num_output; ++i) {
			
 
				+      auto name = engine->getBindingName(i + num_input);
			
 
				+      auto dim = engine->getBindingDimensions(i + num_input);
			
 
				+      INFO("\t%d.%s : shape {%s}", i, name, format_shape(dim).c_str());
			
 
				+    }
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+Infer *loadraw(const std::string &file) {
			
 
				+  InferImpl *impl = new InferImpl();
			
 
				+  if (!impl->load(file)) {
			
 
				+    delete impl;
			
 
				+    impl = nullptr;
			
 
				+  }
			
 
				+  return impl;
			
 
				+}
			
 
				+
			
 
				+std::shared_ptr<Infer> load(const std::string &file) {
			
 
				+  return std::shared_ptr<InferImpl>((InferImpl *)loadraw(file));
			
 
				+}
			
 
				+
			
 
				+std::string format_shape(const std::vector<int> &shape) {
			
 
				+  stringstream output;
			
 
				+  char buf[64];
			
 
				+  const char *fmts[] = {"%d", "x%d"};
			
 
				+  for (int i = 0; i < (int)shape.size(); ++i) {
			
 
				+    snprintf(buf, sizeof(buf), fmts[i != 0], shape[i]);
			
 
				+    output << buf;
			
 
				+  }
			
 
				+  return output.str();
			
 
				+}
			
 
				+};  // namespace trt
			
--- a/src/detection/label_sign_detection/src/yolo.cu
+++ b/src/detection/label_sign_detection/src/yolo.cu
@@ -0,0 +1,740 @@
 
				+#include "../include/infer.hpp"
			
 
				+#include "../include/yolo.hpp"
			
 
				+
			
 
				+namespace yolo {
			
 
				+
			
 
				+using namespace std;
			
 
				+
			
 
				+#define GPU_BLOCK_THREADS 512
			
 
				+#define checkRuntime(call)                                                                 \
			
 
				+  do {                                                                                     \
			
 
				+    auto ___call__ret_code__ = (call);                                                     \
			
 
				+    if (___call__ret_code__ != cudaSuccess) {                                              \
			
 
				+      INFO("CUDA Runtime error💥 %s # %s, code = %s [ %d ]", #call,                         \
			
 
				+           cudaGetErrorString(___call__ret_code__), cudaGetErrorName(___call__ret_code__), \
			
 
				+           ___call__ret_code__);                                                           \
			
 
				+      abort();                                                                             \
			
 
				+    }                                                                                      \
			
 
				+  } while (0)
			
 
				+
			
 
				+#define checkKernel(...)                 \
			
 
				+  do {                                   \
			
 
				+    { (__VA_ARGS__); }                   \
			
 
				+    checkRuntime(cudaPeekAtLastError()); \
			
 
				+  } while (0)
			
 
				+
			
 
				+enum class NormType : int { None = 0, MeanStd = 1, AlphaBeta = 2 };
			
 
				+
			
 
				+enum class ChannelType : int { None = 0, SwapRB = 1 };
			
 
				+
			
 
				+/* 归一化操作，可以支持均值标准差，alpha beta，和swap RB */
			
 
				+struct Norm {
			
 
				+  float mean[3];
			
 
				+  float std[3];
			
 
				+  float alpha, beta;
			
 
				+  NormType type = NormType::None;
			
 
				+  ChannelType channel_type = ChannelType::None;
			
 
				+
			
 
				+  // out = (x * alpha - mean) / std
			
 
				+  static Norm mean_std(const float mean[3], const float std[3], float alpha = 1 / 255.0f,
			
 
				+                       ChannelType channel_type = ChannelType::None);
			
 
				+
			
 
				+  // out = x * alpha + beta
			
 
				+  static Norm alpha_beta(float alpha, float beta = 0, ChannelType channel_type = ChannelType::None);
			
 
				+
			
 
				+  // None
			
 
				+  static Norm None();
			
 
				+};
			
 
				+
			
 
				+Norm Norm::mean_std(const float mean[3], const float std[3], float alpha,
			
 
				+                    ChannelType channel_type) {
			
 
				+  Norm out;
			
 
				+  out.type = NormType::MeanStd;
			
 
				+  out.alpha = alpha;
			
 
				+  out.channel_type = channel_type;
			
 
				+  memcpy(out.mean, mean, sizeof(out.mean));
			
 
				+  memcpy(out.std, std, sizeof(out.std));
			
 
				+  return out;
			
 
				+}
			
 
				+
			
 
				+Norm Norm::alpha_beta(float alpha, float beta, ChannelType channel_type) {
			
 
				+  Norm out;
			
 
				+  out.type = NormType::AlphaBeta;
			
 
				+  out.alpha = alpha;
			
 
				+  out.beta = beta;
			
 
				+  out.channel_type = channel_type;
			
 
				+  return out;
			
 
				+}
			
 
				+
			
 
				+Norm Norm::None() { return Norm(); }
			
 
				+
			
 
				+const int NUM_BOX_ELEMENT = 8;  // left, top, right, bottom, confidence, class,
			
 
				+                                // keepflag, row_index(output)
			
 
				+const int MAX_IMAGE_BOXES = 1024;
			
 
				+inline int upbound(int n, int align = 32) { return (n + align - 1) / align * align; }
			
 
				+static __host__ __device__ void affine_project(float *matrix, float x, float y, float *ox,
			
 
				+                                               float *oy) {
			
 
				+  *ox = matrix[0] * x + matrix[1] * y + matrix[2];
			
 
				+  *oy = matrix[3] * x + matrix[4] * y + matrix[5];
			
 
				+}
			
 
				+
			
 
				+static __global__ void decode_kernel_common(float *predict, int num_bboxes, int num_classes,
			
 
				+                                            int output_cdim, float confidence_threshold,
			
 
				+                                            float *invert_affine_matrix, float *parray,
			
 
				+                                            int MAX_IMAGE_BOXES) {
			
 
				+  int position = blockDim.x * blockIdx.x + threadIdx.x;
			
 
				+  if (position >= num_bboxes) return;
			
 
				+
			
 
				+  float *pitem = predict + output_cdim * position;
			
 
				+  float objectness = pitem[4];
			
 
				+  if (objectness < confidence_threshold) return;
			
 
				+
			
 
				+  float *class_confidence = pitem + 5;
			
 
				+  float confidence = *class_confidence++;
			
 
				+  int label = 0;
			
 
				+  for (int i = 1; i < num_classes; ++i, ++class_confidence) {
			
 
				+    if (*class_confidence > confidence) {
			
 
				+      confidence = *class_confidence;
			
 
				+      label = i;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  confidence *= objectness;
			
 
				+  if (confidence < confidence_threshold) return;
			
 
				+
			
 
				+  int index = atomicAdd(parray, 1);
			
 
				+  if (index >= MAX_IMAGE_BOXES) return;
			
 
				+
			
 
				+  float cx = *pitem++;
			
 
				+  float cy = *pitem++;
			
 
				+  float width = *pitem++;
			
 
				+  float height = *pitem++;
			
 
				+  float left = cx - width * 0.5f;
			
 
				+  float top = cy - height * 0.5f;
			
 
				+  float right = cx + width * 0.5f;
			
 
				+  float bottom = cy + height * 0.5f;
			
 
				+  affine_project(invert_affine_matrix, left, top, &left, &top);
			
 
				+  affine_project(invert_affine_matrix, right, bottom, &right, &bottom);
			
 
				+
			
 
				+  float *pout_item = parray + 1 + index * NUM_BOX_ELEMENT;
			
 
				+  *pout_item++ = left;
			
 
				+  *pout_item++ = top;
			
 
				+  *pout_item++ = right;
			
 
				+  *pout_item++ = bottom;
			
 
				+  *pout_item++ = confidence;
			
 
				+  *pout_item++ = label;
			
 
				+  *pout_item++ = 1;  // 1 = keep, 0 = ignore
			
 
				+}
			
 
				+
			
 
				+static __global__ void decode_kernel_v8(float *predict, int num_bboxes, int num_classes,
			
 
				+                                        int output_cdim, float confidence_threshold,
			
 
				+                                        float *invert_affine_matrix, float *parray,
			
 
				+                                        int MAX_IMAGE_BOXES) {
			
 
				+  int position = blockDim.x * blockIdx.x + threadIdx.x;
			
 
				+  if (position >= num_bboxes) return;
			
 
				+
			
 
				+  float *pitem = predict + output_cdim * position;
			
 
				+  float *class_confidence = pitem + 4;
			
 
				+  float confidence = *class_confidence++;
			
 
				+  int label = 0;
			
 
				+  for (int i = 1; i < num_classes; ++i, ++class_confidence) {
			
 
				+    if (*class_confidence > confidence) {
			
 
				+      confidence = *class_confidence;
			
 
				+      label = i;
			
 
				+    }
			
 
				+  }
			
 
				+  if (confidence < confidence_threshold) return;
			
 
				+
			
 
				+  int index = atomicAdd(parray, 1);
			
 
				+  if (index >= MAX_IMAGE_BOXES) return;
			
 
				+
			
 
				+  float cx = *pitem++;
			
 
				+  float cy = *pitem++;
			
 
				+  float width = *pitem++;
			
 
				+  float height = *pitem++;
			
 
				+  float left = cx - width * 0.5f;
			
 
				+  float top = cy - height * 0.5f;
			
 
				+  float right = cx + width * 0.5f;
			
 
				+  float bottom = cy + height * 0.5f;
			
 
				+  affine_project(invert_affine_matrix, left, top, &left, &top);
			
 
				+  affine_project(invert_affine_matrix, right, bottom, &right, &bottom);
			
 
				+
			
 
				+  float *pout_item = parray + 1 + index * NUM_BOX_ELEMENT;
			
 
				+  *pout_item++ = left;
			
 
				+  *pout_item++ = top;
			
 
				+  *pout_item++ = right;
			
 
				+  *pout_item++ = bottom;
			
 
				+  *pout_item++ = confidence;
			
 
				+  *pout_item++ = label;
			
 
				+  *pout_item++ = 1;  // 1 = keep, 0 = ignore
			
 
				+  *pout_item++ = position;
			
 
				+}
			
 
				+
			
 
				+static __device__ float box_iou(float aleft, float atop, float aright, float abottom, float bleft,
			
 
				+                                float btop, float bright, float bbottom) {
			
 
				+  float cleft = max(aleft, bleft);
			
 
				+  float ctop = max(atop, btop);
			
 
				+  float cright = min(aright, bright);
			
 
				+  float cbottom = min(abottom, bbottom);
			
 
				+
			
 
				+  float c_area = max(cright - cleft, 0.0f) * max(cbottom - ctop, 0.0f);
			
 
				+  if (c_area == 0.0f) return 0.0f;
			
 
				+
			
 
				+  float a_area = max(0.0f, aright - aleft) * max(0.0f, abottom - atop);
			
 
				+  float b_area = max(0.0f, bright - bleft) * max(0.0f, bbottom - btop);
			
 
				+  return c_area / (a_area + b_area - c_area);
			
 
				+}
			
 
				+
			
 
				+static __global__ void fast_nms_kernel(float *bboxes, int MAX_IMAGE_BOXES, float threshold) {
			
 
				+  int position = (blockDim.x * blockIdx.x + threadIdx.x);
			
 
				+  int count = min((int)*bboxes, MAX_IMAGE_BOXES);
			
 
				+  if (position >= count) return;
			
 
				+
			
 
				+  // left, top, right, bottom, confidence, class, keepflag
			
 
				+  float *pcurrent = bboxes + 1 + position * NUM_BOX_ELEMENT;
			
 
				+  for (int i = 0; i < count; ++i) {
			
 
				+    float *pitem = bboxes + 1 + i * NUM_BOX_ELEMENT;
			
 
				+    if (i == position || pcurrent[5] != pitem[5]) continue;
			
 
				+
			
 
				+    if (pitem[4] >= pcurrent[4]) {
			
 
				+      if (pitem[4] == pcurrent[4] && i < position) continue;
			
 
				+
			
 
				+      float iou = box_iou(pcurrent[0], pcurrent[1], pcurrent[2], pcurrent[3], pitem[0], pitem[1],
			
 
				+                          pitem[2], pitem[3]);
			
 
				+
			
 
				+      if (iou > threshold) {
			
 
				+        pcurrent[6] = 0;  // 1=keep, 0=ignore
			
 
				+        return;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+static dim3 grid_dims(int numJobs) {
			
 
				+  int numBlockThreads = numJobs < GPU_BLOCK_THREADS ? numJobs : GPU_BLOCK_THREADS;
			
 
				+  return dim3(((numJobs + numBlockThreads - 1) / (float)numBlockThreads));
			
 
				+}
			
 
				+
			
 
				+static dim3 block_dims(int numJobs) {
			
 
				+  return numJobs < GPU_BLOCK_THREADS ? numJobs : GPU_BLOCK_THREADS;
			
 
				+}
			
 
				+
			
 
				+static void decode_kernel_invoker(float *predict, int num_bboxes, int num_classes, int output_cdim,
			
 
				+                                  float confidence_threshold, float nms_threshold,
			
 
				+                                  float *invert_affine_matrix, float *parray, int MAX_IMAGE_BOXES,
			
 
				+                                  Type type, cudaStream_t stream) {
			
 
				+  auto grid = grid_dims(num_bboxes);
			
 
				+  auto block = block_dims(num_bboxes);
			
 
				+
			
 
				+  if (type == Type::V8 || type == Type::V8Seg) {
			
 
				+    checkKernel(decode_kernel_v8<<<grid, block, 0, stream>>>(
			
 
				+        predict, num_bboxes, num_classes, output_cdim, confidence_threshold, invert_affine_matrix,
			
 
				+        parray, MAX_IMAGE_BOXES));
			
 
				+  } else {
			
 
				+    checkKernel(decode_kernel_common<<<grid, block, 0, stream>>>(
			
 
				+        predict, num_bboxes, num_classes, output_cdim, confidence_threshold, invert_affine_matrix,
			
 
				+        parray, MAX_IMAGE_BOXES));
			
 
				+  }
			
 
				+
			
 
				+  grid = grid_dims(MAX_IMAGE_BOXES);
			
 
				+  block = block_dims(MAX_IMAGE_BOXES);
			
 
				+  checkKernel(fast_nms_kernel<<<grid, block, 0, stream>>>(parray, MAX_IMAGE_BOXES, nms_threshold));
			
 
				+}
			
 
				+
			
 
				+static __global__ void warp_affine_bilinear_and_normalize_plane_kernel(
			
 
				+    uint8_t *src, int src_line_size, int src_width, int src_height, float *dst, int dst_width,
			
 
				+    int dst_height, uint8_t const_value_st, float *warp_affine_matrix_2_3, Norm norm) {
			
 
				+  int dx = blockDim.x * blockIdx.x + threadIdx.x;
			
 
				+  int dy = blockDim.y * blockIdx.y + threadIdx.y;
			
 
				+  if (dx >= dst_width || dy >= dst_height) return;
			
 
				+
			
 
				+  float m_x1 = warp_affine_matrix_2_3[0];
			
 
				+  float m_y1 = warp_affine_matrix_2_3[1];
			
 
				+  float m_z1 = warp_affine_matrix_2_3[2];
			
 
				+  float m_x2 = warp_affine_matrix_2_3[3];
			
 
				+  float m_y2 = warp_affine_matrix_2_3[4];
			
 
				+  float m_z2 = warp_affine_matrix_2_3[5];
			
 
				+
			
 
				+  float src_x = m_x1 * dx + m_y1 * dy + m_z1;
			
 
				+  float src_y = m_x2 * dx + m_y2 * dy + m_z2;
			
 
				+  float c0, c1, c2;
			
 
				+
			
 
				+  if (src_x <= -1 || src_x >= src_width || src_y <= -1 || src_y >= src_height) {
			
 
				+    // out of range
			
 
				+    c0 = const_value_st;
			
 
				+    c1 = const_value_st;
			
 
				+    c2 = const_value_st;
			
 
				+  } else {
			
 
				+    int y_low = floorf(src_y);
			
 
				+    int x_low = floorf(src_x);
			
 
				+    int y_high = y_low + 1;
			
 
				+    int x_high = x_low + 1;
			
 
				+
			
 
				+    uint8_t const_value[] = {const_value_st, const_value_st, const_value_st};
			
 
				+    float ly = src_y - y_low;
			
 
				+    float lx = src_x - x_low;
			
 
				+    float hy = 1 - ly;
			
 
				+    float hx = 1 - lx;
			
 
				+    float w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
			
 
				+    uint8_t *v1 = const_value;
			
 
				+    uint8_t *v2 = const_value;
			
 
				+    uint8_t *v3 = const_value;
			
 
				+    uint8_t *v4 = const_value;
			
 
				+    if (y_low >= 0) {
			
 
				+      if (x_low >= 0) v1 = src + y_low * src_line_size + x_low * 3;
			
 
				+
			
 
				+      if (x_high < src_width) v2 = src + y_low * src_line_size + x_high * 3;
			
 
				+    }
			
 
				+
			
 
				+    if (y_high < src_height) {
			
 
				+      if (x_low >= 0) v3 = src + y_high * src_line_size + x_low * 3;
			
 
				+
			
 
				+      if (x_high < src_width) v4 = src + y_high * src_line_size + x_high * 3;
			
 
				+    }
			
 
				+
			
 
				+    // same to opencv
			
 
				+    c0 = floorf(w1 * v1[0] + w2 * v2[0] + w3 * v3[0] + w4 * v4[0] + 0.5f);
			
 
				+    c1 = floorf(w1 * v1[1] + w2 * v2[1] + w3 * v3[1] + w4 * v4[1] + 0.5f);
			
 
				+    c2 = floorf(w1 * v1[2] + w2 * v2[2] + w3 * v3[2] + w4 * v4[2] + 0.5f);
			
 
				+  }
			
 
				+
			
 
				+  if (norm.channel_type == ChannelType::SwapRB) {
			
 
				+    float t = c2;
			
 
				+    c2 = c0;
			
 
				+    c0 = t;
			
 
				+  }
			
 
				+
			
 
				+  if (norm.type == NormType::MeanStd) {
			
 
				+    c0 = (c0 * norm.alpha - norm.mean[0]) / norm.std[0];
			
 
				+    c1 = (c1 * norm.alpha - norm.mean[1]) / norm.std[1];
			
 
				+    c2 = (c2 * norm.alpha - norm.mean[2]) / norm.std[2];
			
 
				+  } else if (norm.type == NormType::AlphaBeta) {
			
 
				+    c0 = c0 * norm.alpha + norm.beta;
			
 
				+    c1 = c1 * norm.alpha + norm.beta;
			
 
				+    c2 = c2 * norm.alpha + norm.beta;
			
 
				+  }
			
 
				+
			
 
				+  int area = dst_width * dst_height;
			
 
				+  float *pdst_c0 = dst + dy * dst_width + dx;
			
 
				+  float *pdst_c1 = pdst_c0 + area;
			
 
				+  float *pdst_c2 = pdst_c1 + area;
			
 
				+  *pdst_c0 = c0;
			
 
				+  *pdst_c1 = c1;
			
 
				+  *pdst_c2 = c2;
			
 
				+}
			
 
				+
			
 
				+static void warp_affine_bilinear_and_normalize_plane(uint8_t *src, int src_line_size, int src_width,
			
 
				+                                                     int src_height, float *dst, int dst_width,
			
 
				+                                                     int dst_height, float *matrix_2_3,
			
 
				+                                                     uint8_t const_value, const Norm &norm,
			
 
				+                                                     cudaStream_t stream) {
			
 
				+  dim3 grid((dst_width + 31) / 32, (dst_height + 31) / 32);
			
 
				+  dim3 block(32, 32);
			
 
				+
			
 
				+  checkKernel(warp_affine_bilinear_and_normalize_plane_kernel<<<grid, block, 0, stream>>>(
			
 
				+      src, src_line_size, src_width, src_height, dst, dst_width, dst_height, const_value,
			
 
				+      matrix_2_3, norm));
			
 
				+}
			
 
				+
			
 
				+static __global__ void decode_single_mask_kernel(int left, int top, float *mask_weights,
			
 
				+                                                 float *mask_predict, int mask_width,
			
 
				+                                                 int mask_height, unsigned char *mask_out,
			
 
				+                                                 int mask_dim, int out_width, int out_height) {
			
 
				+  // mask_predict to mask_out
			
 
				+  // mask_weights @ mask_predict
			
 
				+  int dx = blockDim.x * blockIdx.x + threadIdx.x;
			
 
				+  int dy = blockDim.y * blockIdx.y + threadIdx.y;
			
 
				+  if (dx >= out_width || dy >= out_height) return;
			
 
				+
			
 
				+  int sx = left + dx;
			
 
				+  int sy = top + dy;
			
 
				+  if (sx < 0 || sx >= mask_width || sy < 0 || sy >= mask_height) {
			
 
				+    mask_out[dy * out_width + dx] = 0;
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  float cumprod = 0;
			
 
				+  for (int ic = 0; ic < mask_dim; ++ic) {
			
 
				+    float cval = mask_predict[(ic * mask_height + sy) * mask_width + sx];
			
 
				+    float wval = mask_weights[ic];
			
 
				+    cumprod += cval * wval;
			
 
				+  }
			
 
				+
			
 
				+  float alpha = 1.0f / (1.0f + exp(-cumprod));
			
 
				+  mask_out[dy * out_width + dx] = alpha * 255;
			
 
				+}
			
 
				+
			
 
				+static void decode_single_mask(float left, float top, float *mask_weights, float *mask_predict,
			
 
				+                               int mask_width, int mask_height, unsigned char *mask_out,
			
 
				+                               int mask_dim, int out_width, int out_height, cudaStream_t stream) {
			
 
				+  // mask_weights is mask_dim(32 element) gpu pointer
			
 
				+  dim3 grid((out_width + 31) / 32, (out_height + 31) / 32);
			
 
				+  dim3 block(32, 32);
			
 
				+
			
 
				+  checkKernel(decode_single_mask_kernel<<<grid, block, 0, stream>>>(
			
 
				+      left, top, mask_weights, mask_predict, mask_width, mask_height, mask_out, mask_dim, out_width,
			
 
				+      out_height));
			
 
				+}
			
 
				+
			
 
				+const char *type_name(Type type) {
			
 
				+  switch (type) {
			
 
				+    case Type::V5:
			
 
				+      return "YoloV5";
			
 
				+    case Type::V3:
			
 
				+      return "YoloV3";
			
 
				+    case Type::V7:
			
 
				+      return "YoloV7";
			
 
				+    case Type::X:
			
 
				+      return "YoloX";
			
 
				+    case Type::V8:
			
 
				+      return "YoloV8";
			
 
				+    default:
			
 
				+      return "Unknow";
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+struct AffineMatrix {
			
 
				+  float i2d[6];  // image to dst(network), 2x3 matrix
			
 
				+  float d2i[6];  // dst to image, 2x3 matrix
			
 
				+
			
 
				+  void compute(const std::tuple<int, int> &from, const std::tuple<int, int> &to) {
			
 
				+    float scale_x = get<0>(to) / (float)get<0>(from);
			
 
				+    float scale_y = get<1>(to) / (float)get<1>(from);
			
 
				+    float scale = std::min(scale_x, scale_y);
			
 
				+    i2d[0] = scale;
			
 
				+    i2d[1] = 0;
			
 
				+    i2d[2] = -scale * get<0>(from) * 0.5 + get<0>(to) * 0.5 + scale * 0.5 - 0.5;
			
 
				+    i2d[3] = 0;
			
 
				+    i2d[4] = scale;
			
 
				+    i2d[5] = -scale * get<1>(from) * 0.5 + get<1>(to) * 0.5 + scale * 0.5 - 0.5;
			
 
				+
			
 
				+    double D = i2d[0] * i2d[4] - i2d[1] * i2d[3];
			
 
				+    D = D != 0. ? double(1.) / D : double(0.);
			
 
				+    double A11 = i2d[4] * D, A22 = i2d[0] * D, A12 = -i2d[1] * D, A21 = -i2d[3] * D;
			
 
				+    double b1 = -A11 * i2d[2] - A12 * i2d[5];
			
 
				+    double b2 = -A21 * i2d[2] - A22 * i2d[5];
			
 
				+
			
 
				+    d2i[0] = A11;
			
 
				+    d2i[1] = A12;
			
 
				+    d2i[2] = b1;
			
 
				+    d2i[3] = A21;
			
 
				+    d2i[4] = A22;
			
 
				+    d2i[5] = b2;
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+InstanceSegmentMap::InstanceSegmentMap(int width, int height) {
			
 
				+  this->width = width;
			
 
				+  this->height = height;
			
 
				+  checkRuntime(cudaMallocHost(&this->data, width * height));
			
 
				+}
			
 
				+
			
 
				+InstanceSegmentMap::~InstanceSegmentMap() {
			
 
				+  if (this->data) {
			
 
				+    checkRuntime(cudaFreeHost(this->data));
			
 
				+    this->data = nullptr;
			
 
				+  }
			
 
				+  this->width = 0;
			
 
				+  this->height = 0;
			
 
				+}
			
 
				+
			
 
				+class InferImpl : public Infer {
			
 
				+ public:
			
 
				+  shared_ptr<trt::Infer> trt_;
			
 
				+  string engine_file_;
			
 
				+  Type type_;
			
 
				+  float confidence_threshold_;
			
 
				+  float nms_threshold_;
			
 
				+  vector<shared_ptr<trt::Memory<unsigned char>>> preprocess_buffers_;
			
 
				+  trt::Memory<float> input_buffer_, bbox_predict_, output_boxarray_;
			
 
				+  trt::Memory<float> segment_predict_;
			
 
				+  int network_input_width_, network_input_height_;
			
 
				+  Norm normalize_;
			
 
				+  vector<int> bbox_head_dims_;
			
 
				+  vector<int> segment_head_dims_;
			
 
				+  int num_classes_ = 0;
			
 
				+  bool has_segment_ = false;
			
 
				+  bool isdynamic_model_ = false;
			
 
				+  vector<shared_ptr<trt::Memory<unsigned char>>> box_segment_cache_;
			
 
				+
			
 
				+  virtual ~InferImpl() = default;
			
 
				+
			
 
				+  void adjust_memory(int batch_size) {
			
 
				+    // the inference batch_size
			
 
				+    size_t input_numel = network_input_width_ * network_input_height_ * 3;
			
 
				+    input_buffer_.gpu(batch_size * input_numel);
			
 
				+    bbox_predict_.gpu(batch_size * bbox_head_dims_[1] * bbox_head_dims_[2]);
			
 
				+    output_boxarray_.gpu(batch_size * (32 + MAX_IMAGE_BOXES * NUM_BOX_ELEMENT));
			
 
				+    output_boxarray_.cpu(batch_size * (32 + MAX_IMAGE_BOXES * NUM_BOX_ELEMENT));
			
 
				+
			
 
				+    if (has_segment_)
			
 
				+      segment_predict_.gpu(batch_size * segment_head_dims_[1] * segment_head_dims_[2] *
			
 
				+                           segment_head_dims_[3]);
			
 
				+
			
 
				+    if ((int)preprocess_buffers_.size() < batch_size) {
			
 
				+      for (int i = preprocess_buffers_.size(); i < batch_size; ++i)
			
 
				+        preprocess_buffers_.push_back(make_shared<trt::Memory<unsigned char>>());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  void preprocess(int ibatch, const Image &image,
			
 
				+                  shared_ptr<trt::Memory<unsigned char>> preprocess_buffer, AffineMatrix &affine,
			
 
				+                  void *stream = nullptr) {
			
 
				+    affine.compute(make_tuple(image.width, image.height),
			
 
				+                   make_tuple(network_input_width_, network_input_height_));
			
 
				+
			
 
				+    size_t input_numel = network_input_width_ * network_input_height_ * 3;
			
 
				+    float *input_device = input_buffer_.gpu() + ibatch * input_numel;
			
 
				+    size_t size_image = image.width * image.height * 3;
			
 
				+    size_t size_matrix = upbound(sizeof(affine.d2i), 32);
			
 
				+    uint8_t *gpu_workspace = preprocess_buffer->gpu(size_matrix + size_image);
			
 
				+    float *affine_matrix_device = (float *)gpu_workspace;
			
 
				+    uint8_t *image_device = gpu_workspace + size_matrix;
			
 
				+
			
 
				+    uint8_t *cpu_workspace = preprocess_buffer->cpu(size_matrix + size_image);
			
 
				+    float *affine_matrix_host = (float *)cpu_workspace;
			
 
				+    uint8_t *image_host = cpu_workspace + size_matrix;
			
 
				+
			
 
				+    // speed up
			
 
				+    cudaStream_t stream_ = (cudaStream_t)stream;
			
 
				+    memcpy(image_host, image.bgrptr, size_image);
			
 
				+    memcpy(affine_matrix_host, affine.d2i, sizeof(affine.d2i));
			
 
				+    checkRuntime(
			
 
				+        cudaMemcpyAsync(image_device, image_host, size_image, cudaMemcpyHostToDevice, stream_));
			
 
				+    checkRuntime(cudaMemcpyAsync(affine_matrix_device, affine_matrix_host, sizeof(affine.d2i),
			
 
				+                                 cudaMemcpyHostToDevice, stream_));
			
 
				+
			
 
				+    warp_affine_bilinear_and_normalize_plane(image_device, image.width * 3, image.width,
			
 
				+                                             image.height, input_device, network_input_width_,
			
 
				+                                             network_input_height_, affine_matrix_device, 114,
			
 
				+                                             normalize_, stream_);
			
 
				+  }
			
 
				+
			
 
				+  bool load(const string &engine_file, Type type, float confidence_threshold, float nms_threshold) {
			
 
				+    trt_ = trt::load(engine_file);
			
 
				+    if (trt_ == nullptr) return false;
			
 
				+
			
 
				+    trt_->print();
			
 
				+
			
 
				+    this->type_ = type;
			
 
				+    this->confidence_threshold_ = confidence_threshold;
			
 
				+    this->nms_threshold_ = nms_threshold;
			
 
				+
			
 
				+    auto input_dim = trt_->static_dims(0);
			
 
				+    bbox_head_dims_ = trt_->static_dims(1);
			
 
				+    has_segment_ = type == Type::V8Seg;
			
 
				+    if (has_segment_) {
			
 
				+      bbox_head_dims_ = trt_->static_dims(2);
			
 
				+      segment_head_dims_ = trt_->static_dims(1);
			
 
				+    }
			
 
				+    network_input_width_ = input_dim[3];
			
 
				+    network_input_height_ = input_dim[2];
			
 
				+    isdynamic_model_ = trt_->has_dynamic_dim();
			
 
				+
			
 
				+    if (type == Type::V5 || type == Type::V3 || type == Type::V7) {
			
 
				+      normalize_ = Norm::alpha_beta(1 / 255.0f, 0.0f, ChannelType::SwapRB);
			
 
				+      num_classes_ = bbox_head_dims_[2] - 5;
			
 
				+    } else if (type == Type::V8) {
			
 
				+      normalize_ = Norm::alpha_beta(1 / 255.0f, 0.0f, ChannelType::SwapRB);
			
 
				+      num_classes_ = bbox_head_dims_[2] - 4;
			
 
				+    } else if (type == Type::V8Seg) {
			
 
				+      normalize_ = Norm::alpha_beta(1 / 255.0f, 0.0f, ChannelType::SwapRB);
			
 
				+      num_classes_ = bbox_head_dims_[2] - 4 - segment_head_dims_[1];
			
 
				+    } else if (type == Type::X) {
			
 
				+      // float mean[] = {0.485, 0.456, 0.406};
			
 
				+      // float std[]  = {0.229, 0.224, 0.225};
			
 
				+      // normalize_ = Norm::mean_std(mean, std, 1/255.0f, ChannelType::SwapRB);
			
 
				+      normalize_ = Norm::None();
			
 
				+      num_classes_ = bbox_head_dims_[2] - 5;
			
 
				+    } else {
			
 
				+      INFO("Unsupport type %d", type);
			
 
				+    }
			
 
				+    return true;
			
 
				+  }
			
 
				+
			
 
				+  virtual BoxArray forward(const Image &image, void *stream = nullptr) override {
			
 
				+    auto output = forwards({image}, stream);
			
 
				+    if (output.empty()) return {};
			
 
				+    return output[0];
			
 
				+  }
			
 
				+
			
 
				+  virtual vector<BoxArray> forwards(const vector<Image> &images, void *stream = nullptr) override {
			
 
				+    int num_image = images.size();
			
 
				+    if (num_image == 0) return {};
			
 
				+
			
 
				+    auto input_dims = trt_->static_dims(0);
			
 
				+    int infer_batch_size = input_dims[0];
			
 
				+    if (infer_batch_size != num_image) {
			
 
				+      if (isdynamic_model_) {
			
 
				+        infer_batch_size = num_image;
			
 
				+        input_dims[0] = num_image;
			
 
				+        if (!trt_->set_run_dims(0, input_dims)) return {};
			
 
				+      } else {
			
 
				+        if (infer_batch_size < num_image) {
			
 
				+          INFO(
			
 
				+              "When using static shape model, number of images[%d] must be "
			
 
				+              "less than or equal to the maximum batch[%d].",
			
 
				+              num_image, infer_batch_size);
			
 
				+          return {};
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+    adjust_memory(infer_batch_size);
			
 
				+
			
 
				+    vector<AffineMatrix> affine_matrixs(num_image);
			
 
				+    cudaStream_t stream_ = (cudaStream_t)stream;
			
 
				+    for (int i = 0; i < num_image; ++i)
			
 
				+      preprocess(i, images[i], preprocess_buffers_[i], affine_matrixs[i], stream);
			
 
				+
			
 
				+    float *bbox_output_device = bbox_predict_.gpu();
			
 
				+    vector<void *> bindings{input_buffer_.gpu(), bbox_output_device};
			
 
				+
			
 
				+    if (has_segment_) {
			
 
				+      bindings = {input_buffer_.gpu(), segment_predict_.gpu(), bbox_output_device};
			
 
				+    }
			
 
				+
			
 
				+    if (!trt_->forward(bindings, stream)) {
			
 
				+      INFO("Failed to tensorRT forward.");
			
 
				+      return {};
			
 
				+    }
			
 
				+
			
 
				+    for (int ib = 0; ib < num_image; ++ib) {
			
 
				+      float *boxarray_device =
			
 
				+          output_boxarray_.gpu() + ib * (32 + MAX_IMAGE_BOXES * NUM_BOX_ELEMENT);
			
 
				+      float *affine_matrix_device = (float *)preprocess_buffers_[ib]->gpu();
			
 
				+      float *image_based_bbox_output =
			
 
				+          bbox_output_device + ib * (bbox_head_dims_[1] * bbox_head_dims_[2]);
			
 
				+      checkRuntime(cudaMemsetAsync(boxarray_device, 0, sizeof(int), stream_));
			
 
				+      decode_kernel_invoker(image_based_bbox_output, bbox_head_dims_[1], num_classes_,
			
 
				+                            bbox_head_dims_[2], confidence_threshold_, nms_threshold_,
			
 
				+                            affine_matrix_device, boxarray_device, MAX_IMAGE_BOXES, type_, stream_);
			
 
				+    }
			
 
				+    checkRuntime(cudaMemcpyAsync(output_boxarray_.cpu(), output_boxarray_.gpu(),
			
 
				+                                 output_boxarray_.gpu_bytes(), cudaMemcpyDeviceToHost, stream_));
			
 
				+    checkRuntime(cudaStreamSynchronize(stream_));
			
 
				+
			
 
				+    vector<BoxArray> arrout(num_image);
			
 
				+    int imemory = 0;
			
 
				+    for (int ib = 0; ib < num_image; ++ib) {
			
 
				+      float *parray = output_boxarray_.cpu() + ib * (32 + MAX_IMAGE_BOXES * NUM_BOX_ELEMENT);
			
 
				+      int count = min(MAX_IMAGE_BOXES, (int)*parray);
			
 
				+      BoxArray &output = arrout[ib];
			
 
				+      output.reserve(count);
			
 
				+      for (int i = 0; i < count; ++i) {
			
 
				+        float *pbox = parray + 1 + i * NUM_BOX_ELEMENT;
			
 
				+        int label = pbox[5];
			
 
				+        int keepflag = pbox[6];
			
 
				+        if (keepflag == 1) {
			
 
				+          Box result_object_box(pbox[0], pbox[1], pbox[2], pbox[3], pbox[4], label);
			
 
				+          if (has_segment_) {
			
 
				+            int row_index = pbox[7];
			
 
				+            int mask_dim = segment_head_dims_[1];
			
 
				+            float *mask_weights = bbox_output_device +
			
 
				+                                  (ib * bbox_head_dims_[1] + row_index) * bbox_head_dims_[2] +
			
 
				+                                  num_classes_ + 4;
			
 
				+
			
 
				+            float *mask_head_predict = segment_predict_.gpu();
			
 
				+            float left, top, right, bottom;
			
 
				+            float *i2d = affine_matrixs[ib].i2d;
			
 
				+            affine_project(i2d, pbox[0], pbox[1], &left, &top);
			
 
				+            affine_project(i2d, pbox[2], pbox[3], &right, &bottom);
			
 
				+
			
 
				+            float box_width = right - left;
			
 
				+            float box_height = bottom - top;
			
 
				+
			
 
				+            float scale_to_predict_x = segment_head_dims_[3] / (float)network_input_width_;
			
 
				+            float scale_to_predict_y = segment_head_dims_[2] / (float)network_input_height_;
			
 
				+            int mask_out_width = box_width * scale_to_predict_x + 0.5f;
			
 
				+            int mask_out_height = box_height * scale_to_predict_y + 0.5f;
			
 
				+
			
 
				+            if (mask_out_width > 0 && mask_out_height > 0) {
			
 
				+              if (imemory >= (int)box_segment_cache_.size()) {
			
 
				+                box_segment_cache_.push_back(std::make_shared<trt::Memory<unsigned char>>());
			
 
				+              }
			
 
				+
			
 
				+              int bytes_of_mask_out = mask_out_width * mask_out_height;
			
 
				+              auto box_segment_output_memory = box_segment_cache_[imemory];
			
 
				+              result_object_box.seg =
			
 
				+                  make_shared<InstanceSegmentMap>(mask_out_width, mask_out_height);
			
 
				+
			
 
				+              unsigned char *mask_out_device = box_segment_output_memory->gpu(bytes_of_mask_out);
			
 
				+              unsigned char *mask_out_host = result_object_box.seg->data;
			
 
				+              decode_single_mask(left * scale_to_predict_x, top * scale_to_predict_y, mask_weights,
			
 
				+                                 mask_head_predict + ib * segment_head_dims_[1] *
			
 
				+                                                         segment_head_dims_[2] *
			
 
				+                                                         segment_head_dims_[3],
			
 
				+                                 segment_head_dims_[3], segment_head_dims_[2], mask_out_device,
			
 
				+                                 mask_dim, mask_out_width, mask_out_height, stream_);
			
 
				+              checkRuntime(cudaMemcpyAsync(mask_out_host, mask_out_device,
			
 
				+                                           box_segment_output_memory->gpu_bytes(),
			
 
				+                                           cudaMemcpyDeviceToHost, stream_));
			
 
				+            }
			
 
				+          }
			
 
				+          output.emplace_back(result_object_box);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    if (has_segment_) checkRuntime(cudaStreamSynchronize(stream_));
			
 
				+
			
 
				+    return arrout;
			
 
				+  }
			
 
				+};
			
 
				+
			
 
				+Infer *loadraw(const std::string &engine_file, Type type, float confidence_threshold,
			
 
				+               float nms_threshold) {
			
 
				+  InferImpl *impl = new InferImpl();
			
 
				+  if (!impl->load(engine_file, type, confidence_threshold, nms_threshold)) {
			
 
				+    delete impl;
			
 
				+    impl = nullptr;
			
 
				+  }
			
 
				+  return impl;
			
 
				+}
			
 
				+
			
 
				+shared_ptr<Infer> load(const string &engine_file, Type type, float confidence_threshold,
			
 
				+                       float nms_threshold) {
			
 
				+  return std::shared_ptr<InferImpl>(
			
 
				+      (InferImpl *)loadraw(engine_file, type, confidence_threshold, nms_threshold));
			
 
				+}
			
 
				+
			
 
				+std::tuple<uint8_t, uint8_t, uint8_t> hsv2bgr(float h, float s, float v) {
			
 
				+  const int h_i = static_cast<int>(h * 6);
			
 
				+  const float f = h * 6 - h_i;
			
 
				+  const float p = v * (1 - s);
			
 
				+  const float q = v * (1 - f * s);
			
 
				+  const float t = v * (1 - (1 - f) * s);
			
 
				+  float r, g, b;
			
 
				+  switch (h_i) {
			
 
				+    case 0:
			
 
				+      r = v, g = t, b = p;
			
 
				+      break;
			
 
				+    case 1:
			
 
				+      r = q, g = v, b = p;
			
 
				+      break;
			
 
				+    case 2:
			
 
				+      r = p, g = v, b = t;
			
 
				+      break;
			
 
				+    case 3:
			
 
				+      r = p, g = q, b = v;
			
 
				+      break;
			
 
				+    case 4:
			
 
				+      r = t, g = p, b = v;
			
 
				+      break;
			
 
				+    case 5:
			
 
				+      r = v, g = p, b = q;
			
 
				+      break;
			
 
				+    default:
			
 
				+      r = 1, g = 1, b = 1;
			
 
				+      break;
			
 
				+  }
			
 
				+  return make_tuple(static_cast<uint8_t>(b * 255), static_cast<uint8_t>(g * 255),
			
 
				+                    static_cast<uint8_t>(r * 255));
			
 
				+}
			
 
				+
			
 
				+std::tuple<uint8_t, uint8_t, uint8_t> random_color(int id) {
			
 
				+  float h_plane = ((((unsigned int)id << 2) ^ 0x937151) % 100) / 100.0f;
			
 
				+  float s_plane = ((((unsigned int)id << 3) ^ 0x315793) % 100) / 100.0f;
			
 
				+  return hsv2bgr(h_plane, s_plane, 1);
			
 
				+}
			
 
				+
			
 
				+};  // namespace yolo