Forráskód Böngészése

update pp+ukf+shigong

lijinliang 2 éve
szülő
commit
7af8fc75c7
46 módosított fájl, 9818 hozzáadás és 210 törlés
  1. 294 0
      src/detection/detection_lidar_PointPillars_MultiHead/Tracker/Ctracker.cpp
  2. 292 0
      src/detection/detection_lidar_PointPillars_MultiHead/Tracker/Ctracker.h
  3. 723 0
      src/detection/detection_lidar_PointPillars_MultiHead/Tracker/HungarianAlg.cpp
  4. 39 0
      src/detection/detection_lidar_PointPillars_MultiHead/Tracker/HungarianAlg.h
  5. 873 0
      src/detection/detection_lidar_PointPillars_MultiHead/Tracker/Kalman.cpp
  6. 120 0
      src/detection/detection_lidar_PointPillars_MultiHead/Tracker/Kalman.h
  7. 65 0
      src/detection/detection_lidar_PointPillars_MultiHead/Tracker/ShortPathCalculator.h
  8. 159 0
      src/detection/detection_lidar_PointPillars_MultiHead/Tracker/Tracking.hpp
  9. 147 0
      src/detection/detection_lidar_PointPillars_MultiHead/Tracker/defines.h
  10. 492 0
      src/detection/detection_lidar_PointPillars_MultiHead/Tracker/track.cpp
  11. 303 0
      src/detection/detection_lidar_PointPillars_MultiHead/Tracker/track.h
  12. 26 8
      src/detection/detection_lidar_PointPillars_MultiHead/detection_lidar_PointPillars_MultiHead.pro
  13. 461 67
      src/detection/detection_lidar_PointPillars_MultiHead/main.cpp
  14. 36 82
      src/detection/detection_lidar_PointPillars_MultiHead/pointpillars.cc
  15. 1 5
      src/detection/detection_lidar_PointPillars_MultiHead/pointpillars.h
  16. 176 45
      src/detection/detection_lidar_PointPillars_MultiHead/postprocess.cu
  17. 25 0
      src/detection/detection_lidar_PointPillars_MultiHead/roiaware_pool3d.h
  18. 361 0
      src/detection/detection_lidar_PointPillars_MultiHead/roiaware_pool3d_kernel.cu
  19. 6 3
      src/detection/detection_lidar_ukf_pda/detection_lidar_ukf_pda.pro
  20. 100 0
      src/detection/detection_lidar_ukf_pda/imm_ukf_pda.h
  21. 36 0
      src/detection/detection_yolov4_shigong_monocular/include/Hungarian.h
  22. 90 0
      src/detection/detection_yolov4_shigong_monocular/include/KalmanTracker.h
  23. 54 0
      src/detection/detection_yolov4_shigong_monocular/include/detect_obstacle.h
  24. 82 0
      src/detection/detection_yolov4_shigong_monocular/include/imageBuffer.h
  25. 353 0
      src/detection/detection_yolov4_shigong_monocular/main.cpp
  26. 406 0
      src/detection/detection_yolov4_shigong_monocular/main_multibatch.cpp
  27. 28 0
      src/detection/detection_yolov4_shigong_monocular/proto/obstacles.proto
  28. 1 0
      src/detection/detection_yolov4_shigong_monocular/proto/protomake.sh
  29. 16 0
      src/detection/detection_yolov4_shigong_monocular/proto/rawpic.proto
  30. 398 0
      src/detection/detection_yolov4_shigong_monocular/src/Hungarian.cpp
  31. 101 0
      src/detection/detection_yolov4_shigong_monocular/src/KalmanTracker.cpp
  32. 309 0
      src/detection/detection_yolov4_shigong_monocular/src/detect_obstacle.cpp
  33. 94 0
      src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/include/Utils.h
  34. 503 0
      src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/include/logging.h
  35. 106 0
      src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/include/mish.h
  36. 70 0
      src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/include/trt_utils.h
  37. 167 0
      src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/include/yolo.h
  38. 56 0
      src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/include/yolodetect.h
  39. 126 0
      src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/include/yololayer.h
  40. 270 0
      src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/src/main.cpp
  41. 196 0
      src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/src/mish.cu
  42. 473 0
      src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/src/trt_utils.cpp
  43. 510 0
      src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/src/yolo.cpp
  44. 274 0
      src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/src/yolodetect.cpp
  45. 129 0
      src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/src/yololayer.cpp
  46. 271 0
      src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/src/yololayer.cu

+ 294 - 0
src/detection/detection_lidar_PointPillars_MultiHead/Tracker/Ctracker.cpp

@@ -0,0 +1,294 @@
+#include "Ctracker.h"
+///
+/// \brief CTracker::CTracker
+/// Tracker. Manage tracks. Create, remove, update.
+/// \param settings
+///
+CTracker::CTracker(TrackerSettings& settings)
+{
+    m_settings = settings;
+}
+///
+/// \brief CTracker::CTracker
+/// Tracker. Manage tracks. Create, remove, update.
+/// \param settings
+///
+CTracker::CTracker(const TrackerSettings& settings)
+    :
+      m_settings(settings),
+      m_nextTrackID(0)
+{
+    ShortPathCalculator* spcalc = nullptr;
+    SPSettings spSettings = { settings.m_distThres, 12 };
+    switch (m_settings.m_matchType)
+    {
+    case tracking::MatchHungrian:
+        spcalc = new SPHungrian(spSettings);
+        break;
+        //    case tracking::MatchBipart:
+        //        spcalc = new SPBipart(spSettings);
+        //        break;
+    }
+    assert(spcalc != nullptr);
+    m_SPCalculator = std::unique_ptr<ShortPathCalculator>(spcalc);
+}
+
+///
+/// \brief CTracker::~CTracker
+///
+CTracker::~CTracker(void)
+{
+}
+///
+/// \brief CTracker::setSettings
+/// Tracker. Manage tracks. Create, remove, update.
+/// \param settings
+///
+void CTracker::setSettings(TrackerSettings& settings)
+{
+    m_settings = settings;
+    m_nextTrackID = 0;
+    ShortPathCalculator* spcalc = nullptr;
+    SPSettings spSettings = { settings.m_distThres, 12 };
+    switch (m_settings.m_matchType)
+    {
+    case tracking::MatchHungrian:
+        spcalc = new SPHungrian(spSettings);
+        break;
+        //    case tracking::MatchBipart:
+        //        spcalc = new SPBipart(spSettings);
+        //        break;
+    }
+    assert(spcalc != nullptr);
+    m_SPCalculator = std::unique_ptr<ShortPathCalculator>(spcalc);
+}
+///
+/// \brief CTracker::Update
+/// \param regions
+/// \param currFrame
+/// \param fps
+///
+void CTracker::Update(
+        const regions_t& regions,
+        cv::UMat currFrame,
+        float fps
+        )
+{
+    UpdateTrackingState(regions, currFrame, fps);
+
+    currFrame.copyTo(m_prevFrame);
+}
+
+///
+/// \brief CTracker::UpdateTrackingState
+/// \param regions
+/// \param currFrame
+/// \param fps
+///
+void CTracker::UpdateTrackingState(
+        const regions_t& regions,
+        cv::UMat currFrame,
+        float fps
+        )
+{
+    const size_t N = m_tracks.size();	// Tracking objects
+    const size_t M = regions.size();	// Detections or regions
+
+    assignments_t assignment(N, -1); // Assignments regions -> tracks
+
+    if (!m_tracks.empty())
+    {
+        // Distance matrix between all tracks to all regions
+        distMatrix_t costMatrix(N * M);
+        const track_t maxPossibleCost = 1e3;//static_cast<track_t>(currFrame.cols * currFrame.rows);
+        track_t maxCost = 0;
+        CreateDistaceMatrix(regions, costMatrix, maxPossibleCost, maxCost, currFrame);
+
+        // Solving assignment problem (shortest paths)
+        m_SPCalculator->Solve(costMatrix, N, M, assignment, maxCost);//row->col(trackid->regionid)
+
+        // clean assignment from pairs with large distance
+        for (size_t i = 0; i < assignment.size(); i++)
+        {
+            if (assignment[i] != -1)
+            {
+#ifdef DEBUG_SHOW
+                std::cout<<costMatrix[i + assignment[i] * N]<<", ";
+#endif
+                if (costMatrix[i + assignment[i] * N] > m_settings.m_distThres)
+                {
+                    assignment[i] = -1;
+                    m_tracks[i]->SkippedFrames()++;
+                }
+            }
+            else
+            {
+#ifdef DEBUG_SHOW
+                std::cout<<-1<<", ";
+#endif
+                // If track have no assigned detect, then increment skipped frames counter.
+                m_tracks[i]->SkippedFrames()++;
+            }
+        }
+#ifdef DEBUG_SHOW
+                std::cout<<std::endl;
+#endif
+        // If track didn't get detects long time, remove it.
+        for (size_t i = 0; i < m_tracks.size();)
+        {
+            if (m_tracks[i]->SkippedFrames() > m_settings.m_maximumAllowedSkippedFrames ||
+                    m_tracks[i]->IsStaticTimeout(cvRound(fps * (m_settings.m_maxStaticTime - m_settings.m_minStaticTime))))
+            {
+                m_tracks.erase(m_tracks.begin() + i);
+                assignment.erase(assignment.begin() + i);
+            }
+            else
+            {
+                ++i;
+            }
+        }
+    }
+
+    // Search for unassigned detects and start new tracks for them.
+    for (size_t i = 0; i < regions.size(); ++i)
+    {
+        if (find(assignment.begin(), assignment.end(), i) == assignment.end())
+        {
+            m_tracks.push_back(std::make_unique<CTrack>(regions[i],
+                                                        m_settings.m_kalmanType,
+                                                        m_settings.m_dt,
+                                                        m_settings.m_accelNoiseMag,
+                                                        m_settings.m_useAcceleration,
+                                                        m_nextTrackID++%500,
+                                                        i,
+                                                        m_settings.m_filterGoal,
+                                                        m_settings.m_lostTrackType));
+        }
+    }
+
+    // Update Kalman Filters state
+    const ptrdiff_t stop_i = static_cast<ptrdiff_t>(assignment.size());
+#pragma omp parallel for
+    for (ptrdiff_t i = 0; i < stop_i; ++i)
+    {
+        // If track updated less than one time, than filter state is not correct.
+        if (assignment[i] != -1) // If we have assigned detect, then update using its coordinates,
+        {
+            m_tracks[i]->DetectedFrames()++;
+            m_tracks[i]->SkippedFrames() = 0;
+            m_tracks[i]->Update(
+                        regions[assignment[i]], true,
+                    m_settings.m_maxTraceLength,
+                    m_prevFrame, currFrame,
+                    m_settings.m_useAbandonedDetection ? cvRound(m_settings.m_minStaticTime * fps) : 0);
+        }
+        else				     // if not continue using predictions
+        {
+            m_tracks[i]->Update(CRegion(), false, m_settings.m_maxTraceLength, m_prevFrame, currFrame, 0);
+        }
+        m_tracks[i]->m_regionID = assignment[i];
+    }
+}
+
+///
+/// \brief CTracker::CreateDistaceMatrix
+/// \param regions
+/// \param costMatrix
+/// \param maxPossibleCost
+/// \param maxCost
+///
+void CTracker::CreateDistaceMatrix(const regions_t& regions, distMatrix_t& costMatrix, track_t maxPossibleCost, track_t& maxCost, cv::UMat currFrame)
+{
+    const size_t N = m_tracks.size();	// Tracking objects
+    maxCost = 0;
+
+    for (size_t i = 0; i < N; ++i)
+    {
+        const auto& track = m_tracks[i];
+
+        // Calc predicted area for track
+//        cv::Size_<track_t> minRadius;
+//        if (m_settings.m_minAreaRadiusPix < 0)
+//        {
+//            minRadius.width = m_settings.m_minAreaRadiusK * track->LastRegion().m_rect.size.width;
+//            minRadius.height = m_settings.m_minAreaRadiusK * track->LastRegion().m_rect.size.height;
+//        }
+//        else
+//        {
+//            minRadius.width = m_settings.m_minAreaRadiusPix;
+//            minRadius.height = m_settings.m_minAreaRadiusPix;
+//        }
+        //cv::RotatedRect predictedArea = track->CalcPredictionEllipse(minRadius);
+
+        // Calc distance between track and regions
+        for (size_t j = 0; j < regions.size(); ++j)
+        {
+            const auto& reg = regions[j];
+
+            auto dist = maxPossibleCost;
+            if (reg.m_type==-1?(m_settings.CheckType(m_tracks[i]->LastRegion().m_type_name, reg.m_type_name)):(m_settings.CheckType(m_tracks[i]->LastRegion().m_type, reg.m_type)))
+            {
+                dist = 0;
+                size_t ind = 0;
+                if (m_settings.m_distType[ind] > 0.0f && ind == tracking::DistCenters)
+                {
+#if 0
+                    track_t ellipseDist = track->IsInsideArea(reg.m_rrect.center, predictedArea);
+                    if (ellipseDist > 1)
+                        dist += m_settings.m_distType[ind];
+                    else
+                        dist += ellipseDist * m_settings.m_distType[ind];
+#else
+                    dist += m_settings.m_distType[ind] * track->CalcDistCenter(reg);
+#endif
+                }
+                ++ind;
+
+                if (m_settings.m_distType[ind] > 0.0f && ind == tracking::DistRects)
+                {
+#if 0
+                    track_t ellipseDist = track->IsInsideArea(reg.m_rect.center, predictedArea);
+                    if (ellipseDist < 1)
+                    {
+                        track_t dw = track->WidthDist(reg);
+                        track_t dh = track->HeightDist(reg);
+                        dist += m_settings.m_distType[ind] * (1 - (1 - ellipseDist) * (dw + dh) * 0.5f);
+                    }
+                    else
+                    {
+                        dist += m_settings.m_distType[ind];
+                    }
+                    //std::cout << "dist = " << dist << ", ed = " << ellipseDist << ", dw = " << dw << ", dh = " << dh << std::endl;
+#else
+                    dist += m_settings.m_distType[ind] * track->CalcDistRect(reg);
+#endif
+                }
+                ++ind;
+                if (m_settings.m_distType[ind] > 0.0f && ind == tracking::DistRect3Ds)
+                {
+#if 0
+                    track_t ellipseDist = track->IsInsideArea(reg.m_rect.center, predictedArea);
+                    if (ellipseDist < 1)
+                    {
+                        track_t dw = track->WidthDist(reg);
+                        track_t dh = track->HeightDist(reg);
+                        dist += m_settings.m_distType[ind] * (1 - (1 - ellipseDist) * (dw + dh) * 0.5f);
+                    }
+                    else
+                    {
+                        dist += m_settings.m_distType[ind];
+                    }
+                    //std::cout << "dist = " << dist << ", ed = " << ellipseDist << ", dw = " << dw << ", dh = " << dh << std::endl;
+#else
+                    dist += m_settings.m_distType[ind] * track->CalcDistRect3D(reg);
+#endif
+                }
+                ++ind;
+                assert(ind == tracking::DistsCount);
+            }
+            costMatrix[i + j * N] = dist;
+            if (dist > maxCost)
+                maxCost = dist;
+        }
+    }
+}

+ 292 - 0
src/detection/detection_lidar_PointPillars_MultiHead/Tracker/Ctracker.h

@@ -0,0 +1,292 @@
+#pragma once
+#include <iostream>
+#include <vector>
+#include <memory>
+#include <array>
+#include <deque>
+#include <numeric>
+#include <map>
+#include <set>
+
+#include "defines.h"
+#include "track.h"
+#include "ShortPathCalculator.h"
+
+// ----------------------------------------------------------------------
+
+///
+/// \brief The TrackerSettings struct
+///
+struct TrackerSettings
+{
+    tracking::KalmanType m_kalmanType = tracking::KalmanLinear;
+    tracking::FilterGoal m_filterGoal = tracking::FilterCenter;
+    tracking::LostTrackType m_lostTrackType = tracking::TrackNone;
+    tracking::MatchType m_matchType = tracking::MatchHungrian;
+
+	std::array<track_t, tracking::DistsCount> m_distType;
+
+    ///
+    /// \brief m_dt
+    /// Time step for Kalman
+    ///
+    track_t m_dt = 1.0f;
+
+    ///
+    /// \brief m_accelNoiseMag
+    /// Noise magnitude for Kalman
+    ///
+    track_t m_accelNoiseMag = 0.1f;
+
+	///
+	/// \brief m_useAcceleration
+	/// Constant velocity or constant acceleration motion model
+	///
+	bool m_useAcceleration = false;
+
+    ///
+    /// \brief m_distThres
+    /// Distance threshold for Assignment problem: from 0 to 1
+    ///
+    track_t m_distThres = 0.8f;
+
+    ///
+    /// \brief m_minAreaRadius
+    /// Minimal area radius in pixels for objects centers
+    ///
+    track_t m_minAreaRadiusPix = 20.f;
+
+	///
+	/// \brief m_minAreaRadius
+	/// Minimal area radius in ration for object size.. Used if m_minAreaRadiusPix < 0
+	///
+	track_t m_minAreaRadiusK = 0.5f;
+
+    ///
+    /// \brief m_maximumAllowedSkippedFrames
+    /// If the object don't assignment more than this frames then it will be removed
+    ///
+    size_t m_maximumAllowedSkippedFrames = 25;
+
+    ///
+    /// \brief m_maxTraceLength
+    /// The maximum trajectory length
+    ///
+    size_t m_maxTraceLength = 50;
+
+    ///
+    /// \brief m_useAbandonedDetection
+    /// Detection abandoned objects
+    ///
+    bool m_useAbandonedDetection = false;
+
+    ///
+    /// \brief m_minStaticTime
+    /// After this time (in seconds) the object is considered abandoned
+    ///
+    int m_minStaticTime = 5;
+    ///
+    /// \brief m_maxStaticTime
+    /// After this time (in seconds) the abandoned object will be removed
+    ///
+    int m_maxStaticTime = 25;
+
+	///
+	/// \brief m_nearTypes
+	/// Object types that can be matched while tracking
+	///
+    std::map<int, std::set<int>> m_nearTypes;
+
+    std::map<std::string, std::set<std::string>> m_nearTypeNames;
+
+	///
+	TrackerSettings()
+	{
+        m_distType[tracking::DistCenters] = 0.5f;
+        m_distType[tracking::DistRects] = 0.5f;
+        m_distType[tracking::DistRect3Ds] = 0.f;
+
+		assert(CheckDistance());
+	}
+
+	///
+	bool CheckDistance() const
+	{
+		track_t sum = std::accumulate(m_distType.begin(), m_distType.end(), 0.0f);
+        track_t maxOne = std::max(1.0f, std::fabs(sum));
+		return std::fabs(sum - 1.0f) <= std::numeric_limits<track_t>::epsilon() * maxOne;
+	}
+
+	///
+	bool SetDistances(std::array<track_t, tracking::DistsCount> distType)
+	{
+		bool res = true;
+		auto oldDists = m_distType;
+		m_distType = distType;
+		if (!CheckDistance())
+		{
+			m_distType = oldDists;
+			res = false;
+		}
+		return res;
+	}
+
+	///
+	bool SetDistance(tracking::DistType distType)
+	{
+		std::fill(m_distType.begin(), m_distType.end(), 0.0f);
+		m_distType[distType] = 1.f;
+		return true;
+	}
+
+	///
+    void AddNearTypes(const int& type1, const int& type2, bool sym)
+	{
+        auto AddOne = [&](const int& type1, const int& type2)
+		{
+			auto it = m_nearTypes.find(type1);
+			if (it == std::end(m_nearTypes))
+			{
+                m_nearTypes[type1] = std::set<int>{ type2 };
+			}
+			else
+			{
+				it->second.insert(type2);
+			}
+		};
+		AddOne(type1, type2);
+		if (sym)
+        {
+			AddOne(type2, type1);
+        }
+	}
+
+	///
+    bool CheckType(const int& type1, const int& type2) const
+	{
+        bool res = type1==-1 || type2==-1 || (type1 == type2);
+		if (!res)
+		{
+			auto it = m_nearTypes.find(type1);
+			if (it != std::end(m_nearTypes))
+			{
+				res = it->second.find(type2) != std::end(it->second);
+			}
+		}
+		return res;
+	}
+
+    ///
+    void AddNearTypes(const std::string& type1, const std::string& type2, bool sym)
+    {
+        auto AddOne = [&](const std::string& type1, const std::string& type2)
+        {
+            auto it = m_nearTypeNames.find(type1);
+            if (it == std::end(m_nearTypeNames))
+            {
+                m_nearTypeNames[type1] = std::set<std::string>{ type2 };
+            }
+            else
+            {
+                it->second.insert(type2);
+            }
+        };
+        AddOne(type1, type2);
+        if (sym)
+        {
+            AddOne(type2, type1);
+        }
+    }
+
+    ///
+    bool CheckType(const std::string& type1, const std::string& type2) const
+    {
+        bool res = type1.empty() || type2.empty() || (type1 == type2);
+        if (!res)
+        {
+            auto it = m_nearTypeNames.find(type1);
+            if (it != std::end(m_nearTypeNames))
+            {
+                res = it->second.find(type2) != std::end(it->second);
+            }
+        }
+        return res;
+    }
+};
+
+///
+/// \brief The CTracker class
+///
+class CTracker
+{
+public:
+    CTracker(TrackerSettings& settings);
+    CTracker(const TrackerSettings& settings);
+    CTracker(const CTracker&) = delete;
+    CTracker(CTracker&&) = delete;
+	CTracker& operator=(const CTracker&) = delete;
+	CTracker& operator=(CTracker&&) = delete;
+	
+	~CTracker(void);
+    void setSettings(TrackerSettings& settings);
+    void Update(const regions_t& regions, cv::UMat currFrame, float fps);
+
+    ///
+    /// \brief CanGrayFrameToTrack
+    /// \return
+    ///
+    bool CanGrayFrameToTrack() const
+    {
+        bool needColor = (m_settings.m_lostTrackType == tracking::LostTrackType::TrackNone);
+        return !needColor;
+    }
+
+	///
+	/// \brief CanColorFrameToTrack
+	/// \return
+	///
+	bool CanColorFrameToTrack() const
+	{
+		return true;
+	}
+
+    ///
+    /// \brief GetTracksCount
+    /// \return
+    ///
+	size_t GetTracksCount() const
+	{
+		return m_tracks.size();
+	}
+    ///
+    /// \brief GetTracks
+    /// \return
+    ///
+	std::vector<TrackingObject> GetTracks() const
+	{
+		std::vector<TrackingObject> tracks;
+		if (!m_tracks.empty())
+		{
+			tracks.reserve(m_tracks.size());
+			for (const auto& track : m_tracks)
+			{
+                tracks.push_back(track->ConstructObject());
+			}
+		}
+		return tracks;
+	}
+
+private:
+    TrackerSettings m_settings;
+
+	tracks_t m_tracks;
+
+    size_t m_nextTrackID;
+
+    cv::UMat m_prevFrame;
+
+    std::unique_ptr<ShortPathCalculator> m_SPCalculator;
+
+    void CreateDistaceMatrix(const regions_t& regions, distMatrix_t& costMatrix, track_t maxPossibleCost, track_t& maxCost, cv::UMat currFrame);
+    void UpdateTrackingState(const regions_t& regions, cv::UMat currFrame, float fps);
+};

+ 723 - 0
src/detection/detection_lidar_PointPillars_MultiHead/Tracker/HungarianAlg.cpp

@@ -0,0 +1,723 @@
+#include "HungarianAlg.h"
+#include <limits>
+
+// --------------------------------------------------------------------------
+//
+// --------------------------------------------------------------------------
+AssignmentProblemSolver::AssignmentProblemSolver()
+{
+}
+
+// --------------------------------------------------------------------------
+//
+// --------------------------------------------------------------------------
+AssignmentProblemSolver::~AssignmentProblemSolver()
+{
+}
+
+// --------------------------------------------------------------------------
+//
+// --------------------------------------------------------------------------
+track_t AssignmentProblemSolver::Solve(
+	const distMatrix_t& distMatrixIn,
+	size_t nOfRows,
+	size_t nOfColumns,
+	std::vector<int>& assignment,
+	TMethod Method
+	)
+{
+	assignment.resize(nOfRows, -1);
+
+	track_t cost = 0;
+
+	switch (Method)
+	{
+	case optimal:
+		assignmentoptimal(assignment, cost, distMatrixIn, nOfRows, nOfColumns);
+		break;
+
+	case many_forbidden_assignments:
+		assignmentsuboptimal1(assignment, cost, distMatrixIn, nOfRows, nOfColumns);
+		break;
+
+	case without_forbidden_assignments:
+		assignmentsuboptimal2(assignment, cost, distMatrixIn, nOfRows, nOfColumns);
+		break;
+	}
+
+	return cost;
+}
+// --------------------------------------------------------------------------
+// Computes the optimal assignment (minimum overall costs) using Munkres algorithm.
+// --------------------------------------------------------------------------
+void AssignmentProblemSolver::assignmentoptimal(assignments_t& assignment, track_t& cost, const distMatrix_t& distMatrixIn, size_t nOfRows, size_t nOfColumns)
+{
+	// Generate distance cv::Matrix 
+	// and check cv::Matrix elements positiveness :)
+
+	// Total elements number
+	size_t nOfElements = nOfRows * nOfColumns;
+	// Memory allocation
+	track_t* distMatrix = (track_t *)malloc(nOfElements * sizeof(track_t));
+
+    if (distMatrix == nullptr)
+    {
+        return;
+    }
+
+	// Pointer to last element
+	track_t* distMatrixEnd = distMatrix + nOfElements;
+
+	for (size_t row = 0; row < nOfElements; row++)
+	{
+        track_t value = distMatrixIn[row];
+		assert(value >= 0);
+		distMatrix[row] = value;
+	}
+
+	// Memory allocation
+	bool* coveredColumns = (bool*)calloc(nOfColumns, sizeof(bool));
+	bool* coveredRows = (bool*)calloc(nOfRows, sizeof(bool));
+	bool* starMatrix = (bool*)calloc(nOfElements, sizeof(bool));
+	bool* primeMatrix = (bool*)calloc(nOfElements, sizeof(bool));
+	bool* newStarMatrix = (bool*)calloc(nOfElements, sizeof(bool)); /* used in step4 */
+
+	/* preliminary steps */
+	if (nOfRows <= nOfColumns)
+	{
+		for (size_t row = 0; row < nOfRows; row++)
+		{
+			/* find the smallest element in the row */
+            track_t* distMatrixTemp = distMatrix + row;
+			track_t  minValue = *distMatrixTemp;
+			distMatrixTemp += nOfRows;
+			while (distMatrixTemp < distMatrixEnd)
+			{
+				track_t value = *distMatrixTemp;
+				if (value < minValue)
+				{
+					minValue = value;
+				}
+				distMatrixTemp += nOfRows;
+			}
+			/* subtract the smallest element from each element of the row */
+			distMatrixTemp = distMatrix + row;
+			while (distMatrixTemp < distMatrixEnd)
+			{
+				*distMatrixTemp -= minValue;
+				distMatrixTemp += nOfRows;
+			}
+		}
+		/* Steps 1 and 2a */
+		for (size_t row = 0; row < nOfRows; row++)
+		{
+			for (size_t col = 0; col < nOfColumns; col++)
+			{
+				if (distMatrix[row + nOfRows*col] == 0)
+				{
+					if (!coveredColumns[col])
+					{
+						starMatrix[row + nOfRows * col] = true;
+						coveredColumns[col] = true;
+						break;
+					}
+				}
+			}
+		}
+	}
+	else /* if(nOfRows > nOfColumns) */
+	{
+		for (size_t col = 0; col < nOfColumns; col++)
+		{
+			/* find the smallest element in the column */
+			track_t* distMatrixTemp = distMatrix + nOfRows*col;
+			track_t* columnEnd = distMatrixTemp + nOfRows;
+			track_t  minValue = *distMatrixTemp++;
+			while (distMatrixTemp < columnEnd)
+			{
+				track_t value = *distMatrixTemp++;
+				if (value < minValue)
+				{
+					minValue = value;
+				}
+			}
+			/* subtract the smallest element from each element of the column */
+			distMatrixTemp = distMatrix + nOfRows*col;
+			while (distMatrixTemp < columnEnd)
+			{
+				*distMatrixTemp++ -= minValue;
+			}
+		}
+		/* Steps 1 and 2a */
+		for (size_t col = 0; col < nOfColumns; col++)
+		{
+			for (size_t row = 0; row < nOfRows; row++)
+			{
+				if (distMatrix[row + nOfRows*col] == 0)
+				{
+					if (!coveredRows[row])
+					{
+						starMatrix[row + nOfRows*col] = true;
+						coveredColumns[col] = true;
+						coveredRows[row] = true;
+						break;
+					}
+				}
+			}
+		}
+
+		for (size_t row = 0; row < nOfRows; row++)
+		{
+			coveredRows[row] = false;
+		}
+	}
+	/* move to step 2b */
+	step2b(assignment, distMatrix, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, (nOfRows <= nOfColumns) ? nOfRows : nOfColumns);
+	/* compute cost and remove invalid assignments */
+	computeassignmentcost(assignment, cost, distMatrixIn, nOfRows);
+	/* free allocated memory */
+	free(distMatrix);
+	free(coveredColumns);
+	free(coveredRows);
+	free(starMatrix);
+	free(primeMatrix);
+	free(newStarMatrix);
+	return;
+}
+// --------------------------------------------------------------------------
+//
+// --------------------------------------------------------------------------
+void AssignmentProblemSolver::buildassignmentvector(assignments_t& assignment, bool *starMatrix, size_t nOfRows, size_t nOfColumns)
+{
+    for (size_t row = 0; row < nOfRows; row++)
+	{
+        for (size_t col = 0; col < nOfColumns; col++)
+		{
+			if (starMatrix[row + nOfRows * col])
+			{
+				assignment[row] = static_cast<int>(col);
+				break;
+			}
+		}
+	}
+}
+// --------------------------------------------------------------------------
+//
+// --------------------------------------------------------------------------
+void AssignmentProblemSolver::computeassignmentcost(const assignments_t& assignment, track_t& cost, const distMatrix_t& distMatrixIn, size_t nOfRows)
+{
+	for (size_t row = 0; row < nOfRows; row++)
+	{
+		const int col = assignment[row];
+		if (col >= 0)
+		{
+			cost += distMatrixIn[row + nOfRows * col];
+		}
+	}
+}
+
+// --------------------------------------------------------------------------
+//
+// --------------------------------------------------------------------------
+void AssignmentProblemSolver::step2a(assignments_t& assignment, track_t *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, size_t nOfRows, size_t nOfColumns, size_t minDim)
+{
+    bool *starMatrixTemp, *columnEnd;
+    /* cover every column containing a starred zero */
+    for (size_t col = 0; col < nOfColumns; col++)
+    {
+        starMatrixTemp = starMatrix + nOfRows * col;
+        columnEnd = starMatrixTemp + nOfRows;
+        while (starMatrixTemp < columnEnd)
+        {
+            if (*starMatrixTemp++)
+            {
+                coveredColumns[col] = true;
+                break;
+            }
+        }
+    }
+    /* move to step 3 */
+	step2b(assignment, distMatrix, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim);
+}
+
+// --------------------------------------------------------------------------
+//
+// --------------------------------------------------------------------------
+void AssignmentProblemSolver::step2b(assignments_t& assignment, track_t *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, size_t nOfRows, size_t nOfColumns, size_t minDim)
+{
+	/* count covered columns */
+    size_t nOfCoveredColumns = 0;
+    for (size_t col = 0; col < nOfColumns; col++)
+	{
+		if (coveredColumns[col])
+		{
+			nOfCoveredColumns++;
+		}
+	}
+	if (nOfCoveredColumns == minDim)
+	{
+		/* algorithm finished */
+		buildassignmentvector(assignment, starMatrix, nOfRows, nOfColumns);
+	}
+	else
+	{
+		/* move to step 3 */
+		step3_5(assignment, distMatrix, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim);
+	}
+}
+
+// --------------------------------------------------------------------------
+//
+// --------------------------------------------------------------------------
+void AssignmentProblemSolver::step3_5(assignments_t& assignment, track_t *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, size_t nOfRows, size_t nOfColumns, size_t minDim)
+{
+	for (;;)
+	{
+		/* step 3 */
+		bool zerosFound = true;
+		while (zerosFound)
+		{
+			zerosFound = false;
+			for (size_t col = 0; col < nOfColumns; col++)
+			{
+				if (!coveredColumns[col])
+				{
+					for (size_t row = 0; row < nOfRows; row++)
+					{
+						if ((!coveredRows[row]) && (distMatrix[row + nOfRows*col] == 0))
+						{
+							/* prime zero */
+							primeMatrix[row + nOfRows*col] = true;
+							/* find starred zero in current row */
+							size_t starCol = 0;
+							for (; starCol < nOfColumns; starCol++)
+							{
+								if (starMatrix[row + nOfRows * starCol])
+								{
+									break;
+								}
+							}
+							if (starCol == nOfColumns) /* no starred zero found */
+							{
+								/* move to step 4 */
+								step4(assignment, distMatrix, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim, row, col);
+								return;
+							}
+							else
+							{
+								coveredRows[row] = true;
+								coveredColumns[starCol] = false;
+								zerosFound = true;
+								break;
+							}
+						}
+					}
+				}
+			}
+		}
+		/* step 5 */
+        track_t h = std::numeric_limits<track_t>::max();
+		for (size_t row = 0; row < nOfRows; row++)
+		{
+			if (!coveredRows[row])
+			{
+				for (size_t col = 0; col < nOfColumns; col++)
+				{
+					if (!coveredColumns[col])
+					{
+                        const track_t value = distMatrix[row + nOfRows*col];
+						if (value < h)
+						{
+							h = value;
+						}
+					}
+				}
+			}
+		}
+		/* add h to each covered row */
+		for (size_t row = 0; row < nOfRows; row++)
+		{
+			if (coveredRows[row])
+			{
+				for (size_t col = 0; col < nOfColumns; col++)
+				{
+					distMatrix[row + nOfRows*col] += h;
+				}
+			}
+		}
+		/* subtract h from each uncovered column */
+		for (size_t col = 0; col < nOfColumns; col++)
+		{
+			if (!coveredColumns[col])
+			{
+				for (size_t row = 0; row < nOfRows; row++)
+				{
+					distMatrix[row + nOfRows*col] -= h;
+				}
+			}
+		}
+	}
+}
+
+// --------------------------------------------------------------------------
+//
+// --------------------------------------------------------------------------
+void AssignmentProblemSolver::step4(assignments_t& assignment, track_t *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, size_t nOfRows, size_t nOfColumns, size_t minDim, size_t row, size_t col)
+{
+	const size_t nOfElements = nOfRows * nOfColumns;
+	/* generate temporary copy of starMatrix */
+	for (size_t n = 0; n < nOfElements; n++)
+	{
+		newStarMatrix[n] = starMatrix[n];
+	}
+	/* star current zero */
+	newStarMatrix[row + nOfRows*col] = true;
+	/* find starred zero in current column */
+	size_t starCol = col;
+	size_t starRow = 0;
+	for (; starRow < nOfRows; starRow++)
+	{
+		if (starMatrix[starRow + nOfRows * starCol])
+		{
+			break;
+		}
+	}
+	while (starRow < nOfRows)
+	{
+		/* unstar the starred zero */
+		newStarMatrix[starRow + nOfRows*starCol] = false;
+		/* find primed zero in current row */
+		size_t primeRow = starRow;
+		size_t primeCol = 0;
+		for (; primeCol < nOfColumns; primeCol++)
+		{
+			if (primeMatrix[primeRow + nOfRows * primeCol])
+			{
+				break;
+			}
+		}
+		/* star the primed zero */
+		newStarMatrix[primeRow + nOfRows*primeCol] = true;
+		/* find starred zero in current column */
+		starCol = primeCol;
+		for (starRow = 0; starRow < nOfRows; starRow++)
+		{
+			if (starMatrix[starRow + nOfRows * starCol])
+			{
+				break;
+			}
+		}
+	}
+	/* use temporary copy as new starMatrix */
+	/* delete all primes, uncover all rows */
+    for (size_t n = 0; n < nOfElements; n++)
+	{
+		primeMatrix[n] = false;
+		starMatrix[n] = newStarMatrix[n];
+	}
+    for (size_t n = 0; n < nOfRows; n++)
+	{
+		coveredRows[n] = false;
+	}
+	/* move to step 2a */
+	step2a(assignment, distMatrix, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim);
+}
+
+// --------------------------------------------------------------------------
+// Computes a suboptimal solution. Good for cases without forbidden assignments.
+// --------------------------------------------------------------------------
+void AssignmentProblemSolver::assignmentsuboptimal2(assignments_t& assignment, track_t& cost, const distMatrix_t& distMatrixIn, size_t nOfRows, size_t nOfColumns)
+{
+	/* make working copy of distance Matrix */
+	const size_t nOfElements = nOfRows * nOfColumns;
+	track_t* distMatrix = (track_t*)malloc(nOfElements * sizeof(track_t));
+	for (size_t n = 0; n < nOfElements; n++)
+	{
+		distMatrix[n] = distMatrixIn[n];
+	}
+
+	/* recursively search for the minimum element and do the assignment */
+	for (;;)
+	{
+		/* find minimum distance observation-to-track pair */
+		track_t minValue = std::numeric_limits<track_t>::max();
+		size_t tmpRow = 0;
+		size_t tmpCol = 0;
+		for (size_t row = 0; row < nOfRows; row++)
+		{
+			for (size_t col = 0; col < nOfColumns; col++)
+			{
+				const track_t value = distMatrix[row + nOfRows*col];
+				if (value != std::numeric_limits<track_t>::max() && (value < minValue))
+				{
+					minValue = value;
+					tmpRow = row;
+					tmpCol = col;
+				}
+			}
+		}
+
+		if (minValue != std::numeric_limits<track_t>::max())
+		{
+			assignment[tmpRow] = static_cast<int>(tmpCol);
+			cost += minValue;
+			for (size_t n = 0; n < nOfRows; n++)
+			{
+				distMatrix[n + nOfRows*tmpCol] = std::numeric_limits<track_t>::max();
+			}
+			for (size_t n = 0; n < nOfColumns; n++)
+			{
+				distMatrix[tmpRow + nOfRows*n] = std::numeric_limits<track_t>::max();
+			}
+		}
+		else
+		{
+			break;
+		}
+	}
+
+	free(distMatrix);
+}
+// --------------------------------------------------------------------------
+// Computes a suboptimal solution. Good for cases with many forbidden assignments.
+// --------------------------------------------------------------------------
+void AssignmentProblemSolver::assignmentsuboptimal1(assignments_t& assignment, track_t& cost, const distMatrix_t& distMatrixIn, size_t nOfRows, size_t nOfColumns)
+{
+	/* make working copy of distance Matrix */
+	const size_t nOfElements = nOfRows * nOfColumns;
+	track_t* distMatrix = (track_t *)malloc(nOfElements * sizeof(track_t));
+    for (size_t n = 0; n < nOfElements; n++)
+	{
+		distMatrix[n] = distMatrixIn[n];
+	}
+
+	/* allocate memory */
+	int* nOfValidObservations = (int *)calloc(nOfRows, sizeof(int));
+	int* nOfValidTracks = (int *)calloc(nOfColumns, sizeof(int));
+
+	/* compute number of validations */
+	bool infiniteValueFound = false;
+	bool finiteValueFound = false;
+	for (size_t row = 0; row < nOfRows; row++)
+	{
+		for (size_t col = 0; col < nOfColumns; col++)
+		{
+			if (distMatrix[row + nOfRows*col] != std::numeric_limits<track_t>::max())
+			{
+				nOfValidTracks[col] += 1;
+				nOfValidObservations[row] += 1;
+				finiteValueFound = true;
+			}
+			else
+			{
+				infiniteValueFound = true;
+			}
+		}
+	}
+
+	if (infiniteValueFound)
+	{
+		if (!finiteValueFound)
+		{
+            /* free allocated memory */
+            free(nOfValidObservations);
+            free(nOfValidTracks);
+            free(distMatrix);
+
+			return;
+		}
+		bool repeatSteps = true;
+
+		while (repeatSteps)
+		{
+			repeatSteps = false;
+
+			/* step 1: reject assignments of multiply validated tracks to singly validated observations		 */
+			for (size_t col = 0; col < nOfColumns; col++)
+			{
+				bool singleValidationFound = false;
+				for (size_t row = 0; row < nOfRows; row++)
+				{
+					if (distMatrix[row + nOfRows * col] != std::numeric_limits<track_t>::max() && (nOfValidObservations[row] == 1))
+					{
+						singleValidationFound = true;
+						break;
+					}
+				}
+				if (singleValidationFound)
+				{
+					for (size_t nestedRow = 0; nestedRow < nOfRows; nestedRow++)
+						if ((nOfValidObservations[nestedRow] > 1) && distMatrix[nestedRow + nOfRows * col] != std::numeric_limits<track_t>::max())
+						{
+							distMatrix[nestedRow + nOfRows * col] = std::numeric_limits<track_t>::max();
+							nOfValidObservations[nestedRow] -= 1;
+							nOfValidTracks[col] -= 1;
+							repeatSteps = true;
+						}
+				}
+			}
+
+			/* step 2: reject assignments of multiply validated observations to singly validated tracks */
+			if (nOfColumns > 1)
+			{
+				for (size_t row = 0; row < nOfRows; row++)
+				{
+					bool singleValidationFound = false;
+                    for (size_t col = 0; col < nOfColumns; col++)
+					{
+						if (distMatrix[row + nOfRows*col] != std::numeric_limits<track_t>::max() && (nOfValidTracks[col] == 1))
+						{
+							singleValidationFound = true;
+							break;
+						}
+					}
+
+					if (singleValidationFound)
+					{
+						for (size_t col = 0; col < nOfColumns; col++)
+						{
+							if ((nOfValidTracks[col] > 1) && distMatrix[row + nOfRows*col] != std::numeric_limits<track_t>::max())
+							{
+								distMatrix[row + nOfRows*col] = std::numeric_limits<track_t>::max();
+								nOfValidObservations[row] -= 1;
+								nOfValidTracks[col] -= 1;
+								repeatSteps = true;
+							}
+						}
+					}
+				}
+			}
+		} /* while(repeatSteps) */
+
+		/* for each multiply validated track that validates only with singly validated  */
+		/* observations, choose the observation with minimum distance */
+        for (size_t row = 0; row < nOfRows; row++)
+		{
+			if (nOfValidObservations[row] > 1)
+			{
+				bool allSinglyValidated = true;
+				track_t minValue = std::numeric_limits<track_t>::max();
+				size_t tmpCol = 0;
+                for (size_t col = 0; col < nOfColumns; col++)
+				{
+					const track_t value = distMatrix[row + nOfRows*col];
+					if (value != std::numeric_limits<track_t>::max())
+					{
+						if (nOfValidTracks[col] > 1)
+						{
+							allSinglyValidated = false;
+							break;
+						}
+						else if ((nOfValidTracks[col] == 1) && (value < minValue))
+						{
+							tmpCol = col;
+							minValue = value;
+						}
+					}
+				}
+
+				if (allSinglyValidated)
+				{
+					assignment[row] = static_cast<int>(tmpCol);
+					cost += minValue;
+					for (size_t n = 0; n < nOfRows; n++)
+					{
+						distMatrix[n + nOfRows*tmpCol] = std::numeric_limits<track_t>::max();
+					}
+					for (size_t n = 0; n < nOfColumns; n++)
+					{
+						distMatrix[row + nOfRows*n] = std::numeric_limits<track_t>::max();
+					}
+				}
+			}
+		}
+
+		// for each multiply validated observation that validates only with singly validated  track, choose the track with minimum distance
+        for (size_t col = 0; col < nOfColumns; col++)
+		{
+			if (nOfValidTracks[col] > 1)
+			{
+				bool allSinglyValidated = true;
+				track_t minValue = std::numeric_limits<track_t>::max();
+				size_t tmpRow = 0;
+				for (size_t row = 0; row < nOfRows; row++)
+				{
+					const track_t value = distMatrix[row + nOfRows*col];
+					if (value != std::numeric_limits<track_t>::max())
+					{
+						if (nOfValidObservations[row] > 1)
+						{
+							allSinglyValidated = false;
+							break;
+						}
+						else if ((nOfValidObservations[row] == 1) && (value < minValue))
+						{
+							tmpRow = row;
+							minValue = value;
+						}
+					}
+				}
+
+				if (allSinglyValidated)
+				{
+					assignment[tmpRow] = static_cast<int>(col);
+					cost += minValue;
+					for (size_t n = 0; n < nOfRows; n++)
+					{
+						distMatrix[n + nOfRows*col] = std::numeric_limits<track_t>::max();
+					}
+					for (size_t n = 0; n < nOfColumns; n++)
+					{
+						distMatrix[tmpRow + nOfRows*n] = std::numeric_limits<track_t>::max();
+					}
+				}
+			}
+		}
+	} /* if(infiniteValueFound) */
+
+
+	/* now, recursively search for the minimum element and do the assignment */
+	for (;;)
+	{
+		/* find minimum distance observation-to-track pair */
+		track_t minValue = std::numeric_limits<track_t>::max();
+		size_t tmpRow = 0;
+		size_t tmpCol = 0;
+		for (size_t row = 0; row < nOfRows; row++)
+		{
+			for (size_t col = 0; col < nOfColumns; col++)
+			{
+				const track_t value = distMatrix[row + nOfRows*col];
+				if (value != std::numeric_limits<track_t>::max() && (value < minValue))
+				{
+					minValue = value;
+					tmpRow = row;
+					tmpCol = col;
+				}
+			}
+		}
+
+		if (minValue != std::numeric_limits<track_t>::max())
+		{
+			assignment[tmpRow] = static_cast<int>(tmpCol);
+			cost += minValue;
+			for (size_t n = 0; n < nOfRows; n++)
+			{
+				distMatrix[n + nOfRows*tmpCol] = std::numeric_limits<track_t>::max();
+			}
+			for (size_t n = 0; n < nOfColumns; n++)
+			{
+				distMatrix[tmpRow + nOfRows*n] = std::numeric_limits<track_t>::max();
+			}
+		}
+		else
+		{
+			break;
+		}
+	}
+
+	/* free allocated memory */
+	free(nOfValidObservations);
+	free(nOfValidTracks);
+	free(distMatrix);
+}

+ 39 - 0
src/detection/detection_lidar_PointPillars_MultiHead/Tracker/HungarianAlg.h

@@ -0,0 +1,39 @@
+#include <vector>
+#include <iostream>
+#include <limits>
+#include <time.h>
+#include "defines.h"
+// http://community.topcoder.com/tc?module=Static&d1=tutorials&d2=hungarianAlgorithm
+
+///
+/// \brief The AssignmentProblemSolver class
+///
+class AssignmentProblemSolver
+{
+private:
+	// Computes the optimal assignment (minimum overall costs) using Munkres algorithm.
+	void assignmentoptimal(assignments_t& assignment, track_t& cost, const distMatrix_t& distMatrixIn, size_t nOfRows, size_t nOfColumns);
+	void buildassignmentvector(assignments_t& assignment, bool *starMatrix, size_t nOfRows, size_t nOfColumns);
+	void computeassignmentcost(const assignments_t& assignment, track_t& cost, const distMatrix_t& distMatrixIn, size_t nOfRows);
+	void step2a(assignments_t& assignment, track_t *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, size_t nOfRows, size_t nOfColumns, size_t minDim);
+	void step2b(assignments_t& assignment, track_t *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, size_t nOfRows, size_t nOfColumns, size_t minDim);
+	void step3_5(assignments_t& assignment, track_t *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, size_t nOfRows, size_t nOfColumns, size_t minDim);
+	void step4(assignments_t& assignment, track_t *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, size_t nOfRows, size_t nOfColumns, size_t minDim, size_t row, size_t col);
+
+	// Computes a suboptimal solution. Good for cases with many forbidden assignments.
+	void assignmentsuboptimal1(assignments_t& assignment, track_t& cost, const distMatrix_t& distMatrixIn, size_t nOfRows, size_t nOfColumns);
+	// Computes a suboptimal solution. Good for cases with many forbidden assignments.
+	void assignmentsuboptimal2(assignments_t& assignment, track_t& cost, const distMatrix_t& distMatrixIn, size_t nOfRows, size_t nOfColumns);
+
+public:
+	enum TMethod
+	{
+		optimal,
+		many_forbidden_assignments,
+		without_forbidden_assignments
+	};
+
+	AssignmentProblemSolver();
+	~AssignmentProblemSolver();
+	track_t Solve(const distMatrix_t& distMatrixIn, size_t nOfRows, size_t nOfColumns, assignments_t& assignment, TMethod Method = optimal);
+};

+ 873 - 0
src/detection/detection_lidar_PointPillars_MultiHead/Tracker/Kalman.cpp

@@ -0,0 +1,873 @@
+#include "Kalman.h"
+#include <iostream>
+#include <vector>
+
+//---------------------------------------------------------------------------
+TKalmanFilter::TKalmanFilter(
+        tracking::KalmanType type,
+	    bool useAcceleration,
+        track_t deltaTime, // time increment (lower values makes target more "massive")
+        track_t accelNoiseMag
+        )
+    :
+      m_type(type),
+      m_initialized(false),
+      m_deltaTime(deltaTime),
+      m_deltaTimeMin(deltaTime),
+      m_deltaTimeMax(2 * deltaTime),
+      m_lastDist(0),
+      m_accelNoiseMag(accelNoiseMag),
+	  m_useAcceleration(useAcceleration)
+{
+    m_deltaStep = (m_deltaTimeMax - m_deltaTimeMin) / m_deltaStepsCount;
+}
+
+//---------------------------------------------------------------------------
+void TKalmanFilter::CreateLinear(cv::Point3f xy0, cv::Point3f xyv0)
+{
+    // We don't know acceleration, so, assume it to process noise.
+    // But we can guess, the range of acceleration values thich can be achieved by tracked object.
+    // Process noise. (standard deviation of acceleration: m/s^2)
+    // shows, woh much target can accelerate.
+
+    // 4 state variables, 2 measurements
+    m_linearKalman.init(4, 2, 0, El_t);
+    // Transition cv::Matrix
+    m_linearKalman.transitionMatrix = (cv::Mat_<track_t>(4, 4) <<
+                                        1, 0, m_deltaTime, 0,
+                                        0, 1, 0, m_deltaTime,
+                                        0, 0, 1, 0,
+                                        0, 0, 0, 1);
+
+    // init...
+    m_lastPointResult = xy0;
+    m_linearKalman.statePre.at<track_t>(0) = xy0.x;  // x
+    m_linearKalman.statePre.at<track_t>(1) = xy0.y;  // y
+    m_linearKalman.statePre.at<track_t>(2) = xyv0.x; // vx
+    m_linearKalman.statePre.at<track_t>(3) = xyv0.y; // vy
+
+    m_linearKalman.statePost.at<track_t>(0) = xy0.x;
+    m_linearKalman.statePost.at<track_t>(1) = xy0.y;
+    m_linearKalman.statePost.at<track_t>(2) = xyv0.x;
+    m_linearKalman.statePost.at<track_t>(3) = xyv0.y;
+
+    cv::setIdentity(m_linearKalman.measurementMatrix);
+
+    m_linearKalman.processNoiseCov = (cv::Mat_<track_t>(4, 4) <<
+                                       pow(m_deltaTime,4.0)/4.0	,0						,pow(m_deltaTime,3.0)/2.0		,0,
+                                       0						,pow(m_deltaTime,4.0)/4.0	,0							,pow(m_deltaTime,3.0)/2.0,
+                                       pow(m_deltaTime,3.0)/2.0	,0						,pow(m_deltaTime,2.0)			,0,
+                                       0						,pow(m_deltaTime,3.0)/2.0	,0							,pow(m_deltaTime,2.0));
+
+
+    m_linearKalman.processNoiseCov *= m_accelNoiseMag;
+
+    cv::setIdentity(m_linearKalman.measurementNoiseCov, cv::Scalar::all(0.1));
+
+    cv::setIdentity(m_linearKalman.errorCovPost, cv::Scalar::all(.1));
+
+    m_initialized = true;
+}
+//---------------------------------------------------------------------------
+void TKalmanFilter::CreateLinear(cv::Rect_<track_t> rect0, Point_t rectv0)
+{
+    // We don't know acceleration, so, assume it to process noise.
+    // But we can guess, the range of acceleration values thich can be achieved by tracked object.
+    // Process noise. (standard deviation of acceleration: m/s^2)
+    // shows, woh much target can accelerate.
+
+    // 8 state variables (x, y, vx, vy, width, height, vw, vh), 4 measurements (x, y, width, height)
+    m_linearKalman.init(8, 4, 0, El_t);
+    // Transition cv::Matrix
+    m_linearKalman.transitionMatrix = (cv::Mat_<track_t>(8, 8) <<
+                                        1, 0, 0, 0, m_deltaTime, 0,           0,           0,
+                                        0, 1, 0, 0, 0,           m_deltaTime, 0,           0,
+                                        0, 0, 1, 0, 0,           0,           m_deltaTime, 0,
+                                        0, 0, 0, 1, 0,           0,           0,           m_deltaTime,
+                                        0, 0, 0, 0, 1,           0,           0,           0,
+                                        0, 0, 0, 0, 0,           1,           0,           0,
+                                        0, 0, 0, 0, 0,           0,           1,           0,
+                                        0, 0, 0, 0, 0,           0,           0,           1);
+
+    // init...
+    m_linearKalman.statePre.at<track_t>(0) = rect0.x;      // x
+    m_linearKalman.statePre.at<track_t>(1) = rect0.y;      // y
+    m_linearKalman.statePre.at<track_t>(2) = rect0.width;  // width
+    m_linearKalman.statePre.at<track_t>(3) = rect0.height; // height
+    m_linearKalman.statePre.at<track_t>(4) = rectv0.x;     // dx
+    m_linearKalman.statePre.at<track_t>(5) = rectv0.y;     // dy
+    m_linearKalman.statePre.at<track_t>(6) = 0;            // dw
+    m_linearKalman.statePre.at<track_t>(7) = 0;            // dh
+
+    m_linearKalman.statePost.at<track_t>(0) = rect0.x;
+    m_linearKalman.statePost.at<track_t>(1) = rect0.y;
+    m_linearKalman.statePost.at<track_t>(2) = rect0.width;
+    m_linearKalman.statePost.at<track_t>(3) = rect0.height;
+    m_linearKalman.statePost.at<track_t>(4) = rectv0.x;
+    m_linearKalman.statePost.at<track_t>(5) = rectv0.y;
+    m_linearKalman.statePost.at<track_t>(6) = 0;
+    m_linearKalman.statePost.at<track_t>(7) = 0;
+
+    cv::setIdentity(m_linearKalman.measurementMatrix);
+
+    track_t n1 = pow(m_deltaTime, 4.f) / 4.f;
+    track_t n2 = pow(m_deltaTime, 3.f) / 2.f;
+    track_t n3 = pow(m_deltaTime, 2.f);
+    m_linearKalman.processNoiseCov = (cv::Mat_<track_t>(8, 8) <<
+                                       n1, 0,  0,  0,  n2, 0,  0,  0,
+                                       0,  n1, 0,  0,  0,  n2, 0,  0,
+                                       0,  0,  n1, 0,  0,  0,  n2, 0,
+                                       0,  0,  0,  n1, 0,  0,  0,  n2,
+                                       n2, 0,  0,  0,  n3, 0,  0,  0,
+                                       0,  n2, 0,  0,  0,  n3, 0,  0,
+                                       0,  0,  n2, 0,  0,  0,  n3, 0,
+                                       0,  0,  0,  n2, 0,  0,  0,  n3);
+
+    m_linearKalman.processNoiseCov *= m_accelNoiseMag;
+
+    cv::setIdentity(m_linearKalman.measurementNoiseCov, cv::Scalar::all(0.1));
+
+    cv::setIdentity(m_linearKalman.errorCovPost, cv::Scalar::all(.1));
+
+    m_initialized = true;
+}
+//---------------------------------------------------------------------------
+void TKalmanFilter::CreateLinear3D(Rect3D rect0, cv::Point3f rectv0)
+{
+    // We don't know acceleration, so, assume it to process noise.
+    // But we can guess, the range of acceleration values thich can be achieved by tracked object.
+    // Process noise. (standard deviation of acceleration: m/s^2)
+    // shows, woh much target can accelerate.
+
+    // 14 state variables (x, y, z, width, height, length, yaw, d...), 7 measurements (x, y, z, width, height, length, yaw)
+    m_linearKalman.init(14, 7, 0, El_t);
+    // Transition cv::Matrix
+    m_linearKalman.transitionMatrix = (cv::Mat_<track_t>(14, 14) <<
+                                        1, 0, 0, 0, 0, 0, 0, m_deltaTime, 0,           0,           0,           0,           0,           0,
+                                        0, 1, 0, 0, 0, 0, 0, 0,           m_deltaTime, 0,           0,           0,           0,           0,
+                                        0, 0, 1, 0, 0, 0, 0, 0,           0,           m_deltaTime, 0,           0,           0,           0,
+                                        0, 0, 0, 1, 0, 0, 0, 0,           0,           0,           m_deltaTime, 0,           0,           0,
+                                        0, 0, 0, 0, 1, 0, 0, 0,           0,           0,           0,           m_deltaTime, 0,           0,
+                                        0, 0, 0, 0, 0, 1, 0, 0,           0,           0,           0,           0,           m_deltaTime, 0,
+                                        0, 0, 0, 0, 0, 0, 1, 0,           0,           0,           0,           0,           0,           m_deltaTime,
+                                        0, 0, 0, 0, 0, 0, 0, 1,           0,           0,           0,           0,           0,           0,
+                                        0, 0, 0, 0, 0, 0, 0, 0,           1,           0,           0,           0,           0,           0,
+                                        0, 0, 0, 0, 0, 0, 0, 0,           0,           1,           0,           0,           0,           0,
+                                        0, 0, 0, 0, 0, 0, 0, 0,           0,           0,           1,           0,           0,           0,
+                                        0, 0, 0, 0, 0, 0, 0, 0,           0,           0,           0,           1,           0,           0,
+                                        0, 0, 0, 0, 0, 0, 0, 0,           0,           0,           0,           0,           1,           0,
+                                        0, 0, 0, 0, 0, 0, 0, 0,           0,           0,           0,           0,           0,           1);
+
+    // init...
+    m_linearKalman.statePre.at<track_t>(0) = rect0.center.x;      // x
+    m_linearKalman.statePre.at<track_t>(1) = rect0.center.y;      // y
+    m_linearKalman.statePre.at<track_t>(2) = rect0.center.z;      // z
+    m_linearKalman.statePre.at<track_t>(3) = rect0.size.width;  // width
+    m_linearKalman.statePre.at<track_t>(4) = rect0.size.height; // height
+    m_linearKalman.statePre.at<track_t>(5) = rect0.size.length; // length
+    m_linearKalman.statePre.at<track_t>(6) = rect0.yaw; // yaw
+    m_linearKalman.statePre.at<track_t>(7) = rectv0.x;     // dx
+    m_linearKalman.statePre.at<track_t>(8) = rectv0.y;     // dy
+    m_linearKalman.statePre.at<track_t>(9) = 0;            // dz
+    m_linearKalman.statePre.at<track_t>(10) = 0;            // dw
+    m_linearKalman.statePre.at<track_t>(11) = 0;            // dh
+    m_linearKalman.statePre.at<track_t>(12) = 0;            // dl
+    m_linearKalman.statePre.at<track_t>(13) = 0;            // dyaw
+
+    m_linearKalman.statePost.at<track_t>(0) = rect0.center.x;
+    m_linearKalman.statePost.at<track_t>(1) = rect0.center.y;
+    m_linearKalman.statePost.at<track_t>(2) = rect0.center.z;
+    m_linearKalman.statePost.at<track_t>(3) = rect0.size.width;
+    m_linearKalman.statePost.at<track_t>(4) = rect0.size.height;
+    m_linearKalman.statePost.at<track_t>(5) = rect0.size.length;
+    m_linearKalman.statePost.at<track_t>(6) = rect0.yaw;
+    m_linearKalman.statePost.at<track_t>(7) = rectv0.x;
+    m_linearKalman.statePost.at<track_t>(8) = rectv0.y;
+    m_linearKalman.statePost.at<track_t>(9) = 0;
+    m_linearKalman.statePost.at<track_t>(10) = 0;
+    m_linearKalman.statePost.at<track_t>(11) = 0;
+    m_linearKalman.statePost.at<track_t>(12) = 0;
+    m_linearKalman.statePost.at<track_t>(13) = 0;
+
+    cv::setIdentity(m_linearKalman.measurementMatrix);
+
+    track_t n1 = pow(m_deltaTime, 4.f) / 4.f;
+    track_t n2 = pow(m_deltaTime, 3.f) / 2.f;
+    track_t n3 = pow(m_deltaTime, 2.f);
+    m_linearKalman.processNoiseCov = (cv::Mat_<track_t>(14, 14) <<
+                                       n1, 0,  0,  0,  0,  0,  0,  n2, 0,  0,  0,  0,  0,  0,
+                                       0,  n1, 0,  0,  0,  0,  0,  0,  n2, 0,  0,  0,  0,  0,
+                                       0,  0,  n1, 0,  0,  0,  0,  0,  0,  n2, 0,  0,  0,  0,
+                                       0,  0,  0,  n1, 0,  0,  0,  0,  0,  0,  n2, 0,  0,  0,
+                                       0,  0,  0,  0,  n1, 0,  0,  0,  0,  0,  0,  n2, 0,  0,
+                                       0,  0,  0,  0,  0,  n1, 0,  0,  0,  0,  0,  0,  n2, 0,
+                                       0,  0,  0,  0,  0,  0,  n1, 0,  0,  0,  0,  0,  0,  n2,
+                                       n2, 0,  0,  0,  0,  0,  0,  n3, 0,  0,  0,  0,  0,  0,
+                                       0,  n2, 0,  0,  0,  0,  0,  0,  n3, 0,  0,  0,  0,  0,
+                                       0,  0,  n2, 0,  0,  0,  0,  0,  0,  n3, 0,  0,  0,  0,
+                                       0,  0,  0,  n2, 0,  0,  0,  0,  0,  0,  n3, 0,  0,  0,
+                                       0,  0,  0,  0,  n2, 0,  0,  0,  0,  0,  0,  n3, 0,  0,
+                                       0,  0,  0,  0,  0,  n2, 0,  0,  0,  0,  0,  0,  n3, 0,
+                                       0,  0,  0,  0,  0,  0,  n2, 0,  0,  0,  0,  0,  0,  n3);
+
+    m_linearKalman.processNoiseCov *= m_accelNoiseMag;
+
+    cv::setIdentity(m_linearKalman.measurementNoiseCov, cv::Scalar::all(0.1));
+
+    cv::setIdentity(m_linearKalman.errorCovPost, cv::Scalar::all(.1));
+
+    m_initialized = true;
+}
+
+//---------------------------------------------------------------------------
+void TKalmanFilter::CreateLinearAcceleration(cv::Point3f xy0, cv::Point3f xyv0)
+{
+	// 6 state variables, 2 measurements
+	m_linearKalman.init(6, 2, 0, El_t);
+	// Transition cv::Matrix
+	const track_t dt = m_deltaTime;
+	const track_t dt2 = 0.5f * m_deltaTime * m_deltaTime;
+	m_linearKalman.transitionMatrix = (cv::Mat_<track_t>(6, 6) <<
+		1, 0, dt, 0,  dt2, 0,
+		0, 1, 0,  dt, 0,   dt2,
+		0, 0, 1,  0,  dt,  0,
+		0, 0, 0,  1,  0,   dt,
+	    0, 0, 0,  0,  1,   0,
+	    0, 0, 0,  0,  0,   1);
+
+	// init...
+	m_lastPointResult = xy0;
+	m_linearKalman.statePre.at<track_t>(0) = xy0.x;  // x
+	m_linearKalman.statePre.at<track_t>(1) = xy0.y;  // y
+	m_linearKalman.statePre.at<track_t>(2) = xyv0.x; // vx
+	m_linearKalman.statePre.at<track_t>(3) = xyv0.y; // vy
+	m_linearKalman.statePre.at<track_t>(4) = 0;      // ax
+	m_linearKalman.statePre.at<track_t>(5) = 0;      // ay
+
+	m_linearKalman.statePost.at<track_t>(0) = xy0.x;
+	m_linearKalman.statePost.at<track_t>(1) = xy0.y;
+	m_linearKalman.statePost.at<track_t>(2) = xyv0.x;
+	m_linearKalman.statePost.at<track_t>(3) = xyv0.y;
+	m_linearKalman.statePost.at<track_t>(4) = 0;
+	m_linearKalman.statePost.at<track_t>(5) = 0;
+
+	cv::setIdentity(m_linearKalman.measurementMatrix);
+
+	track_t n1 = pow(m_deltaTime, 4.f) / 4.f;
+	track_t n2 = pow(m_deltaTime, 3.f) / 2.f;
+	track_t n3 = pow(m_deltaTime, 2.f);
+	m_linearKalman.processNoiseCov = (cv::Mat_<track_t>(6, 6) <<
+		n1, 0, n2, 0, n2, 0,
+		0, n1, 0, n2, 0, n2,
+		n2, 0, n3, 0, n3, 0,
+		0, n2, 0, n3, 0, n3,
+		0, 0, n2, 0, n3, 0,
+		0, 0, 0, n2, 0, n3);
+
+	m_linearKalman.processNoiseCov *= m_accelNoiseMag;
+
+	cv::setIdentity(m_linearKalman.measurementNoiseCov, cv::Scalar::all(0.1));
+
+	cv::setIdentity(m_linearKalman.errorCovPost, cv::Scalar::all(.1));
+
+	m_initialized = true;
+}
+//---------------------------------------------------------------------------
+void TKalmanFilter::CreateLinearAcceleration(cv::Rect_<track_t> rect0, Point_t rectv0)
+{
+    // 12 state variables (x, y, vx, vy, ax, ay, width, height, vw, vh, aw, ah), 4 measurements (x, y, width, height)
+    m_linearKalman.init(12, 4, 0, El_t);
+    // Transition cv::Matrix
+    const track_t dt = m_deltaTime;
+    const track_t dt2 = 0.5f * m_deltaTime * m_deltaTime;
+    m_linearKalman.transitionMatrix = (cv::Mat_<track_t>(12, 12) <<
+        1, 0, 0, 0, dt, 0,  0,  0,  dt2, 0,   dt2, 0,
+        0, 1, 0, 0, 0,  dt, 0,  0,  0,   dt2, 0,   dt2,
+        0, 0, 1, 0, 0,  0,  dt, 0,  0,   0,   dt2, 0,
+        0, 0, 0, 1, 0,  0,  0,  dt, 0,   0,   0,   dt2,
+        0, 0, 0, 0, 1,  0,  0,  0,  dt,  0,   0,   0,
+        0, 0, 0, 0, 0,  1,  0,  0,  0,   dt,  0,   0,
+        0, 0, 0, 0, 0,  0,  1,  0,  0,   0,   dt,  0,
+        0, 0, 0, 0, 0,  0,  0,  1,  0,   0,   0,   dt,
+        0, 0, 0, 0, 0,  0,  0,  0,  1,   0,   0,   0,
+        0, 0, 0, 0, 0,  0,  0,  0,  0,   1,   0,   0,
+        0, 0, 0, 0, 0,  0,  0,  0,  0,   0,   0,   1);
+
+    // init...
+    m_linearKalman.statePre.at<track_t>(0) = rect0.x;      // x
+    m_linearKalman.statePre.at<track_t>(1) = rect0.y;      // y
+    m_linearKalman.statePre.at<track_t>(2) = rect0.width;  // width
+    m_linearKalman.statePre.at<track_t>(3) = rect0.height; // height
+    m_linearKalman.statePre.at<track_t>(4) = rectv0.x;     // dx
+    m_linearKalman.statePre.at<track_t>(5) = rectv0.y;     // dy
+    m_linearKalman.statePre.at<track_t>(6) = 0;            // dw
+    m_linearKalman.statePre.at<track_t>(7) = 0;            // dh
+    m_linearKalman.statePre.at<track_t>(8) = 0;            // ax
+    m_linearKalman.statePre.at<track_t>(9) = 0;            // ay
+    m_linearKalman.statePre.at<track_t>(10) = 0;           // aw
+    m_linearKalman.statePre.at<track_t>(11) = 0;           // ah
+
+    m_linearKalman.statePost.at<track_t>(0) = rect0.x;
+    m_linearKalman.statePost.at<track_t>(1) = rect0.y;
+    m_linearKalman.statePost.at<track_t>(2) = rect0.width;
+    m_linearKalman.statePost.at<track_t>(3) = rect0.height;
+    m_linearKalman.statePost.at<track_t>(4) = rectv0.x;
+    m_linearKalman.statePost.at<track_t>(5) = rectv0.y;
+    m_linearKalman.statePost.at<track_t>(6) = 0;
+    m_linearKalman.statePost.at<track_t>(7) = 0;
+    m_linearKalman.statePost.at<track_t>(8) = 0;
+    m_linearKalman.statePost.at<track_t>(9) = 0;
+    m_linearKalman.statePost.at<track_t>(10) = 0;
+    m_linearKalman.statePost.at<track_t>(11) = 0;
+
+    cv::setIdentity(m_linearKalman.measurementMatrix);
+
+    track_t n1 = pow(m_deltaTime, 4.f) / 4.f;
+    track_t n2 = pow(m_deltaTime, 3.f) / 2.f;
+    track_t n3 = pow(m_deltaTime, 2.f);
+    m_linearKalman.processNoiseCov = (cv::Mat_<track_t>(12, 12) <<
+        n1, 0, 0, 0, n2, 0, 0, 0, n2, 0, n2,
+        0, n1, 0, 0, 0, n2, 0, 0, 0, n2, 0, n2,
+        0, 0, n1, 0, 0, 0, n2, 0, 0, 0, n2, 0,
+        0, 0, 0, n1, 0, 0, 0, n2, 0, 0, 0, n2,
+        n2, 0, 0, 0, n3, 0, 0, 0, n3, 0, n3, 0,
+        0, n2, 0, 0, 0, n3, 0, 0, 0, n3, 0, n3,
+        0, 0, n2, 0, 0, 0, n3, 0, 0, 0, n3, 0,
+        0, 0, 0, n2, 0, 0, 0, n3, 0, 0, 0, n3,
+        n2, 0, 0, 0, n3, 0, 0, 0, n3, 0, 0, 0,
+        0, n2, 0, 0, 0, n3, 0, 0, 0, n3, 0, 0,
+        0, 0, n2, 0, 0, 0, n3, 0, 0, 0, n3, 0,
+        0, 0, 0, n2, 0, 0, 0, n3, 0, 0, 0, n3);
+
+    m_linearKalman.processNoiseCov *= m_accelNoiseMag;
+
+    cv::setIdentity(m_linearKalman.measurementNoiseCov, cv::Scalar::all(0.1));
+
+    cv::setIdentity(m_linearKalman.errorCovPost, cv::Scalar::all(.1));
+
+    m_initialized = true;
+}
+//---------------------------------------------------------------------------
+void TKalmanFilter::CreateLinearAcceleration3D(Rect3D rect0, cv::Point3f rectv0)
+{
+	// 12 state variables (x, y, vx, vy, ax, ay, width, height, vw, vh, aw, ah), 4 measurements (x, y, width, height)
+	m_linearKalman.init(12, 4, 0, El_t);
+	// Transition cv::Matrix
+	const track_t dt = m_deltaTime;
+	const track_t dt2 = 0.5f * m_deltaTime * m_deltaTime;
+	m_linearKalman.transitionMatrix = (cv::Mat_<track_t>(12, 12) <<
+		1, 0, 0, 0, dt, 0,  0,  0,  dt2, 0,   dt2, 0,
+		0, 1, 0, 0, 0,  dt, 0,  0,  0,   dt2, 0,   dt2,
+		0, 0, 1, 0, 0,  0,  dt, 0,  0,   0,   dt2, 0,
+		0, 0, 0, 1, 0,  0,  0,  dt, 0,   0,   0,   dt2,
+		0, 0, 0, 0, 1,  0,  0,  0,  dt,  0,   0,   0,
+		0, 0, 0, 0, 0,  1,  0,  0,  0,   dt,  0,   0,
+		0, 0, 0, 0, 0,  0,  1,  0,  0,   0,   dt,  0,
+		0, 0, 0, 0, 0,  0,  0,  1,  0,   0,   0,   dt,
+		0, 0, 0, 0, 0,  0,  0,  0,  1,   0,   0,   0,
+		0, 0, 0, 0, 0,  0,  0,  0,  0,   1,   0,   0,
+		0, 0, 0, 0, 0,  0,  0,  0,  0,   0,   0,   1);
+
+	// init...
+    m_linearKalman.statePre.at<track_t>(0) = rect0.center.x;      // x
+    m_linearKalman.statePre.at<track_t>(1) = rect0.center.y;      // y
+    m_linearKalman.statePre.at<track_t>(2) = rect0.size.width;  // width
+    m_linearKalman.statePre.at<track_t>(3) = rect0.size.height; // height
+	m_linearKalman.statePre.at<track_t>(4) = rectv0.x;     // dx
+	m_linearKalman.statePre.at<track_t>(5) = rectv0.y;     // dy
+	m_linearKalman.statePre.at<track_t>(6) = 0;            // dw
+	m_linearKalman.statePre.at<track_t>(7) = 0;            // dh
+	m_linearKalman.statePre.at<track_t>(8) = 0;            // ax
+	m_linearKalman.statePre.at<track_t>(9) = 0;            // ay
+	m_linearKalman.statePre.at<track_t>(10) = 0;           // aw
+	m_linearKalman.statePre.at<track_t>(11) = 0;           // ah
+
+    m_linearKalman.statePost.at<track_t>(0) = rect0.center.x;
+    m_linearKalman.statePost.at<track_t>(1) = rect0.center.y;
+    m_linearKalman.statePost.at<track_t>(2) = rect0.size.width;
+    m_linearKalman.statePost.at<track_t>(3) = rect0.size.height;
+	m_linearKalman.statePost.at<track_t>(4) = rectv0.x;
+	m_linearKalman.statePost.at<track_t>(5) = rectv0.y;
+	m_linearKalman.statePost.at<track_t>(6) = 0;
+	m_linearKalman.statePost.at<track_t>(7) = 0;
+	m_linearKalman.statePost.at<track_t>(8) = 0;
+	m_linearKalman.statePost.at<track_t>(9) = 0;
+	m_linearKalman.statePost.at<track_t>(10) = 0;
+	m_linearKalman.statePost.at<track_t>(11) = 0;
+
+	cv::setIdentity(m_linearKalman.measurementMatrix);
+
+	track_t n1 = pow(m_deltaTime, 4.f) / 4.f;
+	track_t n2 = pow(m_deltaTime, 3.f) / 2.f;
+	track_t n3 = pow(m_deltaTime, 2.f);
+	m_linearKalman.processNoiseCov = (cv::Mat_<track_t>(12, 12) <<
+		n1, 0, 0, 0, n2, 0, 0, 0, n2, 0, n2,
+		0, n1, 0, 0, 0, n2, 0, 0, 0, n2, 0, n2,
+		0, 0, n1, 0, 0, 0, n2, 0, 0, 0, n2, 0,
+		0, 0, 0, n1, 0, 0, 0, n2, 0, 0, 0, n2,
+		n2, 0, 0, 0, n3, 0, 0, 0, n3, 0, n3, 0,
+		0, n2, 0, 0, 0, n3, 0, 0, 0, n3, 0, n3,
+		0, 0, n2, 0, 0, 0, n3, 0, 0, 0, n3, 0,
+		0, 0, 0, n2, 0, 0, 0, n3, 0, 0, 0, n3,
+		n2, 0, 0, 0, n3, 0, 0, 0, n3, 0, 0, 0,
+		0, n2, 0, 0, 0, n3, 0, 0, 0, n3, 0, 0,
+		0, 0, n2, 0, 0, 0, n3, 0, 0, 0, n3, 0,
+		0, 0, 0, n2, 0, 0, 0, n3, 0, 0, 0, n3);
+
+	m_linearKalman.processNoiseCov *= m_accelNoiseMag;
+
+	cv::setIdentity(m_linearKalman.measurementNoiseCov, cv::Scalar::all(0.1));
+
+	cv::setIdentity(m_linearKalman.errorCovPost, cv::Scalar::all(.1));
+
+	m_initialized = true;
+}
+
+//---------------------------------------------------------------------------
+cv::Point3f TKalmanFilter::GetPointPrediction()
+{
+    if (m_initialized)
+    {
+        cv::Mat prediction;
+
+        switch (m_type)
+        {
+        case tracking::KalmanLinear:
+            prediction = m_linearKalman.predict();
+            break;
+        }
+
+        m_lastPointResult = cv::Point3f(prediction.at<track_t>(0), prediction.at<track_t>(1), prediction.at<track_t>(2));
+    }
+    else
+    {
+
+    }
+    return m_lastPointResult;
+}
+
+//---------------------------------------------------------------------------
+cv::Point3f TKalmanFilter::Update(cv::Point3f pt, bool dataCorrect)
+{
+    if (!m_initialized)
+    {
+        if (m_initialPoints.size() < MIN_INIT_VALS)
+        {
+            if (dataCorrect)
+            {
+                m_initialPoints.push_back(pt);
+                m_lastPointResult = pt;
+            }
+        }
+        if (m_initialPoints.size() == MIN_INIT_VALS)
+        {
+            track_t kx = 0;
+            track_t bx = 0;
+            track_t ky = 0;
+            track_t by = 0;
+            get_lin_regress_params(m_initialPoints, 0, MIN_INIT_VALS, kx, bx, ky, by);//predict p,v
+            cv::Point3f xy0(kx * (MIN_INIT_VALS - 1) + bx, ky * (MIN_INIT_VALS - 1) + by, m_lastPointResult.z);
+            cv::Point3f xyv0(kx, ky,0);
+
+            switch (m_type)
+            {
+            case tracking::KalmanLinear:
+                if (m_useAcceleration)
+					CreateLinearAcceleration(xy0, xyv0);
+				else
+					CreateLinear(xy0, xyv0);
+                break;
+            }
+            m_lastDist = 0;
+        }
+    }
+
+    if (m_initialized)
+    {
+        cv::Mat measurement(2, 1, Mat_t(1));
+        if (!dataCorrect)
+        {
+            measurement.at<track_t>(0) = m_lastPointResult.x;  //update using prediction
+            measurement.at<track_t>(1) = m_lastPointResult.y;
+        }
+        else
+        {
+            measurement.at<track_t>(0) = pt.x;  //update using measurements
+            measurement.at<track_t>(1) = pt.y;
+        }
+        // Correction
+        cv::Mat estimated;
+        switch (m_type)
+        {
+        case tracking::KalmanLinear:
+        {
+            estimated = m_linearKalman.correct(measurement);
+
+            // Inertia correction
+			if (!m_useAcceleration)
+			{
+				track_t currDist = sqrtf(sqr(estimated.at<track_t>(0) - pt.x) + sqr(estimated.at<track_t>(1) - pt.y));
+				if (currDist > m_lastDist)
+				{
+					m_deltaTime = std::min(m_deltaTime + m_deltaStep, m_deltaTimeMax);
+				}
+				else
+				{
+					m_deltaTime = std::max(m_deltaTime - m_deltaStep, m_deltaTimeMin);
+				}
+				m_lastDist = currDist;
+
+				m_linearKalman.transitionMatrix.at<track_t>(0, 2) = m_deltaTime;
+				m_linearKalman.transitionMatrix.at<track_t>(1, 3) = m_deltaTime;
+			}
+            break;
+        }
+        }
+
+        m_lastPointResult.x = estimated.at<track_t>(0);   //update using measurements
+        m_lastPointResult.y = estimated.at<track_t>(1);
+    }
+    else
+    {
+        if (dataCorrect)
+        {
+            m_lastPointResult = pt;
+        }
+    }
+    return m_lastPointResult;
+}
+//---------------------------------------------------------------------------
+cv::Rect TKalmanFilter::GetRectPrediction()
+{
+    if (m_initialized)
+    {
+        cv::Mat prediction;
+
+        switch (m_type)
+        {
+        case tracking::KalmanLinear:
+            prediction = m_linearKalman.predict();
+            break;
+        }
+
+        m_lastRectResult = cv::Rect_<track_t>(prediction.at<track_t>(0), prediction.at<track_t>(1), prediction.at<track_t>(2), prediction.at<track_t>(3));
+    }
+    else
+    {
+
+    }
+    return cv::Rect(static_cast<int>(m_lastRectResult.x), static_cast<int>(m_lastRectResult.y), static_cast<int>(m_lastRectResult.width), static_cast<int>(m_lastRectResult.height));
+}
+
+//---------------------------------------------------------------------------
+cv::Rect TKalmanFilter::Update(cv::Rect rect, bool dataCorrect)
+{
+    if (!m_initialized)
+    {
+        if (m_initialRects.size() < MIN_INIT_VALS)
+        {
+            if (dataCorrect)
+            {
+                m_initialRects.push_back(rect);
+                m_lastRectResult.x = static_cast<track_t>(rect.x);
+                m_lastRectResult.y = static_cast<track_t>(rect.y);
+                m_lastRectResult.width = static_cast<track_t>(rect.width);
+                m_lastRectResult.height = static_cast<track_t>(rect.height);
+            }
+        }
+        if (m_initialRects.size() == MIN_INIT_VALS)
+        {
+            std::vector<Point_t> initialPoints;
+            Point_t averageSize(0, 0);
+            for (const auto& r : m_initialRects)
+            {
+                initialPoints.emplace_back(static_cast<track_t>(r.x), static_cast<track_t>(r.y));
+                averageSize.x += r.width;
+                averageSize.y += r.height;
+            }
+            averageSize.x /= MIN_INIT_VALS;
+            averageSize.y /= MIN_INIT_VALS;
+
+            track_t kx = 0;
+            track_t bx = 0;
+            track_t ky = 0;
+            track_t by = 0;
+            get_lin_regress_params(initialPoints, 0, MIN_INIT_VALS, kx, bx, ky, by);
+            cv::Rect_<track_t> rect0(kx * (MIN_INIT_VALS - 1) + bx, ky * (MIN_INIT_VALS - 1) + by, averageSize.x, averageSize.y);
+            Point_t rectv0(kx, ky);
+
+            switch (m_type)
+            {
+            case tracking::KalmanLinear:
+                if (m_useAcceleration)
+                    CreateLinearAcceleration(rect0, rectv0);
+                else
+                    CreateLinear(rect0, rectv0);
+                break;
+            }
+        }
+    }
+
+    if (m_initialized)
+    {
+        cv::Mat measurement(4, 1, Mat_t(1));
+        if (!dataCorrect)
+        {
+            measurement.at<track_t>(0) = m_lastRectResult.x;  // update using prediction
+            measurement.at<track_t>(1) = m_lastRectResult.y;
+            measurement.at<track_t>(2) = m_lastRectResult.width;
+            measurement.at<track_t>(3) = m_lastRectResult.height;
+        }
+        else
+        {
+            measurement.at<track_t>(0) = static_cast<track_t>(rect.x);  // update using measurements
+            measurement.at<track_t>(1) = static_cast<track_t>(rect.y);
+            measurement.at<track_t>(2) = static_cast<track_t>(rect.width);
+            measurement.at<track_t>(3) = static_cast<track_t>(rect.height);
+        }
+        // Correction
+        cv::Mat estimated;
+        switch (m_type)
+        {
+        case tracking::KalmanLinear:
+        {
+            estimated = m_linearKalman.correct(measurement);
+
+            m_lastRectResult.x = estimated.at<track_t>(0);   //update using measurements
+            m_lastRectResult.y = estimated.at<track_t>(1);
+            m_lastRectResult.width = estimated.at<track_t>(2);
+            m_lastRectResult.height = estimated.at<track_t>(3);
+
+            // Inertia correction
+            if (!m_useAcceleration)
+            {
+                track_t currDist = sqrtf(sqr(estimated.at<track_t>(0) - rect.x) + sqr(estimated.at<track_t>(1) - rect.y) + sqr(estimated.at<track_t>(2) - rect.width) + sqr(estimated.at<track_t>(3) - rect.height));
+                if (currDist > m_lastDist)
+                {
+                    m_deltaTime = std::min(m_deltaTime + m_deltaStep, m_deltaTimeMax);
+                }
+                else
+                {
+                    m_deltaTime = std::max(m_deltaTime - m_deltaStep, m_deltaTimeMin);
+                }
+                m_lastDist = currDist;
+
+                m_linearKalman.transitionMatrix.at<track_t>(0, 4) = m_deltaTime;
+                m_linearKalman.transitionMatrix.at<track_t>(1, 5) = m_deltaTime;
+                m_linearKalman.transitionMatrix.at<track_t>(2, 6) = m_deltaTime;
+                m_linearKalman.transitionMatrix.at<track_t>(3, 7) = m_deltaTime;
+            }
+            break;
+        }
+        }
+    }
+    else
+    {
+        if (dataCorrect)
+        {
+            m_lastRectResult.x = static_cast<track_t>(rect.x);
+            m_lastRectResult.y = static_cast<track_t>(rect.y);
+            m_lastRectResult.width = static_cast<track_t>(rect.width);
+            m_lastRectResult.height = static_cast<track_t>(rect.height);
+        }
+    }
+    return cv::Rect(static_cast<int>(m_lastRectResult.x), static_cast<int>(m_lastRectResult.y), static_cast<int>(m_lastRectResult.width), static_cast<int>(m_lastRectResult.height));
+}
+//---------------------------------------------------------------------------
+Rect3D TKalmanFilter::GetRect3DPrediction()
+{
+    if (m_initialized)
+    {
+        cv::Mat prediction;
+
+        switch (m_type)
+        {
+        case tracking::KalmanLinear:
+            prediction = m_linearKalman.predict();
+            break;
+        }
+
+        m_lastRect3DResult = Rect3D(cv::Point3f(prediction.at<track_t>(0), prediction.at<track_t>(1), prediction.at<track_t>(2)), Size3D(prediction.at<track_t>(3), prediction.at<track_t>(4), prediction.at<track_t>(5)), prediction.at<track_t>(6));
+    }
+    else
+    {
+
+    }
+    return m_lastRect3DResult;
+}
+
+//---------------------------------------------------------------------------
+Rect3D TKalmanFilter::Update(Rect3D rect, bool dataCorrect)
+{
+    if (!m_initialized)
+    {
+        if (m_initialRect3Ds.size() < MIN_INIT_VALS)
+        {
+            if (dataCorrect)
+            {
+                m_initialRect3Ds.push_back(rect);
+                m_lastRect3DResult.center.x = static_cast<track_t>(rect.center.x);
+                m_lastRect3DResult.center.y = static_cast<track_t>(rect.center.y);
+                m_lastRect3DResult.center.z = static_cast<track_t>(rect.center.z);
+                m_lastRect3DResult.size.width = static_cast<track_t>(rect.size.width);
+                m_lastRect3DResult.size.height = static_cast<track_t>(rect.size.height);
+                m_lastRect3DResult.size.length = static_cast<track_t>(rect.size.length);
+                m_lastRect3DResult.yaw = static_cast<track_t>(rect.yaw);
+            }
+        }
+        if (m_initialRect3Ds.size() == MIN_INIT_VALS)
+        {
+            std::vector<Point_t> initialPoints;
+            cv::Point3f averageSize(0, 0, 0);
+            float averageZ = 0;
+            float averageYaw = 0;
+            for (const auto& r : m_initialRect3Ds)
+            {
+                initialPoints.emplace_back(static_cast<track_t>(r.center.x), static_cast<track_t>(r.center.y));
+                averageZ += r.center.z;
+                averageSize.x += r.size.width;
+                averageSize.y += r.size.height;
+                averageSize.z += r.size.length;
+                averageYaw += r.yaw;
+            }
+            averageZ /= MIN_INIT_VALS;
+            averageSize.x /= MIN_INIT_VALS;
+            averageSize.y /= MIN_INIT_VALS;
+            averageSize.z /= MIN_INIT_VALS;
+            averageYaw /= MIN_INIT_VALS;
+
+            track_t kx = 0;
+            track_t bx = 0;
+            track_t ky = 0;
+            track_t by = 0;
+            get_lin_regress_params(initialPoints, 0, MIN_INIT_VALS, kx, bx, ky, by);
+            Rect3D rect0(cv::Point3f(kx * (MIN_INIT_VALS - 1) + bx, ky * (MIN_INIT_VALS - 1) + by, averageZ), Size3D(averageSize.x, averageSize.y,averageSize.z), averageYaw);
+            cv::Point3f rectv0(kx, ky, 0);
+
+            switch (m_type)
+            {
+            case tracking::KalmanLinear:
+				if (m_useAcceleration)
+                    CreateLinearAcceleration3D(rect0, rectv0);
+				else
+                    CreateLinear3D(rect0, rectv0);
+                break;
+            }
+        }
+    }
+
+    if (m_initialized)
+    {
+        cv::Mat measurement(7, 1, Mat_t(1));
+        if (!dataCorrect)
+        {
+            measurement.at<track_t>(0) = m_lastRect3DResult.center.x;  // update using prediction
+            measurement.at<track_t>(1) = m_lastRect3DResult.center.y;
+            measurement.at<track_t>(2) = m_lastRect3DResult.center.z;
+            measurement.at<track_t>(3) = m_lastRect3DResult.size.width;
+            measurement.at<track_t>(4) = m_lastRect3DResult.size.height;
+            measurement.at<track_t>(5) = m_lastRect3DResult.size.length;
+            measurement.at<track_t>(6) = m_lastRect3DResult.yaw;
+        }
+        else
+        {
+            measurement.at<track_t>(0) = static_cast<track_t>(rect.center.x);  // update using measurements
+            measurement.at<track_t>(1) = static_cast<track_t>(rect.center.y);
+            measurement.at<track_t>(2) = static_cast<track_t>(rect.center.z);
+            measurement.at<track_t>(3) = static_cast<track_t>(rect.size.width);
+            measurement.at<track_t>(4) = static_cast<track_t>(rect.size.height);
+            measurement.at<track_t>(5) = static_cast<track_t>(rect.size.length);
+            measurement.at<track_t>(6) = static_cast<track_t>(rect.yaw);
+        }
+        // Correction
+        cv::Mat estimated;
+        switch (m_type)
+        {
+        case tracking::KalmanLinear:
+        {
+            estimated = m_linearKalman.correct(measurement);
+
+            m_lastRect3DResult.center.x = estimated.at<track_t>(0);   //update using measurements
+            m_lastRect3DResult.center.y = estimated.at<track_t>(1);
+            m_lastRect3DResult.center.z = estimated.at<track_t>(2);
+            m_lastRect3DResult.size.width = estimated.at<track_t>(3);
+            m_lastRect3DResult.size.height = estimated.at<track_t>(4);
+            m_lastRect3DResult.size.length = estimated.at<track_t>(5);
+            m_lastRect3DResult.yaw = estimated.at<track_t>(6);
+
+            // Inertia correction
+			if (!m_useAcceleration)
+			{
+                track_t currDist = sqrtf(sqr(estimated.at<track_t>(0) - rect.center.x) + sqr(estimated.at<track_t>(1) - rect.center.y)+ sqr(estimated.at<track_t>(2) - rect.center.z) + sqr(estimated.at<track_t>(3) - rect.size.width) + sqr(estimated.at<track_t>(4) - rect.size.height) + sqr(estimated.at<track_t>(5) - rect.size.length) + sqr(estimated.at<track_t>(6) - rect.yaw));
+				if (currDist > m_lastDist)
+				{
+					m_deltaTime = std::min(m_deltaTime + m_deltaStep, m_deltaTimeMax);
+				}
+				else
+				{
+					m_deltaTime = std::max(m_deltaTime - m_deltaStep, m_deltaTimeMin);
+				}
+				m_lastDist = currDist;
+
+                m_linearKalman.transitionMatrix.at<track_t>(0, 7) = m_deltaTime;
+                m_linearKalman.transitionMatrix.at<track_t>(1, 8) = m_deltaTime;
+                m_linearKalman.transitionMatrix.at<track_t>(2, 9) = m_deltaTime;
+                m_linearKalman.transitionMatrix.at<track_t>(3, 10) = m_deltaTime;
+                m_linearKalman.transitionMatrix.at<track_t>(4, 11) = m_deltaTime;
+                m_linearKalman.transitionMatrix.at<track_t>(5, 12) = m_deltaTime;
+                m_linearKalman.transitionMatrix.at<track_t>(6, 13) = m_deltaTime;
+			}
+            break;
+        }
+        }
+    }
+    else
+    {
+        if (dataCorrect)
+        {
+            m_lastRect3DResult.center.x = static_cast<track_t>(rect.center.x);
+            m_lastRect3DResult.center.y = static_cast<track_t>(rect.center.y);
+            m_lastRect3DResult.center.z = static_cast<track_t>(rect.center.z);
+            m_lastRect3DResult.size.width = static_cast<track_t>(rect.size.width);
+            m_lastRect3DResult.size.height = static_cast<track_t>(rect.size.height);
+            m_lastRect3DResult.size.length = static_cast<track_t>(rect.size.length);
+            m_lastRect3DResult.yaw = static_cast<track_t>(rect.yaw);
+        }
+    }
+    return m_lastRect3DResult;
+}
+
+//---------------------------------------------------------------------------
+cv::Vec<track_t, 2> TKalmanFilter::GetVelocity() const
+{
+    cv::Vec<track_t, 2> res(0, 0);
+    if (m_initialized)
+    {
+        switch (m_type)
+        {
+        case tracking::KalmanLinear:
+        {
+            if (m_linearKalman.statePre.rows > 3)
+            {
+                int indX = 2;
+                int indY = 3;
+                if (m_linearKalman.statePre.rows > 5)
+                {
+                    indX = 4;
+                    indY = 5;
+                    if (m_linearKalman.statePre.rows > 8)
+                    {
+                        indX = 7;
+                        indY = 8;
+                    }
+                    res[0] = m_linearKalman.statePre.at<track_t>(indX);
+                    res[1] = m_linearKalman.statePre.at<track_t>(indY);
+                }
+            }
+            break;
+        }
+
+        }
+    }
+    return res;
+}

+ 120 - 0
src/detection/detection_lidar_PointPillars_MultiHead/Tracker/Kalman.h

@@ -0,0 +1,120 @@
+#pragma once
+#include "defines.h"
+#include <memory>
+#include <deque>
+
+#include <opencv2/opencv.hpp>
+
+///
+/// \brief The TKalmanFilter class
+/// http://www.morethantechnical.com/2011/06/17/simple-kalman-filter-for-tracking-using-opencv-2-2-w-code/
+///
+class TKalmanFilter
+{
+public:
+    TKalmanFilter(tracking::KalmanType type, bool useAcceleration, track_t deltaTime, track_t accelNoiseMag);
+    ~TKalmanFilter() = default;
+
+    cv::Point3f GetPointPrediction();
+    cv::Point3f Update(cv::Point3f pt, bool dataCorrect);
+
+    cv::Rect GetRectPrediction();
+    cv::Rect Update(cv::Rect rect, bool dataCorrect);
+
+    Rect3D GetRect3DPrediction();
+    Rect3D Update(Rect3D rect, bool dataCorrect);
+
+	cv::Vec<track_t, 2> GetVelocity() const;
+
+private:
+    tracking::KalmanType m_type = tracking::KalmanLinear;
+    cv::KalmanFilter m_linearKalman;
+    static const size_t MIN_INIT_VALS = 4;
+    std::deque<cv::Point3f> m_initialPoints;
+    cv::Point3f m_lastPointResult;
+
+    std::deque<cv::Rect> m_initialRects;
+    cv::Rect_<track_t> m_lastRectResult;
+    cv::Rect_<track_t> m_lastRect;
+
+    std::deque<Rect3D> m_initialRect3Ds;
+    Rect3D m_lastRect3DResult;
+    cv::Rect_<track_t> m_lastRect3D;
+
+    bool m_initialized = false;
+    track_t m_deltaTime = 0.2f;
+    track_t m_deltaTimeMin = 0.2f;
+    track_t m_deltaTimeMax = 2 * 0.2f;
+    track_t m_lastDist = 0;
+    track_t m_deltaStep = 0;
+    static const int m_deltaStepsCount = 20;
+    track_t m_accelNoiseMag = 0.5f;
+	bool m_useAcceleration = false; // If set true then will be used motion model x(t) = x0 + v0 * t + a * t^2 / 2
+
+	// Constant velocity model
+    void CreateLinear(cv::Point3f xy0, cv::Point3f xyv0);
+    void CreateLinear(cv::Rect_<track_t> rect0, Point_t rectv0);
+    void CreateLinear3D(Rect3D rect0, cv::Point3f rectv0);
+
+	// Constant acceleration model
+	// https://www.mathworks.com/help/driving/ug/linear-kalman-filters.html
+    void CreateLinearAcceleration(cv::Point3f xy0, cv::Point3f xyv0);
+    void CreateLinearAcceleration(cv::Rect_<track_t> rect0, Point_t rectv0);
+    void CreateLinearAcceleration3D(Rect3D rect0, cv::Point3f rectv0);
+};
+
+//---------------------------------------------------------------------------
+///
+/// \brief sqr
+/// \param val
+/// \return
+///
+template<class T> inline
+T sqr(T val)
+{
+    return val * val;
+}
+
+///
+/// \brief get_lin_regress_params
+/// \param in_data
+/// \param start_pos
+/// \param in_data_size
+/// \param kx
+/// \param bx
+/// \param ky
+/// \param by
+///
+template<typename T, typename CONT>
+void get_lin_regress_params(
+        const CONT& in_data,
+        size_t start_pos,
+        size_t in_data_size,
+        T& kx, T& bx, T& ky, T& by)
+{
+    T m1(0.), m2(0.);
+    T m3_x(0.), m4_x(0.);
+    T m3_y(0.), m4_y(0.);
+
+    const T el_count = static_cast<T>(in_data_size - start_pos);
+    for (size_t i = start_pos; i < in_data_size; ++i)
+    {
+        m1 += i;
+        m2 += sqr(i);
+
+        m3_x += in_data[i].x;
+        m4_x += i * in_data[i].x;
+
+        m3_y += in_data[i].y;
+        m4_y += i * in_data[i].y;
+    }
+    T det_1 = 1 / (el_count * m2 - sqr(m1));
+
+    m1 *= -1;
+
+    kx = det_1 * (m1 * m3_x + el_count * m4_x);
+    bx = det_1 * (m2 * m3_x + m1 * m4_x);
+
+    ky = det_1 * (m1 * m3_y + el_count * m4_y);
+    by = det_1 * (m2 * m3_y + m1 * m4_y);
+}

+ 65 - 0
src/detection/detection_lidar_PointPillars_MultiHead/Tracker/ShortPathCalculator.h

@@ -0,0 +1,65 @@
+#pragma once
+#include "defines.h"
+#include "HungarianAlg.h"
+
+///
+/// \brief The SPSettings struct
+///
+struct SPSettings
+{
+    track_t m_distThres = 0.8f;
+    size_t m_maxHistory = 10;
+};
+
+///
+/// \brief The ShortPathCalculator class
+///
+class ShortPathCalculator
+{
+public:
+    ShortPathCalculator(const SPSettings& settings)
+        : m_settings(settings)
+    {
+    }
+    virtual ~ShortPathCalculator()
+    {
+    }
+
+    virtual void Solve(const distMatrix_t& costMatrix, size_t N, size_t M, assignments_t& assignment, track_t maxCost) = 0;
+protected:
+    SPSettings m_settings;
+};
+
+///
+/// \brief The SPHungrian class
+///
+class SPHungrian : public ShortPathCalculator
+{
+public:
+    SPHungrian(const SPSettings& settings)
+        : ShortPathCalculator(settings)
+    {
+    }
+
+    void Solve(const distMatrix_t& costMatrix, size_t N, size_t M, assignments_t& assignment, track_t /*maxCost*/)
+    {
+        m_solver.Solve(costMatrix, N, M, assignment, AssignmentProblemSolver::optimal);
+    }
+
+private:
+    AssignmentProblemSolver m_solver;
+};
+
+///
+/// \brief The SPBipart class
+///
+//class SPBipart : public ShortPathCalculator
+//{
+//public:
+//    SPBipart(const SPSettings& settings)
+//        : ShortPathCalculator(settings)
+//    {
+//    }
+
+//    void Solve(const distMatrix_t& costMatrix, size_t N, size_t M, assignments_t& assignment, track_t maxCost);
+//};

+ 159 - 0
src/detection/detection_lidar_PointPillars_MultiHead/Tracker/Tracking.hpp

@@ -0,0 +1,159 @@
+#pragma once
+#include "objectarray.pb.h"
+#include "Ctracker.h"
+#include <iostream>
+#include <vector>
+#define FPS 8 
+#define MIN_DETECTEDFRAMES 5
+///
+/// \brief 跟踪器参数设置
+///
+bool InitTracker(CTracker& tracker)
+{
+    TrackerSettings settings;
+    settings.SetDistance(tracking::DistRect3Ds); // 代价矩阵:两中心点之间的距离
+    settings.m_kalmanType = tracking::KalmanLinear; // 滤波器类型:卡尔曼线性滤波器
+    settings.m_filterGoal = tracking::FilterRect3D; // 滤波对象:Rect3D
+    settings.m_lostTrackType = tracking::TrackNone; // 丢失了的目标,不再追踪
+    settings.m_matchType = tracking::MatchHungrian; // 匹配算法:匈牙利
+    settings.m_dt = 1.f; // 卡尔曼滤波器的时间步长
+    settings.m_accelNoiseMag = 0.5f; // 卡尔曼的噪声放大器
+    settings.m_distThres = 30.f; // 匹配算法中的距离阈值
+    settings.m_minAreaRadiusPix = -1.f;//frame.rows / 20.f; // 目标的最小面积半径(像素)
+    settings.m_maximumAllowedSkippedFrames = 3; // 被跟踪目标允许未匹配到的最大次数,当超过这一数值,该目标的跟踪器将被移除
+    settings.m_maxTraceLength = 5; // 最大跟踪长度,即历史轨迹保留的最大长度
+
+    tracker.setSettings(settings);
+
+    return true;
+}
+
+///
+/// \brief 对融合后的目标进行跟踪,并以跟踪后的最优估计值更新融合目标的状态信息
+///
+iv::lidar::objectarray Tracking(iv::lidar::objectarray& lidarobjvec, CTracker& tracker)
+{
+#ifdef DEBUG_SHOW
+    std::cout<<"-------------------------------------------------"<<std::endl;
+#endif
+    iv::lidar::objectarray trackedobjvec;
+    trackedobjvec.clear_obj();
+    trackedobjvec.set_timestamp(lidarobjvec.timestamp());
+    regions_t regions;
+    cv::Point3f pointXYZ;
+    for(int i = 0;i<lidarobjvec.obj_size();i++)
+    {
+        pointXYZ.x = lidarobjvec.obj(i).centroid().x();
+        pointXYZ.y = lidarobjvec.obj(i).centroid().y();
+        pointXYZ.z = lidarobjvec.obj(i).centroid().z();
+        Rect3D rect;
+        rect.center = pointXYZ;
+        rect.size.width = lidarobjvec.obj(i).dimensions().x();//w
+        rect.size.height = lidarobjvec.obj(i).dimensions().y();//l
+        rect.size.length = lidarobjvec.obj(i).dimensions().z();//h
+        rect.yaw = lidarobjvec.obj(i).tyaw();
+        CRegion region = CRegion(rect,lidarobjvec.obj(i).mntype(),lidarobjvec.obj(i).score());
+        regions.push_back(region);
+#ifdef DEBUG_SHOW
+        std::cout<<"old id:"<<i<<std::endl;
+        std::cout<<"old type:"<<lidarobjvec.obj(i).mntype()<<std::endl;
+        std::cout<<"old x,y,z,w,h,l,yaw:"<<rect.center.x<<","<<rect.center.y<<","<<rect.center.z<<"  "<<rect.size.width<<","<<rect.size.height<<","<<rect.size.length<<"  "<<rect.yaw<<std::endl;
+#endif
+
+    }
+    tracker.Update(regions, cv::UMat(), 30);
+    auto tracks = tracker.GetTracks();
+#ifdef DEBUG_SHOW
+    std::cout<<"detect size, tracker size:"<<regions.size()<<","<<tracks.size()<<std::endl;
+#endif
+    for (size_t i = 0; i < tracks.size(); i++)
+    {
+        const auto& track = tracks[i];
+        if(track.m_detectedFrames < MIN_DETECTEDFRAMES) continue;
+        int obj_id = track.m_regionID;
+        iv::lidar::lidarobject lidar_object;
+        if(obj_id != -1) // 当前融合目标成功匹配上跟踪器中的已有目标,则根据跟踪所得的最优估计值更新融合目标状态
+        {
+            lidar_object = lidarobjvec.obj(obj_id);
+            lidar_object.set_id(track.m_ID);
+
+            iv::lidar::PointXYZ centroid;
+            iv::lidar::PointXYZ *centerpoint;
+            centroid.set_x(track.m_rect.center.x);
+            centroid.set_y(track.m_rect.center.y);
+            centroid.set_z(track.m_rect.center.z);
+            centerpoint=lidar_object.mutable_centroid();
+            centerpoint->CopyFrom(centroid);
+/* not update */
+//            iv::lidar::Dimension dimension;
+//            iv::lidar::Dimension *obj_dimension;
+//            dimension.set_x(track.m_rect.size.width);
+//            dimension.set_y(track.m_rect.size.height);
+//            dimension.set_z(track.m_rect.size.length);
+//            obj_dimension=lidar_object.mutable_dimensions();
+//            obj_dimension->CopyFrom(dimension);
+
+//            lidar_object.set_tyaw(track.m_rect.yaw);
+
+            iv::lidar::VelXY vel_relative;
+            iv::lidar::VelXY *velrelative;
+            vel_relative.set_x(track.m_velocity[0]*FPS);
+            vel_relative.set_y(track.m_velocity[1]*FPS);
+            velrelative = lidar_object.mutable_vel_relative();
+            velrelative->CopyFrom(vel_relative);
+
+            iv::lidar::lidarobject *pe = trackedobjvec.add_obj();
+            pe->CopyFrom(lidar_object);
+        }else{ // 当前时刻没有融合目标与跟踪器中的已有目标匹配上,则将跟踪器中已有目标的预测结果增加到融合结果中            
+            lidar_object.set_id(track.m_ID);
+            lidar_object.set_mntype(track.m_region.m_type);
+            lidar_object.set_score(track.m_region.m_confidence);
+
+            iv::lidar::PointXYZ centroid;
+            iv::lidar::PointXYZ *centerpoint;
+            centroid.set_x(track.m_rect.center.x);
+            centroid.set_y(track.m_rect.center.y);
+            centroid.set_z(track.m_rect.center.z);
+            centerpoint=lidar_object.mutable_centroid();
+            centerpoint->CopyFrom(centroid);
+
+            iv::lidar::Dimension dimension;
+            iv::lidar::Dimension *obj_dimension;
+            dimension.set_x(track.m_region.m_rect.size.width);
+            dimension.set_y(track.m_region.m_rect.size.height);
+            dimension.set_z(track.m_region.m_rect.size.length);
+            obj_dimension=lidar_object.mutable_dimensions();
+            obj_dimension->CopyFrom(dimension);
+
+            lidar_object.set_tyaw(track.m_region.m_rect.yaw);
+
+            iv::lidar::VelXY vel_relative;
+            iv::lidar::VelXY *velrelative;
+            vel_relative.set_x(track.m_velocity[0]*FPS);
+            vel_relative.set_y(track.m_velocity[1]*FPS);
+            velrelative = lidar_object.mutable_vel_relative();
+            velrelative->CopyFrom(vel_relative);
+
+            iv::lidar::lidarobject *pe = trackedobjvec.add_obj();
+            pe->CopyFrom(lidar_object);
+        }
+#ifdef DEBUG_SHOW
+        std::cout<<"id:"<<lidar_object.id()<<"  "<<obj_id<<std::endl;
+        std::cout<<"type:"<<lidar_object.mntype()<<std::endl;
+        std::cout<<"update x,y,z,w,h,l,yaw,vx,vy:"<<lidar_object.centroid().x()<<","<<lidar_object.centroid().y()<<","<<lidar_object.centroid().z()<<"  "<<lidar_object.dimensions().x()<<","<<lidar_object.dimensions().y()<<","<<lidar_object.dimensions().z()<<"  "<<lidar_object.tyaw()<<"  "<<lidar_object.vel_relative().x()<<","<<lidar_object.vel_relative().y()<<std::endl;
+#endif
+    }
+#ifdef DEBUG_SHOW
+    std::cout<<"trackedobjvec size:"<<trackedobjvec.obj_size()<<std::endl;
+#endif
+    //    for (size_t i = 0; i < trackedobjvec.obj_size(); i++)
+    //    {
+    //        iv::lidar::lidarobject lidar_object;
+    //        lidar_object = trackedobjvec.obj(i);
+    //        std::cout<<"historical size:"<<lidar_object.point_historical_size()<<std::endl;
+    //    }
+    regions.clear();
+    tracks.clear();
+    return trackedobjvec;
+}
+

+ 147 - 0
src/detection/detection_lidar_PointPillars_MultiHead/Tracker/defines.h

@@ -0,0 +1,147 @@
+#pragma once
+
+#include <vector>
+#include <string>
+#include <map>
+#include <opencv2/opencv.hpp>
+
+
+// ---------------------------------------------------------------------------
+//
+// ---------------------------------------------------------------------------
+typedef float track_t;
+typedef cv::Point_<track_t> Point_t;
+#define El_t CV_32F
+#define Mat_t CV_32FC
+
+typedef std::vector<int> assignments_t;
+typedef std::vector<track_t> distMatrix_t;
+typedef struct Size3D{
+    Size3D()
+    {
+    }
+    Size3D(float w, float h, float l)
+        :
+          width(w),
+          height(h),
+          length(l)
+    {
+    }
+    float length;
+    float width;
+    float height;
+} Size3D;
+typedef struct Rect3D{
+    Rect3D()
+    {
+    }
+    Rect3D(cv::Point3f c, Size3D s, float y)
+        :
+          center(c),
+          size(s),
+          yaw(y)
+    {
+    }
+    cv::Point3f center;
+    Size3D size;
+    float yaw;
+} Rect3D;
+///
+/// \brief config_t
+///
+typedef std::multimap<std::string, std::string> config_t;
+
+///
+/// \brief The CRegion class
+///
+class CRegion
+{
+public:
+    CRegion()
+        : m_type(-1), m_type_name(""), m_confidence(-1)
+    {
+    }
+
+    CRegion(const Rect3D& rect, const int& type, float confidence)
+        : m_rect(rect), m_type(type), m_confidence(confidence)
+    {
+        RBRect();
+    }
+
+    CRegion(const Rect3D& rect, const std::string& type, float confidence)
+        : m_rect(rect), m_type_name(type), m_confidence(confidence)
+    {
+        RBRect();
+    }
+    Rect3D m_rect;
+    cv::RotatedRect m_rrect;
+    cv::Rect m_brect;
+    int m_type = -1;
+    std::string m_type_name = "";
+    float m_confidence = -1;
+private:
+    ///
+    /// \brief B2RRect
+    /// \return
+    ///
+    cv::RotatedRect RBRect()
+    {
+        m_rrect = cv::RotatedRect(Point_t(m_rect.center.x,m_rect.center.y), cv::Size(m_rect.size.width,m_rect.size.height), 0);
+        m_brect = cv::Rect(m_rect.center.x-m_rect.size.width/2,m_rect.center.y-m_rect.size.height/2, m_rect.size.width,m_rect.size.height);
+        return m_rrect;
+    }
+};
+
+typedef std::vector<CRegion> regions_t;
+
+///
+///
+///
+namespace tracking
+{
+
+///
+/// \brief The DistType enum
+///
+enum DistType
+{
+    DistCenters,   // Euclidean distance between centers, pixels
+    DistRects,     // Euclidean distance between bounding rectangles, pixels
+    DistRect3Ds,     // Euclidean distance between bounding rectangles, pixels
+	DistsCount
+};
+
+///
+/// \brief The FilterGoal enum
+///
+enum FilterGoal
+{
+    FilterCenter, // x,y
+    FilterRect,   // x,y,w,h
+    FilterRect3D  // x,y,z,w,h,l,yaw
+};
+
+///
+/// \brief The KalmanType enum
+///
+enum KalmanType
+{
+    KalmanLinear
+};
+
+///
+/// \brief The MatchType enum
+///
+enum MatchType
+{
+    MatchHungrian
+};
+
+///
+/// \brief The LostTrackType enum
+///
+enum LostTrackType
+{
+    TrackNone
+};
+}

+ 492 - 0
src/detection/detection_lidar_PointPillars_MultiHead/Tracker/track.cpp

@@ -0,0 +1,492 @@
+#include "track.h"
+
+//#include "dat/dat_tracker.hpp"
+
+///
+/// \brief CTrack
+/// \param pt
+/// \param region
+/// \param deltaTime
+/// \param accelNoiseMag
+/// \param trackID
+/// \param filterObjectSize
+/// \param externalTrackerForLost
+///
+CTrack::CTrack(const CRegion& region,
+        tracking::KalmanType kalmanType,
+        track_t deltaTime,
+        track_t accelNoiseMag,
+        bool useAcceleration,
+        size_t trackID,
+        size_t regionID,
+        tracking::FilterGoal filterGoal,
+        tracking::LostTrackType externalTrackerForLost
+        )
+    :
+      m_trackID(trackID),
+      m_regionID(regionID),
+      m_skippedFrames(0),
+      m_lastRegion(region),
+      m_predictionPoint(region.m_rect.center),
+      m_predictionRect(region.m_rrect),
+      m_predictionRect3D(region.m_rect),
+      m_kalman(kalmanType, useAcceleration, deltaTime, accelNoiseMag),
+      m_filterGoal(filterGoal),
+      m_outOfTheFrame(false),
+      m_externalTrackerForLost(externalTrackerForLost)
+{
+    if (filterGoal == tracking::FilterRect)
+        m_kalman.Update(region.m_brect, true);
+    else if(filterGoal == tracking::FilterRect3D)
+        m_kalman.Update(region.m_rect, true);
+    else
+        m_kalman.Update(m_predictionPoint, true);
+
+    m_trace.push_back(m_predictionPoint, m_predictionPoint);
+}
+
+///
+/// \brief CTrack::CalcDistCenter
+/// \param reg
+/// \return
+///
+track_t CTrack::CalcDistCenter(const CRegion& reg) const
+{
+    cv::Point3f diff = m_predictionPoint - reg.m_rect.center;
+    return sqrtf(sqr(diff.x) + sqr(diff.y));
+}
+///
+/// \brief CTrack::CalcDistRect
+/// \param reg
+/// \return
+///
+track_t CTrack::CalcDistRect(const CRegion& reg) const
+{
+    std::array<track_t, 5> diff;
+    diff[0] = reg.m_rrect.center.x - m_lastRegion.m_rrect.center.x;
+    diff[1] = reg.m_rrect.center.y - m_lastRegion.m_rrect.center.y;
+    diff[2] = static_cast<track_t>(m_lastRegion.m_rrect.size.width - reg.m_rrect.size.width);
+    diff[3] = static_cast<track_t>(m_lastRegion.m_rrect.size.height - reg.m_rrect.size.height);
+    diff[4] = static_cast<track_t>(m_lastRegion.m_rrect.angle - reg.m_rrect.angle);
+
+    track_t dist = 0;
+    for (size_t i = 0; i < diff.size(); ++i)
+    {
+        dist += sqr(diff[i]);
+    }
+    return sqrtf(dist);
+}
+///
+/// \brief CTrack::CalcDistRect3D
+/// \param reg
+/// \return
+///
+track_t CTrack::CalcDistRect3D(const CRegion& reg) const
+{
+    std::array<track_t, 7> diff;
+    diff[0] = reg.m_rect.center.x - m_lastRegion.m_rect.center.x;
+    diff[1] = reg.m_rect.center.y - m_lastRegion.m_rect.center.y;
+    diff[2] = 0;//reg.m_rect.center.z - m_lastRegion.m_rect.center.z;
+    diff[3] = static_cast<track_t>(m_lastRegion.m_rect.size.width - reg.m_rect.size.width);
+    diff[4] = static_cast<track_t>(m_lastRegion.m_rect.size.height - reg.m_rect.size.height);
+    diff[5] = static_cast<track_t>(m_lastRegion.m_rect.size.length - reg.m_rect.size.length);
+    diff[6] = 0;//static_cast<track_t>(m_lastRegion.m_rect.yaw - reg.m_rect.yaw);
+
+    track_t dist = 0;
+    for (size_t i = 0; i < diff.size(); ++i)
+    {
+        dist += sqr(diff[i]);
+    }
+    return sqrtf(dist);
+}
+
+///
+/// \brief CTrack::Update
+/// \*param region
+/// \param dataCorrect
+/// \param max_trace_length
+/// \param prevFrame
+/// \param currFrame
+/// \param trajLen
+///
+void CTrack::Update(
+        const CRegion& region,
+        bool dataCorrect,
+        size_t max_trace_length,
+        cv::UMat prevFrame,
+        cv::UMat currFrame,
+        int trajLen
+        )
+{
+    if (m_filterGoal == tracking::FilterRect) // Kalman filter for object coordinates and size
+        RectUpdate(region, dataCorrect, prevFrame, currFrame);
+    if (m_filterGoal == tracking::FilterRect3D) // Kalman filter for object coordinates and size
+        Rect3DUpdate(region, dataCorrect, prevFrame, currFrame);
+    else // Kalman filter only for object center
+        PointUpdate(region.m_rect.center, region.m_rrect.size, dataCorrect, currFrame.size());
+
+    if (dataCorrect)
+    {
+        //std::cout << m_lastRegion.m_brect << " - " << region.m_brect << std::endl;
+
+        m_lastRegion = region;
+        m_trace.push_back(m_predictionPoint, region.m_rect.center);
+
+        CheckStatic(trajLen, currFrame, region);
+    }
+    else
+    {
+        m_trace.push_back(m_predictionPoint);
+    }
+
+    if (m_trace.size() > max_trace_length)
+        m_trace.pop_front(m_trace.size() - max_trace_length);
+}
+
+///
+/// \brief CTrack::IsStatic
+/// \return
+///
+bool CTrack::IsStatic() const
+{
+    return m_isStatic;
+}
+
+///
+/// \brief CTrack::IsStaticTimeout
+/// \param framesTime
+/// \return
+///
+bool CTrack::IsStaticTimeout(int framesTime) const
+{
+    return (m_staticFrames > framesTime);
+}
+
+///
+/// \brief CTrack::IsOutOfTheFrame
+/// \return
+///
+bool CTrack::IsOutOfTheFrame() const
+{
+    return m_outOfTheFrame;
+}
+
+///
+//cv::RotatedRect CTrack::CalcPredictionEllipse(cv::Size_<track_t> minRadius) const
+//{
+//    // Move ellipse to velocity
+//    auto velocity = m_kalman.GetVelocity();
+//    Point_t d(3.f * velocity[0], 3.f * velocity[1]);
+
+//    cv::RotatedRect rrect(m_predictionPoint, cv::Size2f(std::max(minRadius.width, fabs(d.x)), std::max(minRadius.height, fabs(d.y))), 0);
+
+//    if (fabs(d.x) + fabs(d.y) > 4) // pix
+//    {
+//        if (fabs(d.x) > 0.0001f)
+//        {
+//            track_t l = std::min(rrect.size.width, rrect.size.height) / 3;
+
+//            track_t p2_l = sqrtf(sqr(d.x) + sqr(d.y));
+//            rrect.center.x = l * d.x / p2_l + m_predictionPoint.x;
+//            rrect.center.y = l * d.y / p2_l + m_predictionPoint.y;
+
+//            rrect.angle = atanf(d.y / d.x);
+//        }
+//        else
+//        {
+//            rrect.center.y += d.y / 3;
+//            rrect.angle = static_cast<float>(CV_PI / 2.);
+//        }
+//    }
+//    return rrect;
+//}
+
+///
+/// \brief CTrack::IsInsideArea
+///        If result <= 1 then center of the object is inside ellipse with prediction and velocity
+/// \param pt
+/// \return
+///
+track_t CTrack::IsInsideArea(const Point_t& pt, const cv::RotatedRect& rrect) const
+{
+    Point_t pt_(pt.x - rrect.center.x, pt.y - rrect.center.y);
+    track_t r = sqrtf(sqr(pt_.x) + sqr(pt_.y));
+    track_t t = (r > 1) ? acosf(pt_.x / r) : 0;
+    track_t t_ = t - rrect.angle;
+    Point_t pt_rotated(r * cosf(t_), r * sinf(t_));
+
+    return sqr(pt_rotated.x) / sqr(rrect.size.width) + sqr(pt_rotated.y) / sqr(rrect.size.height);
+}
+
+///
+/// \brief CTrack::WidthDist
+/// \param reg
+/// \return
+///
+track_t CTrack::WidthDist(const CRegion& reg) const
+{
+    if (m_lastRegion.m_rect.size.width < reg.m_rect.size.width)
+        return m_lastRegion.m_rect.size.width / reg.m_rect.size.width;
+    else
+        return reg.m_rect.size.width / m_lastRegion.m_rect.size.width;
+}
+
+///
+/// \brief CTrack::HeightDist
+/// \param reg
+/// \return
+///
+track_t CTrack::HeightDist(const CRegion& reg) const
+{
+    if (m_lastRegion.m_rect.size.height < reg.m_rect.size.height)
+        return m_lastRegion.m_rect.size.height / reg.m_rect.size.height;
+    else
+        return reg.m_rect.size.height / m_lastRegion.m_rect.size.height;
+}
+
+///
+/// \brief CTrack::CheckStatic
+/// \param trajLen
+/// \return
+///
+bool CTrack::CheckStatic(int trajLen, cv::UMat currFrame, const CRegion& region)
+{
+    if (!trajLen || static_cast<int>(m_trace.size()) < trajLen)
+    {
+        m_isStatic = false;
+        m_staticFrames = 0;
+        m_staticFrame = cv::UMat();
+    }
+    else
+    {
+        track_t kx = 0;
+        track_t bx = 0;
+        track_t ky = 0;
+        track_t by = 0;
+        get_lin_regress_params(m_trace, m_trace.size() - trajLen, m_trace.size(), kx, bx, ky, by);
+        track_t speed = sqrt(sqr(kx * trajLen) + sqr(ky * trajLen));
+        const track_t speedThresh = 10;
+        if (speed < speedThresh)
+        {
+            if (!m_isStatic)
+            {
+                m_staticFrame = currFrame.clone();
+                m_staticRect = region.m_rect;
+            }
+
+            ++m_staticFrames;
+            m_isStatic = true;
+        }
+        else
+        {
+            m_isStatic = false;
+            m_staticFrames = 0;
+            m_staticFrame = cv::UMat();
+        }
+    }
+
+    return m_isStatic;
+}
+
+///
+/// \brief GetLastRect
+/// \return
+///
+Rect3D CTrack::GetLastRect() const
+{
+    if (m_filterGoal == tracking::FilterRect)
+        return Rect3D(cv::Point3f(m_predictionRect.center.x,m_predictionRect.center.y,0),Size3D(m_predictionRect.boundingRect().width,m_predictionRect.boundingRect().height,0),0);
+    else if(m_filterGoal == tracking::FilterRect3D)
+        return m_predictionRect3D;
+    else
+        return Rect3D(cv::Point3f(m_predictionPoint.x, m_predictionPoint.y,0),Size3D(0,0,0),0);
+
+}
+///
+/// \brief CTrack::LastRegion
+/// \return
+///
+const CRegion& CTrack::LastRegion() const
+{
+    return m_lastRegion;
+}
+
+///
+/// \brief CTrack::ConstructObject
+/// \return
+///
+TrackingObject CTrack::ConstructObject() const
+{
+    return TrackingObject(GetLastRect(), m_lastRegion, m_trackID, m_regionID, m_trace, IsStatic(), IsOutOfTheFrame(), m_kalman.GetVelocity(), m_detectedFrames);
+}
+
+///
+/// \brief CTrack::SkippedFrames
+/// \return
+///
+size_t CTrack::SkippedFrames() const
+{
+    return m_skippedFrames;
+}
+
+///
+/// \brief CTrack::SkippedFrames
+/// \return
+///
+size_t& CTrack::SkippedFrames()
+{
+    return m_skippedFrames;
+}
+///
+/// \brief CTrack::DetectedFrames
+/// \return
+///
+size_t& CTrack::DetectedFrames()
+{
+    return m_detectedFrames;
+}
+///
+/// \brief RectUpdate
+/// \param region
+/// \param dataCorrect
+/// \param prevFrame
+/// \param currFrame
+///
+void CTrack::RectUpdate(
+        const CRegion& region,
+        bool dataCorrect,
+        cv::UMat prevFrame,
+        cv::UMat currFrame
+        )
+{
+    m_kalman.GetRectPrediction();
+
+    bool recalcPrediction = true;
+
+    auto Clamp = [](int& v, int& size, int hi) -> int
+    {
+        int res = 0;
+
+        if (size < 1)
+            size = 0;
+
+        if (v < 0)
+        {
+            res = v;
+            v = 0;
+            return res;
+        }
+        else if (v + size > hi - 1)
+        {
+            res = v;
+            v = hi - 1 - size;
+            if (v < 0)
+            {
+                size += v;
+                v = 0;
+            }
+            res -= v;
+            return res;
+        }
+        return res;
+    };
+
+    auto UpdateRRect = [&](cv::Rect prevRect, cv::Rect newRect)
+    {
+        m_predictionRect.center.x += newRect.x - prevRect.x;
+        m_predictionRect.center.y += newRect.y - prevRect.y;
+        m_predictionRect.size.width *= newRect.width / static_cast<float>(prevRect.width);
+        m_predictionRect.size.height *= newRect.height / static_cast<float>(prevRect.height);
+    };
+
+    switch (m_externalTrackerForLost)
+    {
+    case tracking::TrackNone:
+        break;
+    }
+
+    if (recalcPrediction)
+        UpdateRRect(m_predictionRect.boundingRect(), m_kalman.Update(region.m_brect, dataCorrect));
+
+    cv::Rect brect = m_predictionRect.boundingRect();
+    int dx = Clamp(brect.x, brect.width, currFrame.cols);
+    int dy = Clamp(brect.y, brect.height, currFrame.rows);
+
+    m_outOfTheFrame = (dx != 0) || (dy != 0) || (brect.width < 2) || (brect.height < 2);
+
+    m_predictionPoint = cv::Point3f(m_predictionRect.center.x,m_predictionRect.center.y,0);
+
+    //std::cout << "brect = " << brect << ", dx = " << dx << ", dy = " << dy << ", outOfTheFrame = " << m_outOfTheFrame << ", predictionPoint = " << m_predictionPoint << std::endl;
+}
+///
+/// \brief Rect3DUpdate
+/// \param region
+/// \param dataCorrect
+/// \param prevFrame
+/// \param currFrame
+///
+void CTrack::Rect3DUpdate(
+        const CRegion& region,
+        bool dataCorrect,
+        cv::UMat prevFrame,
+        cv::UMat currFrame
+        )
+{
+    m_kalman.GetRect3DPrediction();
+
+    switch (m_externalTrackerForLost)
+    {
+    case tracking::TrackNone:
+        break;
+    }
+
+    m_predictionRect3D = m_kalman.Update(region.m_rect, dataCorrect);
+
+    m_predictionPoint = m_predictionRect3D.center;
+
+    //std::cout << "brect = " << brect << ", dx = " << dx << ", dy = " << dy << ", outOfTheFrame = " << m_outOfTheFrame << ", predictionPoint = " << m_predictionPoint << std::endl;
+}
+
+///
+/// \brief PointUpdate
+/// \param pt
+/// \param dataCorrect
+///
+void CTrack::PointUpdate(
+        const cv::Point3f& pt,
+        const cv::Size& newObjSize,
+        bool dataCorrect,
+        const cv::Size& frameSize
+        )
+{
+    m_kalman.GetPointPrediction();
+
+    m_predictionPoint = m_kalman.Update(pt, dataCorrect);
+
+    if (dataCorrect)
+    {
+        const int a1 = 1;
+        const int a2 = 9;
+        m_predictionRect.size.width = (a1 * newObjSize.width + a2 * m_predictionRect.size.width) / (a1 + a2);
+        m_predictionRect.size.height = (a1 * newObjSize.height + a2 * m_predictionRect.size.height) / (a1 + a2);
+    }
+
+    auto Clamp = [](track_t& v, int hi) -> bool
+    {
+        if (v < 0)
+        {
+            v = 0;
+            return true;
+        }
+        else if (hi && v > hi - 1)
+        {
+            v = static_cast<track_t>(hi - 1);
+            return true;
+        }
+        return false;
+    };
+    auto p = m_predictionPoint;
+    m_outOfTheFrame = Clamp(p.x, frameSize.width) || Clamp(p.y, frameSize.height) || (m_predictionRect.size.width < 2) || (m_predictionRect.size.height < 2);
+
+    //std::cout << "predictionRect = " << m_predictionRect.boundingRect() << ", outOfTheFrame = " << m_outOfTheFrame << ", predictionPoint = " << m_predictionPoint << std::endl;
+}

+ 303 - 0
src/detection/detection_lidar_PointPillars_MultiHead/Tracker/track.h

@@ -0,0 +1,303 @@
+#pragma once
+#include <iostream>
+#include <vector>
+#include <deque>
+#include <memory>
+#include <array>
+
+#ifdef USE_OCV_KCF
+#include <opencv2/tracking.hpp>
+#endif
+
+#include "defines.h"
+#include "Kalman.h"
+
+// --------------------------------------------------------------------------
+///
+/// \brief The TrajectoryPoint struct
+///
+struct TrajectoryPoint
+{
+    ///
+    /// \brief TrajectoryPoint
+    ///
+    TrajectoryPoint()
+        : m_hasRaw(false)
+    {
+    }
+
+    ///
+    /// \brief TrajectoryPoint
+    /// \param prediction
+    ///
+    TrajectoryPoint(const cv::Point3f& prediction)
+        :
+          m_hasRaw(false),
+          m_prediction(prediction)
+    {
+    }
+
+    ///
+    /// \brief TrajectoryPoint
+    /// \param prediction
+    /// \param raw
+    ///
+    TrajectoryPoint(const cv::Point3f& prediction, const cv::Point3f& raw)
+        :
+          m_hasRaw(true),
+          m_prediction(prediction),
+          m_raw(raw)
+    {
+    }
+
+    bool m_hasRaw = false;
+    cv::Point3f m_prediction;
+    cv::Point3f m_raw;
+};
+
+// --------------------------------------------------------------------------
+///
+/// \brief The Trace class
+///
+class Trace
+{
+public:
+    ///
+    /// \brief operator []
+    /// \param i
+    /// \return
+    ///
+    const cv::Point3f& operator[](size_t i) const
+    {
+        return m_trace[i].m_prediction;
+    }
+
+    ///
+    /// \brief operator []
+    /// \param i
+    /// \return
+    ///
+    cv::Point3f& operator[](size_t i)
+    {
+        return m_trace[i].m_prediction;
+    }
+
+    ///
+    /// \brief at
+    /// \param i
+    /// \return
+    ///
+    const TrajectoryPoint& at(size_t i) const
+    {
+        return m_trace[i];
+    }
+
+    ///
+    /// \brief size
+    /// \return
+    ///
+    size_t size() const
+    {
+        return m_trace.size();
+    }
+
+    ///
+    /// \brief push_back
+    /// \param prediction
+    ///
+    void push_back(const cv::Point3f& prediction)
+    {
+        m_trace.emplace_back(prediction);
+    }
+    void push_back(const cv::Point3f& prediction, const cv::Point3f& raw)
+    {
+        m_trace.emplace_back(prediction, raw);
+    }
+
+    ///
+    /// \brief pop_front
+    /// \param count
+    ///
+    void pop_front(size_t count)
+    {
+        if (count < size())
+            m_trace.erase(m_trace.begin(), m_trace.begin() + count);
+        else
+            m_trace.clear();
+    }
+
+    ///
+    /// \brief GetRawCount
+    /// \param lastPeriod
+    /// \return
+    ///
+    size_t GetRawCount(size_t lastPeriod) const
+    {
+        size_t res = 0;
+
+        size_t i = 0;
+        if (lastPeriod < m_trace.size())
+            i = m_trace.size() - lastPeriod;
+
+        for (; i < m_trace.size(); ++i)
+        {
+            if (m_trace[i].m_hasRaw)
+                ++res;
+        }
+
+        return res;
+    }
+
+private:
+    std::deque<TrajectoryPoint> m_trace;
+};
+
+// --------------------------------------------------------------------------
+///
+/// \brief The TrackingObject class
+///
+struct TrackingObject
+{
+    Rect3D m_rect;           // Coordinates
+    CRegion m_region;         // detect region
+    Trace m_trace;                     // Trajectory
+    size_t m_ID = 0;                   // Objects ID
+    size_t m_regionID = 0;                   // Objects ID
+    bool m_isStatic = false;           // Object is abandoned
+    bool m_outOfTheFrame = false;      // Is object out of freme
+    cv::Vec<track_t, 2> m_velocity;    // pixels/sec
+    size_t m_detectedFrames = 0;       // detected frames' count
+    ///
+    TrackingObject(const Rect3D& rect, const CRegion& region, size_t ID, size_t regionID, const Trace& trace,
+                   bool isStatic, bool outOfTheFrame, cv::Vec<track_t, 2> velocity, size_t detectedFrames)
+        :
+          m_rect(rect), m_region(region), m_ID(ID), m_regionID(regionID), m_isStatic(isStatic), m_outOfTheFrame(outOfTheFrame), m_velocity(velocity), m_detectedFrames(detectedFrames)
+    {
+        for (size_t i = 0; i < trace.size(); ++i)
+        {
+            auto tp = trace.at(i);
+            if (tp.m_hasRaw)
+                m_trace.push_back(tp.m_prediction, tp.m_raw);
+            else
+                m_trace.push_back(tp.m_prediction);
+        }
+    }
+    ///
+    bool IsRobust(int minTraceSize, float minRawRatio, cv::Size2f sizeRatio) const
+    {
+        bool res = m_trace.size() > static_cast<size_t>(minTraceSize);
+        res &= m_trace.GetRawCount(m_trace.size() - 1) / static_cast<float>(m_trace.size()) > minRawRatio;
+        if (sizeRatio.width + sizeRatio.height > 0)
+        {
+            float sr = m_rect.size.width / m_rect.size.height;
+            if (sizeRatio.width > 0)
+                res &= (sr > sizeRatio.width);
+
+            if (sizeRatio.height > 0)
+                res &= (sr < sizeRatio.height);
+        }
+        if (m_outOfTheFrame)
+            res = false;
+
+        return res;
+    }
+};
+
+// --------------------------------------------------------------------------
+///
+/// \brief The CTrack class
+///
+class CTrack
+{
+public:
+    CTrack(const CRegion& region,
+           tracking::KalmanType kalmanType,
+           track_t deltaTime,
+           track_t accelNoiseMag,
+           bool useAcceleration,
+           size_t trackID,
+           size_t regionID,
+           tracking::FilterGoal filterObjectGoal,
+           tracking::LostTrackType externalTrackerForLost);
+
+    ///
+    /// \brief CalcDist
+    /// Euclidean distance in pixels between objects centres on two N and N+1 frames
+    /// \param reg
+    /// \return
+    ///
+    track_t CalcDistCenter(const CRegion& reg) const;
+    ///
+    /// \brief CalcDist
+    /// Euclidean distance in pixels between object contours on two N and N+1 frames
+    /// \param reg
+    /// \return
+    ///
+    track_t CalcDistRect(const CRegion& reg) const;
+    ///
+    /// \brief CalcDist
+    /// Euclidean distance in pixels between object contours on two N and N+1 frames
+    /// \param reg
+    /// \return
+    ///
+    track_t CalcDistRect3D(const CRegion& reg) const;
+
+    //cv::RotatedRect CalcPredictionEllipse(cv::Size_<track_t> minRadius) const;
+    ///
+    /// \brief IsInsideArea
+    /// Test point inside in prediction area: prediction area + object velocity
+    /// \param pt
+    /// \param minVal
+    /// \return
+    ///
+    track_t IsInsideArea(const Point_t& pt, const cv::RotatedRect& rrect) const;
+    track_t WidthDist(const CRegion& reg) const;
+    track_t HeightDist(const CRegion& reg) const;
+
+    void Update(const CRegion& region, bool dataCorrect, size_t max_trace_length, cv::UMat prevFrame, cv::UMat currFrame, int trajLen);
+
+    bool IsStatic() const;
+    bool IsStaticTimeout(int framesTime) const;
+    bool IsOutOfTheFrame() const;
+
+    Rect3D GetLastRect() const;
+
+    const cv::Point3f& AveragePoint() const;
+    cv::Point3f& AveragePoint();
+    const CRegion& LastRegion() const;
+    size_t SkippedFrames() const;
+    size_t& SkippedFrames();
+    size_t& DetectedFrames();
+
+    TrackingObject ConstructObject() const;
+public:
+    size_t m_regionID = 0;
+private:
+    Trace m_trace;
+    size_t m_trackID = 0;
+    size_t m_skippedFrames = 0;
+    size_t m_detectedFrames = 0;
+    CRegion m_lastRegion;
+
+    cv::Point3f m_predictionPoint;
+    cv::RotatedRect m_predictionRect;
+    Rect3D m_predictionRect3D;
+    TKalmanFilter m_kalman;
+    tracking::FilterGoal m_filterGoal = tracking::FilterCenter;
+    bool m_outOfTheFrame = false;
+
+    tracking::LostTrackType m_externalTrackerForLost;
+
+    void RectUpdate(const CRegion& region, bool dataCorrect, cv::UMat prevFrame, cv::UMat currFrame);
+    void Rect3DUpdate(const CRegion& region, bool dataCorrect, cv::UMat prevFrame, cv::UMat currFrame);
+    void PointUpdate(const cv::Point3f& pt, const cv::Size& newObjSize, bool dataCorrect, const cv::Size& frameSize);
+
+    bool CheckStatic(int trajLen, cv::UMat currFrame, const CRegion& region);
+    bool m_isStatic = false;
+    int m_staticFrames = 0;
+    cv::UMat m_staticFrame;
+    Rect3D m_staticRect;
+};
+
+typedef std::vector<std::unique_ptr<CTrack>> tracks_t;
+

+ 26 - 8
src/detection/detection_lidar_PointPillars_MultiHead/detection_lidar_PointPillars_MultiHead.pro

@@ -1,6 +1,6 @@
 QT -= gui
 
-CONFIG += c++11 console
+CONFIG += c++14 console
 CONFIG -= app_bundle
 
 QMAKE_CXXFLAGS += -std=gnu++17
@@ -11,7 +11,7 @@ QMAKE_LFLAGS += -no-pie  -Wl,--no-as-needed
 # depend on your compiler). Please consult the documentation of the
 # deprecated API in order to know how to port your code away from it.
 DEFINES += QT_DEPRECATED_WARNINGS
-
+#DEFINES += DEBUG_SHOW
 # You can also make your code fail to compile if you use deprecated APIs.
 # In order to do so, uncomment the following line.
 # You can also select to disable deprecated APIs only up to a certain version of Qt.
@@ -20,13 +20,18 @@ DEFINES += QT_DEPRECATED_WARNINGS
 SOURCES += main.cpp \
     pointpillars.cc \
     ../../include/msgtype/object.pb.cc \
-    ../../include/msgtype/objectarray.pb.cc
+    ../../include/msgtype/objectarray.pb.cc \
+    Tracker/Ctracker.cpp \
+    Tracker/HungarianAlg.cpp \
+    Tracker/Kalman.cpp \
+    Tracker/track.cpp
 
 DISTFILES += \
     nms.cu \
     postprocess.cu \
     preprocess.cu \
-    scatter.cu
+    scatter.cu \
+    roiaware_pool3d_kernel.cu
 
 HEADERS += \
     common.h \
@@ -36,14 +41,24 @@ HEADERS += \
     preprocess.h \
     scatter.h \
     ../../include/msgtype/object.pb.h \
-    ../../include/msgtype/objectarray.pb.h
-
+    ../../include/msgtype/objectarray.pb.h \
+    Tracker/Ctracker.h \
+    Tracker/defines.h \
+    Tracker/HungarianAlg.h \
+    Tracker/Kalman.h \
+    Tracker/ShortPathCalculator.h \
+    Tracker/track.h \
+    Tracker/Tracking.hpp \
+    roiaware_pool3d.h
+
+INCLUDEPATH+=Tracker
 
 CUDA_SOURCES +=  \
     nms.cu \
     postprocess.cu \
     preprocess.cu \
-    scatter.cu
+    scatter.cu \
+    roiaware_pool3d_kernel.cu
 
 CUDA_SDK = "/usr/local/cuda/"   # cudaSDK路径
 
@@ -128,7 +143,7 @@ LIBS += -lcudart -lcufft -lyaml-cpp
 
 #LIBS += -L/home/adc/soft/cudnn-10.2-linux-x64-v7.6.5.32/cuda/lib64 -lcudnn
 
-LIBS +=  -lmyelin -lnvinfer -lnvonnxparser -lnvcaffe_parser
+LIBS +=  -L/usr/lib/aarch64-linux-gnu/ -lmyelin -lnvinfer -lnvonnxparser -lnvcaffe_parser
 
 #LIBS += -L/home/nvidia/git/libtorch_gpu-1.6.0-linux-aarch64/lib -ltorch_cuda  -ltorch -lc10 -ltorch_cpu
 
@@ -164,3 +179,6 @@ unix:LIBS +=  -lpcl_common\
         -lpcl_surface\
         -lpcl_tracking\
         -lpcl_visualization
+
+INCLUDEPATH += /usr/include/opencv4/
+LIBS += /usr/lib/aarch64-linux-gnu/libopencv*.so

+ 461 - 67
src/detection/detection_lidar_PointPillars_MultiHead/main.cpp

@@ -16,6 +16,10 @@
 #include <thread>
 #include "objectarray.pb.h"
 //#include "ivbacktrace.h"
+#include "Tracking.hpp"
+
+#include "roiaware_pool3d.h"
+#include "Eigen/Dense"
 
 iv::Ivfault *gfault = nullptr;
 iv::Ivlog *givlog = nullptr;
@@ -29,33 +33,310 @@ const int kNumPointFeature = 5;
 const int kOutputNumBoxFeature = 7;
 std::string gstrinput;
 std::string gstroutput;
+Eigen::Matrix3d rotation_matrix;
+Eigen::Vector3d trans_matrix;
+TrackerSettings settings;
+CTracker tracker(settings);
+bool m_isTrackerInitialized = false;
+
+
+#include <random>
+////////////////////用于nms////////////////////
+#include<opencv2/opencv.hpp>
+#define rad2Angle(rad) ((rad) * 180.0 / M_PI)
+
+const float smallinbig_threshold = 0.8;
+const float distance_threshold = 0.1;
+const float secondnms_threshold = 0.1;
+
+typedef struct {
+    cv::RotatedRect box;
+    std::vector<float> detection;
+    std::vector<float> bbox_pts;
+    int label;
+    float score;
+}BBOX3D;
+
+//将检测结果转为RotatedRect以便于nms计算
+bool GetRotatedRect(std::vector<float> &out_detections,std::vector<int> &out_labels,
+                    std::vector<float> &out_scores, std::vector<BBOX3D> &results)
+{
+    int obj_size = out_detections.size()/kOutputNumBoxFeature;
+    if(out_labels.size()==obj_size && out_scores.size()==obj_size)
+    {
+        for(int i=0;i<obj_size;i++)
+        {
+            BBOX3D result;
+            result.box = cv::RotatedRect(cv::Point2f(out_detections.at(i*7),out_detections.at(i*7+1)),
+                                         cv::Size2f(out_detections.at(i*7+3),out_detections.at(i*7+4)),
+                                         rad2Angle(out_detections.at(i*7+6)));
+            for(int j=0;j<kOutputNumBoxFeature;j++)
+                result.detection.push_back(out_detections.at(i*7+j));
+            result.label = out_labels.at(i);
+            result.score = out_scores.at(i);
+            results.push_back(result);
+        }
+        return true;
+    }
+    else
+    {
+        std::cout<<"the size of detections labels scores is not equal !!!"<<std::endl;
+        return false;
+    }
+
+}
+
+bool sort_score(BBOX3D box1,BBOX3D box2)
+{
+    return (box1.score > box2.score);
+}
+
+//计算两个旋转矩形的IOU
+float calcIOU(cv::RotatedRect rect1, cv::RotatedRect rect2)
+{
+    float areaRect1 = rect1.size.width * rect1.size.height;
+    float areaRect2 = rect2.size.width * rect2.size.height;
+    vector<cv::Point2f> vertices;
+    int intersectionType = cv::rotatedRectangleIntersection(rect1, rect2, vertices);
+    if (vertices.size()==0)
+        return 0.0;
+    else{
+        vector<cv::Point2f> order_pts;
+        cv::convexHull(cv::Mat(vertices), order_pts, true);
+        double area = cv::contourArea(order_pts);
+        float inner = (float) (area / (areaRect1 + areaRect2 - area + 0.0001));
+        //排除小框完全在大框里面的case
+        float areaMin = (areaRect1 < areaRect2)?areaRect1:areaRect2;
+        float innerMin = (float)(area / (areaMin + 0.0001));
+        if(innerMin > smallinbig_threshold)
+            inner = innerMin;
+        return inner;
+    }
+}
+//计算两个点的欧式距离
+float calcdistance(cv::Point2f center1, cv::Point2f center2)
+{
+    float distance = sqrt((center1.x-center2.x)*(center1.x-center2.x)+
+                          (center1.y-center2.y)*(center1.y-center2.y));
+    return distance;
+}
+
+
+//nms
+void nms(std::vector<BBOX3D> &vec_boxs,float threshold,std::vector<BBOX3D> &results)
+{
+    std::sort(vec_boxs.begin(),vec_boxs.end(),sort_score);
+    while(vec_boxs.size() > 0)
+    {
+        results.push_back(vec_boxs[0]);
+        vec_boxs.erase(vec_boxs.begin());
+        for (auto it = vec_boxs.begin(); it != vec_boxs.end();)
+        {
+            float iou_value =calcIOU(results.back().box,(*it).box);
+            float distance_value = calcdistance(results.back().box.center,(*it).box.center);
+            if ((iou_value > threshold) || (distance_value<distance_threshold))
+                it = vec_boxs.erase(it);
+            else it++;
+        }
+
+//        std::cout<<"results: "<<results.back().detection.at(0)<<" "<<results.back().detection.at(1)<<
+//                   " "<<results.back().detection.at(2)<<std::endl;
+
+    }
+}
+void GetLidarObj(std::vector<BBOX3D> &results,iv::lidar::objectarray & lidarobjvec)
+{
+    int i;
+    int obj_size = results.size();
+    //    givlog->verbose("OBJ","object size is %d",obj_size);;
+    for(i=0;i<obj_size;i++)
+    {
+        iv::lidar::lidarobject lidarobj;
+        if (results.at(i).score < 0.10) {
+            std::cout<<"///////: "<<results.at(i).score<<std::endl;
+            continue;
+        }
+
+
+        //if (results.at(i).label == 5) continue;
+
+        vector<float>out_detection = results.at(i).detection;
+        lidarobj.set_tyaw(out_detection.at(6));
+        iv::lidar::PointXYZ centroid;
+        iv::lidar::PointXYZ * _centroid;
+        centroid.set_x(out_detection.at(0));
+        centroid.set_y(out_detection.at(1));
+        centroid.set_z(out_detection.at(2));
+        _centroid = lidarobj.mutable_centroid();
+        _centroid->CopyFrom(centroid);
+
+        iv::lidar::PointXYZ min_point;
+        iv::lidar::PointXYZ * _min_point;
+        min_point.set_x(0);
+        min_point.set_y(0);
+        min_point.set_z(0);
+        _min_point = lidarobj.mutable_min_point();
+        _min_point->CopyFrom(min_point);
+
+        iv::lidar::PointXYZ max_point;
+        iv::lidar::PointXYZ * _max_point;
+        max_point.set_x(0);
+        max_point.set_y(0);
+        max_point.set_z(0);
+        _max_point = lidarobj.mutable_max_point();
+        _max_point->CopyFrom(max_point);
+        iv::lidar::PointXYZ position;
+        iv::lidar::PointXYZ * _position;
+        position.set_x(out_detection.at(0));
+        position.set_y(out_detection.at(1));
+        position.set_z(out_detection.at(2));
+        _position = lidarobj.mutable_position();
+        _position->CopyFrom(position);
+        lidarobj.set_mntype(results.at(i).label);
+        // label 2  8
+        if(results.at(i).label==2){
+            lidarobj.set_mntype(8);
+        }else if(results.at(i).label==8){
+            lidarobj.set_mntype(2);
+        }
+        lidarobj.set_score(results.at(i).score);
+        lidarobj.add_type_probs(results.at(i).score);
+        iv::lidar::PointXYZI point_cloud;
+        iv::lidar::PointXYZI * _point_cloud;
+        point_cloud.set_x(out_detection.at(0));
+        point_cloud.set_y(out_detection.at(1));
+        point_cloud.set_z(out_detection.at(2));
+        point_cloud.set_i(results.at(i).label);
+        _point_cloud = lidarobj.add_cloud();
+        _point_cloud->CopyFrom(point_cloud);
+        iv::lidar::Dimension ld;
+        iv::lidar::Dimension * pld;
+        ld.set_x(out_detection.at(3));
+        ld.set_y(out_detection.at(4));
+        ld.set_z(out_detection.at(5));
+        pld = lidarobj.mutable_dimensions();
+        pld->CopyFrom(ld);
+
+//        std::cout<<"x y z:  "<<out_detection.at(0)<<" "<<out_detection.at(1)<<" "<<
+//        out_detection.at(2)<<" "<<out_detection.at(3)<<" "<< out_detection.at(4)<<" "
+//        <<out_detection.at(5)<<" "<<out_detection.at(6)<<std::endl;
+
+        iv::lidar::lidarobject * po = lidarobjvec.add_obj();
+        po->CopyFrom(lidarobj);
+    }
+
+//   std::cout<<"the lidarobjvec: "<<lidarobjvec.obj_size()<<std::endl;
+}
+////////////////////用于nms////////////////////
+
+
+////////////////////用于获得3dbbox中点云个数////////////////////
+#if 0
+inline void lidar_to_local_coords_cpu(float shift_x, float shift_y, float rot_angle, float &local_x, float &local_y){
+    float cosa = cos(-rot_angle), sina = sin(-rot_angle);
+    local_x = shift_x * cosa + shift_y * (-sina);
+    local_y = shift_x * sina + shift_y * cosa;
+}
+
+
+inline int check_pt_in_box3d_cpu(const float *pt, std::vector<float> &box3d, float &local_x, float &local_y){
+    // param pt: (x, y, z)
+    // param box3d: [x, y, z, dx, dy, dz, heading], (x, y, z) is the box center
+    const float MARGIN = 1e-2;
+    float x = pt[0], y = pt[1], z = pt[2];
+    float cx = box3d[0], cy = box3d[1], cz = box3d[2];
+    float dx = box3d[3], dy = box3d[4], dz = box3d[5], rz = box3d[6];
+
+    if (fabsf(z - cz) > dz / 2.0) return 0;
+    lidar_to_local_coords_cpu(x - cx, y - cy, rz, local_x, local_y);
+    float in_flag = (fabs(local_x) < dx / 2.0 + MARGIN) & (fabs(local_y) < dy / 2.0 + MARGIN);
+    return in_flag;
+}
+
+int points_in_boxes_cpu(std::vector<BBOX3D>& boxes, float* pts_lidar,
+                        int pts_num, std::vector<std::vector<int>>& pts_indices){
+
+    std::vector<std::vector<float>> pts_bboxes;
+
+    int boxes_num = boxes.size();
+    float local_x = 0, local_y = 0;
+    for (int i = 0; i < boxes_num; i++){
+        std::vector<float>pts_bbox;
+        for (int j = 0; j < pts_num; j++){
+            int cur_in_flag = check_pt_in_box3d_cpu(pts_lidar + j * 5, boxes[i].detection, local_x, local_y);
+            pts_indices[i][j] = cur_in_flag;
+            if(cur_in_flag)
+            {
+                pts_bbox.push_back(pts_lidar[j*5+0]);
+                pts_bbox.push_back(pts_lidar[j*5+1]);
+                pts_bbox.push_back(pts_lidar[j*5+2]);
+                pts_bbox.push_back(pts_lidar[j*5+3]);
+                pts_bbox.push_back(pts_lidar[j*5+4]);
+            }
+
+        }
+        pts_bboxes.push_back(pts_bbox);
+
+        std::cout<<"the size of points: "<<i<<" : "<<pts_bbox.size() / 5 <<std::endl;
+
+        pts_bbox.clear();
+    }
+
+    return 1;
+}
+
+#endif
+////////////////////用于获得3dbbox中点云个数////////////////////
 
 
 void PclToArray(
-    const pcl::PointCloud<pcl::PointXYZI>::Ptr& in_pcl_pc_ptr,
-    float* out_points_array, const float normalizing_factor) {
-  for (size_t i = 0; i < in_pcl_pc_ptr->size(); ++i) {
-    pcl::PointXYZI point = in_pcl_pc_ptr->at(i);
-    out_points_array[i * 4 + 0] = point.x;
-    out_points_array[i * 4 + 1] = point.y;
-    out_points_array[i * 4 + 2] = point.z;
-    out_points_array[i * 4 + 3] =
-        static_cast<float>(point.intensity / normalizing_factor);
-  }
+        const pcl::PointCloud<pcl::PointXYZI>::Ptr& in_pcl_pc_ptr,
+        float* out_points_array, const float normalizing_factor) {
+    for (size_t i = 0; i < in_pcl_pc_ptr->size(); ++i) {
+        pcl::PointXYZI point = in_pcl_pc_ptr->at(i);
+        out_points_array[i * 4 + 0] = point.x;
+        out_points_array[i * 4 + 1] = point.y;
+        out_points_array[i * 4 + 2] = point.z;
+        out_points_array[i * 4 + 3] =
+                static_cast<float>(point.intensity / normalizing_factor);
+    }
 }
 
 void PclXYZITToArray(
-    const pcl::PointCloud<pcl::PointXYZI>::Ptr& in_pcl_pc_ptr,
-    float* out_points_array, const float normalizing_factor) {
-  for (size_t i = 0; i < in_pcl_pc_ptr->size(); ++i) {
-    pcl::PointXYZI point = in_pcl_pc_ptr->at(i);
-    out_points_array[i * 5 + 0] = point.x;
-    out_points_array[i * 5 + 1] = point.y;
-    out_points_array[i * 5 + 2] = point.z;
-    out_points_array[i * 5 + 3] =
-        static_cast<float>(point.intensity / normalizing_factor);
-    out_points_array[i * 5 + 4] = 0;
-  }
+        const pcl::PointCloud<pcl::PointXYZI>::Ptr& in_pcl_pc_ptr,
+        float* out_points_array, const float normalizing_factor) {
+
+    /////shuffle the index array/////
+    bool shuffle = true;
+    int point_num = in_pcl_pc_ptr->size();
+    std::vector<int>indices(point_num);
+    std::iota(indices.begin(),indices.end(),0);
+    if(shuffle)
+    {
+//        unsigned seed = 0;
+//        std::shuffle(indices.begin(),indices.end(),std::default_random_engine(seed));
+
+        std::random_device rd;
+        std::mt19937 g(rd());
+        std::shuffle(indices.begin(),indices.end(),g);
+
+    }
+    /////shuffle the index array/////
+
+    for (size_t i = 0; i < in_pcl_pc_ptr->size(); ++i) {
+        //pcl::PointXYZI point = in_pcl_pc_ptr->at(i);
+        int indice = indices[i];
+        pcl::PointXYZI point = in_pcl_pc_ptr->at(indice);
+        Eigen::Vector3d new_point, old_point;
+        old_point<<point.x, point.y, point.z;
+        new_point = rotation_matrix * (old_point) + trans_matrix;
+        out_points_array[i * 5 + 0] = new_point[0];
+        out_points_array[i * 5 + 1] = new_point[1];
+        out_points_array[i * 5 + 2] = new_point[2];
+        out_points_array[i * 5 + 3] =
+                static_cast<float>(point.intensity / normalizing_factor);
+        out_points_array[i * 5 + 4] = 0;
+    }
 }
 
 void GetLidarObj(std::vector<float> out_detections,std::vector<int> out_labels,
@@ -67,8 +348,7 @@ void GetLidarObj(std::vector<float> out_detections,std::vector<int> out_labels,
     for(i=0;i<obj_size;i++)
     {
         iv::lidar::lidarobject lidarobj;
-        if (out_scores.at(i) < 0.12) continue;
-        if (out_labels.at(i) == 5) continue;
+        if (out_scores.at(i) < 0.10) continue;
 
         lidarobj.set_tyaw(out_detections.at(i*7+6));
         iv::lidar::PointXYZ centroid;
@@ -117,16 +397,16 @@ void GetLidarObj(std::vector<float> out_detections,std::vector<int> out_labels,
         point_cloud.set_x(out_detections.at(i*7));
         point_cloud.set_y(out_detections.at(i*7+1));
         point_cloud.set_z(out_detections.at(i*7+2));
-        point_cloud.set_i(out_detections.at(out_labels.at(i)));
+        point_cloud.set_i(0);
 
         _point_cloud = lidarobj.add_cloud();
         _point_cloud->CopyFrom(point_cloud);
 
         iv::lidar::Dimension ld;
         iv::lidar::Dimension * pld;
-        ld.set_x(out_detections.at(i*7+3));
-        ld.set_y(out_detections.at(i*7+4));
-        ld.set_z(out_detections.at(i*7+5));
+        ld.set_x(out_detections.at(i*7+3));// w
+        ld.set_y(out_detections.at(i*7+4));// l
+        ld.set_z(out_detections.at(i*7+5));// h
         pld = lidarobj.mutable_dimensions();
         pld->CopyFrom(ld);
 
@@ -134,15 +414,16 @@ void GetLidarObj(std::vector<float> out_detections,std::vector<int> out_labels,
         iv::lidar::lidarobject * po = lidarobjvec.add_obj();
         po->CopyFrom(lidarobj);
     }
+
 }
 
 void DectectOnePCD(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr)
 {
     std::shared_ptr<float> points_array_ptr = std::shared_ptr<float>(new float[pc_ptr->size() * kNumPointFeature]);
- //   float* points_array = new float[pc_ptr->size() * kNumPointFeature];
+    //   float* points_array = new float[pc_ptr->size() * kNumPointFeature];
     PclXYZITToArray(pc_ptr, points_array_ptr.get(), 1.0);
 
-    int    in_num_points = pc_ptr->width;
+    int in_num_points = pc_ptr->width;
 
     std::vector<float> out_detections;
     std::vector<int> out_labels;
@@ -151,34 +432,146 @@ void DectectOnePCD(const pcl::PointCloud<pcl::PointXYZI>::Ptr &pc_ptr)
     QTime xTime;
 
     xTime.start();
-
+    auto startTime = std::chrono::high_resolution_clock::now();
     cudaDeviceSynchronize();
     pPillars->DoInference(points_array_ptr.get(), in_num_points, &out_detections, &out_labels , &out_scores);
     cudaDeviceSynchronize();
-    int BoxFeature = 7;
-    int num_objects = out_detections.size() / BoxFeature;
+    auto endTime = std::chrono::high_resolution_clock::now();
+    double inferenceDuration = std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - startTime).count()/1000000.0;
+//    std::cout<< "inferenceDuration Time: " << inferenceDuration << " ms"<< std::endl;
 
+
+//    int BoxFeature = 7;
+//    int num_objects = out_detections.size() / BoxFeature;
 //    givlog->verbose("obj size is %d", num_objects);
 //    std::cout<<"obj size is "<<num_objects<<std::endl;
 
-//    std::vector<iv::lidar::lidarobject> lidarobjvec;
+
+//    iv::lidar::objectarray lidarobjvec;
+//    GetLidarObj(out_detections,out_labels,out_scores,lidarobjvec);
+
+    //////////nms//////////
+    //startTime = std::chrono::high_resolution_clock::now();
+    std::vector<BBOX3D>results_rect;
+    GetRotatedRect(out_detections,out_labels,out_scores,results_rect);
+//    std::cout<<"results_rect size: "<<results_rect.size()<<std::endl;
+
+    std::vector<BBOX3D>results_bbox;
+    nms(results_rect,secondnms_threshold,results_bbox);
+//    std::cout<<"results_bbox size: "<<results_bbox.size()<<std::endl;
+
+      //get lidar points in 3Dbbox in cpu
+//    startTime = std::chrono::high_resolution_clock::now();
+//    //get lidar points in 3Dbbox
+//    int boxes_num = results_bbox.size();
+//    std::vector<std::vector<int>>pts_indices(boxes_num, vector<int>(in_num_points, 0));
+//    points_in_boxes_cpu(results_bbox,points_array_ptr.get(),in_num_points,pts_indices);
+//    endTime = std::chrono::high_resolution_clock::now();
+//    double nmsDuration = std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - startTime).count()/1000000.0;
+//    std::cout <<"3DBoxDuration Time : "<<nmsDuration<< endl;
+
+
+
+    //get lidar points in 3Dbbox in gpu
+    startTime = std::chrono::high_resolution_clock::now();
+    float* dev_points;
+    GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&dev_points),
+                        in_num_points * 5 * sizeof(float))); // in_num_points , 5
+    GPU_CHECK(cudaMemset(dev_points, 0, in_num_points * 5 * sizeof(float)));
+    GPU_CHECK(cudaMemcpy(dev_points, points_array_ptr.get(),
+                        in_num_points * 5 * sizeof(float),
+                        cudaMemcpyHostToDevice));
+    int* box_idx_of_points_gpu;
+    GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&box_idx_of_points_gpu),
+                        in_num_points * sizeof(int))); // in_num_points , 5
+    GPU_CHECK(cudaMemset(box_idx_of_points_gpu, -1, in_num_points * sizeof(float)));
+
+    int boxes_num = results_bbox.size();
+    float *boxes_cpu = new float[boxes_num*7];
+    for(int i=0;i<boxes_num;i++)
+    {
+        for(int j=0;j<7;j++)
+            *(boxes_cpu + (i*7+j)) = results_bbox[i].detection[j];
+    }
+    float *boxes_gpu;
+    GPU_CHECK(cudaMalloc(reinterpret_cast<void**>(&boxes_gpu),
+                        boxes_num * 7 * sizeof(float))); // in_num_points , 5
+    GPU_CHECK(cudaMemset(boxes_gpu, 0, boxes_num * 7 * sizeof(float)));
+    GPU_CHECK(cudaMemcpy(boxes_gpu, boxes_cpu,boxes_num * 7 * sizeof(float),
+                        cudaMemcpyHostToDevice));
+    int batch_size = 1;
+    points_in_boxes_launcher(batch_size,boxes_num,in_num_points,boxes_gpu,dev_points,box_idx_of_points_gpu);
+    int* box_idx_of_points_cpu = new int[in_num_points]();
+    cudaMemcpy(box_idx_of_points_cpu, box_idx_of_points_gpu, in_num_points * sizeof(int), cudaMemcpyDeviceToHost);
+    //vector<int> box_idx_of_points(box_idx_of_points_cpu,box_idx_of_points_cpu+in_num_points);
+
+
+    //cv::Mat image=cv::Mat::zeros(1200,1200,CV_8UC3);
+    //存储bbox的点云
+    for(int i=0; i < in_num_points; i++)
+    {
+
+        for(int j=0; j<boxes_num; j++)
+        {
+            if (box_idx_of_points_cpu[i] == j)
+            {
+                for(int idx=0; idx<5; idx++)
+                    results_bbox[j].bbox_pts.push_back(points_array_ptr.get()[i*5+idx]);
+//                int x = int(points_array_ptr.get()[i*5]*10+600);
+//                int y = int(points_array_ptr.get()[i*5+1]*10+600);
+//                cv::circle(image,cv::Point(x,y),2,cv::Scalar(0,0,255));
+            }
+
+        }
+    }
+//    for(int j=0; j<boxes_num; j++)
+//    {
+
+//        std::cout<<"num points in bbox: "<<results_bbox[j].bbox_pts.size()/5<<std::endl;
+//    }
+
+
+    endTime = std::chrono::high_resolution_clock::now();
+    double gpuDuration = std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - startTime).count()/1000000.0;
+//    std::cout <<"3DBoxDuration_gpu Time : "<<gpuDuration<< endl;
+
+
+    //endTime = std::chrono::high_resolution_clock::now();
+    //double nmsDuration = std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - startTime).count()/1000000.0;
+    //std::cout <<"nmsDuration Time : "<<(double)(ends - start)/ CLOCKS_PER_SEC*1000 << endl;
+
     iv::lidar::objectarray lidarobjvec;
-    GetLidarObj(out_detections,out_labels,out_scores,lidarobjvec);
+    GetLidarObj(results_bbox,lidarobjvec);
+    //////////nms//////////
 
     double timex = pc_ptr->header.stamp;
     timex = timex/1000.0;
     lidarobjvec.set_timestamp(pc_ptr->header.stamp);
 
+    //---------------------------------------------  init tracker  -------------------------------------------------
+//    if (!m_isTrackerInitialized)
+//    {
+//        m_isTrackerInitialized = InitTracker(tracker);
+//        if (!m_isTrackerInitialized)
+//        {
+//            std::cerr << "Tracker initialize error!!!" << std::endl;
+//        }
+//    }
+//    iv::lidar::objectarray trackedobjvec = Tracking(lidarobjvec, tracker);
+
+    //--------------------------------------------  end tracking  --------------------------------------------------
+
     int ntlen;
     std::string out = lidarobjvec.SerializeAsString();
+
     iv::modulecomm::ModuleSendMsg(gpdetect,out.data(),out.length());
 
-//    givlog->verbose("lenth is %d",out.length());
+    //    givlog->verbose("lenth is %d",out.length());
 }
 
 void ListenPointCloud(const char *strdata,const unsigned int nSize,const unsigned int index,const QDateTime * dt,const char * strmemname)
 {
-//    std::cout<<" is  ok  ------------  "<<std::endl;
+    //    std::cout<<" is  ok  ------------  "<<std::endl;
     if(nSize <=16)return;
     unsigned int * pHeadSize = (unsigned int *)strdata;
     if(*pHeadSize > nSize)
@@ -210,9 +603,9 @@ void ListenPointCloud(const char *strdata,const unsigned int nSize,const unsigne
     {
         pcl::PointXYZI xp;
         memcpy(&xp,p,sizeof(pcl::PointXYZI));
-        xp.z = xp.z;
+        xp.z = xp.z;p++;
+        if (((abs(xp.x)<0.5)&&(xp.y<0.5 && xp.y > -1.0)) || xp.z > 3.0 ) continue;
         point_cloud->push_back(xp);
-        p++;
     }
 
     DectectOnePCD(point_cloud);
@@ -272,10 +665,7 @@ void exitfunc()
     iv::modulecomm::Unregister(gpdetect);
     std::cout<<"exit func complete"<<std::endl;
 }
-
-#include <QDir>
 #include <QFile>
-
 bool trtisexist(std::string strpfe,std::string strbackbone)
 {
     QFile xFile;
@@ -291,13 +681,12 @@ bool trtisexist(std::string strpfe,std::string strbackbone)
     }
     return true;
 }
-
 int main(int argc, char *argv[])
 {
     QCoreApplication a(argc, argv);
 
-//    RegisterIVBackTrace();
-
+    //    RegisterIVBackTrace();
+    tracker.setSettings(settings);
     gfault = new iv::Ivfault("lidar_pointpillar");
     givlog = new iv::Ivlog("lidar_pointpillar");
 
@@ -306,13 +695,11 @@ int main(int argc, char *argv[])
     char * strhome = getenv("HOME");
     std::string pfe_file = strhome;
     pfe_file += "/models/lidar/cbgs_pp_multihead_pfe.onnx";
-    std::string pfe_trt_file = strhome;
-    pfe_trt_file += "/models/lidar/cbgs_pp_multihead_pfe.trt";
+    std::string pfe_trt_file = pfe_file.substr(0, pfe_file.find(".")) + ".trt";
 
     std::string backbone_file = strhome;
     backbone_file += "/models/lidar/cbgs_pp_multihead_backbone.onnx";
-    std::string backbone_trt_file = strhome;
-    backbone_trt_file += "/models/lidar/cbgs_pp_multihead_backbone.trt";
+    std::string backbone_trt_file = backbone_file.substr(0, backbone_file.find(".")) + ".trt";
 
     bool btrtexist = trtisexist(pfe_trt_file,backbone_trt_file);
 
@@ -332,33 +719,40 @@ int main(int argc, char *argv[])
     backbone_file = xparam.GetParam("backbone_file",backbone_file.data());
     gstrinput = xparam.GetParam("input","lidar_pc");
     gstroutput = xparam.GetParam("output","lidar_pointpillar");
+
     if(btrtexist == false)
     {
 
-       std::cout<<"use onnx model."<<std::endl;
-    pPillars = new PointPillars(
-      0.15,
-      0.10,
-      true,
-      pfe_file,
-      backbone_file,
-      pp_config
-    );
+        std::cout<<"use onnx model."<<std::endl;
+        pPillars = new PointPillars(
+                    0.15,
+                    0.10,
+                    true,
+                    pfe_file,
+                    backbone_file,
+                    pp_config
+                    );
     }
     else
     {
-        std::cout<<"use engine mode."<<std::endl;
-    pPillars = new PointPillars(
-      0.15,
-      0.10,
-      false,
-      pfe_trt_file,
-      backbone_trt_file,
-      pp_config
-    );
+        std::cout<<"use trt model."<<std::endl;
+        pPillars = new PointPillars(
+                    0.15,
+                    0.10,
+                    false,
+                    pfe_trt_file,
+                    backbone_trt_file,
+                    pp_config
+                    );
     }
     std::cout<<"PointPillars Init OK."<<std::endl;
-
+    Eigen::AngleAxisd r_z ( 0.00654, Eigen::Vector3d ( 0,0,1 ) ); //沿 Z 轴旋转 yaw   +
+    Eigen::AngleAxisd r_y ( 0.13, Eigen::Vector3d ( 0,1,0 ) ); //沿 Y 轴旋转 roll  +
+    Eigen::AngleAxisd r_x ( -0.0377, Eigen::Vector3d ( 1,0,0 ) ); //沿 X 轴旋转 pitch -
+    Eigen::Quaterniond q_zyx = r_z*r_y*r_x; //ZYX旋转顺序(绕旋转后的轴接着旋转)
+    // 四元数-->>旋转矩阵
+    rotation_matrix = q_zyx.toRotationMatrix();
+    trans_matrix << 0, 1.1, 0.35;//x,y,z
     gpa = iv::modulecomm::RegisterRecv(gstrinput.data(),ListenPointCloud);
     gpdetect = iv::modulecomm::RegisterSend(gstroutput.data(), 10000000,1);
     gpthread = new std::thread(statethread);

+ 36 - 82
src/detection/detection_lidar_PointPillars_MultiHead/pointpillars.cc

@@ -224,13 +224,28 @@ void PointPillars::DeviceMemoryMalloc() {
 
     GPU_CHECK(cudaMalloc(&rpn_buffers_[0],  kRpnInputSize * sizeof(float)));
 
-    GPU_CHECK(cudaMalloc(&rpn_buffers_[1],  kNumAnchorPerCls  * sizeof(float)));  //classes
-    GPU_CHECK(cudaMalloc(&rpn_buffers_[2],  kNumAnchorPerCls  * 2 * 2 * sizeof(float)));
-    GPU_CHECK(cudaMalloc(&rpn_buffers_[3],  kNumAnchorPerCls  * 2 * 2 * sizeof(float)));
-    GPU_CHECK(cudaMalloc(&rpn_buffers_[4],  kNumAnchorPerCls  * sizeof(float)));
-    GPU_CHECK(cudaMalloc(&rpn_buffers_[5],  kNumAnchorPerCls  * 2 * 2 * sizeof(float)));
-    GPU_CHECK(cudaMalloc(&rpn_buffers_[6],  kNumAnchorPerCls  * 2 * 2 * sizeof(float)));
-    
+
+    if(kNumClass == 10)
+    {
+////class = 10
+        GPU_CHECK(cudaMalloc(&rpn_buffers_[1],  kNumAnchorPerCls  * sizeof(float)));  //classes
+        GPU_CHECK(cudaMalloc(&rpn_buffers_[2],  kNumAnchorPerCls  * 2 * 2 * sizeof(float)));
+        GPU_CHECK(cudaMalloc(&rpn_buffers_[3],  kNumAnchorPerCls  * 2 * 2 * sizeof(float)));
+        GPU_CHECK(cudaMalloc(&rpn_buffers_[4],  kNumAnchorPerCls  * sizeof(float)));
+        GPU_CHECK(cudaMalloc(&rpn_buffers_[5],  kNumAnchorPerCls  * 2 * 2 * sizeof(float)));
+        GPU_CHECK(cudaMalloc(&rpn_buffers_[6],  kNumAnchorPerCls  * 2 * 2 * sizeof(float)));
+    }
+    else
+    {
+////class = 7
+        GPU_CHECK(cudaMalloc(&rpn_buffers_[1],  kNumAnchorPerCls  * sizeof(float)));  //classes
+        GPU_CHECK(cudaMalloc(&rpn_buffers_[2],  kNumAnchorPerCls  * sizeof(float)));
+        GPU_CHECK(cudaMalloc(&rpn_buffers_[3],  kNumAnchorPerCls  * sizeof(float)));
+        GPU_CHECK(cudaMalloc(&rpn_buffers_[4],  kNumAnchorPerCls  * sizeof(float)));
+        GPU_CHECK(cudaMalloc(&rpn_buffers_[5],  kNumAnchorPerCls  * sizeof(float)));
+        GPU_CHECK(cudaMalloc(&rpn_buffers_[6],  kNumAnchorPerCls  * 2 * 2 * sizeof(float)));
+    }
+
     GPU_CHECK(cudaMalloc(&rpn_buffers_[7],  kNumAnchorPerCls * kNumClass * kNumOutputBoxFeature * sizeof(float))); //boxes
 
     // for scatter kernel
@@ -283,33 +298,13 @@ void PointPillars::SetDeviceMemoryToZero() {
     GPU_CHECK(cudaMemset(dev_filter_count_,     0, kNumClass * sizeof(int)));
 }
 
-
-#include <fstream>
-#include <iostream>
-#
-
-void PointPillars::SaveEngine(nvinfer1::ICudaEngine* pengine,std::string strpath)
-{
-    nvinfer1::IHostMemory* data = pengine->serialize();
-    std::ofstream file;
-    file.open(strpath,std::ios::binary | std::ios::out);
-    if(!file.is_open())
-    {
-        std::cout << "read create engine file" << strpath <<" failed" << std::endl;
-        return;
-    }
-
-    file.write((const char*)data->data(), data->size());
-    file.close();
-}
-
 void PointPillars::InitTRT(const bool use_onnx) {
   if (use_onnx_) {
     // create a TensorRT model from the onnx model and load it into an engine
     OnnxToTRTModel(pfe_file_, &pfe_engine_);
-    SaveEngine(pfe_engine_,"/home/nvidia/models/lidar/cbgs_pp_multihead_pfe.trt");
+    SaveEngine(pfe_engine_, pfe_file_.substr(0, pfe_file_.find(".")) + ".trt");
     OnnxToTRTModel(backbone_file_, &backbone_engine_);
-    SaveEngine(backbone_engine_,"/home/nvidia/models/lidar/cbgs_pp_multihead_backbone.trt");
+    SaveEngine(backbone_engine_, backbone_file_.substr(0, backbone_file_.find(".")) + ".trt");
   }else {
     EngineToTRTModel(pfe_file_, &pfe_engine_);
     EngineToTRTModel(backbone_file_, &backbone_engine_);
@@ -363,58 +358,19 @@ void PointPillars::OnnxToTRTModel(
     builder->destroy();
 }
 
-void PointPillars::LoadEngineModel(const string &engine_file, nvinfer1::ICudaEngine **engine_ptr)
+void PointPillars::SaveEngine(const nvinfer1::ICudaEngine* engine, const std::string& engine_filepath)
 {
-//    using namespace std;
-//    fstream file;
-
-//    file.open(engine_file,ios::binary | ios::in);
-//    if(!file.is_open())
-//    {
-//        cout << "read engine file" << engine_file <<" failed" << endl;
-//        return;
-//    }
-//    file.seekg(0, ios::end);
-//    int length = file.tellg();
-//    file.seekg(0, ios::beg);
-//    std::unique_ptr<char[]> data(new char[length]);
-//    file.read(data.get(), length);
-
-//    file.close();
-
-//    std::cout << "deserializing" << std::endl;
-//    mTrtRunTime = createInferRuntime(gLogger);
-//    assert(mTrtRunTime != nullptr);
-//    mTrtEngine= mTrtRunTime->deserializeCudaEngine(data.get(), length, &mTrtPluginFactory);
-//    assert(mTrtEngine != nullptr);
-
-//    nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(g_logger_);
-
-//    if (runtime == nullptr) {
-//        std::string msg("failed to build runtime parser");
-//        g_logger_.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
-//        exit(EXIT_FAILURE);
-//    }
-
-
-
-//    std::cout << "                                                                  "<< std::endl;
-//    std::cout << "------------------------------------------------------------------"<< std::endl;
-//    std::cout << ">>>>                                                          >>>>"<< std::endl;
-//    std::cout << "                                                                  "<< std::endl;
-//    std::cout << "Input filename:   " << engine_file << std::endl;
-//    std::cout << "                                                                  "<< std::endl;
-//    std::cout << ">>>>                                                          >>>>"<< std::endl;
-//    std::cout << "------------------------------------------------------------------"<< std::endl;
-//    std::cout << "                                                                  "<< std::endl;
-
-//    nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(modelMem, modelSize, NULL);
-//    if (engine == nullptr) {
-//        std::string msg("failed to build engine parser");
-//        g_logger_.log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
-//        exit(EXIT_FAILURE);
-//    }
-//    *engine_ptr = engine;
+    // serialize the engine, then close everything down
+    nvinfer1::IHostMemory& trtModelStream = *(engine->serialize());
+    std::ofstream file;
+    file.open(engine_filepath, std::ios::binary | std::ios::out);
+    if(!file.is_open())
+    {
+        std::cout << "read create engine file" << engine_filepath <<" failed" << std::endl;
+        return;
+    }
+    file.write((const char*)trtModelStream.data(), std::streamsize(trtModelStream.size()));
+    file.close();
 }
 
 void PointPillars::EngineToTRTModel(
@@ -561,8 +517,6 @@ void PointPillars::DoInference(const float* in_points_array,
 //        std::cout << setiosflags(ios::left) << setw(14) << Modules[i]  << setw(8)  << Times[i] * 1000 << " ms" << resetiosflags(ios::left) << std::endl;
 //    }
 //    std::cout << "------------------------------------" << std::endl;
-
-    GPU_CHECK(cudaFree(dev_points));
     cudaStreamDestroy(stream);
 
 }

+ 1 - 5
src/detection/detection_lidar_PointPillars_MultiHead/pointpillars.h

@@ -225,6 +225,7 @@ class PointPillars {
      * @details Called in the constructor
      */
     void InitTRT(const bool use_onnx);
+    void SaveEngine(const nvinfer1::ICudaEngine* engine, const std::string& engine_filepath);
     /**
      * @brief Convert ONNX to TensorRT model
      * @param[in] model_file ONNX model file path
@@ -243,9 +244,6 @@ class PointPillars {
     void EngineToTRTModel(const std::string &engine_file ,     
                         nvinfer1::ICudaEngine** engine_ptr) ;
 
-    void LoadEngineModel(const std::string &engine_file ,
-                         nvinfer1::ICudaEngine** engine_ptr) ;
-
     /**
      * @brief Preproces points
      * @param[in] in_points_array Point cloud array
@@ -272,8 +270,6 @@ class PointPillars {
                 const std::string pp_config);
     ~PointPillars();
 
-    void SaveEngine(nvinfer1::ICudaEngine* pengine,std::string strpath);
-
     /**
      * @brief Call PointPillars for the inference
      * @param[in] in_points_array Point cloud array

+ 176 - 45
src/detection/detection_lidar_PointPillars_MultiHead/postprocess.cu

@@ -143,6 +143,7 @@ __global__ void sigmoid_filter_kernel(
     if( cls_score[ threadIdx.x ] > score_threshold) 
     {
         int counter = atomicAdd(&filter_count[blockIdx.z], 1);
+        //printf("counter : %d \n" , counter);
         if ( blockIdx.z == 0) {
             box_decode_warp(0 ,box_pred_0 , tid , num_anchors_per_head , counter , filtered_box);
             filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x ];
@@ -188,38 +189,139 @@ __global__ void sigmoid_filter_kernel(
     __syncthreads();  
     if( cls_score[ threadIdx.x + blockDim.x ] > score_threshold)  {     
             int counter = atomicAdd(&filter_count[blockIdx.z], 1);
-            // printf("counter : %d \n" , counter);
+            //printf("counter : %d \n" , counter);
             if (blockIdx.z == 1) {
                 box_decode_warp(0 ,box_pred_2 , tid , num_anchors_per_head , counter , filtered_box);
-                filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x ];
+                filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x + blockDim.x];
             }else 
             if (blockIdx.z == 2) {
                 box_decode_warp(0 ,box_pred_2 , tid , num_anchors_per_head , counter , filtered_box);
-                filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x ];
+                filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x + blockDim.x];
             }else 
             if (blockIdx.z == 3) {
                 box_decode_warp(0 ,box_pred_4 , tid , num_anchors_per_head , counter , filtered_box);
-                filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x ];
+                filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x + blockDim.x];
             }else 
             if (blockIdx.z == 4) {
                 box_decode_warp(0 ,box_pred_4 , tid , num_anchors_per_head , counter , filtered_box);
-                filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x ];
+                filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x + blockDim.x];
             }else 
             if (blockIdx.z == 6) {
                 box_decode_warp(0 ,box_pred_7 , tid , num_anchors_per_head , counter , filtered_box);
-                filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x ];
+                filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x + blockDim.x];
             }else 
             if (blockIdx.z == 7) {
                 box_decode_warp(0 ,box_pred_7 , tid , num_anchors_per_head , counter , filtered_box);
-                filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x ];
+                filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x + blockDim.x];
             }else 
             if (blockIdx.z == 8) {
                 box_decode_warp(0 ,box_pred_9 , tid , num_anchors_per_head , counter , filtered_box);
-                filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x ];
+                filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x + blockDim.x];
             }else 
             if (blockIdx.z == 9) {
                 box_decode_warp(0 ,box_pred_9 , tid , num_anchors_per_head , counter , filtered_box);
-                filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x ];
+                filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x + blockDim.x];
+            }
+    }
+}
+
+__global__ void sigmoid_filter_kernel(
+
+    float* cls_pred_0,
+    float* cls_pred_1,
+    float* cls_pred_2,
+    float* cls_pred_3,
+    float* cls_pred_4,
+    float* cls_pred_56,
+
+    const float* box_pred_0,
+
+    const float* box_pred_1,
+
+    const float* box_pred_2,
+
+    const float* box_pred_3,
+
+    const float* box_pred_4,
+
+    const float* box_pred_5,
+    const float* box_pred_6,
+
+
+    float* filtered_box,
+    float* filtered_score,
+    int* filter_count,
+
+    const float score_threshold) {
+
+    // cls_pred_34
+    // 32768*2 , 2
+
+    int num_anchors_per_head = gridDim.x * gridDim.y * blockDim.x;
+    // 16 * 4 * 512 = 32768
+    extern __shared__ float cls_score[];
+    cls_score[threadIdx.x + blockDim.x] = -1.0f;
+
+    int tid = blockIdx.x * gridDim.y * blockDim.x + blockIdx.y *  blockDim.x + threadIdx.x;
+
+
+    if ( blockIdx.z == 0) cls_score[ threadIdx.x ] = 1 / (1 + expf(-cls_pred_0[ tid ]));
+    if ( blockIdx.z == 1) cls_score[ threadIdx.x ] = 1 / (1 + expf(-cls_pred_1[ tid ]));
+    if ( blockIdx.z == 2) cls_score[ threadIdx.x ] = 1 / (1 + expf(-cls_pred_2[ tid ]));
+    if ( blockIdx.z == 3) cls_score[ threadIdx.x ] = 1 / (1 + expf(-cls_pred_3[ tid ]));
+    if ( blockIdx.z == 4) cls_score[ threadIdx.x ] = 1 / (1 + expf(-cls_pred_4[ tid ]));
+    if ( blockIdx.z == 5) {
+        cls_score[ threadIdx.x ] = 1 / (1 + expf(-cls_pred_56[ tid * 2 ]));
+        cls_score[ threadIdx.x + blockDim.x] = 1 / (1 + expf(-cls_pred_56[ (num_anchors_per_head + tid) * 2]));}
+    if ( blockIdx.z == 6) {
+        cls_score[ threadIdx.x ] = 1 / (1 + expf(-cls_pred_56[ tid * 2 + 1 ]));
+        cls_score[ threadIdx.x + blockDim.x] = 1 / (1 + expf(-cls_pred_56[ (num_anchors_per_head + tid) * 2 + 1]));}
+
+    __syncthreads();
+
+    if( cls_score[ threadIdx.x ] > score_threshold)
+    {
+        int counter = atomicAdd(&filter_count[blockIdx.z], 1);
+        if ( blockIdx.z == 0) {
+            box_decode_warp(0 ,box_pred_0 , tid , num_anchors_per_head , counter , filtered_box);
+            filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x ];
+        }else
+        if ( blockIdx.z == 1) {
+            box_decode_warp(0 ,box_pred_1 , tid , num_anchors_per_head , counter , filtered_box);
+            filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x ];
+        }else
+        if ( blockIdx.z == 2) {
+            box_decode_warp(0 ,box_pred_2 , tid , num_anchors_per_head , counter , filtered_box);
+            filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x ];
+        }else
+        if ( blockIdx.z == 3) {
+            box_decode_warp(0 ,box_pred_3 , tid , num_anchors_per_head , counter , filtered_box);
+            filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x ];
+        }else
+        if (blockIdx.z == 4) {
+            box_decode_warp(0 ,box_pred_4 , tid , num_anchors_per_head , counter , filtered_box);
+            filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x ];
+        }else
+        if ( blockIdx.z == 5) {
+            box_decode_warp(0 ,box_pred_5 , tid , num_anchors_per_head , counter , filtered_box);
+            filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x ];
+        }else
+        if ( blockIdx.z == 6) {
+            box_decode_warp(0 ,box_pred_5 , tid , num_anchors_per_head , counter , filtered_box);
+            filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x ];
+        }
+    }
+    __syncthreads();
+    if( cls_score[ threadIdx.x + blockDim.x ] > score_threshold)  {
+            int counter = atomicAdd(&filter_count[blockIdx.z], 1);
+            // printf("counter : %d \n" , counter);
+            if (blockIdx.z == 5) {
+                box_decode_warp(0 ,box_pred_6 , tid , num_anchors_per_head , counter , filtered_box);
+                filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x + blockDim.x];
+            }else
+            if (blockIdx.z == 6) {
+                box_decode_warp(0 ,box_pred_6 , tid , num_anchors_per_head , counter , filtered_box);
+                filtered_score[blockIdx.z * num_anchors_per_head + counter] = cls_score[ threadIdx.x + blockDim.x];
             }
     }
 }
@@ -290,31 +392,62 @@ void PostprocessCuda::DoPostprocessCuda(
     std::vector<float>& out_detection, std::vector<int>& out_label , std::vector<float>& out_score) {
     // 在此之前,先进行rpn_box_output的concat. 
     // 128x128 的feature map, cls_pred 的shape为(32768,1),(32768,1),(32768,1),(65536,2),(32768,1)
-    dim3 gridsize(16, 4 , 10);  //16 *  4  * 512  = 32768 代表一个head的anchors
-    sigmoid_filter_kernel<<< gridsize, 512 , 512 * 2 * sizeof(float)>>>(
-        cls_pred_0,
-        cls_pred_12, 
-        cls_pred_34, 
-        cls_pred_5, 
-        cls_pred_67, 
-        cls_pred_89,
-
-        &box_preds[0 * 32768 * 9],
-        &box_preds[1 * 32768 * 9],
-        &box_preds[2 * 32768 * 9],
-        &box_preds[3 * 32768 * 9],
-        &box_preds[4 * 32768 * 9],
-        &box_preds[5 * 32768 * 9],
-        &box_preds[6 * 32768 * 9],
-        &box_preds[7 * 32768 * 9],
-        &box_preds[8 * 32768 * 9],
-        &box_preds[9 * 32768 * 9],
-
-        dev_filtered_box, 
-        dev_filtered_score,  
-        dev_filter_count, 
-    
-        score_threshold_);
+
+//////class = 10
+    if(num_class_ == 10)
+    {
+        dim3 gridsize(16, 4 , 10);  //16 *  4  * 512  = 32768 代表一个head的anchors
+        sigmoid_filter_kernel<<< gridsize, 512 , 512 * 2 * sizeof(float)>>>(
+            cls_pred_0,
+            cls_pred_12,
+            cls_pred_34,
+            cls_pred_5,
+            cls_pred_67,
+            cls_pred_89,
+
+            &box_preds[0 * 32768 * 9],
+            &box_preds[1 * 32768 * 9],
+            &box_preds[2 * 32768 * 9],
+            &box_preds[3 * 32768 * 9],
+            &box_preds[4 * 32768 * 9],
+            &box_preds[5 * 32768 * 9],
+            &box_preds[6 * 32768 * 9],
+            &box_preds[7 * 32768 * 9],
+            &box_preds[8 * 32768 * 9],
+            &box_preds[9 * 32768 * 9],
+
+            dev_filtered_box,
+            dev_filtered_score,
+            dev_filter_count,
+
+            score_threshold_);
+    }
+    else
+    {
+        ////class = 7
+            dim3 gridsize(16, 4 , 7);  //16 *  4  * 512  = 32768 代表一个head的anchors
+            sigmoid_filter_kernel<<< gridsize, 512 , 512 * 2 * sizeof(float)>>>(
+                 cls_pred_0,
+                 cls_pred_12,
+                 cls_pred_34,
+                 cls_pred_5,
+                 cls_pred_67,
+                 cls_pred_89,
+                 &box_preds[0 * 32768 * 9],
+                 &box_preds[1 * 32768 * 9],
+                 &box_preds[2 * 32768 * 9],
+                 &box_preds[3 * 32768 * 9],
+                 &box_preds[4 * 32768 * 9],
+                 &box_preds[5 * 32768 * 9],
+                 &box_preds[6 * 32768 * 9],
+
+                 dev_filtered_box,
+                 dev_filtered_score,
+                 dev_filter_count,
+
+                 score_threshold_);
+    }
+
     cudaDeviceSynchronize();
     
     int host_filter_count[num_class_] = {0};
@@ -362,17 +495,16 @@ void PostprocessCuda::DoPostprocessCuda(
 
         cudaMemcpy(host_filtered_box, dev_sorted_filtered_box, host_filter_count[i] * num_output_box_feature_ * sizeof(float), cudaMemcpyDeviceToHost);
         cudaMemcpy(host_filtered_scores, dev_sorted_filtered_scores, host_filter_count[i] * sizeof(float), cudaMemcpyDeviceToHost);
-        for (int i = 0; i < num_out; ++i)  {
-            out_detection.emplace_back(host_filtered_box[keep_inds[i] * num_output_box_feature_ + 0]);
-            out_detection.emplace_back(host_filtered_box[keep_inds[i] * num_output_box_feature_ + 1]);
-            out_detection.emplace_back(host_filtered_box[keep_inds[i] * num_output_box_feature_ + 2]);
-            out_detection.emplace_back(host_filtered_box[keep_inds[i] * num_output_box_feature_ + 3]);
-            out_detection.emplace_back(host_filtered_box[keep_inds[i] * num_output_box_feature_ + 4]);
-            out_detection.emplace_back(host_filtered_box[keep_inds[i] * num_output_box_feature_ + 5]);
-            out_detection.emplace_back(host_filtered_box[keep_inds[i] * num_output_box_feature_ + 6]);
-            out_score.emplace_back(host_filtered_scores[keep_inds[i]]);
+        for (int j = 0; j < num_out; ++j)  {
+            out_detection.emplace_back(host_filtered_box[keep_inds[j] * num_output_box_feature_ + 0]);
+            out_detection.emplace_back(host_filtered_box[keep_inds[j] * num_output_box_feature_ + 1]);
+            out_detection.emplace_back(host_filtered_box[keep_inds[j] * num_output_box_feature_ + 2]);
+            out_detection.emplace_back(host_filtered_box[keep_inds[j] * num_output_box_feature_ + 3]);
+            out_detection.emplace_back(host_filtered_box[keep_inds[j] * num_output_box_feature_ + 4]);
+            out_detection.emplace_back(host_filtered_box[keep_inds[j] * num_output_box_feature_ + 5]);
+            out_detection.emplace_back(host_filtered_box[keep_inds[j] * num_output_box_feature_ + 6]);
+            out_score.emplace_back(host_filtered_scores[keep_inds[j]]);
             out_label.emplace_back(i);
-
         }
         delete[] keep_inds;
         delete[] host_filtered_scores;
@@ -380,6 +512,5 @@ void PostprocessCuda::DoPostprocessCuda(
 
         GPU_CHECK(cudaFree(dev_indexes));
         GPU_CHECK(cudaFree(dev_sorted_filtered_box));
-        GPU_CHECK(cudaFree(dev_sorted_filtered_scores));
     }
 }

+ 25 - 0
src/detection/detection_lidar_PointPillars_MultiHead/roiaware_pool3d.h

@@ -0,0 +1,25 @@
+/*
+RoI-aware point cloud feature pooling
+Reference paper:  https://arxiv.org/abs/1907.03670
+Written by Shaoshuai Shi
+All Rights Reserved 2019-2020.
+*/
+
+
+#include <assert.h>
+
+
+//#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
+//#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
+//#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
+
+
+void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_pts_each_voxel,
+    int out_x, int out_y, int out_z, const float *rois, const float *pts, const float *pts_feature,
+    int *argmax, int *pts_idx_of_voxels, float *pooled_features, int pool_method);
+
+void roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel,
+    const int *pts_idx_of_voxels, const int *argmax, const float *grad_out, float *grad_in, int pool_method);
+
+void points_in_boxes_launcher(int batch_size, int boxes_num, int pts_num, const float *boxes,
+    const float *pts, int *box_idx_of_points);

+ 361 - 0
src/detection/detection_lidar_PointPillars_MultiHead/roiaware_pool3d_kernel.cu

@@ -0,0 +1,361 @@
+/*
+RoI-aware point cloud feature pooling
+Written by Shaoshuai Shi
+All Rights Reserved 2019-2020.
+*/
+
+
+#include <math.h>
+#include <stdio.h>
+
+#include "roiaware_pool3d.h"
+
+#define THREADS_PER_BLOCK 256
+#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
+// #define DEBUG
+
+
+__device__ inline void lidar_to_local_coords(float shift_x, float shift_y, float rot_angle, float &local_x, float &local_y){
+    float cosa = cos(-rot_angle), sina = sin(-rot_angle);
+    local_x = shift_x * cosa + shift_y * (-sina);
+    local_y = shift_x * sina + shift_y * cosa;
+}
+
+
+__device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, float &local_x, float &local_y){
+    // param pt: (x, y, z)
+    // param box3d: [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center
+
+    const float MARGIN = 1e-5;
+    float x = pt[0], y = pt[1], z = pt[2];
+    float cx = box3d[0], cy = box3d[1], cz = box3d[2];
+    float dx = box3d[3], dy = box3d[4], dz = box3d[5], rz = box3d[6];
+
+    if (fabsf(z - cz) > dz / 2.0) return 0;
+    lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y);
+    float in_flag = (fabs(local_x) < dx / 2.0 + MARGIN) & (fabs(local_y) < dy / 2.0 + MARGIN);
+    return in_flag;
+}
+
+
+__global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, int out_x, int out_y, int out_z,
+    const float *rois, const float *pts, int *pts_mask){
+    // params rois: [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center
+    // params pts: (npoints, 3) [x, y, z]
+    // params pts_mask: (N, npoints): -1 means point doesnot in this box, otherwise: encode (x_idxs, y_idxs, z_idxs) by binary bit
+    int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
+    int box_idx = blockIdx.y;
+    if (pt_idx >= pts_num || box_idx >= boxes_num) return;
+
+    pts += pt_idx * 3;
+    rois += box_idx * 7;
+    pts_mask += box_idx * pts_num + pt_idx;
+
+    float local_x = 0, local_y = 0;
+    int cur_in_flag = check_pt_in_box3d(pts, rois, local_x, local_y);
+
+    pts_mask[0] = -1;
+    if (cur_in_flag > 0){
+        float local_z = pts[2] - rois[2];
+        float dx = rois[3], dy = rois[4], dz = rois[5];
+
+        float x_res = dx / out_x;
+        float y_res = dy / out_y;
+        float z_res = dz / out_z;
+
+        unsigned int x_idx = int((local_x + dx / 2) / x_res);
+        unsigned int y_idx = int((local_y + dy / 2) / y_res);
+        unsigned int z_idx = int((local_z + dz / 2) / z_res);
+
+        x_idx = min(max(x_idx, 0), out_x - 1);
+        y_idx = min(max(y_idx, 0), out_y - 1);
+        z_idx = min(max(z_idx, 0), out_z - 1);
+
+        unsigned int idx_encoding = (x_idx << 16) + (y_idx << 8) + z_idx;
+        pts_mask[0] = idx_encoding;
+    }
+}
+
+
+__global__ void collect_inside_pts_for_box3d(int boxes_num, int pts_num, int max_pts_each_voxel,
+    int out_x, int out_y, int out_z, const int *pts_mask, int *pts_idx_of_voxels){
+    // params pts_mask: (N, npoints)  0 or 1
+    // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
+
+    int box_idx = blockIdx.x * blockDim.x + threadIdx.x;
+    if (box_idx >= boxes_num) return;
+
+    int max_num_pts = max_pts_each_voxel - 1;  // index 0 is the counter
+    pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel;
+
+    for (int k = 0; k < pts_num; k++){
+        if (pts_mask[box_idx * pts_num + k] != -1){
+            unsigned int idx_encoding = pts_mask[box_idx * pts_num + k];
+            unsigned int x_idx = (idx_encoding >> 16) & 0xFF;
+            unsigned int y_idx = (idx_encoding >> 8) & 0xFF;
+            unsigned int z_idx = idx_encoding & 0xFF;
+            unsigned int base_offset = x_idx * out_y * out_z * max_pts_each_voxel + y_idx * out_z * max_pts_each_voxel + z_idx * max_pts_each_voxel;
+            unsigned int cnt = pts_idx_of_voxels[base_offset];
+            if (cnt < max_num_pts){
+                pts_idx_of_voxels[base_offset + cnt + 1] = k;
+                pts_idx_of_voxels[base_offset]++;
+            }
+#ifdef DEBUG
+        printf("collect: pts_%d, idx(%d, %d, %d), idx_encoding=%x\n",
+            k, x_idx, y_idx, z_idx, idx_encoding);
+#endif
+
+        }
+    }
+}
+
+
+__global__ void roiaware_maxpool3d(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x,
+    int out_y, int out_z, const float *pts_feature, const int *pts_idx_of_voxels, float *pooled_features, int *argmax){
+    // params pts_feature: (npoints, C)
+    // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), index 0 is the counter
+    // params pooled_features: (N, out_x, out_y, out_z, C)
+    // params argmax: (N, out_x, out_y, out_z, C)
+
+    int box_idx = blockIdx.z;
+    int channel_idx = blockIdx.y;
+    int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;
+
+    int x_idx = voxel_idx_flat / (out_y * out_z);
+    int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
+    int z_idx = voxel_idx_flat % out_z;
+    if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return;
+
+#ifdef DEBUG
+    printf("src pts_idx_of_voxels: (%p, ), argmax: %p\n", pts_idx_of_voxels, argmax);
+#endif
+
+    int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
+    pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel;
+    pooled_features += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
+    argmax += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
+
+    int argmax_idx = -1;
+    float max_val = -1e50;
+
+    int total_pts = pts_idx_of_voxels[0];
+
+    for (int k = 1; k <= total_pts; k++){
+        if (pts_feature[pts_idx_of_voxels[k] * channels + channel_idx] > max_val){
+            max_val = pts_feature[pts_idx_of_voxels[k] * channels + channel_idx];
+            argmax_idx = pts_idx_of_voxels[k];
+        }
+    }
+
+    if (argmax_idx != -1){
+        pooled_features[0] = max_val;
+    }
+    argmax[0] = argmax_idx;
+
+#ifdef DEBUG
+    printf("channel_%d idx(%d, %d, %d), argmax_idx=(%d, %.3f), total=%d, after pts_idx: %p, argmax: (%p, %d)\n",
+        channel_idx, x_idx, y_idx, z_idx, argmax_idx, max_val, total_pts, pts_idx_of_voxels, argmax, argmax_idx);
+#endif
+}
+
+
+__global__ void roiaware_avgpool3d(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x,
+    int out_y, int out_z, const float *pts_feature, const int *pts_idx_of_voxels, float *pooled_features){
+    // params pts_feature: (npoints, C)
+    // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), index 0 is the counter
+    // params pooled_features: (N, out_x, out_y, out_z, C)
+    // params argmax: (N, out_x, out_y, out_z, C)
+
+    int box_idx = blockIdx.z;
+    int channel_idx = blockIdx.y;
+    int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;
+
+    int x_idx = voxel_idx_flat / (out_y * out_z);
+    int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
+    int z_idx = voxel_idx_flat % out_z;
+    if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return;
+
+    int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
+    pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel;
+    pooled_features += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
+
+    float sum_val = 0;
+    int total_pts = pts_idx_of_voxels[0];
+
+    for (int k = 1; k <= total_pts; k++){
+        sum_val += pts_feature[pts_idx_of_voxels[k] * channels + channel_idx];
+    }
+
+    if (total_pts > 0){
+        pooled_features[0] = sum_val / total_pts;
+    }
+}
+
+
+void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x, int out_y, int out_z,
+    const float *rois, const float *pts, const float *pts_feature, int *argmax, int *pts_idx_of_voxels, float *pooled_features, int pool_method){
+    // params rois: (N, 7) [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center
+    // params pts: (npoints, 3) [x, y, z]
+    // params pts_feature: (npoints, C)
+    // params argmax: (N, out_x, out_y, out_z, C)
+    // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
+    // params pooled_features: (N, out_x, out_y, out_z, C)
+    // params pool_method: 0: max_pool 1: avg_pool
+
+    int *pts_mask = NULL;
+    cudaMalloc(&pts_mask, boxes_num * pts_num * sizeof(int));  // (N, M)
+    cudaMemset(pts_mask, -1, boxes_num * pts_num * sizeof(int));
+
+    dim3 blocks_mask(DIVUP(pts_num, THREADS_PER_BLOCK), boxes_num);
+    dim3 threads(THREADS_PER_BLOCK);
+    generate_pts_mask_for_box3d<<<blocks_mask, threads>>>(boxes_num, pts_num, out_x, out_y, out_z, rois, pts, pts_mask);
+
+    // TODO: Merge the collect and pool functions, SS
+
+    dim3 blocks_collect(DIVUP(boxes_num, THREADS_PER_BLOCK));
+    collect_inside_pts_for_box3d<<<blocks_collect, threads>>>(boxes_num, pts_num, max_pts_each_voxel,
+        out_x, out_y, out_z, pts_mask, pts_idx_of_voxels);
+
+    dim3 blocks_pool(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels, boxes_num);
+    if (pool_method == 0){
+        roiaware_maxpool3d<<<blocks_pool, threads>>>(boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
+            pts_feature, pts_idx_of_voxels, pooled_features, argmax);
+    }
+    else if (pool_method == 1){
+        roiaware_avgpool3d<<<blocks_pool, threads>>>(boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
+            pts_feature, pts_idx_of_voxels, pooled_features);
+    }
+
+
+    cudaFree(pts_mask);
+
+#ifdef DEBUG
+    cudaDeviceSynchronize();  // for using printf in kernel function
+#endif
+}
+
+
+__global__ void roiaware_maxpool3d_backward(int boxes_num, int channels, int out_x, int out_y, int out_z,
+    const int *argmax, const float *grad_out, float *grad_in){
+    // params argmax: (N, out_x, out_y, out_z, C)
+    // params grad_out: (N, out_x, out_y, out_z, C)
+    // params grad_in: (npoints, C), return value
+
+    int box_idx = blockIdx.z;
+    int channel_idx = blockIdx.y;
+    int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;
+
+    int x_idx = voxel_idx_flat / (out_y * out_z);
+    int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
+    int z_idx = voxel_idx_flat % out_z;
+    if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return;
+
+    int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
+    argmax += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
+    grad_out += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
+
+    if (argmax[0] == -1) return;
+
+    atomicAdd(grad_in + argmax[0] * channels + channel_idx, grad_out[0] * 1);
+}
+
+
+__global__ void roiaware_avgpool3d_backward(int boxes_num, int channels, int out_x, int out_y, int out_z,
+    int max_pts_each_voxel, const int *pts_idx_of_voxels, const float *grad_out, float *grad_in){
+    // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
+    // params grad_out: (N, out_x, out_y, out_z, C)
+    // params grad_in: (npoints, C), return value
+
+    int box_idx = blockIdx.z;
+    int channel_idx = blockIdx.y;
+    int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x;
+
+    int x_idx = voxel_idx_flat / (out_y * out_z);
+    int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z;
+    int z_idx = voxel_idx_flat % out_z;
+    if (box_idx >= boxes_num || channel_idx >= channels|| x_idx >= out_x || y_idx >= out_y || z_idx >= out_z) return;
+
+    int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx;
+    pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + offset_base * max_pts_each_voxel;
+    grad_out += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx;
+
+
+    int total_pts = pts_idx_of_voxels[0];
+    float cur_grad = 1 / fmaxf(float(total_pts), 1.0);
+    for (int k = 1; k <= total_pts; k++){
+        atomicAdd(grad_in + pts_idx_of_voxels[k] * channels + channel_idx, grad_out[0] * cur_grad);
+    }
+}
+
+
+void roiaware_pool3d_backward_launcher(int boxes_num, int out_x, int out_y, int out_z, int channels, int max_pts_each_voxel,
+    const int *pts_idx_of_voxels, const int *argmax, const float *grad_out, float *grad_in, int pool_method){
+    // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
+    // params argmax: (N, out_x, out_y, out_z, C)
+    // params grad_out: (N, out_x, out_y, out_z, C)
+    // params grad_in: (npoints, C), return value
+    // params pool_method: 0: max_pool, 1: avg_pool
+
+    dim3 blocks(DIVUP(out_x * out_y * out_z, THREADS_PER_BLOCK), channels, boxes_num);
+    dim3 threads(THREADS_PER_BLOCK);
+    if (pool_method == 0){
+        roiaware_maxpool3d_backward<<<blocks, threads>>>(
+            boxes_num, channels, out_x, out_y, out_z, argmax, grad_out, grad_in
+        );
+    }
+    else if (pool_method == 1){
+        roiaware_avgpool3d_backward<<<blocks, threads>>>(
+            boxes_num, channels, out_x, out_y, out_z, max_pts_each_voxel, pts_idx_of_voxels, grad_out, grad_in
+        );
+    }
+
+}
+
+
+__global__ void points_in_boxes_kernel(int batch_size, int boxes_num, int pts_num, const float *boxes,
+    const float *pts, int *box_idx_of_points){
+    // params boxes: (B, N, 7) [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center
+    // params pts: (B, npoints, 3) [x, y, z] in LiDAR coordinate
+    // params boxes_idx_of_points: (B, npoints), default -1
+
+    int bs_idx = blockIdx.y;
+    int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
+    if (bs_idx >= batch_size || pt_idx >= pts_num) return;
+
+    boxes += bs_idx * boxes_num * 7;
+    pts += bs_idx * pts_num * 5 + pt_idx * 5;
+    box_idx_of_points += bs_idx * pts_num + pt_idx;
+
+    float local_x = 0, local_y = 0;
+    int cur_in_flag = 0;
+    for (int k = 0; k < boxes_num; k++){
+        cur_in_flag = check_pt_in_box3d(pts, boxes + k * 7, local_x, local_y);
+        if (cur_in_flag){
+            box_idx_of_points[0] = k;
+            break;
+        }
+    }
+}
+
+
+void points_in_boxes_launcher(int batch_size, int boxes_num, int pts_num, const float *boxes,
+    const float *pts, int *box_idx_of_points){
+    // params boxes: (B, N, 7) [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center
+    // params pts: (B, npoints, 3) [x, y, z]
+    // params boxes_idx_of_points: (B, npoints), default -1
+    cudaError_t err;
+
+    dim3 blocks(DIVUP(pts_num, THREADS_PER_BLOCK), batch_size);
+    dim3 threads(THREADS_PER_BLOCK);
+    points_in_boxes_kernel<<<blocks, threads>>>(batch_size, boxes_num, pts_num, boxes, pts, box_idx_of_points);
+
+    err = cudaGetLastError();
+    if (cudaSuccess != err) {
+        fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
+        exit(-1);
+    }
+
+#ifdef DEBUG
+    cudaDeviceSynchronize();  // for using printf in kernel function
+#endif
+}

+ 6 - 3
src/detection/detection_lidar_ukf_pda/detection_lidar_ukf_pda.pro

@@ -543,7 +543,8 @@ void ImmUkfPda::staticClassification()
   for (size_t i = 0; i < targets_.size(); i++)
   {
     // targets_[i].x_merge_(2) is referred for estimated velocity
-    double current_velocity = std::abs(targets_[i].x_merge_(2));
+//    double current_velocity = std::abs(targets_[i].x_merge_(2));
+    double current_velocity = targets_[i].x_merge_(2);
     targets_[i].vel_history_.push_back(current_velocity);
     if (targets_[i].tracking_num_ == TrackingState::Stable && targets_[i].lifetime_ > life_time_thres_)
     {
@@ -696,7 +697,7 @@ void ImmUkfPda::makeOutput(const iv::lidar::objectarray & input,
 
 //    }
 
-    std::cout<<"          v     tyaw :      "<<tv<<"  "   <<tyaw<<std::endl;
+//    std::cout<<"          v     tyaw :      "<<tv<<"  "   <<tyaw<<std::endl;
 
     while (tyaw > M_PI)
       tyaw -= 2. * M_PI;
@@ -711,6 +712,8 @@ void ImmUkfPda::makeOutput(const iv::lidar::objectarray & input,
     dd.set_id(targets_[i].ukf_id_);
 //    dd.id = targets_[i].ukf_id_;
     dd.set_velocity_linear_x(tv);
+//    std::cout<<" -------------------------------------------------------"<<std::endl;
+//    std::cout<<"   vel    linear    "<<tv<<std::endl;
    // dd.velocity_linear_x = tv;
     dd.set_acceleration_linear_y(tyaw_rate);
    // dd.acceleration_linear_y = tyaw_rate;
@@ -901,7 +904,7 @@ void ImmUkfPda::tracker(const iv::lidar::objectarray & input,
       for(j=0;j<detected_objects_output.obj_size();j++)
       {
       iv::lidar::lidarobject obj = detected_objects_output.obj(j);
-      std::cout<<"obj "<<j<< " vel is "<<obj.velocity_linear_x()<<" id is "<<obj.id()<<std::endl;
+//      std::cout<<"obj "<<j<< " vel is "<<obj.velocity_linear_x()<<" id is "<<obj.id()<<std::endl;
 
       }
   }

+ 100 - 0
src/detection/detection_lidar_ukf_pda/imm_ukf_pda.h

@@ -0,0 +1,100 @@
+QT -= gui
+
+CONFIG += c++11 console
+CONFIG -= app_bundle
+# The following define makes your compiler emit warnings if you use
+# any feature of Qt which as been marked deprecated (the exact warnings
+# depend on your compiler). Please consult the documentation of the
+# deprecated API in order to know how to port your code away from it.
+DEFINES += QT_DEPRECATED_WARNINGS
+
+# You can also make your code fail to compile if you use deprecated APIs.
+# In order to do so, uncomment the following line.
+# You can also select to disable deprecated APIs only up to a certain version of Qt.
+#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000    # disables all the APIs deprecated before Qt 6.0.0
+# project's file
+
+
+INCLUDEPATH += yolov4Tensorrt/include \
+               include
+
+SOURCES += yolov4Tensorrt/src/trt_utils.cpp \
+           yolov4Tensorrt/src/yolo.cpp \
+           yolov4Tensorrt/src/yolodetect.cpp
+
+CUDA_SOURCES += yolov4Tensorrt/src/mish.cu \
+                yolov4Tensorrt/src/yololayer.cu
+
+DISTFILES += yolov4Tensorrt/src/mish.cu \
+             yolov4Tensorrt/src/yololayer.cu
+
+SOURCES += main_multibatch.cpp \
+           src/Hungarian.cpp \
+           src/KalmanTracker.cpp \
+           src/detect_obstacle.cpp
+
+
+INCLUDEPATH += proto
+SOURCES +=  \
+        proto/rawpic.pb.cc \
+        proto/obstacles.pb.cc
+
+#INCLUDEPATH += modulecomm
+#LIBS += $$PWD/modulecomm/libmodulecomm.so
+
+#LIBS += -L"/usr/local/lib" \
+#        -lprotobuf
+!include(../../../include/common.pri ) {
+    error( "Couldn't find the common.pri file!" )
+}
+
+!include(../../../include/ivprotobuf.pri ) {
+    error( "Couldn't find the ivprotobuf.pri file!" )
+}
+# opencv
+INCLUDEPATH += /usr/include/opencv4
+LIBS += /usr/lib/aarch64-linux-gnu/libopencv_*.so
+
+# tensorrt
+LIBS += /usr/lib/aarch64-linux-gnu/libnvinfer.so \
+        /usr/lib/aarch64-linux-gnu/libnvinfer_plugin.so
+# c++
+LIBS += -L/usr/lib/aarch64-linux-gnu -lstdc++fs
+# cuda
+CUDA_SDK = "/usr/local/cuda"   # Path to cuda SDK install
+CUDA_DIR = "/usr/local/cuda"            # Path to cuda toolkit install
+#####系统类型,计算能力###########
+SYSTEM_NAME = linux         # Depending on your system either 'Win32', 'x64', or 'Win64'
+SYSTEM_TYPE = 64            # '32' or '64', depending on your system
+CUDA_ARCH = sm_72           # Type of CUDA architecture, for example 'compute_10', 'compute_11', 'sm_10'
+NVCC_OPTIONS = --use_fast_math
+
+INCLUDEPATH += $$CUDA_DIR/include
+QMAKE_LIBDIR += $$CUDA_DIR/lib64/
+
+CUDA_OBJECTS_DIR = ./
+
+# Add the necessary libraries
+CUDA_LIBS = -lcuda -lcudart #-lcublas
+
+# The following makes sure all path names (which often include spaces) are put between quotation marks
+CUDA_INC = $$join(INCLUDEPATH,'" -I"','-I"','"')
+LIBS += $$CUDA_LIBS
+
+# Configuration of the Cuda compiler
+CONFIG(debug, debug|release) {
+    # Debug mode
+    cuda_d.input = CUDA_SOURCES
+    cuda_d.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}.o
+    cuda_d.commands = $$CUDA_DIR/bin/nvcc -D_DEBUG $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
+    cuda_d.dependency_type = TYPE_C
+    QMAKE_EXTRA_COMPILERS += cuda_d
+}
+else {
+    # Release mode
+    cuda.input = CUDA_SOURCES
+    cuda.output = $$CUDA_OBJECTS_DIR/${QMAKE_FILE_BASE}.o
+    cuda.commands = $$CUDA_DIR/bin/nvcc $$NVCC_OPTIONS $$CUDA_INC $$NVCC_LIBS --machine $$SYSTEM_TYPE -arch=$$CUDA_ARCH -c -o ${QMAKE_FILE_OUT} ${QMAKE_FILE_NAME}
+    cuda.dependency_type = TYPE_C
+    QMAKE_EXTRA_COMPILERS += cuda
+}

+ 36 - 0
src/detection/detection_yolov4_shigong_monocular/include/Hungarian.h

@@ -0,0 +1,36 @@
+//
+// Created by lqx on 20-4-23.
+//
+
+#ifndef TRACK_SORT_HUNGARIAN_H_H
+#define TRACK_SORT_HUNGARIAN_H_H
+
+#include <iostream>
+#include <vector>
+
+using namespace std;
+
+class HungarianAlgorithm
+{
+public:
+    HungarianAlgorithm();
+    ~HungarianAlgorithm();
+    double Solve(vector<vector<double>>& DistMatrix, vector<int>& Assignment);
+
+private:
+    void assignmentoptimal(int *assignment, double *cost, double *distMatrix, int nOfRows, int nOfColumns);
+    void buildassignmentvector(int *assignment, bool *starMatrix, int nOfRows, int nOfColumns);
+    void computeassignmentcost(int *assignment, double *cost, double *distMatrix, int nOfRows);
+    void step2a(int *assignment, double *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix,
+                bool *coveredColumns, bool *coveredRows, int nOfRows, int nOfColumns, int minDim);
+    void step2b(int *assignment, double *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix,
+                bool *coveredColumns, bool *coveredRows, int nOfRows, int nOfColumns, int minDim);
+    void step3(int *assignment, double *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix,
+               bool *coveredColumns, bool *coveredRows, int nOfRows, int nOfColumns, int minDim);
+    void step4(int *assignment, double *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix,
+               bool *coveredColumns, bool *coveredRows, int nOfRows, int nOfColumns, int minDim, int row, int col);
+    void step5(int *assignment, double *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix,
+               bool *coveredColumns, bool *coveredRows, int nOfRows, int nOfColumns, int minDim);
+};
+
+#endif //TRACK_SORT_HUNGARIAN_H_H

+ 90 - 0
src/detection/detection_yolov4_shigong_monocular/include/KalmanTracker.h

@@ -0,0 +1,90 @@
+//
+// Created by lqx on 20-4-23.
+//
+
+#ifndef TRACK_SORT_KALMANTRACKER_H
+#define TRACK_SORT_KALMANTRACKER_H
+
+///////////////////////////////////////////////////////////////////////////////
+// KalmanTracker.h: KalmanTracker Class Declaration
+
+#include "opencv2/video/tracking.hpp"
+#include "opencv2/highgui/highgui.hpp"
+
+using namespace std;
+using namespace cv;
+
+#define StateType Rect_<float>
+
+
+// This class represents the internel state of individual tracked objects observed as bounding box.
+class KalmanTracker
+{
+public:
+    KalmanTracker()
+    {
+        init_kf(StateType());
+        m_time_since_update = 0;
+        m_hits = 0;
+        m_hit_streak = 0;
+        m_age = 0;
+        m_id = kf_count;
+        //kf_count++;
+    }
+    KalmanTracker(StateType initRect)
+    {
+        init_kf(initRect);
+        m_time_since_update = 0;
+        m_hits = 0;
+        m_hit_streak = 0;
+        m_age = 0;
+        m_id = kf_count;
+        //kf_count++;
+    }
+
+    KalmanTracker(StateType initRect, int classId,float prob)
+    {
+        init_kf(initRect);
+        m_time_since_update = 0;
+        m_hits = 0;
+        m_hit_streak = 0;
+        m_age = 0;
+        m_id = kf_count;
+        kf_count++;
+        m_class_id = classId;
+        m_prob = prob;
+    }
+
+    ~KalmanTracker()
+    {
+        m_history.clear();
+        m_class_history.clear();
+    }
+
+    StateType predict();
+    void update(StateType stateMat,int classId, float prob);
+
+    StateType get_state();
+    StateType get_rect_xysr(float cx, float cy, float s, float r);
+
+    static int kf_count;
+
+    int m_time_since_update;
+    int m_hits;
+    int m_hit_streak;
+    int m_age;
+    int m_id;
+    int m_class_id;
+    std::vector<int> m_class_history;
+    float m_prob;
+
+private:
+    void init_kf(StateType stateMat);
+
+    cv::KalmanFilter kf;
+    cv::Mat measurement;
+
+    std::vector<StateType> m_history;
+};
+
+#endif //TRACK_SORT_KALMANTRACKER_H

+ 54 - 0
src/detection/detection_yolov4_shigong_monocular/include/detect_obstacle.h

@@ -0,0 +1,54 @@
+#ifndef DETECT_TURNSTILE_H
+#define DETECT_TURNSTILE_H
+
+#include "Hungarian.h"
+#include "KalmanTracker.h"
+
+namespace od{
+
+const int COLOR_MAP[2][3]={{0, 0, 255},{255, 0, 0}};
+const int max_age = 3;
+const int min_hits = 3;
+const double iouThreshold = 0.5;
+
+struct bbox_t {
+    unsigned int x, y, w, h;       // (x,y) - top-left corner, (w, h) - width & height of bounded box
+    float prob;                    // confidence - probability that the object was found correctly
+    unsigned int obj_id;           // class of object - from range [0, classes-1]
+    unsigned int track_id;         // tracking id for video (0 - untracked, 1 - inf - tracked object)
+    unsigned int frames_counter;   // counter of frames on which the object was detected
+    float x_3d, y_3d, z_3d;        // center of object (in Meters) if ZED 3D Camera is used
+};
+
+typedef struct TrackingBox
+{
+    int frame;
+    int id;
+    int class_id;
+    float prob;
+    Rect_<float> box;
+    vector<int> class_history;
+}TrackingBox;
+
+//yolo data o DetectBox
+typedef struct DetectBox
+{
+    int class_id;
+    float prob;
+    Rect_<float> box;
+}DetectBox;
+
+//Computes IOU between two bounding boxes
+double GetIOU(Rect_<float> bb_test, Rect_<float> bb_gt);
+//画出检测框和相关信息
+void DrawBoxes(Mat &frame, vector<string> classes, int classId, int turnstileId,float conf, int left, int top, int right, int bottom);
+
+//画出检测结果,image
+void Drawer(Mat &frame, vector<bbox_t> outs, vector<string> classes);
+//画出检测结果,video
+void Drawer(Mat &frame, vector<od::TrackingBox> &track_result, vector<string> &classes);
+//tracking obstacle
+bool TrackObstacle(int frame_count,vector<KalmanTracker> &trackers,vector<bbox_t> &outs,vector<od::TrackingBox> &track_result);
+}
+
+#endif // DETECT_TURNSTILE_H

+ 82 - 0
src/detection/detection_yolov4_shigong_monocular/include/imageBuffer.h

@@ -0,0 +1,82 @@
+#ifndef IMAGEBUFFER_H
+#define IMAGEBUFFER_H
+
+#include <opencv2/opencv.hpp>
+#include <mutex>
+#include <condition_variable>
+#include <queue>
+template<typename T>
+class ConsumerProducerQueue
+{
+
+public:
+    ConsumerProducerQueue(int mxsz,bool dropFrame) :
+            maxSize(mxsz),dropFrame(dropFrame)
+    { }
+
+    bool add(T request)
+    {
+        std::unique_lock<std::mutex> lock(mutex);
+        if(dropFrame && isFull())
+        {
+            //lock.unlock();
+            //return false;
+            cpq.pop();
+            cpq.push(request);
+            cond.notify_all();
+            return true;
+        }
+        else {
+            cond.wait(lock, [this]() { return !isFull(); });
+            cpq.push(request);
+            //lock.unlock();
+            cond.notify_all();
+            return true;
+        }
+    }
+    void consume(T &request)
+    {
+        std::unique_lock<std::mutex> lock(mutex);
+        cond.wait(lock, [this]()
+        { return !isEmpty(); });
+        request = cpq.front();
+        cpq.pop();
+        //lock.unlock();
+        cond.notify_all();
+
+    }
+
+    bool isFull() const
+    {
+        return cpq.size() >= maxSize;
+    }
+
+    bool isEmpty() const
+    {
+        return cpq.size() == 0;
+    }
+
+    int length() const
+    {
+        return cpq.size();
+    }
+
+    void clear()
+    {
+        std::unique_lock<std::mutex> lock(mutex);
+        while (!isEmpty())
+        {
+            cpq.pop();
+        }
+        lock.unlock();
+        cond.notify_all();
+    }
+
+private:
+    std::condition_variable cond;  //条件变量允许通过通知进而实现线程同步
+    std::mutex mutex;     //提供了多种互斥操作,可以显式避免数据竞争
+    std::queue<T> cpq;    //容器适配器,它给予程序员队列的功能
+    int maxSize;
+    bool dropFrame;
+};
+#endif // IMAGEBUFFER_H

+ 353 - 0
src/detection/detection_yolov4_shigong_monocular/main.cpp

@@ -0,0 +1,353 @@
+#include <opencv2/opencv.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/highgui.hpp>
+#include <vector>
+#include <fstream>
+#include <thread>
+#include <cmath>
+
+#include "imageBuffer.h"
+#include "modulecomm.h"
+#include "rawpic.pb.h"
+#include "obstacles.pb.h"
+#include "yolodetect.h"
+#include "detect_obstacle.h"
+
+using namespace std;
+using namespace cv;
+
+uint max_batch_size = 1;
+
+string yaml_path="./model/external.yaml";
+
+bool test_video = false;
+string video_path = "20201231144029.avi";
+
+void * g_obstacle;
+string obstacledata="obstacledata";
+
+void * mpa_camera;
+string cameradata="picleft";
+
+typedef struct frame_info
+{
+    cv::Mat  frame;
+    long long timestamp;
+}frame_info;
+typedef struct obstacle_info
+{
+    string category;
+    uint64 class_id;
+    Point3f bbox_3d;
+    Rect_<float> bbox;
+}obstacle_info;
+
+typedef struct obstacles_info
+{
+    vector<obstacle_info> bboxes;
+    uint64 time;
+}obstacles_info;
+
+
+int capture_width=1280,capture_height=720;
+cv::Mat cameraMatrix = (cv::Mat_<double>(3,3)<< 1.0538648949999999e+03, 0., 9.7134830299999999e+02, 0.,
+                        9.6887771299999997e+02, 5.5096488099999999e+02, 0., 0., 1.);
+cv::Mat distCoeffs = (cv::Mat_<double>(1,5)<<-2.9078999999999999e-01, 6.0994000000000000e-02,
+                      3.6489999999999999e-03, -2.0590000000000001e-03, 0.);
+cv::Mat cameraMatrix_inv = (cv::Mat_<double>(3,3)<<9.4888823486240150e-04, 0., -9.2170097667025908e-01, 0.,
+                            1.0321219970099572e-03, -5.6866297326007331e-01, 0., 0., 1.);
+cv::Mat rvecM_inv = (cv::Mat_<double>(3,3)<<9.9361999532145029e-01, 1.5481286202195165e-02,
+                     1.1171228524619221e-01, -1.1071480863577153e-01,
+                     -5.4789655210038264e-02, 9.9234083098031878e-01,
+                     2.1483390005894807e-02, -9.9837789611924532e-01,
+                     -5.2726089314150459e-02 );
+cv::Mat tvec = (cv::Mat_<double>(3,1)<<1.5367854556890524e+02, 1.5365463964586140e+03,
+                -1.2444937322334890e+03);
+
+ConsumerProducerQueue<frame_info> * imageBuffer =  new ConsumerProducerQueue<frame_info>(3,true);
+ConsumerProducerQueue<frame_info> * imageBuffer_30 =  new ConsumerProducerQueue<frame_info>(3,true);
+ConsumerProducerQueue<frame_info> * imageBuffer_90 =  new ConsumerProducerQueue<frame_info>(3,true);
+
+// 获取系统当前时间,从格林威治标准时间(1970-01-01 00:00:00.000)开始到指定时间的毫秒数。
+long long getCurrentSystemTime()
+{
+    auto now = std::chrono::system_clock::now();
+    long long millseconds = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
+    /*
+    uint64_t dis_millseconds = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count()
+            - std::chrono::duration_cast<std::chrono::seconds>(now.time_since_epoch()).count() * 1000;
+    auto tt = std::chrono::system_clock::to_time_t(now);
+    struct tm* ptm = localtime(&tt);
+    char date[60] = { 0 };
+    sprintf(date, "%d/%02d/%02d/%02d/%02d/%02d/%03d",
+            (int)ptm->tm_year + 1900, (int)ptm->tm_mon + 1, (int)ptm->tm_mday,
+            (int)ptm->tm_hour, (int)ptm->tm_min, (int)ptm->tm_sec, (int)dis_millseconds);
+    return std::string(date);
+    */
+    return millseconds;
+}
+//读取视频数据
+void ReadFunc(int n)
+{
+    cv::VideoCapture cap_30(video_path);
+    if(!cap_30.isOpened())
+    {
+        cout<<"camera failed to open"<<endl;
+    }
+    while(1)
+    {
+        frame_info frameInfo_30;
+        cv::Mat frame_30;
+        //读视频的时候加上,读摄像头去掉
+        if(imageBuffer_30->isFull())
+        {
+            continue;
+        }
+        if(cap_30.read(frame_30))
+        {
+            long long timestamp = getCurrentSystemTime();
+            frameInfo_30.frame = frame_30;
+            frameInfo_30.timestamp = timestamp;
+            imageBuffer_30->add(frameInfo_30);
+        }
+        else
+        {
+            std::this_thread::sleep_for(std::chrono::milliseconds(1));
+        }
+    }
+}
+//从共享内存中接收摄像头数据
+void Listencamera(const char * strdata,const unsigned int nSize,const unsigned int index,const QDateTime * dt,const char * strmemname)
+{
+    if(nSize<1000)return;
+    iv::vision::rawpic pic;
+    if(false == pic.ParseFromArray(strdata,nSize))
+    {
+        std::cout<<"picview Listenpic fail."<<std::endl;
+        return;
+    }
+    cv::Mat mat(pic.height(),pic.width(),pic.mattype());
+    if(pic.type() == 1)
+        memcpy(mat.data,pic.picdata().data(),mat.rows*mat.cols*mat.elemSize());
+    else
+    {
+        //     mat.release();
+        std::vector<unsigned char> buff(pic.picdata().data(),pic.picdata().data() + pic.picdata().size());
+        mat = cv::imdecode(buff,IMREAD_COLOR);
+    }
+
+    frame_info img_info;
+    img_info.frame = mat;
+    img_info.timestamp = pic.time();
+    imageBuffer->add(img_info);
+    mat.release();
+}
+//向共享内存中存入障碍物检测结果
+void SendDetectResult(obstacles_info &obstacles,void* g_name)
+{
+    iv::vision::ObstacleInfo detectResult;
+    detectResult.set_time(obstacles.time);
+    for(unsigned int i=0;i <obstacles.bboxes.size();i++)
+    {
+        iv::vision::Bbox3D obstacle;
+        obstacle.set_category(obstacles.bboxes[i].category);
+        obstacle.set_class_id(obstacles.bboxes[i].class_id);
+        obstacle.set_x_3d(obstacles.bboxes[i].bbox_3d.x);
+        obstacle.set_y_3d(obstacles.bboxes[i].bbox_3d.y);
+        obstacle.set_z_3d(obstacles.bboxes[i].bbox_3d.z);
+        obstacle.set_x(obstacles.bboxes[i].bbox.x);
+        obstacle.set_y(obstacles.bboxes[i].bbox.y);
+        obstacle.set_w(obstacles.bboxes[i].bbox.width);
+        obstacle.set_h(obstacles.bboxes[i].bbox.height);
+        iv::vision::Bbox3D * bb = detectResult.add_bbox_3d();
+        bb->CopyFrom(obstacle);
+    }
+    std::string out_result = detectResult.SerializeAsString();
+    iv::modulecomm::ModuleSendMsg(g_name,out_result.data(),out_result.length());
+}
+//像素坐标系转换到车体坐标系
+cv::Point3f CameraToWorld(cv::Point2f CameraInfo)
+{
+    cv::Mat imagePoint = cv::Mat::ones(3, 1, cv::DataType<double>::type);//u,v,1
+    cv::Mat tempMat, tempMat2;
+    //输入一个2D坐标点,便可以求出相应的s
+    imagePoint.at<double>(0,0)=double(CameraInfo.x);
+    imagePoint.at<double>(1,0)=double(CameraInfo.y);
+    double zConst = 0;//实际坐标系的距离
+    //计算参数s
+    double s;
+    tempMat = rvecM_inv * cameraMatrix_inv * imagePoint;
+    tempMat2 = rvecM_inv * tvec;
+    s = zConst + tempMat2.at<double>(2, 0);
+    s /= tempMat.at<double>(2, 0);
+    //cout<<"s : "<<s<<endl;
+    //////////////////////camera_coordinates////////////////
+    cv::Mat camera_cordinates=-rvecM_inv*tvec;
+    /////////////////////2D to 3D///////////////////////
+    cv::Mat wcPoint = rvecM_inv * (cameraMatrix_inv *s*imagePoint - tvec);
+    cv::Point3f worldPoint(wcPoint.at<double>(0, 0), wcPoint.at<double>(1, 0),
+                           wcPoint.at<double>(2, 0));
+    return worldPoint;
+}
+int main(int argc, char** argv )
+{
+
+    if(argc==3)
+    {
+        test_video = (strcmp(argv[1], "true") == 0)?true:false;
+        if(test_video)
+            video_path = argv[2];
+        else
+            cameradata = argv[2];
+    }
+    if(argc==2)
+        test_video = (strcmp(argv[1], "true") == 0)?true:false;
+
+    if(test_video)
+        std::thread * readthread = new std::thread(ReadFunc,1);
+    else
+        mpa_camera= iv::modulecomm::RegisterRecv(&cameradata[0],Listencamera);
+
+    g_obstacle = iv::modulecomm::RegisterSend(&obstacledata[0],1000000,1);
+
+    //导入相机内参和畸变系数矩阵
+    FileStorage file_storage(yaml_path, FileStorage::READ);
+    if( file_storage.isOpened())
+    {
+        file_storage["camera_matrix"] >> cameraMatrix;
+        file_storage["camera_matrix_inv"] >> cameraMatrix_inv;
+        file_storage["distortion_coefficients"] >> distCoeffs;
+        file_storage["RotationR_inv"] >> rvecM_inv;
+        file_storage["tvec"] >> tvec;
+        file_storage.release();
+    }
+    else{
+        cout << "Error: can not open the external parameters file"<<endl;
+        return 0;
+    }
+
+    NetworkInfo networkInfo;
+    networkInfo.networkType     = "yolov4";
+    networkInfo.configFilePath  = "model/yolov4-shigong.cfg";
+    networkInfo.wtsFilePath     = "model/yolov4-shigong_final";
+    networkInfo.deviceType      = "kGPU";
+    networkInfo.inputBlobName   = "data";
+    networkInfo.maxbatchSize    = max_batch_size;
+    std::string modelname = "model/yolov4_shigong.engine";
+    IExecutionContext* yolo_context{nullptr};
+    YoloDetect detector(networkInfo,modelname);
+
+    string classesFile = "model/coco.names";
+    //加载类别名
+    /*
+    vector<string> classes;
+    ifstream ifs(classesFile.c_str());
+    string line;
+    while (getline(ifs, line)) classes.push_back(line);
+    */
+    vector<string> classes = {"shigong"};
+    //加载网络模型,0是指定第一块GPU
+    cudaSetDevice(0);
+    if(!detector.loadModel(yolo_context))
+    {
+        cout<<"load yolo model failed"<<endl;
+        return -1;
+    }
+
+    //Size size(cap.get(CV_CAP_PROP_FRAME_WIDTH), cap.get(CV_CAP_PROP_FRAME_HEIGHT));
+    //VideoWriter writer("./data/result.avi", cv::VideoWriter::fourcc('M', 'J', 'P', 'G'), 25, size);
+
+    vector<KalmanTracker> trackers_30;
+    KalmanTracker::kf_count = 0; // tracking id relies on this, so we have to reset it in each seq.
+    int frame_count = 0;
+    frame_info frameInfo_30;
+    Mat frame_30;
+
+    long long millseconds; //时间戳
+    double waittime = (double)getTickCount();
+
+    while (1)
+    {
+        if(imageBuffer_30->isEmpty())
+        {
+            double waittotal = (double)getTickCount() - waittime;
+            if(waittotal/cv::getTickFrequency()>30.0)
+            {
+                cout<<"Cant't get frame and quit"<<endl;
+                break;
+            }
+            continue;
+        }
+
+        imageBuffer_30->consume(frameInfo_30);
+        frame_30 = frameInfo_30.frame;
+
+        obstacles_info detect_result_info;
+        detect_result_info.time = frameInfo_30.timestamp;
+        frame_count++;
+
+        double start = (double)getTickCount();
+        //前向预测
+        float ignore_thresh=0.4;
+        float nms_thresh = 0.4;
+
+        std::vector<Detection> result_30;
+        od::bbox_t bbox_t_30;
+        vector<od::bbox_t> outs_30;
+
+        if(detector.process(*yolo_context,frame_30,result_30,ignore_thresh,nms_thresh))
+        {
+            for (size_t i = 0; i < result_30.size(); i++) {
+                cv::Rect r = detector.get_rect(frame_30, result_30[i].bbox,detector.m_input_w,detector.m_input_h);
+                bbox_t_30.x = r.x;
+                bbox_t_30.y = r.y;
+                bbox_t_30.w = r.width;
+                bbox_t_30.h = r.height;
+                bbox_t_30.prob = result_30[i].det_confidence;
+                bbox_t_30.obj_id = result_30[i].class_id;
+                outs_30.push_back(bbox_t_30);
+            }
+        }
+
+//        double infertime = (double)getTickCount() - start;
+//        std::cout<< "Total Cost of infertime: "  <<infertime*1000.0/cv::getTickFrequency()<<" ms"<<std::endl;
+
+        vector<od::TrackingBox>track_result_30;
+        bool track_flag_30 = od::TrackObstacle(frame_count,trackers_30,outs_30,track_result_30);
+        if(track_flag_30)
+        {
+            vector<obstacle_info>obstacle_vector;
+            for(unsigned int i=0;i < track_result_30.size(); i++)
+            {
+                obstacle_info obstacle;
+                obstacle.category = classes[track_result_30[i].class_id];
+                obstacle.class_id = track_result_30[i].class_id;
+                //求bbox的接地点
+                Point2f bottom_center = Point2f(track_result_30[i].box.x+0.5*track_result_30[i].box.width,
+                                                track_result_30[i].box.y+track_result_30[i].box.height);
+                obstacle.bbox_3d = CameraToWorld(bottom_center);
+                obstacle.bbox = track_result_30[i].box;
+                obstacle_vector.push_back(obstacle);
+            }
+            detect_result_info.bboxes = obstacle_vector;
+            SendDetectResult(detect_result_info,g_obstacle);
+            od::Drawer(frame_30, track_result_30, classes);
+        }
+
+        double total = (double)getTickCount() - start;
+        std::cout<< "Total Cost of Detection: "  <<total*1000.0/cv::getTickFrequency()<<" ms"<<endl;
+
+        namedWindow("Result_30",WINDOW_NORMAL);
+        imshow("Result_30",frame_30);
+
+        if(waitKey(1) == 'q')
+            break;
+        if(waitKey(1) == 's')
+            waitKey(0);
+        waittime = (double)getTickCount();
+    }
+    destroyAllWindows();
+    trackers_30.clear();
+    yolo_context->destroy();
+    return 0;
+}

+ 406 - 0
src/detection/detection_yolov4_shigong_monocular/main_multibatch.cpp

@@ -0,0 +1,406 @@
+#include <opencv2/opencv.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/highgui.hpp>
+#include <vector>
+#include <fstream>
+#include <thread>
+#include <cmath>
+
+#include "imageBuffer.h"
+#include "modulecomm.h"
+#include "rawpic.pb.h"
+#include "obstacles.pb.h"
+#include "yolodetect.h"
+#include "detect_obstacle.h"
+
+using namespace std;
+using namespace cv;
+
+uint max_batch_size = 2;
+
+string yaml_path="/home/nvidia/models/camera/external.yaml";
+
+bool test_video = true;
+string video_path = "20201231144029.avi";
+
+void * g_obstacle;
+string obstacledata="obstacledata";
+
+void * mpa_camera;
+string cameradata="picleft";
+
+typedef struct frame_info
+{
+    cv::Mat  frame;
+    long long timestamp;
+}frame_info;
+typedef struct obstacle_info
+{
+    string category;
+    uint64 class_id;
+    Point3f bbox_3d;
+    Rect_<float> bbox;
+}obstacle_info;
+
+typedef struct obstacles_info
+{
+    vector<obstacle_info> bboxes;
+    uint64 time;
+}obstacles_info;
+
+
+int capture_width=1280,capture_height=720;
+cv::Mat cameraMatrix = (cv::Mat_<double>(3,3)<< 1.0538648949999999e+03, 0., 9.7134830299999999e+02, 0.,
+                        9.6887771299999997e+02, 5.5096488099999999e+02, 0., 0., 1.);
+cv::Mat distCoeffs = (cv::Mat_<double>(1,5)<<-2.9078999999999999e-01, 6.0994000000000000e-02,
+                      3.6489999999999999e-03, -2.0590000000000001e-03, 0.);
+cv::Mat cameraMatrix_inv = (cv::Mat_<double>(3,3)<<9.4888823486240150e-04, 0., -9.2170097667025908e-01, 0.,
+                            1.0321219970099572e-03, -5.6866297326007331e-01, 0., 0., 1.);
+cv::Mat rvecM_inv = (cv::Mat_<double>(3,3)<<9.9361999532145029e-01, 1.5481286202195165e-02,
+                     1.1171228524619221e-01, -1.1071480863577153e-01,
+                     -5.4789655210038264e-02, 9.9234083098031878e-01,
+                     2.1483390005894807e-02, -9.9837789611924532e-01,
+                     -5.2726089314150459e-02 );
+cv::Mat tvec = (cv::Mat_<double>(3,1)<<1.5367854556890524e+02, 1.5365463964586140e+03,
+                -1.2444937322334890e+03);
+
+ConsumerProducerQueue<frame_info> * imageBuffer =  new ConsumerProducerQueue<frame_info>(3,true);
+ConsumerProducerQueue<frame_info> * imageBuffer_30 =  new ConsumerProducerQueue<frame_info>(3,true);
+ConsumerProducerQueue<frame_info> * imageBuffer_90 =  new ConsumerProducerQueue<frame_info>(3,true);
+
+// 获取系统当前时间,从格林威治标准时间(1970-01-01 00:00:00.000)开始到指定时间的毫秒数。
+long long getCurrentSystemTime()
+{
+    auto now = std::chrono::system_clock::now();
+    long long millseconds = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
+    /*
+    uint64_t dis_millseconds = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count()
+            - std::chrono::duration_cast<std::chrono::seconds>(now.time_since_epoch()).count() * 1000;
+    auto tt = std::chrono::system_clock::to_time_t(now);
+    struct tm* ptm = localtime(&tt);
+    char date[60] = { 0 };
+    sprintf(date, "%d/%02d/%02d/%02d/%02d/%02d/%03d",
+            (int)ptm->tm_year + 1900, (int)ptm->tm_mon + 1, (int)ptm->tm_mday,
+            (int)ptm->tm_hour, (int)ptm->tm_min, (int)ptm->tm_sec, (int)dis_millseconds);
+    return std::string(date);
+    */
+    return millseconds;
+}
+//读取视频数据
+void ReadFunc(int n)
+{
+    cv::VideoCapture cap_30(0);
+    cv::VideoCapture cap_90(1);
+    if(!cap_30.isOpened() | !cap_90.isOpened())
+    {
+        cout<<"camera failed to open"<<endl;
+    }
+    while(1)
+    {
+        frame_info frameInfo_30,frameInfo_90;
+        cv::Mat frame_30,frame_90;
+        //读视频的时候加上,读摄像头去掉
+        if(imageBuffer_30->isFull() | imageBuffer_90->isFull())
+        {
+            continue;
+        }
+        if(cap_30.read(frame_30) && cap_90.read(frame_90))
+        {
+            long long timestamp = getCurrentSystemTime();
+            frameInfo_30.frame = frame_30;
+            frameInfo_30.timestamp = timestamp;
+            imageBuffer_30->add(frameInfo_30);
+            frameInfo_90.frame = frame_90;
+            frameInfo_90.timestamp = timestamp;
+            imageBuffer_90->add(frameInfo_90);
+        }
+        else
+        {
+            std::this_thread::sleep_for(std::chrono::milliseconds(1));
+        }
+    }
+}
+//从共享内存中接收摄像头数据
+void Listencamera(const char * strdata,const unsigned int nSize,const unsigned int index,const QDateTime * dt,const char * strmemname)
+{
+    if(nSize<1000)return;
+    iv::vision::rawpic pic;
+    if(false == pic.ParseFromArray(strdata,nSize))
+    {
+        std::cout<<"picview Listenpic fail."<<std::endl;
+        return;
+    }
+    cv::Mat mat(pic.height(),pic.width(),pic.mattype());
+    if(pic.type() == 1)
+        memcpy(mat.data,pic.picdata().data(),mat.rows*mat.cols*mat.elemSize());
+    else
+    {
+        //     mat.release();
+        std::vector<unsigned char> buff(pic.picdata().data(),pic.picdata().data() + pic.picdata().size());
+        mat = cv::imdecode(buff,IMREAD_COLOR);
+    }
+
+    frame_info img_info;
+    img_info.frame = mat;
+    img_info.timestamp = pic.time();
+    imageBuffer->add(img_info);
+    mat.release();
+}
+//向共享内存中存入障碍物检测结果
+void SendDetectResult(obstacles_info &obstacles,void* g_name)
+{
+    iv::vision::ObstacleInfo detectResult;
+    detectResult.set_time(obstacles.time);
+    for(unsigned int i=0;i <obstacles.bboxes.size();i++)
+    {
+        iv::vision::Bbox3D obstacle;
+        obstacle.set_category(obstacles.bboxes[i].category);
+        obstacle.set_class_id(obstacles.bboxes[i].class_id);
+        obstacle.set_x_3d(obstacles.bboxes[i].bbox_3d.x);
+        obstacle.set_y_3d(obstacles.bboxes[i].bbox_3d.y);
+        obstacle.set_z_3d(obstacles.bboxes[i].bbox_3d.z);
+        obstacle.set_x(obstacles.bboxes[i].bbox.x);
+        obstacle.set_y(obstacles.bboxes[i].bbox.y);
+        obstacle.set_w(obstacles.bboxes[i].bbox.width);
+        obstacle.set_h(obstacles.bboxes[i].bbox.height);
+        iv::vision::Bbox3D * bb = detectResult.add_bbox_3d();
+        bb->CopyFrom(obstacle);
+    }
+    std::string out_result = detectResult.SerializeAsString();
+    iv::modulecomm::ModuleSendMsg(g_name,out_result.data(),out_result.length());
+}
+//像素坐标系转换到车体坐标系
+cv::Point3f CameraToWorld(cv::Point2f CameraInfo)
+{
+    cv::Mat imagePoint = cv::Mat::ones(3, 1, cv::DataType<double>::type);//u,v,1
+    cv::Mat tempMat, tempMat2;
+    //输入一个2D坐标点,便可以求出相应的s
+    imagePoint.at<double>(0,0)=double(CameraInfo.x);
+    imagePoint.at<double>(1,0)=double(CameraInfo.y);
+    double zConst = 0;//实际坐标系的距离
+    //计算参数s
+    double s;
+    tempMat = rvecM_inv * cameraMatrix_inv * imagePoint;
+    tempMat2 = rvecM_inv * tvec;
+    s = zConst + tempMat2.at<double>(2, 0);
+    s /= tempMat.at<double>(2, 0);
+    //cout<<"s : "<<s<<endl;
+    //////////////////////camera_coordinates////////////////
+    cv::Mat camera_cordinates=-rvecM_inv*tvec;
+    /////////////////////2D to 3D///////////////////////
+    cv::Mat wcPoint = rvecM_inv * (cameraMatrix_inv *s*imagePoint - tvec);
+    cv::Point3f worldPoint(wcPoint.at<double>(0, 0), wcPoint.at<double>(1, 0),
+                           wcPoint.at<double>(2, 0));
+    return worldPoint;
+}
+int main(int argc, char** argv )
+{
+
+    if(argc==3)
+    {
+        test_video = (strcmp(argv[1], "true") == 0)?true:false;
+        if(test_video)
+            video_path = argv[2];
+        else
+            cameradata = argv[2];
+    }
+    if(argc==2)
+        test_video = (strcmp(argv[1], "true") == 0)?true:false;
+
+    if(test_video)
+        std::thread * readthread = new std::thread(ReadFunc,1);
+    else
+        mpa_camera= iv::modulecomm::RegisterRecv(&cameradata[0],Listencamera);
+
+    g_obstacle = iv::modulecomm::RegisterSend(&obstacledata[0],1000000,1);
+
+    //导入相机内参和畸变系数矩阵
+    FileStorage file_storage(yaml_path, FileStorage::READ);
+    if( file_storage.isOpened())
+    {
+        file_storage["camera_matrix"] >> cameraMatrix;
+        file_storage["camera_matrix_inv"] >> cameraMatrix_inv;
+        file_storage["distortion_coefficients"] >> distCoeffs;
+        file_storage["RotationR_inv"] >> rvecM_inv;
+        file_storage["tvec"] >> tvec;
+        file_storage.release();
+    }
+    else{
+        cout << "Error: can not open the external parameters file"<<endl;
+        return 0;
+    }
+
+    NetworkInfo networkInfo;
+    networkInfo.networkType     = "yolov4";
+    networkInfo.configFilePath  = "/home/nvidia/models/camera/yolov4-shigong.cfg";
+    networkInfo.wtsFilePath     = "/home/nvidia/models/camera/yolov4-shigong_final.weights";
+    networkInfo.deviceType      = "kGPU";
+    networkInfo.inputBlobName   = "data";
+    networkInfo.maxbatchSize    = max_batch_size;
+    std::string modelname = "/home/nvidia/models/camera/yolov4_shigong.engine";
+    IExecutionContext* yolo_context{nullptr};
+    YoloDetect detector(networkInfo,modelname);
+
+    /*
+    string classesFile = "model/shigong.names";
+    //加载类别名
+    vector<string> classes;
+    ifstream ifs(classesFile.c_str());
+    string line;
+    while (getline(ifs, line)) classes.push_back(line);
+    */
+    vector<string> classes = {"shigong"};
+    //加载网络模型,0是指定第一块GPU
+    cudaSetDevice(0);
+    if(!detector.loadModel(yolo_context))
+    {
+        cout<<"load yolo model failed"<<endl;
+        return -1;
+    }
+
+    //Size size(cap.get(CV_CAP_PROP_FRAME_WIDTH), cap.get(CV_CAP_PROP_FRAME_HEIGHT));
+    //VideoWriter writer("./data/result.avi", cv::VideoWriter::fourcc('M', 'J', 'P', 'G'), 25, size);
+
+    vector<KalmanTracker> trackers_30,trackers_90;
+    KalmanTracker::kf_count = 0; // tracking id relies on this, so we have to reset it in each seq.
+    int frame_count = 0;
+    frame_info frameInfo_30,frameInfo_90;
+    Mat frame_30,frame_90;
+
+    long long millseconds; //时间戳
+    double waittime = (double)getTickCount();
+
+    while (1)
+    {
+        if(imageBuffer_30->isEmpty() | imageBuffer_90->isEmpty())
+        {
+            double waittotal = (double)getTickCount() - waittime;
+            if(waittotal/cv::getTickFrequency()>30.0)
+            {
+                cout<<"Can't get frame and quit"<<endl;
+                break;
+            }
+            continue;
+        }
+
+        imageBuffer_30->consume(frameInfo_30);
+        frame_30 = frameInfo_30.frame;
+        imageBuffer_90->consume(frameInfo_90);
+        frame_90 = frameInfo_90.frame;
+        vector<Mat> frame_vec;
+        frame_vec.push_back(frame_30);
+        frame_vec.push_back(frame_90);
+
+        obstacles_info detect_result_info;
+        detect_result_info.time = frameInfo_30.timestamp;
+        frame_count++;
+
+        double start = (double)getTickCount();
+        //前向预测
+        float ignore_thresh=0.4;
+        float nms_thresh = 0.4;
+
+        //两个摄像头30,90度
+        std::vector<std::vector<Detection>> detect_results_yolo;
+        std::vector<Detection> result_30,result_90;
+        detect_results_yolo.push_back(result_30);
+        detect_results_yolo.push_back(result_90);
+        bool flag_30 = false;
+        bool flag_90 = false;
+        od::bbox_t bbox_t_30;
+        od::bbox_t bbox_t_90;
+        vector<od::bbox_t> outs_30;
+        vector<od::bbox_t> outs_90;
+
+        if(detector.process(*yolo_context,max_batch_size,frame_vec,
+                            detect_results_yolo,ignore_thresh,nms_thresh))
+        {
+            for (size_t i = 0; i < detect_results_yolo[0].size(); i++) {
+                cv::Rect r = detector.get_rect(frame_30, detect_results_yolo[0][i].bbox,detector.m_input_w,detector.m_input_h);
+                bbox_t_30.x = r.x;
+                bbox_t_30.y = r.y;
+                bbox_t_30.w = r.width;
+                bbox_t_30.h = r.height;
+                bbox_t_30.prob = detect_results_yolo[0][i].det_confidence;
+                bbox_t_30.obj_id = detect_results_yolo[0][i].class_id;
+                outs_30.push_back(bbox_t_30);
+            }
+            for (size_t i = 0; i < detect_results_yolo[1].size(); i++) {
+                cv::Rect r = detector.get_rect(frame_90, detect_results_yolo[1][i].bbox,
+                                               detector.m_input_w,detector.m_input_h);
+                bbox_t_90.x = r.x;
+                bbox_t_90.y = r.y;
+                bbox_t_90.w = r.width;
+                bbox_t_90.h = r.height;
+                bbox_t_90.prob = detect_results_yolo[1][i].det_confidence;
+                bbox_t_90.obj_id = detect_results_yolo[1][i].class_id;
+                outs_90.push_back(bbox_t_90);
+                od::Drawer(frame_90, outs_90, classes);
+            }
+        }
+
+//        double infertime = (double)getTickCount() - start;
+//        std::cout<< "Total Cost of infertime: "  <<infertime*1000.0/cv::getTickFrequency()<<" ms"<<std::endl;
+
+        vector<od::TrackingBox>track_result_30,track_result_90;
+        bool track_flag_30 = od::TrackObstacle(frame_count,trackers_30,outs_30,track_result_30);
+        bool track_flag_90 = od::TrackObstacle(frame_count,trackers_90,outs_90,track_result_90);
+        if(track_flag_30)
+        {
+            vector<obstacle_info>obstacle_vector;
+            for(unsigned int i=0;i < track_result_30.size(); i++)
+            {
+                obstacle_info obstacle;
+                obstacle.category = classes[track_result_30[i].class_id];
+                obstacle.class_id = track_result_30[i].class_id;
+                //求bbox的接地点
+                Point2f bottom_center = Point2f(track_result_30[i].box.x+0.5*track_result_30[i].box.width,
+                                                track_result_30[i].box.y+track_result_30[i].box.height);
+                obstacle.bbox_3d = CameraToWorld(bottom_center);
+                obstacle.bbox = track_result_30[i].box;
+                obstacle_vector.push_back(obstacle);
+            }
+            detect_result_info.bboxes = obstacle_vector;
+            SendDetectResult(detect_result_info,g_obstacle);
+            od::Drawer(frame_30, track_result_30, classes);
+        }
+        else if(track_flag_90)
+        {
+            vector<obstacle_info>obstacle_vector;
+            for(unsigned int i=0;i < track_result_90.size(); i++)
+            {
+                obstacle_info obstacle;
+                obstacle.category = classes[track_result_90[i].class_id];
+                obstacle.class_id = track_result_90[i].class_id;
+                //求bbox的接地点
+                Point2f bottom_center = Point2f(track_result_90[i].box.x+0.5*track_result_90[i].box.width,
+                                                track_result_90[i].box.y+track_result_90[i].box.height);
+                obstacle.bbox_3d = CameraToWorld(bottom_center);
+                obstacle.bbox = track_result_90[i].box;
+                obstacle_vector.push_back(obstacle);
+            }
+            detect_result_info.bboxes = obstacle_vector;
+            SendDetectResult(detect_result_info,g_obstacle);
+            od::Drawer(frame_90, track_result_90, classes);
+        }
+        double total = (double)getTickCount() - start;
+        std::cout<< "Total Cost of Detection: "  <<total*1000.0/cv::getTickFrequency()<<" ms"<<endl;
+
+        namedWindow("Result_30",WINDOW_NORMAL);
+        imshow("Result_30",frame_30);
+
+        namedWindow("Result_90",WINDOW_NORMAL);
+        imshow("Result_90",frame_90);
+
+        if(waitKey(1) == 'q')
+            break;
+        if(waitKey(1) == 's')
+            waitKey(0);
+        waittime = (double)getTickCount();
+    }
+    destroyAllWindows();
+    trackers_30.clear();
+    trackers_90.clear();
+    yolo_context->destroy();
+    return 0;
+}

+ 28 - 0
src/detection/detection_yolov4_shigong_monocular/proto/obstacles.proto

@@ -0,0 +1,28 @@
+syntax = "proto2";
+package iv.vision;
+message Bbox3D
+{
+     optional string category = 1;
+     optional uint64 class_id = 2;
+     optional double x_3d = 3;
+     optional double y_3d = 4;
+     optional double z_3d = 5;
+     /*
+     `-------------->x
+      |
+      |
+      |
+      v
+      y
+     */
+     optional uint64 x = 6;
+     optional uint64 y = 7;
+     optional uint64 w = 8;
+     optional uint64 h = 9;
+}
+
+message ObstacleInfo
+{
+     repeated Bbox3D bbox_3d = 1;
+     optional uint64 time = 2;
+}

+ 1 - 0
src/detection/detection_yolov4_shigong_monocular/proto/protomake.sh

@@ -0,0 +1 @@
+protoc *.proto -I=./ --cpp_out=./

+ 16 - 0
src/detection/detection_yolov4_shigong_monocular/proto/rawpic.proto

@@ -0,0 +1,16 @@
+syntax = "proto2";
+
+package iv.vision;
+
+
+message rawpic
+{
+  optional int64 time = 1; // number of milliseconds since 1970-01-01T00:00:00 Universal Coordinated Time
+  optional int32 index = 2;
+  optional int32 type = 3; //类型, 1 mat 2 jpg
+  optional int32 width = 4;
+  optional int32 height = 5;
+  optional int32 elemsize = 6;
+  optional int32 mattype = 7;
+  optional bytes picdata = 8;
+};

+ 398 - 0
src/detection/detection_yolov4_shigong_monocular/src/Hungarian.cpp

@@ -0,0 +1,398 @@
+//
+// Created by lqx on 20-4-23.
+//
+///////////////////////////////////////////////////////////////////////////////
+// Hungarian.cpp: Implementation file for Class HungarianAlgorithm.
+//
+// This is a C++ wrapper with slight modification of a hungarian algorithm implementation by Markus Buehren.
+// The original implementation is a few mex-functions for use in MATLAB, found here:
+// http://www.mathworks.com/matlabcentral/fileexchange/6543-functions-for-the-rectangular-assignment-problem
+//
+// Both this code and the orignal code are published under the BSD license.
+// by Cong Ma, 2016
+//
+
+#include <math.h>
+#include <cfloat>
+#include "Hungarian.h"
+
+
+HungarianAlgorithm::HungarianAlgorithm(){}
+HungarianAlgorithm::~HungarianAlgorithm(){}
+
+
+//********************************************************//
+// A single function wrapper for solving assignment problem.
+//********************************************************//
+double HungarianAlgorithm::Solve(vector<vector<double>>& DistMatrix, vector<int>& Assignment)
+{
+    unsigned int nRows = DistMatrix.size();
+    unsigned int nCols = DistMatrix[0].size();
+
+    double *distMatrixIn = new double[nRows * nCols];
+    int *assignment = new int[nRows];
+    double cost = 0.0;
+
+    // Fill in the distMatrixIn. Mind the index is "i + nRows * j".
+    // Here the cost matrix of size MxN is defined as a double precision array of N*M elements.
+    // In the solving functions matrices are seen to be saved MATLAB-internally in row-order.
+    // (i.e. the matrix [1 2; 3 4] will be stored as a vector [1 3 2 4], NOT [1 2 3 4]).
+    for (unsigned int i = 0; i < nRows; i++)
+        for (unsigned int j = 0; j < nCols; j++)
+            distMatrixIn[i + nRows * j] = DistMatrix[i][j];
+
+    // call solving function
+    assignmentoptimal(assignment, &cost, distMatrixIn, nRows, nCols);
+
+    Assignment.clear();
+    for (unsigned int r = 0; r < nRows; r++)
+        Assignment.push_back(assignment[r]);
+
+    delete[] distMatrixIn;
+    delete[] assignment;
+    return cost;
+}
+
+
+//********************************************************//
+// Solve optimal solution for assignment problem using Munkres algorithm, also known as Hungarian Algorithm.
+//********************************************************//
+void HungarianAlgorithm::assignmentoptimal(int *assignment, double *cost, double *distMatrixIn, int nOfRows, int nOfColumns)
+{
+    double *distMatrix, *distMatrixTemp, *distMatrixEnd, *columnEnd, value, minValue;
+    bool *coveredColumns, *coveredRows, *starMatrix, *newStarMatrix, *primeMatrix;
+    int nOfElements, minDim, row, col;
+
+    /* initialization */
+    *cost = 0;
+    for (row = 0; row<nOfRows; row++)
+        assignment[row] = -1;
+
+    /* generate working copy of distance Matrix */
+    /* check if all matrix elements are positive */
+    nOfElements = nOfRows * nOfColumns;
+    distMatrix = (double *)malloc(nOfElements * sizeof(double));
+    distMatrixEnd = distMatrix + nOfElements;
+
+    for (row = 0; row<nOfElements; row++)
+    {
+        value = distMatrixIn[row];
+        if (value < 0)
+            cerr << "All matrix elements have to be non-negative." << endl;
+        distMatrix[row] = value;
+    }
+
+
+    /* memory allocation */
+    coveredColumns = (bool *)calloc(nOfColumns, sizeof(bool));
+    coveredRows = (bool *)calloc(nOfRows, sizeof(bool));
+    starMatrix = (bool *)calloc(nOfElements, sizeof(bool));
+    primeMatrix = (bool *)calloc(nOfElements, sizeof(bool));
+    newStarMatrix = (bool *)calloc(nOfElements, sizeof(bool)); /* used in step4 */
+
+    /* preliminary steps */
+    if (nOfRows <= nOfColumns)
+    {
+        minDim = nOfRows;
+
+        for (row = 0; row<nOfRows; row++)
+        {
+            /* find the smallest element in the row */
+            distMatrixTemp = distMatrix + row;
+            minValue = *distMatrixTemp;
+            distMatrixTemp += nOfRows;
+            while (distMatrixTemp < distMatrixEnd)
+            {
+                value = *distMatrixTemp;
+                if (value < minValue)
+                    minValue = value;
+                distMatrixTemp += nOfRows;
+            }
+
+            /* subtract the smallest element from each element of the row */
+            distMatrixTemp = distMatrix + row;
+            while (distMatrixTemp < distMatrixEnd)
+            {
+                *distMatrixTemp -= minValue;
+                distMatrixTemp += nOfRows;
+            }
+        }
+
+        /* Steps 1 and 2a */
+        for (row = 0; row<nOfRows; row++)
+            for (col = 0; col<nOfColumns; col++)
+                if (fabs(distMatrix[row + nOfRows*col]) < DBL_EPSILON)
+                    if (!coveredColumns[col])
+                    {
+                        starMatrix[row + nOfRows*col] = true;
+                        coveredColumns[col] = true;
+                        break;
+                    }
+    }
+    else /* if(nOfRows > nOfColumns) */
+    {
+        minDim = nOfColumns;
+
+        for (col = 0; col<nOfColumns; col++)
+        {
+            /* find the smallest element in the column */
+            distMatrixTemp = distMatrix + nOfRows*col;
+            columnEnd = distMatrixTemp + nOfRows;
+
+            minValue = *distMatrixTemp++;
+            while (distMatrixTemp < columnEnd)
+            {
+                value = *distMatrixTemp++;
+                if (value < minValue)
+                    minValue = value;
+            }
+
+            /* subtract the smallest element from each element of the column */
+            distMatrixTemp = distMatrix + nOfRows*col;
+            while (distMatrixTemp < columnEnd)
+                *distMatrixTemp++ -= minValue;
+        }
+
+        /* Steps 1 and 2a */
+        for (col = 0; col<nOfColumns; col++)
+            for (row = 0; row<nOfRows; row++)
+                if (fabs(distMatrix[row + nOfRows*col]) < DBL_EPSILON)
+                    if (!coveredRows[row])
+                    {
+                        starMatrix[row + nOfRows*col] = true;
+                        coveredColumns[col] = true;
+                        coveredRows[row] = true;
+                        break;
+                    }
+        for (row = 0; row<nOfRows; row++)
+            coveredRows[row] = false;
+
+    }
+
+    /* move to step 2b */
+    step2b(assignment, distMatrix, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim);
+
+    /* compute cost and remove invalid assignments */
+    computeassignmentcost(assignment, cost, distMatrixIn, nOfRows);
+
+    /* free allocated memory */
+    free(distMatrix);
+    free(coveredColumns);
+    free(coveredRows);
+    free(starMatrix);
+    free(primeMatrix);
+    free(newStarMatrix);
+
+    return;
+}
+
+/********************************************************/
+void HungarianAlgorithm::buildassignmentvector(int *assignment, bool *starMatrix, int nOfRows, int nOfColumns)
+{
+    int row, col;
+
+    for (row = 0; row<nOfRows; row++)
+        for (col = 0; col<nOfColumns; col++)
+            if (starMatrix[row + nOfRows*col])
+            {
+#ifdef ONE_INDEXING
+                assignment[row] = col + 1; /* MATLAB-Indexing */
+#else
+                assignment[row] = col;
+#endif
+                break;
+            }
+}
+
+/********************************************************/
+void HungarianAlgorithm::computeassignmentcost(int *assignment, double *cost, double *distMatrix, int nOfRows)
+{
+    int row, col;
+
+    for (row = 0; row<nOfRows; row++)
+    {
+        col = assignment[row];
+        if (col >= 0)
+            *cost += distMatrix[row + nOfRows*col];
+    }
+}
+
+/********************************************************/
+void HungarianAlgorithm::step2a(int *assignment, double *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, int nOfRows, int nOfColumns, int minDim)
+{
+    bool *starMatrixTemp, *columnEnd;
+    int col;
+
+    /* cover every column containing a starred zero */
+    for (col = 0; col<nOfColumns; col++)
+    {
+        starMatrixTemp = starMatrix + nOfRows*col;
+        columnEnd = starMatrixTemp + nOfRows;
+        while (starMatrixTemp < columnEnd){
+            if (*starMatrixTemp++)
+            {
+                coveredColumns[col] = true;
+                break;
+            }
+        }
+    }
+
+    /* move to step 3 */
+    step2b(assignment, distMatrix, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim);
+}
+
+/********************************************************/
+void HungarianAlgorithm::step2b(int *assignment, double *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, int nOfRows, int nOfColumns, int minDim)
+{
+    int col, nOfCoveredColumns;
+
+    /* count covered columns */
+    nOfCoveredColumns = 0;
+    for (col = 0; col<nOfColumns; col++)
+        if (coveredColumns[col])
+            nOfCoveredColumns++;
+
+    if (nOfCoveredColumns == minDim)
+    {
+        /* algorithm finished */
+        buildassignmentvector(assignment, starMatrix, nOfRows, nOfColumns);
+    }
+    else
+    {
+        /* move to step 3 */
+        step3(assignment, distMatrix, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim);
+    }
+
+}
+
+/********************************************************/
+void HungarianAlgorithm::step3(int *assignment, double *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, int nOfRows, int nOfColumns, int minDim)
+{
+    bool zerosFound;
+    int row, col, starCol;
+
+    zerosFound = true;
+    while (zerosFound)
+    {
+        zerosFound = false;
+        for (col = 0; col<nOfColumns; col++)
+            if (!coveredColumns[col])
+                for (row = 0; row<nOfRows; row++)
+                    if ((!coveredRows[row]) && (fabs(distMatrix[row + nOfRows*col]) < DBL_EPSILON))
+                    {
+                        /* prime zero */
+                        primeMatrix[row + nOfRows*col] = true;
+
+                        /* find starred zero in current row */
+                        for (starCol = 0; starCol<nOfColumns; starCol++)
+                            if (starMatrix[row + nOfRows*starCol])
+                                break;
+
+                        if (starCol == nOfColumns) /* no starred zero found */
+                        {
+                            /* move to step 4 */
+                            step4(assignment, distMatrix, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim, row, col);
+                            return;
+                        }
+                        else
+                        {
+                            coveredRows[row] = true;
+                            coveredColumns[starCol] = false;
+                            zerosFound = true;
+                            break;
+                        }
+                    }
+    }
+
+    /* move to step 5 */
+    step5(assignment, distMatrix, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim);
+}
+
+/********************************************************/
+void HungarianAlgorithm::step4(int *assignment, double *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, int nOfRows, int nOfColumns, int minDim, int row, int col)
+{
+    int n, starRow, starCol, primeRow, primeCol;
+    int nOfElements = nOfRows*nOfColumns;
+
+    /* generate temporary copy of starMatrix */
+    for (n = 0; n<nOfElements; n++)
+        newStarMatrix[n] = starMatrix[n];
+
+    /* star current zero */
+    newStarMatrix[row + nOfRows*col] = true;
+
+    /* find starred zero in current column */
+    starCol = col;
+    for (starRow = 0; starRow<nOfRows; starRow++)
+        if (starMatrix[starRow + nOfRows*starCol])
+            break;
+
+    while (starRow<nOfRows)
+    {
+        /* unstar the starred zero */
+        newStarMatrix[starRow + nOfRows*starCol] = false;
+
+        /* find primed zero in current row */
+        primeRow = starRow;
+        for (primeCol = 0; primeCol<nOfColumns; primeCol++)
+            if (primeMatrix[primeRow + nOfRows*primeCol])
+                break;
+
+        /* star the primed zero */
+        newStarMatrix[primeRow + nOfRows*primeCol] = true;
+
+        /* find starred zero in current column */
+        starCol = primeCol;
+        for (starRow = 0; starRow<nOfRows; starRow++)
+            if (starMatrix[starRow + nOfRows*starCol])
+                break;
+    }
+
+    /* use temporary copy as new starMatrix */
+    /* delete all primes, uncover all rows */
+    for (n = 0; n<nOfElements; n++)
+    {
+        primeMatrix[n] = false;
+        starMatrix[n] = newStarMatrix[n];
+    }
+    for (n = 0; n<nOfRows; n++)
+        coveredRows[n] = false;
+
+    /* move to step 2a */
+    step2a(assignment, distMatrix, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim);
+}
+
+/********************************************************/
+void HungarianAlgorithm::step5(int *assignment, double *distMatrix, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, int nOfRows, int nOfColumns, int minDim)
+{
+    double h, value;
+    int row, col;
+
+    /* find smallest uncovered element h */
+    h = DBL_MAX;
+    for (row = 0; row<nOfRows; row++)
+        if (!coveredRows[row])
+            for (col = 0; col<nOfColumns; col++)
+                if (!coveredColumns[col])
+                {
+                    value = distMatrix[row + nOfRows*col];
+                    if (value < h)
+                        h = value;
+                }
+
+    /* add h to each covered row */
+    for (row = 0; row<nOfRows; row++)
+        if (coveredRows[row])
+            for (col = 0; col<nOfColumns; col++)
+                distMatrix[row + nOfRows*col] += h;
+
+    /* subtract h from each uncovered column */
+    for (col = 0; col<nOfColumns; col++)
+        if (!coveredColumns[col])
+            for (row = 0; row<nOfRows; row++)
+                distMatrix[row + nOfRows*col] -= h;
+
+    /* move to step 3 */
+    step3(assignment, distMatrix, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim);
+}
+

+ 101 - 0
src/detection/detection_yolov4_shigong_monocular/src/KalmanTracker.cpp

@@ -0,0 +1,101 @@
+//
+// Created by lqx on 20-4-23.
+//
+
+///////////////////////////////////////////////////////////////////////////////
+// KalmanTracker.cpp: KalmanTracker Class Implementation Declaration
+
+#include "KalmanTracker.h"
+#include "detect_obstacle.h"
+#include <iostream>
+
+int KalmanTracker::kf_count = 0;
+
+// initialize Kalman filter
+void KalmanTracker::init_kf(StateType stateMat)
+{
+    int stateNum = 7;
+    int measureNum = 4;
+    kf = KalmanFilter(stateNum, measureNum, 0);
+
+    measurement = Mat::zeros(measureNum, 1, CV_32F);
+
+    kf.transitionMatrix = (Mat_<float>(stateNum, stateNum) <<
+                                                            1, 0, 0, 0, 1, 0, 0,
+            0, 1, 0, 0, 0, 1, 0,
+            0, 0, 1, 0, 0, 0, 1,
+            0, 0, 0, 1, 0, 0, 0,
+            0, 0, 0, 0, 1, 0, 0,
+            0, 0, 0, 0, 0, 1, 0,
+            0, 0, 0, 0, 0, 0, 1);
+
+    setIdentity(kf.measurementMatrix);
+    setIdentity(kf.processNoiseCov, Scalar::all(1e-2));
+    setIdentity(kf.measurementNoiseCov, Scalar::all(1e-1));
+    setIdentity(kf.errorCovPost, Scalar::all(1));
+
+    // initialize state vector with bounding box in [cx,cy,s,r] style
+    kf.statePost.at<float>(0, 0) = stateMat.x + stateMat.width / 2;
+    kf.statePost.at<float>(1, 0) = stateMat.y + stateMat.height / 2;
+    kf.statePost.at<float>(2, 0) = stateMat.area();
+    kf.statePost.at<float>(3, 0) = stateMat.width / stateMat.height;
+}
+
+// Predict the estimated bounding box.
+StateType KalmanTracker::predict()
+{
+    // predict
+    Mat p = kf.predict();
+    m_age += 1;
+
+    if (m_time_since_update > od::max_age)
+        m_hit_streak = 0;
+    m_time_since_update += 1;
+    StateType predictBox = get_rect_xysr(p.at<float>(0, 0), p.at<float>(1, 0), p.at<float>(2, 0), p.at<float>(3, 0));
+    m_history.push_back(predictBox);
+    return m_history.back();
+}
+
+// Update the state vector with observed bounding box.
+void KalmanTracker::update(StateType stateMat, int classId, float prob)
+{
+    m_time_since_update = 0;
+    m_history.clear();
+    m_hits += 1;
+    m_hit_streak += 1;
+    m_class_id = classId;
+    m_class_history.push_back(classId);
+    m_prob = prob;
+    // measurement
+    measurement.at<float>(0, 0) = stateMat.x + stateMat.width / 2;
+    measurement.at<float>(1, 0) = stateMat.y + stateMat.height / 2;
+    measurement.at<float>(2, 0) = stateMat.area();
+    measurement.at<float>(3, 0) = stateMat.width / stateMat.height;
+
+    // update
+    kf.correct(measurement);
+}
+
+// Return the current state vector
+StateType KalmanTracker::get_state()
+{
+    Mat s = kf.statePost;
+    return get_rect_xysr(s.at<float>(0, 0), s.at<float>(1, 0), s.at<float>(2, 0), s.at<float>(3, 0));
+}
+
+
+// Convert bounding box from [cx,cy,s,r] to [x,y,w,h] style.
+StateType KalmanTracker::get_rect_xysr(float cx, float cy, float s, float r)
+{
+    float w = sqrt(s * r);
+    float h = s / w;
+    float x = (cx - w / 2);
+    float y = (cy - h / 2);
+
+    if (x < 0 && cx > 0)
+        x = 0;
+    if (y < 0 && cy > 0)
+        y = 0;
+
+    return StateType(x, y, w, h);
+}

+ 309 - 0
src/detection/detection_yolov4_shigong_monocular/src/detect_obstacle.cpp

@@ -0,0 +1,309 @@
+
+#include <set>
+#include "detect_obstacle.h"
+
+namespace od{
+// Computes IOU between two bounding boxes
+double GetIOU(Rect_<float> bb_test, Rect_<float> bb_gt)
+{
+    float in = (bb_test & bb_gt).area();
+    float un = bb_test.area() + bb_gt.area() - in;
+
+    if (un < DBL_EPSILON)
+        return 0;
+
+    return (double)(in / un);
+}
+
+//画出检测框和相关信息
+void DrawBoxes(Mat &frame, vector<string> classes, int classId, int turnstileId, float conf, int left, int top, int right, int bottom)
+{
+    //画检测框
+    rectangle(frame, Point(left, top), Point(right, bottom), Scalar(od::COLOR_MAP[classId][0], od::COLOR_MAP[classId][1], od::COLOR_MAP[classId][2]), 2);
+    //该检测框对应的类别和置信度
+    //string label = format("%.2f", conf);
+    //string label = format("%d", turnstileId);
+
+    string label;
+    if (!classes.empty())
+    {
+        CV_Assert(classId < (int)classes.size());
+        //label = classes[classId] + ":" + label;
+        label = classes[classId];
+    }
+    //将标签显示在检测框顶部
+    int baseLine;
+    Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
+    //top = max(top, labelSize.height);
+    rectangle(frame, Point(left, top), Point(left + round(1.1*labelSize.width), top - labelSize.height),
+              Scalar(od::COLOR_MAP[classId][0], od::COLOR_MAP[classId][1], od::COLOR_MAP[classId][2]), FILLED);
+    putText(frame, label, Point(left, top-baseLine*0.5), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(255, 255, 255), 1);
+}
+
+//画出检测结果
+void Drawer(Mat &frame, vector<bbox_t> outs, vector<string> classes)
+{
+    //获取所有最佳检测框信息
+    for (int i = 0; i < outs.size(); i++)
+    {
+        DrawBoxes(frame, classes, outs[i].obj_id, outs[i].track_id, outs[i].prob, outs[i].x, outs[i].y,
+                  outs[i].x + outs[i].w, outs[i].y + outs[i].h);
+    }
+}
+
+//画出检测结果
+void Drawer(Mat &frame, vector<od::TrackingBox> &track_result, vector<string> &classes)
+{
+
+    //获取所有最佳检测框信息
+    for (int i = 0; i < track_result.size(); i++)
+    {
+        DrawBoxes(frame, classes, track_result[i].class_id, track_result[i].id, track_result[i].prob,
+                  int(track_result[i].box.x),int(track_result[i].box.y),
+                  int(track_result[i].box.x + track_result[i].box.width),
+                  int(track_result[i].box.y + track_result[i].box.height));
+    }
+}
+
+//tracking obstacle
+bool TrackObstacle(int frame_count,vector<KalmanTracker> &trackers,vector<bbox_t> &outs,vector<od::TrackingBox> &track_result)
+{
+    // variables used in the for-loop
+    vector<Rect_<float>> predictedBoxes;
+    vector<vector<double>> iouMatrix;
+    vector<int> assignment;
+    set<int> unmatchedDetections;
+    set<int> unmatchedTrajectories;
+    set<int> allItems;
+    set<int> matchedItems;
+    vector<cv::Point> matchedPairs;
+    unsigned int trkNum = 0;
+    unsigned int detNum = 0;
+    vector<od::DetectBox> detect_outs;
+    //bbox_t to Detect_box
+    for(unsigned int i=0;i<outs.size();i++)
+    {
+        od::DetectBox detect_temp;
+        detect_temp.class_id = outs[i].obj_id;
+        detect_temp.prob = outs[i].prob;
+        float tpx = outs[i].x;
+        float tpy = outs[i].y;
+        float tpw = outs[i].w;
+        float tph = outs[i].h;
+        //detect_temp.box = Rect_<float>(Point_<float>(tpx, tpy),Point_<float>(tpx + tpw, tpy + tph));
+        detect_temp.box = Rect_<float>(tpx,tpy,tpw,tph);
+        detect_outs.push_back(detect_temp);
+    }
+    //tracking
+    if (trackers.size() == 0) // the first frame met
+    {
+        // initialize kalman trackers using first detections.
+        for (unsigned int i = 0; i < outs.size(); i++)
+        {
+            KalmanTracker trk = KalmanTracker(detect_outs[i].box,
+                                              detect_outs[i].class_id,
+                                              detect_outs[i].prob);
+            trackers.push_back(trk);
+        }
+        return false;
+    }
+    ///////////////////////////////////////
+    // 3.1. get predicted locations from existing trackers.
+    predictedBoxes.clear();
+
+    for (auto it = trackers.begin(); it != trackers.end();)
+    {
+        Rect_<float> pBox = (*it).predict();
+        if (pBox.x >= 0 && pBox.y >= 0)
+        {
+            predictedBoxes.push_back(pBox);
+            it++;
+        }
+        else
+        {
+
+            cerr << "Box invalid at frame: " << frame_count <<" id "<<(*it).m_id+1<<endl;
+            it = trackers.erase(it);
+
+        }
+    }
+
+    if (trackers.size() == 0) return false;
+
+    ///////////////////////////////////////
+    // 3.2. associate detections to tracked object (both represented as bounding boxes)
+    // dets : detFrameData[fi]
+    trkNum = predictedBoxes.size();
+    detNum = outs.size();
+
+    iouMatrix.clear();
+    iouMatrix.resize(trkNum, vector<double>(detNum, 0));
+
+    for (unsigned int i = 0; i < trkNum; i++) // compute iou matrix as a distance matrix
+    {
+        for (unsigned int j = 0; j < detNum; j++)
+        {
+            // use 1-iou because the hungarian algorithm computes a minimum-cost assignment.
+            iouMatrix[i][j] = 1 - GetIOU(predictedBoxes[i], detect_outs[j].box);
+        }
+    }
+    // solve the assignment problem using hungarian algorithm.
+    // the resulting assignment is [track(prediction) : detection], with len=preNum
+    HungarianAlgorithm HungAlgo;
+    assignment.clear();
+    HungAlgo.Solve(iouMatrix, assignment);
+
+    // find matches, unmatched_detections and unmatched_predictions
+    unmatchedTrajectories.clear();
+    unmatchedDetections.clear();
+    allItems.clear();
+    matchedItems.clear();
+    if (detNum > trkNum) //	there are unmatched detections
+    {
+        for (unsigned int n = 0; n < detNum; n++)
+            allItems.insert(n);
+
+        for (unsigned int i = 0; i < trkNum; ++i)
+            matchedItems.insert(assignment[i]);
+
+        set_difference(allItems.begin(), allItems.end(),
+                       matchedItems.begin(), matchedItems.end(),
+                       insert_iterator<set<int>>(unmatchedDetections, unmatchedDetections.begin()));
+    }
+    else if (detNum < trkNum) // there are unmatched trajectory/predictions
+    {
+        for (unsigned int i = 0; i < trkNum; ++i)
+            if (assignment[i] == -1) // unassigned label will be set as -1 in the assignment algorithm
+                unmatchedTrajectories.insert(i);
+    }
+    // filter out matched with low IOU
+    matchedPairs.clear();
+    for (unsigned int i = 0; i < trkNum; ++i)
+    {
+        if (assignment[i] == -1) // pass over invalid values
+            continue;
+        if (1 - iouMatrix[i][assignment[i]] < od::iouThreshold)
+        {
+            unmatchedTrajectories.insert(i);
+            unmatchedDetections.insert(assignment[i]);
+        }
+        else
+            matchedPairs.push_back(cv::Point(i, assignment[i]));
+    }
+    ///////////////////////////////////////
+    // 3.3. updating trackers
+    // update matched trackers with assigned detections.
+    // each prediction is corresponding to a tracker
+    int detIdx, trkIdx;
+    for (unsigned int i = 0; i < matchedPairs.size(); i++)
+    {
+        trkIdx = matchedPairs[i].x;
+        detIdx = matchedPairs[i].y;
+        trackers[trkIdx].update(detect_outs[detIdx].box,
+                                detect_outs[detIdx].class_id,
+                                detect_outs[detIdx].prob);
+    }
+    // create and initialise new trackers for unmatched detections
+    for (auto umd : unmatchedDetections)
+    {
+        KalmanTracker tracker = KalmanTracker(detect_outs[umd].box,
+                                              detect_outs[umd].class_id,
+                                              detect_outs[umd].prob);
+        trackers.push_back(tracker);
+    }
+
+#if 0
+    //get unique trackers,merg same trackers
+    unsigned int trackers_num = trackers.size();
+    iouMatrix.clear();
+    iouMatrix.resize(trackers_num, vector<double>(trackers_num, 0));
+    for (unsigned int i = 0; i < trackers_num; i++) // compute iou matrix as a distance matrix
+    {
+        for (unsigned int j = 0; j < trackers_num; j++)
+        {
+            // use 1-iou because the hungarian algorithm computes a minimum-cost assignment.
+            if(j==i)
+                iouMatrix[i][j] = 1;
+            else
+                iouMatrix[i][j] = 1 - GetIOU(trackers[i].get_state(), trackers[j].get_state());
+        }
+    }
+    // solve the assignment problem using hungarian algorithm.
+    // the resulting assignment is [track(prediction) : detection], with len=preNum
+    assignment.clear();
+    HungAlgo.Solve(iouMatrix, assignment);
+    // filter out matched with low IOU
+    matchedPairs.clear();
+    for (unsigned int i = 0; i < trackers_num; ++i)
+    {
+        if (assignment[i] == -1) // pass over invalid values
+            continue;
+        if (iouMatrix[i][assignment[i]] < od::iouThreshold)
+        {
+            matchedPairs.push_back(cv::Point(i, assignment[i]));
+        }
+    }
+    int index1,index2;
+    vector<int> delete_index;
+    for (unsigned int i = 0; i < matchedPairs.size(); i++)
+    {
+        index1 = matchedPairs[i].x;
+        index2 = matchedPairs[i].y;
+        if(index1 >= index2)
+            continue;
+        if((trackers[index1].m_id > trackers[index2].m_id) && (trackers[index1].m_class_history.size()>0))
+        {
+            trackers[index1].m_id = trackers[index2].m_id;
+            trackers[index1].m_class_history.insert(trackers[index1].m_class_history.begin(),
+                                                    trackers[index2].m_class_history.begin(),trackers[index2].m_class_history.end());
+            delete_index.push_back(index2);
+        }
+        else if((trackers[index2].m_id > trackers[index1].m_id) && (trackers[index2].m_class_history.size()>0))
+        {
+            trackers[index2].m_id = trackers[index1].m_id;
+            trackers[index2].m_class_history.insert(trackers[index2].m_class_history.begin(),
+                                                    trackers[index1].m_class_history.begin(),trackers[index1].m_class_history.end());
+            delete_index.push_back(index1);
+
+        }
+    }
+    for(unsigned int i = 0; i < delete_index.size(); i++)
+    {
+        int idx = delete_index[i] - i;
+        trackers.erase(trackers.begin() + idx);
+    }
+#endif
+
+    // get trackers' output
+    track_result.clear();
+    for (auto it = trackers.begin(); it != trackers.end();)
+    {
+        if (((*it).m_time_since_update <= od::max_age) &&
+                ((*it).m_hit_streak >= od::min_hits || frame_count <= od::min_hits))
+        {
+            od::TrackingBox res;
+            res.box = (*it).get_state();
+            res.id = (*it).m_id + 1;
+            res.frame = frame_count;
+            res.class_id = (*it).m_class_id;
+            res.prob = (*it).m_prob;
+            res.class_history = (*it).m_class_history;
+            track_result.push_back(res);
+            it++;
+        }
+        else
+            it ++;
+        //remove dead tracklet
+        if(it != trackers.end() && (*it).m_time_since_update > od::max_age)
+        {
+            it = trackers.erase(it);
+        }
+    }
+    if(track_result.size()>0)
+        return true;
+    else return false;
+}
+}
+
+
+

+ 94 - 0
src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/include/Utils.h

@@ -0,0 +1,94 @@
+#ifndef __TRT_UTILS_H_
+#define __TRT_UTILS_H_
+
+#include <iostream>
+#include <vector>
+#include <algorithm>
+#include <cudnn.h>
+
+#ifndef CUDA_CHECK
+
+#define CUDA_CHECK(callstr)                                                                    \
+    {                                                                                          \
+        cudaError_t error_code = callstr;                                                      \
+        if (error_code != cudaSuccess) {                                                       \
+            std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \
+            assert(0);                                                                         \
+        }                                                                                      \
+    }
+
+#endif
+
+namespace Tn
+{
+    class Profiler : public nvinfer1::IProfiler
+    {
+    public:
+        void printLayerTimes(int itrationsTimes)
+        {
+            float totalTime = 0;
+            for (size_t i = 0; i < mProfile.size(); i++)
+            {
+                printf("%-40.40s %4.3fms\n", mProfile[i].first.c_str(), mProfile[i].second / itrationsTimes);
+                totalTime += mProfile[i].second;
+            }
+            printf("Time over all layers: %4.3f\n", totalTime / itrationsTimes);
+        }
+    private:
+        typedef std::pair<std::string, float> Record;
+        std::vector<Record> mProfile;
+
+        virtual void reportLayerTime(const char* layerName, float ms)
+        {
+            auto record = std::find_if(mProfile.begin(), mProfile.end(), [&](const Record& r){ return r.first == layerName; });
+            if (record == mProfile.end())
+                mProfile.push_back(std::make_pair(layerName, ms));
+            else
+                record->second += ms;
+        }
+    };
+
+    //Logger for TensorRT info/warning/errors
+    class Logger : public nvinfer1::ILogger
+    {
+    public:
+
+        Logger(): Logger(Severity::kWARNING) {}
+
+        Logger(Severity severity): reportableSeverity(severity) {}
+
+        void log(Severity severity, const char* msg) override
+        {
+            // suppress messages with severity enum value greater than the reportable
+            if (severity > reportableSeverity) return;
+
+            switch (severity)
+            {
+                case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
+                case Severity::kERROR: std::cerr << "ERROR: "; break;
+                case Severity::kWARNING: std::cerr << "WARNING: "; break;
+                case Severity::kINFO: std::cerr << "INFO: "; break;
+                default: std::cerr << "UNKNOWN: "; break;
+            }
+            std::cerr << msg << std::endl;
+        }
+
+        Severity reportableSeverity{Severity::kWARNING};
+    };
+
+    template<typename T> 
+    void write(char*& buffer, const T& val)
+    {
+        *reinterpret_cast<T*>(buffer) = val;
+        buffer += sizeof(T);
+    }
+
+    template<typename T> 
+    void read(const char*& buffer, T& val)
+    {
+        val = *reinterpret_cast<const T*>(buffer);
+        buffer += sizeof(T);
+    }
+}
+
+#endif

+ 503 - 0
src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/include/logging.h

@@ -0,0 +1,503 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TENSORRT_LOGGING_H
+#define TENSORRT_LOGGING_H
+
+#include "NvInferRuntimeCommon.h"
+#include <cassert>
+#include <ctime>
+#include <iomanip>
+#include <iostream>
+#include <ostream>
+#include <sstream>
+#include <string>
+
+using Severity = nvinfer1::ILogger::Severity;
+
+class LogStreamConsumerBuffer : public std::stringbuf
+{
+public:
+    LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog)
+        : mOutput(stream)
+        , mPrefix(prefix)
+        , mShouldLog(shouldLog)
+    {
+    }
+
+    LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other)
+        : mOutput(other.mOutput)
+    {
+    }
+
+    ~LogStreamConsumerBuffer()
+    {
+        // std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence
+        // std::streambuf::pptr() gives a pointer to the current position of the output sequence
+        // if the pointer to the beginning is not equal to the pointer to the current position,
+        // call putOutput() to log the output to the stream
+        if (pbase() != pptr())
+        {
+            putOutput();
+        }
+    }
+
+    // synchronizes the stream buffer and returns 0 on success
+    // synchronizing the stream buffer consists of inserting the buffer contents into the stream,
+    // resetting the buffer and flushing the stream
+    virtual int sync()
+    {
+        putOutput();
+        return 0;
+    }
+
+    void putOutput()
+    {
+        if (mShouldLog)
+        {
+            // prepend timestamp
+            std::time_t timestamp = std::time(nullptr);
+            tm* tm_local = std::localtime(&timestamp);
+            std::cout << "[";
+            std::cout << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/";
+            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
+            std::cout << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-";
+            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
+            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
+            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
+            // std::stringbuf::str() gets the string contents of the buffer
+            // insert the buffer contents pre-appended by the appropriate prefix into the stream
+            mOutput << mPrefix << str();
+            // set the buffer to empty
+            str("");
+            // flush the stream
+            mOutput.flush();
+        }
+    }
+
+    void setShouldLog(bool shouldLog)
+    {
+        mShouldLog = shouldLog;
+    }
+
+private:
+    std::ostream& mOutput;
+    std::string mPrefix;
+    bool mShouldLog;
+};
+
+//!
+//! \class LogStreamConsumerBase
+//! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer
+//!
+class LogStreamConsumerBase
+{
+public:
+    LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog)
+        : mBuffer(stream, prefix, shouldLog)
+    {
+    }
+
+protected:
+    LogStreamConsumerBuffer mBuffer;
+};
+
+//!
+//! \class LogStreamConsumer
+//! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages.
+//!  Order of base classes is LogStreamConsumerBase and then std::ostream.
+//!  This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field
+//!  in LogStreamConsumer and then the address of the buffer is passed to std::ostream.
+//!  This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream.
+//!  Please do not change the order of the parent classes.
+//!
+class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream
+{
+public:
+    //! \brief Creates a LogStreamConsumer which logs messages with level severity.
+    //!  Reportable severity determines if the messages are severe enough to be logged.
+    LogStreamConsumer(Severity reportableSeverity, Severity severity)
+        : LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity)
+        , std::ostream(&mBuffer) // links the stream buffer with the stream
+        , mShouldLog(severity <= reportableSeverity)
+        , mSeverity(severity)
+    {
+    }
+
+    LogStreamConsumer(LogStreamConsumer&& other)
+        : LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog)
+        , std::ostream(&mBuffer) // links the stream buffer with the stream
+        , mShouldLog(other.mShouldLog)
+        , mSeverity(other.mSeverity)
+    {
+    }
+
+    void setReportableSeverity(Severity reportableSeverity)
+    {
+        mShouldLog = mSeverity <= reportableSeverity;
+        mBuffer.setShouldLog(mShouldLog);
+    }
+
+private:
+    static std::ostream& severityOstream(Severity severity)
+    {
+        return severity >= Severity::kINFO ? std::cout : std::cerr;
+    }
+
+    static std::string severityPrefix(Severity severity)
+    {
+        switch (severity)
+        {
+        case Severity::kINTERNAL_ERROR: return "[F] ";
+        case Severity::kERROR: return "[E] ";
+        case Severity::kWARNING: return "[W] ";
+        case Severity::kINFO: return "[I] ";
+        case Severity::kVERBOSE: return "[V] ";
+        default: assert(0); return "";
+        }
+    }
+
+    bool mShouldLog;
+    Severity mSeverity;
+};
+
+//! \class Logger
+//!
+//! \brief Class which manages logging of TensorRT tools and samples
+//!
+//! \details This class provides a common interface for TensorRT tools and samples to log information to the console,
+//! and supports logging two types of messages:
+//!
+//! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal)
+//! - Test pass/fail messages
+//!
+//! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is
+//! that the logic for controlling the verbosity and formatting of sample output is centralized in one location.
+//!
+//! In the future, this class could be extended to support dumping test results to a file in some standard format
+//! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run).
+//!
+//! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger
+//! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT
+//! library and messages coming from the sample.
+//!
+//! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the
+//! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger
+//! object.
+
+class Logger : public nvinfer1::ILogger
+{
+public:
+    Logger(Severity severity = Severity::kWARNING)
+        : mReportableSeverity(severity)
+    {
+    }
+
+    //!
+    //! \enum TestResult
+    //! \brief Represents the state of a given test
+    //!
+    enum class TestResult
+    {
+        kRUNNING, //!< The test is running
+        kPASSED,  //!< The test passed
+        kFAILED,  //!< The test failed
+        kWAIVED   //!< The test was waived
+    };
+
+    //!
+    //! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger
+    //! \return The nvinfer1::ILogger associated with this Logger
+    //!
+    //! TODO Once all samples are updated to use this method to register the logger with TensorRT,
+    //! we can eliminate the inheritance of Logger from ILogger
+    //!
+    nvinfer1::ILogger& getTRTLogger()
+    {
+        return *this;
+    }
+
+    //!
+    //! \brief Implementation of the nvinfer1::ILogger::log() virtual method
+    //!
+    //! Note samples should not be calling this function directly; it will eventually go away once we eliminate the
+    //! inheritance from nvinfer1::ILogger
+    //!
+    void log(Severity severity, const char* msg) override
+    {
+        LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl;
+    }
+
+    //!
+    //! \brief Method for controlling the verbosity of logging output
+    //!
+    //! \param severity The logger will only emit messages that have severity of this level or higher.
+    //!
+    void setReportableSeverity(Severity severity)
+    {
+        mReportableSeverity = severity;
+    }
+
+    //!
+    //! \brief Opaque handle that holds logging information for a particular test
+    //!
+    //! This object is an opaque handle to information used by the Logger to print test results.
+    //! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used
+    //! with Logger::reportTest{Start,End}().
+    //!
+    class TestAtom
+    {
+    public:
+        TestAtom(TestAtom&&) = default;
+
+    private:
+        friend class Logger;
+
+        TestAtom(bool started, const std::string& name, const std::string& cmdline)
+            : mStarted(started)
+            , mName(name)
+            , mCmdline(cmdline)
+        {
+        }
+
+        bool mStarted;
+        std::string mName;
+        std::string mCmdline;
+    };
+
+    //!
+    //! \brief Define a test for logging
+    //!
+    //! \param[in] name The name of the test.  This should be a string starting with
+    //!                  "TensorRT" and containing dot-separated strings containing
+    //!                  the characters [A-Za-z0-9_].
+    //!                  For example, "TensorRT.sample_googlenet"
+    //! \param[in] cmdline The command line used to reproduce the test
+    //
+    //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
+    //!
+    static TestAtom defineTest(const std::string& name, const std::string& cmdline)
+    {
+        return TestAtom(false, name, cmdline);
+    }
+
+    //!
+    //! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments
+    //!        as input
+    //!
+    //! \param[in] name The name of the test
+    //! \param[in] argc The number of command-line arguments
+    //! \param[in] argv The array of command-line arguments (given as C strings)
+    //!
+    //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
+    static TestAtom defineTest(const std::string& name, int argc, char const* const* argv)
+    {
+        auto cmdline = genCmdlineString(argc, argv);
+        return defineTest(name, cmdline);
+    }
+
+    //!
+    //! \brief Report that a test has started.
+    //!
+    //! \pre reportTestStart() has not been called yet for the given testAtom
+    //!
+    //! \param[in] testAtom The handle to the test that has started
+    //!
+    static void reportTestStart(TestAtom& testAtom)
+    {
+        reportTestResult(testAtom, TestResult::kRUNNING);
+        assert(!testAtom.mStarted);
+        testAtom.mStarted = true;
+    }
+
+    //!
+    //! \brief Report that a test has ended.
+    //!
+    //! \pre reportTestStart() has been called for the given testAtom
+    //!
+    //! \param[in] testAtom The handle to the test that has ended
+    //! \param[in] result The result of the test. Should be one of TestResult::kPASSED,
+    //!                   TestResult::kFAILED, TestResult::kWAIVED
+    //!
+    static void reportTestEnd(const TestAtom& testAtom, TestResult result)
+    {
+        assert(result != TestResult::kRUNNING);
+        assert(testAtom.mStarted);
+        reportTestResult(testAtom, result);
+    }
+
+    static int reportPass(const TestAtom& testAtom)
+    {
+        reportTestEnd(testAtom, TestResult::kPASSED);
+        return EXIT_SUCCESS;
+    }
+
+    static int reportFail(const TestAtom& testAtom)
+    {
+        reportTestEnd(testAtom, TestResult::kFAILED);
+        return EXIT_FAILURE;
+    }
+
+    static int reportWaive(const TestAtom& testAtom)
+    {
+        reportTestEnd(testAtom, TestResult::kWAIVED);
+        return EXIT_SUCCESS;
+    }
+
+    static int reportTest(const TestAtom& testAtom, bool pass)
+    {
+        return pass ? reportPass(testAtom) : reportFail(testAtom);
+    }
+
+    Severity getReportableSeverity() const
+    {
+        return mReportableSeverity;
+    }
+
+private:
+    //!
+    //! \brief returns an appropriate string for prefixing a log message with the given severity
+    //!
+    static const char* severityPrefix(Severity severity)
+    {
+        switch (severity)
+        {
+        case Severity::kINTERNAL_ERROR: return "[F] ";
+        case Severity::kERROR: return "[E] ";
+        case Severity::kWARNING: return "[W] ";
+        case Severity::kINFO: return "[I] ";
+        case Severity::kVERBOSE: return "[V] ";
+        default: assert(0); return "";
+        }
+    }
+
+    //!
+    //! \brief returns an appropriate string for prefixing a test result message with the given result
+    //!
+    static const char* testResultString(TestResult result)
+    {
+        switch (result)
+        {
+        case TestResult::kRUNNING: return "RUNNING";
+        case TestResult::kPASSED: return "PASSED";
+        case TestResult::kFAILED: return "FAILED";
+        case TestResult::kWAIVED: return "WAIVED";
+        default: assert(0); return "";
+        }
+    }
+
+    //!
+    //! \brief returns an appropriate output stream (cout or cerr) to use with the given severity
+    //!
+    static std::ostream& severityOstream(Severity severity)
+    {
+        return severity >= Severity::kINFO ? std::cout : std::cerr;
+    }
+
+    //!
+    //! \brief method that implements logging test results
+    //!
+    static void reportTestResult(const TestAtom& testAtom, TestResult result)
+    {
+        severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
+                                         << testAtom.mCmdline << std::endl;
+    }
+
+    //!
+    //! \brief generate a command line string from the given (argc, argv) values
+    //!
+    static std::string genCmdlineString(int argc, char const* const* argv)
+    {
+        std::stringstream ss;
+        for (int i = 0; i < argc; i++)
+        {
+            if (i > 0)
+                ss << " ";
+            ss << argv[i];
+        }
+        return ss.str();
+    }
+
+    Severity mReportableSeverity;
+};
+
+namespace
+{
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE
+//!
+//! Example usage:
+//!
+//!     LOG_VERBOSE(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
+}
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO
+//!
+//! Example usage:
+//!
+//!     LOG_INFO(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_INFO(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
+}
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING
+//!
+//! Example usage:
+//!
+//!     LOG_WARN(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_WARN(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
+}
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR
+//!
+//! Example usage:
+//!
+//!     LOG_ERROR(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_ERROR(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
+}
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR
+//         ("fatal" severity)
+//!
+//! Example usage:
+//!
+//!     LOG_FATAL(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_FATAL(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR);
+}
+
+} // anonymous namespace
+
+#endif // TENSORRT_LOGGING_H

+ 106 - 0
src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/include/mish.h

@@ -0,0 +1,106 @@
+#ifndef _MISH_PLUGIN_H
+#define _MISH_PLUGIN_H
+
+#include <string>
+#include <vector>
+#include "NvInfer.h"
+
+namespace nvinfer1
+{
+    class MishPlugin: public IPluginV2IOExt
+    {
+        public:
+            explicit MishPlugin();
+            MishPlugin(const void* data, size_t length);
+
+            ~MishPlugin();
+
+            int getNbOutputs() const override
+            {
+                return 1;
+            }
+
+            Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override;
+
+            int initialize() override;
+
+            virtual void terminate() override {};
+
+            virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0;}
+
+            virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override;
+
+            virtual size_t getSerializationSize() const override;
+
+            virtual void serialize(void* buffer) const override;
+
+            bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override {
+                return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
+            }
+
+            const char* getPluginType() const override;
+
+            const char* getPluginVersion() const override;
+
+            void destroy() override;
+
+            IPluginV2IOExt* clone() const override;
+
+            void setPluginNamespace(const char* pluginNamespace) override;
+
+            const char* getPluginNamespace() const override;
+
+            DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override;
+
+            bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override;
+
+            bool canBroadcastInputAcrossBatch(int inputIndex) const override;
+
+            void attachToContext(
+                    cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override;
+
+            void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override;
+
+            void detachFromContext() override;
+
+            int input_size_;
+        private:
+            void forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize = 1);
+            int thread_count_ = 256;
+            const char* mPluginNamespace;
+    };
+
+    class MishPluginCreator : public IPluginCreator
+    {
+        public:
+            MishPluginCreator();
+
+            ~MishPluginCreator() override = default;
+
+            const char* getPluginName() const override;
+
+            const char* getPluginVersion() const override;
+
+            const PluginFieldCollection* getFieldNames() override;
+
+            IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override;
+
+            IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override;
+
+            void setPluginNamespace(const char* libNamespace) override
+            {
+                mNamespace = libNamespace;
+            }
+
+            const char* getPluginNamespace() const override
+            {
+                return mNamespace.c_str();
+            }
+
+        private:
+            std::string mNamespace;
+            static PluginFieldCollection mFC;
+            static std::vector<PluginField> mPluginAttributes;
+    };
+};
+#endif 

+ 70 - 0
src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/include/trt_utils.h

@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef __TRT_UTILS_H__
+#define __TRT_UTILS_H__
+
+#include <set>
+#include <map>
+#include <string>
+#include <vector>
+#include <cassert>
+#include <iostream>
+#include <fstream>
+
+#include "NvInfer.h"
+#include "NvInferPlugin.h"
+
+#define UNUSED(expr) (void)(expr)
+#define DIVUP(n, d) ((n) + (d)-1) / (d)
+
+std::string trim(std::string s);
+float clamp(const float val, const float minVal, const float maxVal);
+bool fileExists(const std::string fileName, bool verbose = true);
+std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType);
+std::string dimsToString(const nvinfer1::Dims d);
+void displayDimType(const nvinfer1::Dims d);
+int getNumChannels(nvinfer1::ITensor* t);
+uint64_t get3DTensorVolume(nvinfer1::Dims inputDims);
+
+// Helper functions to create yolo engine
+nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map<std::string, std::string>& block,
+                                nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network);
+nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map<std::string, std::string>& block,
+                                   std::vector<float>& weights,
+                                   std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr,
+                                   int& inputChannels, nvinfer1::ITensor* input,
+                                   nvinfer1::INetworkDefinition* network);
+nvinfer1::ILayer* netAddConvBNActive(int layerIdx, std::map<std::string, std::string>& block,
+                                    std::vector<float>& weights,
+                                    std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr,
+                                    int& inputChannels, nvinfer1::ITensor* input,
+                                    nvinfer1::INetworkDefinition* network);
+nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map<std::string, std::string>& block,
+                                 std::vector<float>& weights,
+                                 std::vector<nvinfer1::Weights>& trtWeights, int& inputChannels,
+                                 nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network);
+void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput,
+                    std::string layerOutput, std::string weightPtr);
+
+#endif

+ 167 - 0
src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/include/yolo.h

@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _YOLO_H_
+#define _YOLO_H_
+
+#include <stdint.h>
+#include <string>
+#include <vector>
+#include <memory>
+
+#include "NvInfer.h"
+#include "trt_utils.h"
+#include "yololayer.h"
+#include "mish.h"
+
+typedef enum {
+    /** NvDsInferContext operation succeeded. */
+    NVDSINFER_SUCCESS = 0,
+    /** Failed to configure the NvDsInferContext instance possibly due to an
+     *  erroneous initialization property. */
+    NVDSINFER_CONFIG_FAILED,
+    /** Custom Library interface implementation failed. */
+    NVDSINFER_CUSTOM_LIB_FAILED,
+    /** Invalid parameters were supplied. */
+    NVDSINFER_INVALID_PARAMS,
+    /** Output parsing failed. */
+    NVDSINFER_OUTPUT_PARSING_FAILED,
+    /** CUDA error was encountered. */
+    NVDSINFER_CUDA_ERROR,
+    /** TensorRT interface failed. */
+    NVDSINFER_TENSORRT_ERROR,
+    /** Resource error was encountered. */
+    NVDSINFER_RESOURCE_ERROR,
+    /** TRT-IS error was encountered. */
+    NVDSINFER_TRTIS_ERROR,
+    /** Unknown error was encountered. */
+    NVDSINFER_UNKNOWN_ERROR
+} NvDsInferStatus;
+
+class IModelParser
+{
+public:
+    IModelParser() = default;
+    /**
+     * Destructor, make sure all external resource would be released here. */
+    virtual ~IModelParser() = default;
+
+    /**
+     * Function interface for parsing custom model and building tensorrt
+     * network.
+     *
+     * @param[in, out] network NvDsInfer will create the @a network and
+     *                 implementation can setup this network layer by layer.
+     * @return NvDsInferStatus indicating if model parsing was sucessful.
+     */
+    virtual NvDsInferStatus parseModel(
+        nvinfer1::INetworkDefinition& network) = 0;
+
+    /**
+     * Function interface to check if parser can support full-dimensions.
+     */
+    virtual bool hasFullDimsSupported() const = 0;
+
+    /**
+     * Function interface to get the new model name which is to be used for
+     * constructing the serialized engine file path.
+     */
+    virtual const char* getModelName() const = 0;
+};
+
+
+/**
+ * Holds all the file paths required to build a network.
+ */
+struct NetworkInfo
+{
+    std::string networkType;
+    std::string configFilePath;
+    std::string wtsFilePath;
+    std::string deviceType;
+    std::string inputBlobName;
+    uint maxbatchSize;
+};
+
+/**
+ * Holds information about an output tensor of the yolo network.
+ */
+struct TensorInfo
+{
+    std::string blobName;
+    uint stride{0};
+    uint gridSize{0};
+    uint numClasses{0};
+    uint numBBoxes{0};
+    uint64_t volume{0};
+    std::vector<uint> masks;
+    std::vector<float> anchors;
+    int bindingIndex{-1};
+    float* hostBuffer{nullptr};
+};
+
+class Yolo : public IModelParser {
+public:
+    Yolo(const NetworkInfo& networkInfo);
+    ~Yolo() override;
+    bool hasFullDimsSupported() const override { return false; }
+    const char* getModelName() const override {
+        return m_ConfigFilePath.empty() ? m_NetworkType.c_str()
+                                        : m_ConfigFilePath.c_str();
+    }
+    NvDsInferStatus parseModel(nvinfer1::INetworkDefinition& network) override;
+
+    nvinfer1::ICudaEngine *createEngine (nvinfer1::IBuilder* builder);
+
+protected:
+    const std::string m_NetworkType;
+    const std::string m_ConfigFilePath;
+    const std::string m_WtsFilePath;
+    const std::string m_DeviceType;
+    const std::string m_InputBlobName;
+    const std::string m_OutputBlobName;
+    std::vector<TensorInfo> m_OutputTensors;
+    std::vector<std::map<std::string, std::string>> m_ConfigBlocks;
+    uint m_InputH;
+    uint m_InputW;
+    uint m_InputC;
+    uint64_t m_InputSize;
+
+    uint m_MaxBatchSize;
+
+    // TRT specific members
+    std::vector<nvinfer1::Weights> m_TrtWeights;
+    std::vector<nvinfer1::ITensor*> m_YoloTensor;
+
+    std::vector<YoloKernel> m_YoloKernel;
+
+
+private:
+    NvDsInferStatus buildYoloNetwork(
+        std::vector<float>& weights, nvinfer1::INetworkDefinition& network);
+    std::vector<std::map<std::string, std::string>> parseConfigFile(
+        const std::string cfgFilePath);
+    void parseConfigBlocks();
+    void destroyNetworkUtils();
+};
+
+#endif // _YOLO_H_

+ 56 - 0
src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/include/yolodetect.h

@@ -0,0 +1,56 @@
+#ifndef YOLODETECT_H
+#define YOLODETECT_H
+
+#include "opencv2/opencv.hpp"
+#include "NvInfer.h"
+#include "NvInferRuntime.h"
+#include "cuda_runtime_api.h"
+
+#include "logging.h"
+#include "yolo.h"
+#include "trt_utils.h"
+#include "yololayer.h"
+#include "mish.h"
+
+using namespace nvinfer1;
+REGISTER_TENSORRT_PLUGIN(MishPluginCreator);
+REGISTER_TENSORRT_PLUGIN(YoloPluginCreator);
+
+
+class YoloDetect
+{
+public:
+    YoloDetect(NetworkInfo &networkInfo, std::string &modelname):
+        m_networkInfo(networkInfo),m_modelname(modelname)
+    {
+
+    }
+    bool loadModel(IExecutionContext*& context);
+    void doInference(IExecutionContext& context,float* input, float* output, int batch_size);
+    bool process(IExecutionContext& context, cv::Mat &image, std::vector<Detection> &detect_result,float ignore_thresh=0.4,float nms_thresh = 0.4);
+    bool process(IExecutionContext& context, uint max_batch_size,
+                 std::vector<cv::Mat> &image_vec,
+                 std::vector<std::vector<Detection>> &detect_results,
+                 float ignore_thresh=0.4,float nms_thresh = 0.4);
+    cv::Rect get_rect(cv::Mat& img, float bbox[4],int input_w,int input_h);
+private:
+    bool saveEngine();
+    ICudaEngine* loadEngine(IRuntime& runtime);
+    cv::Mat preprocess_img(cv::Mat& img,int input_w,int input_h);
+    float iou(float lbox[4], float rbox[4]);
+    static bool cmp(Detection& a, Detection& b);
+    void nms(std::vector<Detection>& res, float *output, float ignore_thresh=0.4,float nms_thresh = 0.4);
+
+public:
+    int m_input_h;
+    int m_input_w;
+    int m_output_size;
+
+private:
+    NetworkInfo m_networkInfo;
+    std::string m_modelname;
+    Logger gLogger;
+
+};
+
+#endif // YOLODETECT_H

+ 126 - 0
src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/include/yololayer.h

@@ -0,0 +1,126 @@
+#ifndef _YOLO_LAYER_H
+#define _YOLO_LAYER_H
+
+#include <assert.h>
+#include <cmath>
+#include <string.h>
+#include <cublas_v2.h>
+#include "NvInfer.h"
+#include "Utils.h"
+#include <iostream>
+#include "NvInferPlugin.h"
+
+struct YoloKernel
+{
+    int width;
+    int height;
+    int everyYoloAnchors;
+    float anchors[10];   // 一组yolo输出层中 anchors的数据个数 等于 3*2, 可以设置的更大一点,这个无所谓
+};
+
+struct alignas(float) Detection{
+    //x y w h
+    float bbox[4];
+    float det_confidence;
+    float class_id;
+    float class_confidence;
+};
+
+namespace nvinfer1
+{
+    class YoloLayerPlugin: public IPluginV2IOExt
+    {
+        public:
+            YoloLayerPlugin(const PluginFieldCollection& fc);
+            YoloLayerPlugin(const void* data, size_t length);
+
+            ~YoloLayerPlugin();
+
+            int getNbOutputs() const override
+            {
+                return 1;
+            }
+
+            Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override;
+
+            int initialize() override;
+
+            virtual void terminate() override {};
+
+            virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0;}
+
+            virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override;
+
+            virtual size_t getSerializationSize() const override;
+
+            virtual void serialize(void* buffer) const override;
+
+            bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override {
+                return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
+            }
+
+            const char* getPluginType() const override;
+
+            const char* getPluginVersion() const override;
+
+            void destroy() override;
+
+            IPluginV2IOExt* clone() const override;
+
+            void setPluginNamespace(const char* pluginNamespace) override;
+
+            const char* getPluginNamespace() const override;
+
+            DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override;
+
+            bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override;
+
+            bool canBroadcastInputAcrossBatch(int inputIndex) const override;
+
+            void attachToContext(
+                    cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override;
+
+            void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override;
+
+            void detachFromContext() override;
+
+        private:
+            void forwardGpu(const float *const * inputs,float * output, cudaStream_t stream,int batchSize = 1);
+            int mClassCount;        // 检测的目标的类别,从cfg文件获取,在cfg 设置
+            int mInput_w;           // 图像输入的尺寸,从cfg获取
+            int mInput_h;           // 由于umsample层的原因,宽度和高度要想等,TODO 调整
+            int mNumYoloLayers;     // yolo输出层的数量,从cfg获取,无需设置
+            std::vector<YoloKernel> mYoloKernel;
+
+            float mIgnore_thresh = 0.4;     // 置信度阈值,可以调整
+            int max_output_box = 1000;      // 最大输出数量
+            int mThreadCount = 256;         // cuda 内核函数,每一block中线程数量
+            const char* mPluginNamespace;   // 该插件名称
+
+    };
+    // 继承与IPluginCreator,重写虚函数
+    class YoloPluginCreator : public IPluginCreator
+    {
+        public:
+            YoloPluginCreator();
+
+            ~YoloPluginCreator() override = default;
+            const char* getPluginName() const override;
+            const char* getPluginVersion() const override;
+            const PluginFieldCollection* getFieldNames() override;
+            // 生成插件,这个是在 build network时调用
+            IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override;
+            // 反序列化,在读取保存的trt模型engine时调用,负责解析插件
+            IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override;
+            void setPluginNamespace(const char* libNamespace) override{
+                mNamespace = libNamespace;
+            }
+            const char* getPluginNamespace() const override{
+                return mNamespace.c_str();
+            }
+        private:
+            std::string mNamespace;
+    };
+};
+
+#endif 

+ 270 - 0
src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/src/main.cpp

@@ -0,0 +1,270 @@
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <sstream>
+#include <vector>
+#include <chrono>
+#include <string>
+#include "NvInfer.h"
+#include "NvInferRuntime.h"
+#include "cuda_runtime_api.h"
+
+#include <cmath>
+
+#include "logging.h"
+#include "yolo.h"
+#include "trt_utils.h"
+#include "yololayer.h"
+#include "mish.h"
+
+#include "opencv2/opencv.hpp"
+
+using namespace nvinfer1;
+
+Logger gLogger;
+REGISTER_TENSORRT_PLUGIN(MishPluginCreator);
+REGISTER_TENSORRT_PLUGIN(YoloPluginCreator);
+
+cv::Mat preprocess_img(cv::Mat& img,int input_w,int input_h) {
+    int w, h, x, y;
+    float r_w = input_w / (img.cols*1.0);
+    float r_h = input_h / (img.rows*1.0);
+    if (r_h > r_w) {
+        w = input_w;
+        h = r_w * img.rows;
+        x = 0;
+        y = (input_h - h) / 2;
+    } else {
+        w = r_h* img.cols;
+        h = input_h;
+        x = (input_w - w) / 2;
+        y = 0;
+    }
+    cv::Mat re(h, w, CV_8UC3);
+    cv::resize(img, re, re.size(), 0, 0, cv::INTER_CUBIC);
+    cv::Mat out(input_h, input_w, CV_8UC3, cv::Scalar(128, 128, 128));
+    re.copyTo(out(cv::Rect(x, y, re.cols, re.rows)));
+    return out;
+}
+
+cv::Rect get_rect(cv::Mat& img, float bbox[4],int input_w,int input_h) {
+    int l, r, t, b;
+    float r_w = input_w / (img.cols * 1.0);
+    float r_h = input_h / (img.rows * 1.0);
+    if (r_h > r_w) {
+        l = bbox[0] - bbox[2]/2.f;
+        r = bbox[0] + bbox[2]/2.f;
+        t = bbox[1] - bbox[3]/2.f - (input_h - r_w * img.rows) / 2;
+        b = bbox[1] + bbox[3]/2.f - (input_h - r_w * img.rows) / 2;
+        l = l / r_w;
+        r = r / r_w;
+        t = t / r_w;
+        b = b / r_w;
+    } else {
+        l = bbox[0] - bbox[2]/2.f - (input_w - r_h * img.cols) / 2;
+        r = bbox[0] + bbox[2]/2.f - (input_w - r_h * img.cols) / 2;
+        t = bbox[1] - bbox[3]/2.f;
+        b = bbox[1] + bbox[3]/2.f;
+        l = l / r_h;
+        r = r / r_h;
+        t = t / r_h;
+        b = b / r_h;
+    }
+    return cv::Rect(l, t, r-l, b-t);
+}
+
+float iou(float lbox[4], float rbox[4]) {
+    float interBox[] = {
+        std::max(lbox[0] - lbox[2]/2.f , rbox[0] - rbox[2]/2.f), //left
+        std::min(lbox[0] + lbox[2]/2.f , rbox[0] + rbox[2]/2.f), //right
+        std::max(lbox[1] - lbox[3]/2.f , rbox[1] - rbox[3]/2.f), //top
+        std::min(lbox[1] + lbox[3]/2.f , rbox[1] + rbox[3]/2.f), //bottom
+    };
+
+    if(interBox[2] > interBox[3] || interBox[0] > interBox[1])
+        return 0.0f;
+
+    float interBoxS =(interBox[1]-interBox[0])*(interBox[3]-interBox[2]);
+    return interBoxS/(lbox[2]*lbox[3] + rbox[2]*rbox[3] -interBoxS);
+}
+
+bool cmp(Detection& a, Detection& b) {
+    return a.det_confidence > b.det_confidence;
+}
+
+void nms(std::vector<Detection>& res, float *output, float ignore_thresh=0.4,float nms_thresh = 0.4) {
+    std::map<float, std::vector<Detection>> m;
+//    std::cout << "output[0] "<< output[0]<<std::endl;
+    for (int i = 0; i < output[0] && i < 1000; i++) {
+        if (output[1 + 7 * i + 4] <= ignore_thresh) continue;
+        Detection det;
+        memcpy(&det, &output[1 + 7 * i], 7 * sizeof(float));
+        if (m.count(det.class_id) == 0) m.emplace(det.class_id, std::vector<Detection>());
+        m[det.class_id].push_back(det);
+    }
+    for (auto it = m.begin(); it != m.end(); it++) {
+        auto& dets = it->second;
+        std::sort(dets.begin(), dets.end(), cmp);
+        for (size_t m = 0; m < dets.size(); ++m) {
+            auto& item = dets[m];
+            res.push_back(item);
+            for (size_t n = m + 1; n < dets.size(); ++n) {
+                if (iou(item.bbox, dets[n].bbox) > nms_thresh) {
+                    dets.erase(dets.begin()+n);
+                    --n;
+                }
+            }
+        }
+    }
+}
+
+int main(int argc,char* argv[])
+{
+    cudaSetDevice(0);
+    char *trtModelStream{nullptr};
+    size_t size{0};
+
+    NetworkInfo networkInfo;
+
+    networkInfo.networkType     = "yolov4-turnstile";
+    networkInfo.configFilePath  = "../data/yolov4-turnstile.cfg";
+    networkInfo.wtsFilePath     = "../data/yolov4-turnstile.weights";
+    networkInfo.deviceType      = "kGPU";
+    networkInfo.inputBlobName   = "data";
+
+    std::string modelname = networkInfo.networkType + ".engine";
+
+    IBuilder* builder = createInferBuilder(gLogger);
+    if (argc == 2 && std::string(argv[1]) == "-s") {
+        IHostMemory* modelStream{nullptr};
+        Yolo yolo(networkInfo);
+        ICudaEngine *cudaEngine = yolo.createEngine (builder);
+        modelStream = cudaEngine->serialize();
+        assert(modelStream != nullptr);
+        std::ofstream p(modelname, std::ios::binary);
+        if (!p) {
+            std::cerr << "could not open plan output file" << std::endl;
+            return -1;
+        }
+        p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
+        modelStream->destroy();
+        return 0;
+    } else if (argc == 2 && std::string(argv[1]) == "-d") {
+        std::ifstream file(modelname, std::ios::binary);
+        if (file.good()) {
+            file.seekg(0, file.end);
+            size = file.tellg();
+            file.seekg(0, file.beg);
+            trtModelStream = new char[size];
+            assert(trtModelStream);
+            file.read(trtModelStream, size);
+            file.close();
+        }
+    }else {
+        std::cerr << "arguments not right!" << std::endl;
+        std::cerr << "./yolov3 -s  // serialize model to plan file" << std::endl;
+        std::cerr << "./yolov3 -d  // deserialize plan file and run inference" << std::endl;
+        return -1;
+    }
+
+    IRuntime* runtime = createInferRuntime(gLogger);
+    assert(runtime != nullptr);
+    ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
+    assert(engine != nullptr);
+    IExecutionContext* context = engine->createExecutionContext();
+    assert(context != nullptr);
+    delete[] trtModelStream;
+
+    int numbindings=engine->getNbBindings();
+    std::cout<< "getNbBindings: " << numbindings<<std::endl;
+
+    const char* layername = engine->getBindingName(1);
+    std::cout<< "getBindingName:1 " << layername<<std::endl;
+    Dims out = engine->getBindingDimensions(1);
+    std::cout<< "out dims: " << out.d[0]<<" "<<out.d[1]<<" "<<out.d[2]<<" "<<out.d[3]<<std::endl;
+
+    Dims in = engine->getBindingDimensions(0);
+    std::cout<< "out dims: " << in.d[0]<<" "<<in.d[1]<<" "<<in.d[2]<<" "<<in.d[3]<<std::endl;
+
+    int input_h =  in.d[1];
+    int input_w =  in.d[2];
+    int OUTPUT_SIZE = out.d[0];
+
+    void* buffers[2];
+    int batchSize = 1;
+
+    cudaMalloc(&buffers[0], batchSize * 3 * input_h * input_w * sizeof(float));
+    cudaMalloc(&buffers[1], batchSize * OUTPUT_SIZE * sizeof(float));
+
+    // Create stream
+    cudaStream_t stream;
+    cudaStreamCreate(&stream);
+
+    // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
+    cv::VideoCapture cap("../20201231144029.avi");
+    cv::Mat img;
+    cv::Mat pr_img;
+
+    bool detect = false;
+
+    float data[3 * input_h * input_w];
+    float prob[OUTPUT_SIZE];
+
+    std::cout<<"start detect"<<std::endl;
+
+    while (true){
+        if(!detect){detect=true; continue;}
+        cap>>img;
+        cv::Mat pr_img = preprocess_img(img,input_w,input_h);
+        for (int i = 0; i < input_h * input_w; i++) {
+            data[i] = pr_img.at<cv::Vec3b>(i)[2] / 255.0;
+            data[i + input_h * input_w] = pr_img.at<cv::Vec3b>(i)[1] / 255.0;
+            data[i + 2 * input_h * input_w] = pr_img.at<cv::Vec3b>(i)[0] / 255.0;
+        }
+
+//        // Run inference
+        auto start = std::chrono::system_clock::now();
+
+        cudaMemcpyAsync(buffers[0], data, batchSize * 3 * input_w * input_h * sizeof(float), cudaMemcpyHostToDevice, stream);
+        context->enqueue(batchSize, buffers, stream, nullptr);
+        cudaMemcpyAsync(prob, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream);
+        cudaStreamSynchronize(stream);
+
+        auto end = std::chrono::system_clock::now();
+        std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
+
+        std::vector<Detection> res;
+        nms(res, prob);
+
+        for (size_t j = 0; j < res.size(); j++) {
+            float *p = (float*)&res[j];
+            cv::Rect r = get_rect(img, res[j].bbox,input_w,input_h);
+            cv::rectangle(img, r, cv::Scalar(0x27, 0xC1, 0x36), 2);
+            //std::string text = std::to_string((int)res[j].class_id) + " "+
+                    //std::to_string((float)res[j].det_confidence)+" "+
+                    //std::to_string((float)res[j].class_confidence);
+
+            std::string text = std::to_string((int)res[j].class_id);
+            cv::putText(img, text, cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2);
+        }
+        cv::imshow("_", img);
+        if(cv::waitKey(1)==27){break;}
+    }
+
+    // Release stream and buffers
+    cudaStreamDestroy(stream);
+    cudaFree(buffers[0]);
+    cudaFree(buffers[1]);
+
+    // Destroy the engine
+    context->destroy();
+    engine->destroy();
+    runtime->destroy();
+}
+
+
+
+
+
+
+

+ 196 - 0
src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/src/mish.cu

@@ -0,0 +1,196 @@
+#include <cmath>
+#include <stdio.h>
+#include <cassert>
+#include <iostream>
+#include "mish.h"
+
+namespace nvinfer1
+{
+    MishPlugin::MishPlugin()
+    {
+    }
+
+    MishPlugin::~MishPlugin()
+    {
+    }
+
+    // create the plugin at runtime from a byte stream
+    MishPlugin::MishPlugin(const void* data, size_t length)
+    {
+        assert(length == sizeof(input_size_));
+        input_size_ = *reinterpret_cast<const int*>(data);
+    }
+
+    void MishPlugin::serialize(void* buffer) const
+    {
+        *reinterpret_cast<int*>(buffer) = input_size_;
+    }
+
+    size_t MishPlugin::getSerializationSize() const
+    {  
+        return sizeof(input_size_);
+    }
+
+    int MishPlugin::initialize()
+    { 
+        return 0;
+    }
+
+    Dims MishPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
+    {
+        assert(nbInputDims == 1);
+        assert(index == 0);
+        input_size_ = inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2];
+        // Output dimensions
+        return Dims3(inputs[0].d[0], inputs[0].d[1], inputs[0].d[2]);
+    }
+
+    // Set plugin namespace
+    void MishPlugin::setPluginNamespace(const char* pluginNamespace)
+    {
+        mPluginNamespace = pluginNamespace;
+    }
+
+    const char* MishPlugin::getPluginNamespace() const
+    {
+        return mPluginNamespace;
+    }
+
+    // Return the DataType of the plugin output at the requested index
+    DataType MishPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const
+    {
+        return DataType::kFLOAT;
+    }
+
+    // Return true if output tensor is broadcast across a batch.
+    bool MishPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const
+    {
+        return false;
+    }
+
+    // Return true if plugin can use input that is broadcast across batch without replication.
+    bool MishPlugin::canBroadcastInputAcrossBatch(int inputIndex) const
+    {
+        return false;
+    }
+
+    void MishPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)
+    {
+    }
+
+    // Attach the plugin object to an execution context and grant the plugin the access to some context resource.
+    void MishPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator)
+    {
+    }
+
+    // Detach the plugin object from its execution context.
+    void MishPlugin::detachFromContext() {}
+
+    const char* MishPlugin::getPluginType() const
+    {
+        return "Mish_TRT";
+    }
+
+    const char* MishPlugin::getPluginVersion() const
+    {
+        return "1";
+    }
+
+    void MishPlugin::destroy()
+    {
+        delete this;
+    }
+
+    // Clone the plugin
+    IPluginV2IOExt* MishPlugin::clone() const
+    {
+        MishPlugin *p = new MishPlugin();
+        p->input_size_ = input_size_;
+        p->setPluginNamespace(mPluginNamespace);
+        return p;
+    }
+
+    __device__ float tanh_activate_kernel(float x){return (2/(1 + expf(-2*x)) - 1);}
+
+    __device__ float softplus_kernel(float x, float threshold = 20) {
+        if (x > threshold) return x;                // too large
+        else if (x < -threshold) return expf(x);    // too small
+        return logf(expf(x) + 1);
+    }
+
+    __global__ void mish_kernel(const float *input, float *output, int num_elem) {
+
+        int idx = threadIdx.x + blockDim.x * blockIdx.x;
+        if (idx >= num_elem) return;
+
+        //float t = exp(input[idx]);
+        //if (input[idx] > 20.0) {
+        //    t *= t;
+        //    output[idx] = (t - 1.0) / (t + 1.0);
+        //} else {
+        //    float tt = t * t;
+        //    output[idx] = (tt + 2.0 * t) / (tt + 2.0 * t + 2.0);
+        //}
+        //output[idx] *= input[idx];
+        output[idx] = input[idx] * tanh_activate_kernel(softplus_kernel(input[idx]));
+    }
+
+    void MishPlugin::forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize) {
+        int block_size = thread_count_;
+        int grid_size = (input_size_ * batchSize + block_size - 1) / block_size;
+        mish_kernel<<<grid_size, block_size>>>(inputs[0], output, input_size_ * batchSize);
+    }
+
+    int MishPlugin::enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream)
+    {
+        //assert(batchSize == 1);
+        //GPU
+        //CUDA_CHECK(cudaStreamSynchronize(stream));
+        forwardGpu((const float *const *)inputs, (float*)outputs[0], stream, batchSize);
+        return 0;
+    }
+
+    PluginFieldCollection MishPluginCreator::mFC{};
+    std::vector<PluginField> MishPluginCreator::mPluginAttributes;
+
+    MishPluginCreator::MishPluginCreator()
+    {
+        mPluginAttributes.clear();
+
+        mFC.nbFields = mPluginAttributes.size();
+        mFC.fields = mPluginAttributes.data();
+    }
+
+    const char* MishPluginCreator::getPluginName() const
+    {
+            return "Mish_TRT";
+    }
+
+    const char* MishPluginCreator::getPluginVersion() const
+    {
+            return "1";
+    }
+
+    const PluginFieldCollection* MishPluginCreator::getFieldNames()
+    {
+            return &mFC;
+    }
+
+    IPluginV2IOExt* MishPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)
+    {
+        MishPlugin* obj = new MishPlugin();
+        obj->setPluginNamespace(mNamespace.c_str());
+        return obj;
+    }
+
+    IPluginV2IOExt* MishPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)
+    {
+        // This object will be deleted when the network is destroyed, which will
+        // call MishPlugin::destroy()
+        MishPlugin* obj = new MishPlugin(serialData, serialLength);
+        obj->setPluginNamespace(mNamespace.c_str());
+        return obj;
+    }
+
+}
+

+ 473 - 0
src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/src/trt_utils.cpp

@@ -0,0 +1,473 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "trt_utils.h"
+#include <experimental/filesystem>
+#include <fstream>
+#include <iomanip>
+#include <functional>
+#include <algorithm>
+#include <math.h>
+#include "NvInferPlugin.h"
+
+static void leftTrim(std::string& s)
+{
+    s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); }));
+}
+
+static void rightTrim(std::string& s)
+{
+    s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end());
+}
+
+std::string trim(std::string s)
+{
+    leftTrim(s);
+    rightTrim(s);
+    return s;
+}
+
+float clamp(const float val, const float minVal, const float maxVal)
+{
+    assert(minVal <= maxVal);
+    return std::min(maxVal, std::max(minVal, val));
+}
+
+bool fileExists(const std::string fileName, bool verbose)
+{
+    if (!std::experimental::filesystem::exists(std::experimental::filesystem::path(fileName)))
+    {
+        if (verbose) std::cout << "File does not exist : " << fileName << std::endl;
+        return false;
+    }
+    return true;
+}
+
+std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType)
+{
+    assert(fileExists(weightsFilePath));
+    std::cout << "Loading pre-trained weights..." << std::endl;
+    std::ifstream file(weightsFilePath, std::ios_base::binary);
+    assert(file.good());
+    std::string line;
+
+    if (networkType == "yolov2")
+    {
+        // Remove 4 int32 bytes of data from the stream belonging to the header
+        file.ignore(4 * 4);
+    }
+    else if ((networkType == "yolov3") || (networkType == "yolov3-tiny")
+             || (networkType == "yolov4") || (networkType == "yolov4-tiny") || (networkType == "yolov4-turnstile"))
+    {
+        // Remove 5 int32 bytes of data from the stream belonging to the header
+        file.ignore(4 * 5);
+    }
+    else
+    {
+        std::cout << "Invalid network type" << std::endl;
+        assert(0);
+    }
+
+    std::vector<float> weights;
+    char floatWeight[4];
+    while (!file.eof())
+    {
+        file.read(floatWeight, 4);
+        assert(file.gcount() == 4);
+        weights.push_back(*reinterpret_cast<float*>(floatWeight));
+        if (file.peek() == std::istream::traits_type::eof()) break;
+    }
+    std::cout << "Loading weights of " << networkType << " complete!"
+              << std::endl;
+    std::cout << "Total Number of weights read : " << weights.size() << std::endl;
+    return weights;
+}
+
+std::string dimsToString(const nvinfer1::Dims d)
+{
+    std::stringstream s;
+    assert(d.nbDims >= 1);
+    for (int i = 0; i < d.nbDims - 1; ++i)
+    {
+        s << std::setw(4) << d.d[i] << " x";
+    }
+    s << std::setw(4) << d.d[d.nbDims - 1];
+
+    return s.str();
+}
+
+void displayDimType(const nvinfer1::Dims d)
+{
+    std::cout << "(" << d.nbDims << ") ";
+    for (int i = 0; i < d.nbDims; ++i)
+    {
+        switch (d.type[i])
+        {
+        case nvinfer1::DimensionType::kSPATIAL: std::cout << "kSPATIAL "; break;
+        case nvinfer1::DimensionType::kCHANNEL: std::cout << "kCHANNEL "; break;
+        case nvinfer1::DimensionType::kINDEX: std::cout << "kINDEX "; break;
+        case nvinfer1::DimensionType::kSEQUENCE: std::cout << "kSEQUENCE "; break;
+        }
+    }
+    std::cout << std::endl;
+}
+
+int getNumChannels(nvinfer1::ITensor* t)
+{
+    nvinfer1::Dims d = t->getDimensions();
+    assert(d.nbDims == 3);
+
+    return d.d[0];
+}
+
+uint64_t get3DTensorVolume(nvinfer1::Dims inputDims)
+{
+    assert(inputDims.nbDims == 3);
+    return inputDims.d[0] * inputDims.d[1] * inputDims.d[2];
+}
+
+nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map<std::string, std::string>& block,
+                                nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network)
+{
+    assert(block.at("type") == "maxpool");
+    assert(block.find("size") != block.end());
+    assert(block.find("stride") != block.end());
+
+    int size = std::stoi(block.at("size"));
+    int stride = std::stoi(block.at("stride"));
+
+    nvinfer1::IPoolingLayer* pool
+        = network->addPooling(*input, nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{size, size});
+    assert(pool);
+    std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx);
+    pool->setStride(nvinfer1::DimsHW{stride, stride});
+    pool->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
+    pool->setName(maxpoolLayerName.c_str());
+
+    return pool;
+}
+
+nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map<std::string, std::string>& block,
+                                   std::vector<float>& weights,
+                                   std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr,
+                                   int& inputChannels, nvinfer1::ITensor* input,
+                                   nvinfer1::INetworkDefinition* network)
+{
+    assert(block.at("type") == "convolutional");
+    assert(block.find("batch_normalize") == block.end());
+    assert(block.at("activation") == "linear");
+    assert(block.find("filters") != block.end());
+    assert(block.find("pad") != block.end());
+    assert(block.find("size") != block.end());
+    assert(block.find("stride") != block.end());
+
+    int filters = std::stoi(block.at("filters"));
+    int padding = std::stoi(block.at("pad"));
+    int kernelSize = std::stoi(block.at("size"));
+    int stride = std::stoi(block.at("stride"));
+    int pad;
+    if (padding)
+        pad = (kernelSize - 1) / 2;
+    else
+        pad = 0;
+    // load the convolution layer bias
+    nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, filters};
+    float* val = new float[filters];
+    for (int i = 0; i < filters; ++i)
+    {
+        val[i] = weights[weightPtr];
+        weightPtr++;
+    }
+    convBias.values = val;
+    trtWeights.push_back(convBias);
+    // load the convolutional layer weights
+    int size = filters * inputChannels * kernelSize * kernelSize;
+    nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
+    val = new float[size];
+    for (int i = 0; i < size; ++i)
+    {
+        val[i] = weights[weightPtr];
+        weightPtr++;
+    }
+    convWt.values = val;
+    trtWeights.push_back(convWt);
+    nvinfer1::IConvolutionLayer* conv = network->addConvolution(
+        *input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias);
+    assert(conv != nullptr);
+    std::string convLayerName = "conv_" + std::to_string(layerIdx);
+    conv->setName(convLayerName.c_str());
+    conv->setStride(nvinfer1::DimsHW{stride, stride});
+    conv->setPadding(nvinfer1::DimsHW{pad, pad});
+
+    return conv;
+}
+
+nvinfer1::ILayer* netAddConvBNActive(int layerIdx, std::map<std::string, std::string>& block,
+                                    std::vector<float>& weights,
+                                    std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr,
+                                    int& inputChannels, nvinfer1::ITensor* input,
+                                    nvinfer1::INetworkDefinition* network)
+{
+    assert(block.at("type") == "convolutional");
+    assert(block.find("batch_normalize") != block.end());
+    assert(block.at("batch_normalize") == "1");
+//    assert(block.at("activation") == "leaky");
+    assert(block.find("filters") != block.end());
+    assert(block.find("pad") != block.end());
+    assert(block.find("size") != block.end());
+    assert(block.find("stride") != block.end());
+
+    bool batchNormalize, bias;
+    if (block.find("batch_normalize") != block.end())
+    {
+        batchNormalize = (block.at("batch_normalize") == "1");
+        bias = false;
+    }
+    else
+    {
+        batchNormalize = false;
+        bias = true;
+    }
+    // all conv_bn_leaky layers assume bias is false
+    assert(batchNormalize == true && bias == false);
+    UNUSED(batchNormalize);
+    UNUSED(bias);
+
+    int filters = std::stoi(block.at("filters"));
+    int padding = std::stoi(block.at("pad"));
+    int kernelSize = std::stoi(block.at("size"));
+    int stride = std::stoi(block.at("stride"));
+    int pad;
+    if (padding)
+        pad = (kernelSize - 1) / 2;
+    else
+        pad = 0;
+
+    /***** CONVOLUTION LAYER *****/
+    /*****************************/
+    // batch norm weights are before the conv layer
+    // load BN biases (bn_biases)
+    std::vector<float> bnBiases;
+    for (int i = 0; i < filters; ++i)
+    {
+        bnBiases.push_back(weights[weightPtr]);
+        weightPtr++;
+    }
+    // load BN weights
+    std::vector<float> bnWeights;
+    for (int i = 0; i < filters; ++i)
+    {
+        bnWeights.push_back(weights[weightPtr]);
+        weightPtr++;
+    }
+    // load BN running_mean
+    std::vector<float> bnRunningMean;
+    for (int i = 0; i < filters; ++i)
+    {
+        bnRunningMean.push_back(weights[weightPtr]);
+        weightPtr++;
+    }
+    // load BN running_var
+    std::vector<float> bnRunningVar;
+    for (int i = 0; i < filters; ++i)
+    {
+        // 1e-05 for numerical stability
+        bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5));
+        weightPtr++;
+    }
+    // load Conv layer weights (GKCRS)
+    int size = filters * inputChannels * kernelSize * kernelSize;
+    nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
+    float* val = new float[size];
+    for (int i = 0; i < size; ++i)
+    {
+        val[i] = weights[weightPtr];
+        weightPtr++;
+    }
+    convWt.values = val;
+    trtWeights.push_back(convWt);
+    nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, 0};
+    trtWeights.push_back(convBias);
+    nvinfer1::IConvolutionLayer* conv = network->addConvolution(
+        *input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias);
+    assert(conv != nullptr);
+    std::string convLayerName = "conv_" + std::to_string(layerIdx);
+    conv->setName(convLayerName.c_str());
+    conv->setStride(nvinfer1::DimsHW{stride, stride});
+    conv->setPadding(nvinfer1::DimsHW{pad, pad});
+
+    /***** BATCHNORM LAYER *****/
+    /***************************/
+    size = filters;
+    // create the weights
+    nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size};
+    nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size};
+    nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size};
+    float* shiftWt = new float[size];
+    for (int i = 0; i < size; ++i)
+    {
+        shiftWt[i]
+            = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
+    }
+    shift.values = shiftWt;
+    float* scaleWt = new float[size];
+    for (int i = 0; i < size; ++i)
+    {
+        scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
+    }
+    scale.values = scaleWt;
+    float* powerWt = new float[size];
+    for (int i = 0; i < size; ++i)
+    {
+        powerWt[i] = 1.0;
+    }
+    power.values = powerWt;
+    trtWeights.push_back(shift);
+    trtWeights.push_back(scale);
+    trtWeights.push_back(power);
+    // Add the batch norm layers
+    nvinfer1::IScaleLayer* bn = network->addScale(
+        *conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
+    assert(bn != nullptr);
+    std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx);
+    bn->setName(bnLayerName.c_str());
+    /***** ACTIVATION LAYER *****/
+    /****************************/
+    if(block.at("activation") == "leaky"){
+        nvinfer1::ITensor* bnOutput = bn->getOutput(0);
+        nvinfer1::IActivationLayer* leaky = network->addActivation(
+                    *bnOutput, nvinfer1::ActivationType::kLEAKY_RELU);
+        leaky->setAlpha(0.1);
+        assert(leaky != nullptr);
+        std::string leakyLayerName = "leaky_" + std::to_string(layerIdx);
+        leaky->setName(leakyLayerName.c_str());
+        return leaky;
+    }else if(block.at("activation") == "mish")
+    {
+        auto creator = getPluginRegistry()->getPluginCreator("Mish_TRT", "1");
+        const nvinfer1::PluginFieldCollection* pluginData = creator->getFieldNames();
+        nvinfer1::IPluginV2 *pluginObj = creator->createPlugin(("mish" + std::to_string(layerIdx)).c_str(), pluginData);
+        nvinfer1::ITensor* inputTensors[] = {bn->getOutput(0)};
+        auto mish = network->addPluginV2(&inputTensors[0], 1, *pluginObj);
+        return mish;
+    }
+
+}
+
+nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map<std::string, std::string>& block,
+                                 std::vector<float>& weights,
+                                 std::vector<nvinfer1::Weights>& trtWeights, int& inputChannels,
+                                 nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network)
+{
+    assert(block.at("type") == "upsample");
+    nvinfer1::Dims inpDims = input->getDimensions();
+    assert(inpDims.nbDims == 3);
+    assert(inpDims.d[1] == inpDims.d[2]);
+    int h = inpDims.d[1];
+    int w = inpDims.d[2];
+    int stride = std::stoi(block.at("stride"));
+    // add pre multiply matrix as a constant
+    nvinfer1::Dims preDims{3,
+                           {1, stride * h, w},
+                           {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL,
+                            nvinfer1::DimensionType::kSPATIAL}};
+    int size = stride * h * w;
+    nvinfer1::Weights preMul{nvinfer1::DataType::kFLOAT, nullptr, size};
+    float* preWt = new float[size];
+    /* (2*h * w)
+    [ [1, 0, ..., 0],
+      [1, 0, ..., 0],
+      [0, 1, ..., 0],
+      [0, 1, ..., 0],
+      ...,
+      ...,
+      [0, 0, ..., 1],
+      [0, 0, ..., 1] ]
+    */
+    for (int i = 0, idx = 0; i < h; ++i)
+    {
+        for (int s = 0; s < stride; ++s)
+        {
+            for (int j = 0; j < w; ++j, ++idx)
+            {
+                preWt[idx] = (i == j) ? 1.0 : 0.0;
+            }
+        }
+    }
+    preMul.values = preWt;
+    trtWeights.push_back(preMul);
+    nvinfer1::IConstantLayer* preM = network->addConstant(preDims, preMul);
+    assert(preM != nullptr);
+    std::string preLayerName = "preMul_" + std::to_string(layerIdx);
+    preM->setName(preLayerName.c_str());
+    // add post multiply matrix as a constant
+    nvinfer1::Dims postDims{3,
+                            {1, h, stride * w},
+                            {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL,
+                             nvinfer1::DimensionType::kSPATIAL}};
+    size = stride * h * w;
+    nvinfer1::Weights postMul{nvinfer1::DataType::kFLOAT, nullptr, size};
+    float* postWt = new float[size];
+    /* (h * 2*w)
+    [ [1, 1, 0, 0, ..., 0, 0],
+      [0, 0, 1, 1, ..., 0, 0],
+      ...,
+      ...,
+      [0, 0, 0, 0, ..., 1, 1] ]
+    */
+    for (int i = 0, idx = 0; i < h; ++i)
+    {
+        for (int j = 0; j < stride * w; ++j, ++idx)
+        {
+            postWt[idx] = (j / stride == i) ? 1.0 : 0.0;
+        }
+    }
+    postMul.values = postWt;
+    trtWeights.push_back(postMul);
+    nvinfer1::IConstantLayer* post_m = network->addConstant(postDims, postMul);
+    assert(post_m != nullptr);
+    std::string postLayerName = "postMul_" + std::to_string(layerIdx);
+    post_m->setName(postLayerName.c_str());
+    // add matrix multiply layers for upsampling
+    nvinfer1::IMatrixMultiplyLayer* mm1
+        = network->addMatrixMultiply(*preM->getOutput(0), nvinfer1::MatrixOperation::kNONE, *input,
+                                     nvinfer1::MatrixOperation::kNONE);
+    assert(mm1 != nullptr);
+    std::string mm1LayerName = "mm1_" + std::to_string(layerIdx);
+    mm1->setName(mm1LayerName.c_str());
+    nvinfer1::IMatrixMultiplyLayer* mm2
+        = network->addMatrixMultiply(*mm1->getOutput(0), nvinfer1::MatrixOperation::kNONE,
+                                     *post_m->getOutput(0), nvinfer1::MatrixOperation::kNONE);
+    assert(mm2 != nullptr);
+    std::string mm2LayerName = "mm2_" + std::to_string(layerIdx);
+    mm2->setName(mm2LayerName.c_str());
+    return mm2;
+}
+
+void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput,
+                    std::string layerOutput, std::string weightPtr)
+{
+    std::cout << std::setw(6) << std::left << layerIndex << std::setw(15) << std::left << layerName;
+    std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left
+              << layerOutput;
+    std::cout << std::setw(6) << std::left << weightPtr << std::endl;
+}

+ 510 - 0
src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/src/yolo.cpp

@@ -0,0 +1,510 @@
+
+#include "yolo.h"
+
+#include <fstream>
+#include <iomanip>
+#include <iterator>
+
+using namespace nvinfer1;
+
+REGISTER_TENSORRT_PLUGIN(MishPluginCreator);
+REGISTER_TENSORRT_PLUGIN(YoloPluginCreator);
+
+Yolo::Yolo(const NetworkInfo& networkInfo)
+    : m_NetworkType(networkInfo.networkType),           // yolov3
+      m_ConfigFilePath(networkInfo.configFilePath),     // yolov3.cfg
+      m_WtsFilePath(networkInfo.wtsFilePath),           // yolov3.weights
+      m_DeviceType(networkInfo.deviceType),             // kDLA, kGPU
+      m_InputBlobName(networkInfo.inputBlobName),       // data
+      m_InputH(0),
+      m_InputW(0),
+      m_InputC(0),
+      m_InputSize(0),
+      m_MaxBatchSize(networkInfo.maxbatchSize)
+{}
+
+Yolo::~Yolo()
+{
+    destroyNetworkUtils();
+}
+
+nvinfer1::ICudaEngine *Yolo::createEngine (nvinfer1::IBuilder* builder)
+{
+    assert (builder);
+
+//    std::vector<float> weights = loadWeights(m_WtsFilePath, m_NetworkType);
+//    std::vector<nvinfer1::Weights> trtWeights;
+
+    nvinfer1::INetworkDefinition *network = builder->createNetwork();
+    if (parseModel(*network) != NVDSINFER_SUCCESS) {
+        network->destroy();
+        return nullptr;
+    }
+
+    // Build the engine
+    std::cout << "Building the TensorRT Engine..." << std::endl;
+
+    ///////////////////////////////////////
+    builder->setMaxBatchSize(m_MaxBatchSize);
+    ///////////////////////////////////////
+
+    builder->setFp16Mode(true);
+
+    nvinfer1::ICudaEngine * engine = builder->buildCudaEngine(*network);
+    if (engine) {
+        std::cout << "Building complete!" << std::endl;
+    } else {
+        std::cerr << "Building engine failed!" << std::endl;
+    }
+
+    // destroy
+    network->destroy();
+    return engine;
+}
+
+NvDsInferStatus Yolo::parseModel(nvinfer1::INetworkDefinition& network) {
+    destroyNetworkUtils();
+
+    m_ConfigBlocks = parseConfigFile(m_ConfigFilePath);
+    parseConfigBlocks();
+
+    std::vector<float> weights = loadWeights(m_WtsFilePath, m_NetworkType);
+    // build yolo network
+    std::cout << "Building Yolo network..." << std::endl;
+    NvDsInferStatus status = buildYoloNetwork(weights, network);
+
+    if (status == NVDSINFER_SUCCESS) {
+        std::cout << "Building yolo network complete!" << std::endl;
+    } else {
+        std::cerr << "Building yolo network failed!" << std::endl;
+    }
+
+    return status;
+}
+
+NvDsInferStatus Yolo::buildYoloNetwork(
+    std::vector<float>& weights, nvinfer1::INetworkDefinition& network) {
+
+    // 清理yolo层
+    m_YoloKernel.clear();
+
+    int weightPtr = 0;
+    int channels = m_InputC;
+
+    nvinfer1::ITensor* data =
+        network.addInput(m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT,
+            nvinfer1::DimsCHW{static_cast<int>(m_InputC),
+                static_cast<int>(m_InputH), static_cast<int>(m_InputW)});
+    assert(data != nullptr && data->getDimensions().nbDims > 0);
+
+    nvinfer1::ITensor* previous = data;
+    std::vector<nvinfer1::ITensor*> tensorOutputs;
+    uint outputTensorCount = 0;
+
+    // build the network using the network API
+    for (uint i = 0; i < m_ConfigBlocks.size(); ++i) {
+        // check if num. of channels is correct
+        assert(getNumChannels(previous) == channels);
+        std::string layerIndex = "(" + std::to_string(tensorOutputs.size()) + ")";
+
+        if (m_ConfigBlocks.at(i).at("type") == "net") {
+            printLayerInfo("", "layer", "     inp_size", "     out_size", "weightPtr");
+        } else if (m_ConfigBlocks.at(i).at("type") == "convolutional") {
+            std::string inputVol = dimsToString(previous->getDimensions());
+            nvinfer1::ILayer* out;
+            std::string layerType;
+            // check if batch_norm enabled
+            if (m_ConfigBlocks.at(i).find("batch_normalize") != m_ConfigBlocks.at(i).end()) {
+
+                out = netAddConvBNActive(i, m_ConfigBlocks.at(i), weights,
+                                         m_TrtWeights, weightPtr, channels, previous, &network);
+                layerType = "conv-bn-Active";
+            }else{
+                out = netAddConvLinear(i, m_ConfigBlocks.at(i), weights,
+                    m_TrtWeights, weightPtr, channels, previous, &network);
+                layerType = "conv-linear";
+            }
+            previous = out->getOutput(0);
+            assert(previous != nullptr);
+            channels = getNumChannels(previous);
+            std::string outputVol = dimsToString(previous->getDimensions());
+            tensorOutputs.push_back(out->getOutput(0));
+            printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr));
+        } else if (m_ConfigBlocks.at(i).at("type") == "shortcut") {
+            assert(m_ConfigBlocks.at(i).at("activation") == "linear");
+            assert(m_ConfigBlocks.at(i).find("from") !=
+                   m_ConfigBlocks.at(i).end());
+            int from = stoi(m_ConfigBlocks.at(i).at("from"));
+
+            std::string inputVol = dimsToString(previous->getDimensions());
+            // check if indexes are correct
+            assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size()));
+            assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size()));
+            assert(i + from - 1 < i - 2);
+            nvinfer1::IElementWiseLayer* ew = network.addElementWise(
+                *tensorOutputs[i - 2], *tensorOutputs[i + from - 1],
+                nvinfer1::ElementWiseOperation::kSUM);
+            assert(ew != nullptr);
+            std::string ewLayerName = "shortcut_" + std::to_string(i);
+            ew->setName(ewLayerName.c_str());
+            previous = ew->getOutput(0);
+            assert(previous != nullptr);
+            std::string outputVol = dimsToString(previous->getDimensions());
+            tensorOutputs.push_back(ew->getOutput(0));
+            printLayerInfo(layerIndex, "skip", inputVol, outputVol, "    -");
+        } else if (m_ConfigBlocks.at(i).at("type") == "yolo") {
+            nvinfer1::Dims prevTensorDims = previous->getDimensions();
+            assert(prevTensorDims.d[1] == prevTensorDims.d[2]);
+            TensorInfo& curYoloTensor = m_OutputTensors.at(outputTensorCount);
+            curYoloTensor.gridSize = prevTensorDims.d[1];
+            curYoloTensor.stride = m_InputW / curYoloTensor.gridSize;
+            m_OutputTensors.at(outputTensorCount).volume = curYoloTensor.gridSize
+                * curYoloTensor.gridSize
+                * (curYoloTensor.numBBoxes * (5 + curYoloTensor.numClasses));
+            std::string layerName = "yolo_" + std::to_string(i);
+            curYoloTensor.blobName = layerName;
+
+            // 添加yolo层
+            m_YoloTensor.push_back(previous);
+            tensorOutputs.push_back(previous);
+
+            // 调整 yolo层的信息
+            Dims inputdims = previous->getDimensions();
+            YoloKernel tmpYolokernel;
+            tmpYolokernel.height= inputdims.d[1];
+            tmpYolokernel.width= inputdims.d[2];
+            // 添加yolo anchors
+            int masksize = m_OutputTensors.at(outputTensorCount).masks.size();
+            tmpYolokernel.everyYoloAnchors = masksize;
+
+            for(int i=0;i<masksize;i++)
+            {
+                int index = (int)m_OutputTensors.at(outputTensorCount).masks[i] * 2;
+                tmpYolokernel.anchors[2*i] = m_OutputTensors.at(outputTensorCount).anchors[index];
+                tmpYolokernel.anchors[2*i+1] = m_OutputTensors.at(outputTensorCount).anchors[index+1];
+            }
+
+            // 全局
+            m_YoloKernel.push_back(tmpYolokernel);
+
+            std::string inputVol = dimsToString(inputdims);
+            printLayerInfo(layerIndex, "yolo", inputVol, inputVol, std::to_string(weightPtr));
+
+            ++outputTensorCount;
+        } else if (m_ConfigBlocks.at(i).at("type") == "region") {
+            nvinfer1::Dims prevTensorDims = previous->getDimensions();
+            assert(prevTensorDims.d[1] == prevTensorDims.d[2]);
+            TensorInfo& curRegionTensor = m_OutputTensors.at(outputTensorCount);
+            curRegionTensor.gridSize = prevTensorDims.d[1];
+            curRegionTensor.stride = m_InputW / curRegionTensor.gridSize;
+            m_OutputTensors.at(outputTensorCount).volume = curRegionTensor.gridSize
+                * curRegionTensor.gridSize
+                * (curRegionTensor.numBBoxes * (5 + curRegionTensor.numClasses));
+            std::string layerName = "region_" + std::to_string(i);
+            curRegionTensor.blobName = layerName;
+            nvinfer1::plugin::RegionParameters RegionParameters{
+                static_cast<int>(curRegionTensor.numBBoxes), 4,
+                static_cast<int>(curRegionTensor.numClasses), nullptr};
+            std::string inputVol = dimsToString(previous->getDimensions());
+            nvinfer1::IPluginV2* regionPlugin
+                = createRegionPlugin(RegionParameters);
+            assert(regionPlugin != nullptr);
+            nvinfer1::IPluginV2Layer* region =
+                network.addPluginV2(&previous, 1, *regionPlugin);
+            assert(region != nullptr);
+            region->setName(layerName.c_str());
+            previous = region->getOutput(0);
+            assert(previous != nullptr);
+            previous->setName(layerName.c_str());
+            std::string outputVol = dimsToString(previous->getDimensions());
+            network.markOutput(*previous);
+            channels = getNumChannels(previous);
+            tensorOutputs.push_back(region->getOutput(0));
+            printLayerInfo(layerIndex, "region", inputVol, outputVol, std::to_string(weightPtr));
+            std::cout << "Anchors are being converted to network input resolution i.e. Anchors x "
+                      << curRegionTensor.stride << " (stride)" << std::endl;
+            for (auto& anchor : curRegionTensor.anchors) anchor *= curRegionTensor.stride;
+            ++outputTensorCount;
+        } else if (m_ConfigBlocks.at(i).at("type") == "reorg") {
+            std::string inputVol = dimsToString(previous->getDimensions());
+            nvinfer1::IPluginV2* reorgPlugin = createReorgPlugin(2);
+            assert(reorgPlugin != nullptr);
+            nvinfer1::IPluginV2Layer* reorg =
+                network.addPluginV2(&previous, 1, *reorgPlugin);
+            assert(reorg != nullptr);
+
+            std::string layerName = "reorg_" + std::to_string(i);
+            reorg->setName(layerName.c_str());
+            previous = reorg->getOutput(0);
+            assert(previous != nullptr);
+            std::string outputVol = dimsToString(previous->getDimensions());
+            channels = getNumChannels(previous);
+            tensorOutputs.push_back(reorg->getOutput(0));
+            printLayerInfo(layerIndex, "reorg", inputVol, outputVol, std::to_string(weightPtr));
+        }
+        // route layers (single or concat)
+        else if (m_ConfigBlocks.at(i).at("type") == "route") {
+            std::string strLayers = m_ConfigBlocks.at(i).at("layers");
+            std::vector<int> idxLayers;
+            size_t lastPos = 0, pos = 0;
+            while ((pos = strLayers.find(',', lastPos)) != std::string::npos) {
+                int vL = std::stoi(trim(strLayers.substr(lastPos, pos - lastPos)));
+                idxLayers.push_back (vL);
+                lastPos = pos + 1;
+            }
+            if (lastPos < strLayers.length()) {
+                std::string lastV = trim(strLayers.substr(lastPos));
+                if (!lastV.empty()) {
+                    idxLayers.push_back (std::stoi(lastV));
+                }
+            }
+            assert (!idxLayers.empty());
+            std::vector<nvinfer1::ITensor*> concatInputs;
+            for (int idxLayer : idxLayers) {
+                if (idxLayer < 0) {
+                    idxLayer = tensorOutputs.size() + idxLayer;
+                }
+                assert (idxLayer >= 0 && idxLayer < (int)tensorOutputs.size());
+                concatInputs.push_back (tensorOutputs[idxLayer]);
+            }
+            nvinfer1::IConcatenationLayer* concat;
+            if(m_ConfigBlocks.at(i).find("groups") != m_ConfigBlocks.at(i).end())
+            {
+                assert(m_ConfigBlocks.at(i).find("group_id") != m_ConfigBlocks.at(i).end());
+                int gorups =  std::stoi(m_ConfigBlocks.at(i).at("groups"));
+                int group_id = std::stoi(m_ConfigBlocks.at(i).at("group_id"));
+                std::vector<nvinfer1::ITensor*> group_concatInputs;
+                for(auto concatInput : concatInputs)
+                {
+                    Dims out_shape = concatInput->getDimensions();
+                    ISliceLayer* tmp= network.addSlice(*concatInput,Dims3{out_shape.d[0]/2,0,0},Dims3{out_shape.d[0]/2,out_shape.d[1],out_shape.d[2]},Dims3{1,1,1});
+                    group_concatInputs.push_back(tmp->getOutput(0));
+                }
+                concat=network.addConcatenation(group_concatInputs.data(), group_concatInputs.size());
+            }else {
+                concat=network.addConcatenation(concatInputs.data(), concatInputs.size());
+            }
+
+            assert(concat != nullptr);
+            std::string concatLayerName = "route_" + std::to_string(i - 1);
+            concat->setName(concatLayerName.c_str());
+            // concatenate along the channel dimension
+            concat->setAxis(0);
+            previous = concat->getOutput(0);
+            assert(previous != nullptr);
+            std::string outputVol = dimsToString(previous->getDimensions());
+            // set the output volume depth
+            channels
+                = getNumChannels(previous);
+            tensorOutputs.push_back(concat->getOutput(0));
+            printLayerInfo(layerIndex, "route", "        -", outputVol,
+                           std::to_string(weightPtr));
+        } else if (m_ConfigBlocks.at(i).at("type") == "upsample") {
+            std::string inputVol = dimsToString(previous->getDimensions());
+            nvinfer1::ILayer* out = netAddUpsample(i - 1, m_ConfigBlocks[i],
+                weights, m_TrtWeights, channels, previous, &network);
+            previous = out->getOutput(0);
+            std::string outputVol = dimsToString(previous->getDimensions());
+            tensorOutputs.push_back(out->getOutput(0));
+            printLayerInfo(layerIndex, "upsample", inputVol, outputVol, "    -");
+        } else if (m_ConfigBlocks.at(i).at("type") == "maxpool") {
+            std::string inputVol = dimsToString(previous->getDimensions());
+            nvinfer1::ILayer* out =
+                netAddMaxpool(i, m_ConfigBlocks.at(i), previous, &network);
+            previous = out->getOutput(0);
+            assert(previous != nullptr);
+            std::string outputVol = dimsToString(previous->getDimensions());
+            tensorOutputs.push_back(out->getOutput(0));
+            printLayerInfo(layerIndex, "maxpool", inputVol, outputVol, std::to_string(weightPtr));
+        }
+        else
+        {
+            std::cout << "Unsupported layer type --> \""
+                      << m_ConfigBlocks.at(i).at("type") << "\"" << std::endl;
+            assert(0);
+        }
+    }
+
+    auto creator = getPluginRegistry()->getPluginCreator("YoloLayer_TRT", "1");
+    assert(m_YoloKernel.size() == outputTensorCount);
+
+    // plugin filed 数量
+    int numyololayers = m_YoloKernel.size();
+
+    // 假定每个yolo输出层class相等
+    int numclass = m_OutputTensors[0].numClasses;
+    int input_w = m_InputW;
+    int input_h = m_InputH;
+
+    std::vector<PluginField> mPluginAttributes1 = {
+        PluginField("numclass", &numclass, PluginFieldType::kINT32, 1),
+        PluginField("input_w", &input_w, PluginFieldType::kINT32, 1),
+        PluginField("input_h", &input_h, PluginFieldType::kINT32, 1),
+        PluginField("numyololayers", &numyololayers, PluginFieldType::kINT32, 1),
+        PluginField("m_YoloKernel", &m_YoloKernel, PluginFieldType::kUNKNOWN, numyololayers),
+    };
+    PluginFieldCollection mFC1;
+    mFC1.nbFields = mPluginAttributes1.size();
+    mFC1.fields = mPluginAttributes1.data();
+    IPluginV2 * yoloplugin = creator->createPlugin(creator->getPluginName(), &mFC1);
+
+    ITensor** inputTensors_yolo = new ITensor*;
+    for (int i = 0; i<m_YoloTensor.size();i++)
+    {
+        inputTensors_yolo[i] = m_YoloTensor[i];
+    }
+
+    auto yolo = network.addPluginV2(inputTensors_yolo, 3, *yoloplugin);
+
+    previous = yolo->getOutput(0);
+    assert(previous != nullptr);
+    previous->setName("prob");
+    std::string outputVol = dimsToString(previous->getDimensions());
+    network.markOutput(*previous);
+
+    if ((int)weights.size() != weightPtr)
+    {
+        std::cout << "Number of unused weights left : " << (int)weights.size() - weightPtr << std::endl;
+        assert(0);
+    }
+
+    std::cout << "Output yolo blob names :" << std::endl;
+    for (auto& tensor : m_OutputTensors) {
+        std::cout << tensor.blobName << std::endl;
+    }
+
+    int nbLayers = network.getNbLayers();
+    std::cout << "Total number of yolo layers: " << nbLayers << std::endl;
+
+    return NVDSINFER_SUCCESS;
+}
+
+std::vector<std::map<std::string, std::string>>
+Yolo::parseConfigFile (const std::string cfgFilePath)
+{
+    assert(fileExists(cfgFilePath));
+    std::ifstream file(cfgFilePath);
+    assert(file.good());
+    std::string line;
+    std::vector<std::map<std::string, std::string>> blocks;
+    std::map<std::string, std::string> block;
+
+    while (getline(file, line))
+    {
+        if (line.size() == 0) continue;
+        if (line.front() == '#') continue;
+        line = trim(line);
+        if (line.front() == '[')
+        {
+            if (block.size() > 0)
+            {
+                blocks.push_back(block);
+                block.clear();
+            }
+            std::string key = "type";
+            std::string value = trim(line.substr(1, line.size() - 2));
+            block.insert(std::pair<std::string, std::string>(key, value));
+        }
+        else
+        {
+            int cpos = line.find('=');
+            std::string key = trim(line.substr(0, cpos));
+            std::string value = trim(line.substr(cpos + 1));
+            block.insert(std::pair<std::string, std::string>(key, value));
+        }
+    }
+    blocks.push_back(block);
+    return blocks;
+}
+
+void Yolo::parseConfigBlocks()
+{
+    for (auto block : m_ConfigBlocks) {
+        if (block.at("type") == "net")
+        {
+            assert((block.find("height") != block.end())
+                   && "Missing 'height' param in network cfg");
+            assert((block.find("width") != block.end()) && "Missing 'width' param in network cfg");
+            assert((block.find("channels") != block.end())
+                   && "Missing 'channels' param in network cfg");
+
+            m_InputH = std::stoul(block.at("height"));
+            m_InputW = std::stoul(block.at("width"));
+            m_InputC = std::stoul(block.at("channels"));
+//            assert(m_InputW == m_InputH);
+            m_InputSize = m_InputC * m_InputH * m_InputW;
+        }
+        else if ((block.at("type") == "region") || (block.at("type") == "yolo"))
+        {
+            assert((block.find("num") != block.end())
+                   && std::string("Missing 'num' param in " + block.at("type") + " layer").c_str());
+            assert((block.find("classes") != block.end())
+                   && std::string("Missing 'classes' param in " + block.at("type") + " layer")
+                          .c_str());
+            assert((block.find("anchors") != block.end())
+                   && std::string("Missing 'anchors' param in " + block.at("type") + " layer")
+                          .c_str());
+
+            TensorInfo outputTensor;
+            std::string anchorString = block.at("anchors");
+            while (!anchorString.empty())
+            {
+                int npos = anchorString.find_first_of(',');
+                if (npos != -1)
+                {
+                    float anchor = std::stof(trim(anchorString.substr(0, npos)));
+                    outputTensor.anchors.push_back(anchor);
+                    anchorString.erase(0, npos + 1);
+                }
+                else
+                {
+                    float anchor = std::stof(trim(anchorString));
+                    outputTensor.anchors.push_back(anchor);
+                    break;
+                }
+            }
+
+            if ((m_NetworkType == "yolov3") || (m_NetworkType == "yolov3-tiny") || (m_NetworkType == "yolov4-tiny") ||
+                    (m_NetworkType == "yolov4") || (m_NetworkType == "yolov4-turnstile"))
+            {
+                assert((block.find("mask") != block.end())
+                       && std::string("Missing 'mask' param in " + block.at("type") + " layer")
+                              .c_str());
+
+                std::string maskString = block.at("mask");
+                while (!maskString.empty())
+                {
+                    int npos = maskString.find_first_of(',');
+                    if (npos != -1)
+                    {
+                        uint mask = std::stoul(trim(maskString.substr(0, npos)));
+                        outputTensor.masks.push_back(mask);
+                        maskString.erase(0, npos + 1);
+                    }
+                    else
+                    {
+                        uint mask = std::stoul(trim(maskString));
+                        outputTensor.masks.push_back(mask);
+                        break;
+                    }
+                }
+            }
+
+            outputTensor.numBBoxes = outputTensor.masks.size() > 0
+                ? outputTensor.masks.size()
+                : std::stoul(trim(block.at("num")));
+            outputTensor.numClasses = std::stoul(block.at("classes"));
+            m_OutputTensors.push_back(outputTensor);
+        }
+    }
+}
+
+void Yolo::destroyNetworkUtils() {
+    // deallocate the weights
+    for (uint i = 0; i < m_TrtWeights.size(); ++i) {
+        if (m_TrtWeights[i].count > 0)
+            free(const_cast<void*>(m_TrtWeights[i].values));
+    }
+    m_TrtWeights.clear();
+}
+

+ 274 - 0
src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/src/yolodetect.cpp

@@ -0,0 +1,274 @@
+
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <sstream>
+#include <vector>
+#include <chrono>
+#include <string>
+#include <cmath>
+
+#include "yolodetect.h"
+
+//yolo.weights-->yolo.engine
+bool YoloDetect::saveEngine()
+{
+    IBuilder* builder = createInferBuilder(gLogger);
+    IHostMemory* modelStream{nullptr};
+    Yolo yolo(m_networkInfo);
+    ICudaEngine *cudaEngine = yolo.createEngine (builder);
+    modelStream = cudaEngine->serialize();
+    assert(modelStream != nullptr);
+    std::ofstream p(m_modelname, std::ios::binary);
+    if (!p) {
+        std::cerr << "could not open plan output file" << std::endl;
+        return false;
+    }
+    p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
+    modelStream->destroy();
+    return true;
+}
+
+//deserializeCudaEngine
+ICudaEngine* YoloDetect::loadEngine(IRuntime& runtime)
+{
+    char *trtModelStream{nullptr};
+    size_t size{0};
+    std::ifstream file(m_modelname, std::ios::binary);
+    if (file.good()) {
+        file.seekg(0, file.end);
+        size = file.tellg();
+        file.seekg(0, file.beg);
+        trtModelStream = new char[size];
+        assert(trtModelStream);
+        file.read(trtModelStream, size);
+        file.close();
+    }
+    else{
+        std::cout<<m_modelname<<" not found!"<<std::endl;
+        return nullptr;
+    }
+    ICudaEngine* engine = runtime.deserializeCudaEngine(trtModelStream, size);
+    delete[] trtModelStream;
+    return engine;
+}
+
+//load engine
+bool YoloDetect::loadModel(IExecutionContext*& context)
+{
+    std::cout<<"[API] Load engine from "<< m_modelname <<std::endl;
+    IRuntime* runtime = createInferRuntime(gLogger);
+    assert(runtime != nullptr);
+    ICudaEngine* engine = loadEngine(*runtime);
+    if(engine == nullptr)
+    {
+        std::cout<<"[API] Build engine to "<< m_modelname <<std::endl;
+        saveEngine();
+        std::cout << "[API] Build engine done!"<<std::endl;
+        std::cout<<"[API] Reload engine from "<< m_modelname <<std::endl;
+        engine = loadEngine(*runtime);
+    }
+    runtime->destroy();
+    context = engine->createExecutionContext();
+    if(context == nullptr)
+        return false;
+    std::cout << "[API] Load engine done!"<<std::endl;
+
+    int numbindings=engine->getNbBindings();
+    std::cout<< "getNbBindings: " << numbindings<<std::endl;
+
+    const char* layername = engine->getBindingName(1);
+    std::cout<< "getBindingName:1 " << layername<<std::endl;
+
+    Dims in = engine->getBindingDimensions(0);
+    std::cout<< "in dims: " << in.d[0]<<" "<<in.d[1]<<" "<<in.d[2]<<" "<<in.d[3]<<std::endl;
+    Dims out = engine->getBindingDimensions(1);
+    std::cout<< "out dims: " << out.d[0]<<" "<<out.d[1]<<" "<<out.d[2]<<" "<<out.d[3]<<std::endl;
+
+    m_input_h =  in.d[1];
+    m_input_w =  in.d[2];
+    m_output_size = out.d[0];
+    return true;
+}
+
+void YoloDetect::doInference(IExecutionContext& context,float* input, float* output, int batch_size)
+{
+    void* buffers[2];
+    cudaMalloc(&buffers[0], batch_size * 3 * m_input_h * m_input_w * sizeof(float));
+    cudaMalloc(&buffers[1], batch_size * m_output_size * sizeof(float));
+    // Create stream
+    cudaStream_t stream;
+    cudaStreamCreate(&stream);
+    cudaMemcpyAsync(buffers[0], input, batch_size * 3 * m_input_w * m_input_h * sizeof(float), cudaMemcpyHostToDevice, stream);
+    context.enqueue(batch_size, buffers, stream, nullptr);
+    cudaMemcpyAsync(output, buffers[1], batch_size * m_output_size * sizeof(float), cudaMemcpyDeviceToHost, stream);
+    cudaStreamSynchronize(stream);
+    // Release stream and buffers
+    cudaStreamDestroy(stream);
+    cudaFree(buffers[0]);
+    cudaFree(buffers[1]);
+}
+cv::Mat YoloDetect::preprocess_img(cv::Mat& img,int input_w,int input_h) {
+    //    int w, h, x, y;
+    //    float r_w = input_w / (img.cols*1.0);
+    //    float r_h = input_h / (img.rows*1.0);
+    //    if (r_h > r_w) {
+    //        w = input_w;
+    //        h = r_w * img.rows;
+    //        x = 0;
+    //        y = (input_h - h) / 2;
+    //    } else {
+    //        w = r_h* img.cols;
+    //        h = input_h;
+    //        x = (input_w - w) / 2;
+    //        y = 0;
+    //    }
+    //    cv::Mat re(h, w, CV_8UC3);
+    //    cv::resize(img, re, re.size(), 0, 0, cv::INTER_CUBIC);
+    //    cv::Mat out(input_h, input_w, CV_8UC3, cv::Scalar(128, 128, 128));
+    //    re.copyTo(out(cv::Rect(x, y, re.cols, re.rows)));
+
+    //直接将图片resize
+    cv::Mat out;
+    cv::resize(img,out,cv::Size(input_w,input_h),0,0,cv::INTER_CUBIC);
+
+    return out;
+}
+
+cv::Rect YoloDetect::get_rect(cv::Mat& img, float bbox[4],int input_w,int input_h) {
+    int l, r, t, b;
+    float r_w = input_w / (img.cols * 1.0);
+    float r_h = input_h / (img.rows * 1.0);
+
+    //    if (r_h > r_w) {
+    //        l = bbox[0] - bbox[2]/2.f;
+    //        r = bbox[0] + bbox[2]/2.f;
+    //        t = bbox[1] - bbox[3]/2.f - (input_h - r_w * img.rows) / 2;
+    //        b = bbox[1] + bbox[3]/2.f - (input_h - r_w * img.rows) / 2;
+    //        l = l / r_w;
+    //        r = r / r_w;
+    //        t = t / r_w;
+    //        b = b / r_w;
+    //    } else {
+    //        l = bbox[0] - bbox[2]/2.f - (input_w - r_h * img.cols) / 2;
+    //        r = bbox[0] + bbox[2]/2.f - (input_w - r_h * img.cols) / 2;
+    //        t = bbox[1] - bbox[3]/2.f;
+    //        b = bbox[1] + bbox[3]/2.f;
+    //        l = l / r_h;
+    //        r = r / r_h;
+    //        t = t / r_h;
+    //        b = b / r_h;
+    //    }
+
+
+    //直接将图片resize解码部分
+    l = bbox[0] - bbox[2]/2.f;
+    r = bbox[0] + bbox[2]/2.f;
+    t = bbox[1] - bbox[3]/2.f;
+    b = bbox[1] + bbox[3]/2.f;
+    l = l / r_w;
+    r = r / r_w;
+    t = t / r_h;
+    b = b / r_h;
+
+    return cv::Rect(l, t, r-l, b-t);
+}
+
+float YoloDetect::iou(float lbox[4], float rbox[4]) {
+    float interBox[] = {
+        std::max(lbox[0] - lbox[2]/2.f , rbox[0] - rbox[2]/2.f), //left
+        std::min(lbox[0] + lbox[2]/2.f , rbox[0] + rbox[2]/2.f), //right
+        std::max(lbox[1] - lbox[3]/2.f , rbox[1] - rbox[3]/2.f), //top
+        std::min(lbox[1] + lbox[3]/2.f , rbox[1] + rbox[3]/2.f), //bottom
+    };
+
+    if(interBox[2] > interBox[3] || interBox[0] > interBox[1])
+        return 0.0f;
+
+    float interBoxS =(interBox[1]-interBox[0])*(interBox[3]-interBox[2]);
+    return interBoxS/(lbox[2]*lbox[3] + rbox[2]*rbox[3] -interBoxS);
+}
+
+bool YoloDetect::cmp(Detection& a, Detection& b) {
+    return a.det_confidence > b.det_confidence;
+}
+
+void YoloDetect::nms(std::vector<Detection>& res, float *output, float ignore_thresh,float nms_thresh) {
+    std::map<float, std::vector<Detection>> m;
+
+    //std::cout << "output[0] "<<output[0]<<std::endl;
+
+    for (int i = 0; i < output[0] && i < 1000; i++) {
+        if (output[1 + 7 * i + 4] <= ignore_thresh) continue;
+        Detection det;
+        memcpy(&det, &output[1 + 7 * i], 7 * sizeof(float));
+        if (m.count(det.class_id) == 0) m.emplace(det.class_id, std::vector<Detection>());
+        m[det.class_id].push_back(det);
+    }
+    for (auto it = m.begin(); it != m.end(); it++) {
+        auto& dets = it->second;
+        std::sort(dets.begin(), dets.end(), cmp);
+        for (size_t m = 0; m < dets.size(); ++m) {
+            auto& item = dets[m];
+            res.push_back(item);
+            for (size_t n = m + 1; n < dets.size(); ++n) {
+                if (iou(item.bbox, dets[n].bbox) > nms_thresh) {
+                    dets.erase(dets.begin()+n);
+                    --n;
+                }
+            }
+        }
+    }
+}
+bool YoloDetect::process(IExecutionContext& context, cv::Mat &image, std::vector<Detection> &detect_results,
+                         float ignore_thresh,float nms_thresh)
+{
+    float data[3 * m_input_h * m_input_w];
+    float prob[m_output_size];
+    cv::Mat pr_img = preprocess_img(image,m_input_w,m_input_h);
+    for (int i = 0; i < m_input_h * m_input_w; i++) {
+        data[i] = pr_img.at<cv::Vec3b>(i)[2] / 255.0;
+        data[i + m_input_h * m_input_w] = pr_img.at<cv::Vec3b>(i)[1] / 255.0;
+        data[i + 2 * m_input_h * m_input_w] = pr_img.at<cv::Vec3b>(i)[0] / 255.0;
+    }
+    int batch_size = 1;
+    doInference(context,data,prob,batch_size);
+    nms(detect_results, prob,ignore_thresh,nms_thresh);
+    if(detect_results.size()>0)
+        return true;
+    else
+        return false;
+}
+
+//batchsize=2
+bool YoloDetect::process(IExecutionContext& context, uint max_batch_size,
+                         std::vector<cv::Mat> &image_vec,
+                         std::vector<std::vector<Detection>> &detect_results,
+                         float ignore_thresh,float nms_thresh)
+{
+    float data[max_batch_size * 3 * m_input_h * m_input_w];
+    float prob[max_batch_size * m_output_size];
+    for(uint idx=0;idx<max_batch_size;idx++)
+    {
+        cv::Mat pr_img =  preprocess_img(image_vec[idx],m_input_w,m_input_h);
+        for (int i = 0; i < m_input_h * m_input_w; i++) {
+            data[i + (idx * 3 + 0) * m_input_h * m_input_w] = pr_img.at<cv::Vec3b>(i)[2] / 255.0;
+            data[i + (idx * 3 + 1) * m_input_h * m_input_w] = pr_img.at<cv::Vec3b>(i)[1] / 255.0;
+            data[i + (idx * 3 + 2) * m_input_h * m_input_w] = pr_img.at<cv::Vec3b>(i)[0] / 255.0;
+        }
+
+    }
+    doInference(context,data,prob,max_batch_size);
+    bool flag = false;
+    for(uint idx=0;idx<max_batch_size;idx++)
+    {
+        nms(detect_results[idx],(prob+idx*m_output_size),ignore_thresh,nms_thresh);
+        if(detect_results[idx].size()>0) flag = true;
+    }
+    return flag;
+}
+
+
+
+
+

+ 129 - 0
src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/src/yololayer.cpp

@@ -0,0 +1,129 @@
+#include "yololayer.h"
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+using namespace YoloLayer;
+using namespace nvinfer1;
+
+YoloLayerPlugin::YoloLayerPlugin()
+{
+    mClassCount = CLASS_NUM;
+    mYoloKernel.clear();
+    mYoloKernel.push_back(yolo1);
+    mYoloKernel.push_back(yolo2);
+    mYoloKernel.push_back(yolo3);
+
+    mKernelCount = mYoloKernel.size();
+}
+
+YoloLayerPlugin::~YoloLayerPlugin()
+{
+}
+
+// create the plugin at runtime from a byte stream
+YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length)
+{
+
+    const char *d = reinterpret_cast<const char *>(data), *a = d;
+    read(d, mClassCount);
+    read(d, mThreadCount);
+    read(d, mKernelCount);
+    mYoloKernel.resize(mKernelCount);
+    auto kernelSize = mKernelCount*sizeof(YoloKernel);
+    memcpy(mYoloKernel.data(),d,kernelSize);
+    d += kernelSize;
+
+    assert(d == a + length);
+}
+
+void YoloLayerPlugin::serialize(void* buffer) const
+{
+    std::cout<<"start getSerializationSize"<<std::endl;
+    char* d = static_cast<char*>(buffer), *a = d;
+    write(d, mClassCount);
+    write(d, mThreadCount);
+    write(d, mKernelCount);
+    auto kernelSize = mKernelCount*sizeof(YoloKernel);
+    memcpy(d,mYoloKernel.data(),kernelSize);
+    d += kernelSize;
+
+    assert(d == a + getSerializationSize());
+}
+
+size_t YoloLayerPlugin::getSerializationSize() const
+{
+    std::cout<<"start getSerializationSize"<<std::endl;
+    return sizeof(mClassCount) + sizeof(mThreadCount) + sizeof(mKernelCount)  + sizeof(YoloLayer::YoloKernel) * mYoloKernel.size();
+}
+
+int YoloLayerPlugin::initialize()
+{
+    return 0;
+}
+
+Dims YoloLayerPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
+{
+    //output the result to channel
+    int totalsize = MAX_OUTPUT_BBOX_COUNT * sizeof(Detection) / sizeof(float);
+
+    return Dims3(totalsize + 1, 1, 1);
+}
+
+// Set plugin namespace
+void YoloLayerPlugin::setPluginNamespace(const char* pluginNamespace)
+{
+    mPluginNamespace = pluginNamespace;
+}
+
+const char* YoloLayerPlugin::getPluginNamespace() const
+{
+    return mPluginNamespace;
+}
+
+bool YoloLayerPlugin::supportsFormat (
+    nvinfer1::DataType type, nvinfer1::PluginFormat format) const {
+    return (type == nvinfer1::DataType::kFLOAT &&
+            format == nvinfer1::PluginFormat::kNCHW);
+}
+
+void YoloLayerPlugin::configureWithFormat (
+    const nvinfer1::Dims* inputDims, int nbInputs,
+    const nvinfer1::Dims* outputDims, int nbOutputs,
+    nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize)
+{
+    assert(nbInputs == 3);
+    assert (format == nvinfer1::PluginFormat::kNCHW);
+    assert(inputDims != nullptr);
+}
+
+const char* YoloLayerPlugin::getPluginType() const
+{
+    return "YoloLayer_TRT";
+}
+
+const char* YoloLayerPlugin::getPluginVersion() const
+{
+    return "1";
+}
+
+void YoloLayerPlugin::destroy()
+{
+    delete this;
+}
+
+// Clone the plugin
+IPluginV2* YoloLayerPlugin::clone() const
+{
+    return new YoloLayerPlugin();
+}
+
+int YoloLayerPlugin::enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream)
+{
+    forwardGpu((const float *const *)inputs, (float*)outputs[0], stream, batchSize, mYoloKernel, mThreadCount);
+    return 0;
+}
+
+REGISTER_TENSORRT_PLUGIN(YoloPluginCreator);

+ 271 - 0
src/detection/detection_yolov4_shigong_monocular/yolov4Tensorrt/src/yololayer.cu

@@ -0,0 +1,271 @@
+#include "yololayer.h"
+
+namespace nvinfer1
+{
+    // build network 阶段,创建模型阶段,调用的接口
+    YoloLayerPlugin::YoloLayerPlugin(const PluginFieldCollection& fc)
+    {
+        void* tmpvoid;
+        const PluginField* fields = fc.fields;
+        for (int i = 0; i < fc.nbFields; ++i)
+        {
+            const char* attrName = fields[i].name;
+            if (!strcmp(attrName, "numclass")){
+                mClassCount = *(static_cast<const int*>(fields[i].data));
+            }
+            else if (!strcmp(attrName, "input_w")){
+                mInput_w= *(static_cast<const int*>(fields[i].data));
+            }else if(!strcmp(attrName, "input_h")){
+                mInput_h = *(static_cast<const int*>(fields[i].data));
+            }else if(!strcmp(attrName, "numyololayers")){
+                mNumYoloLayers = *(static_cast<const int*>(fields[i].data));
+            }else if(!strcmp(attrName, "m_YoloKernel")){
+                assert(fields[i].type == PluginFieldType::kUNKNOWN);
+                tmpvoid = const_cast<void*>(fields[i].data);
+            }
+        }
+        // 解析 yolo层
+        mYoloKernel = *(std::vector<YoloKernel> *)tmpvoid;
+        std::cout<<"mYoloKernel.size()"<<mYoloKernel.size()<<std::endl;
+    }
+    
+    YoloLayerPlugin::~YoloLayerPlugin()
+    {}
+    // create the plugin at runtime from a byte stream,反序列化,调用的接口,生成模型
+    YoloLayerPlugin::YoloLayerPlugin(const void* data, size_t length)
+    {
+        using namespace Tn;
+        const char *d = reinterpret_cast<const char *>(data), *a = d;
+        read(d, mClassCount);
+        read(d, mThreadCount);
+        read(d, mNumYoloLayers);
+        read(d, mInput_h);
+        read(d, mInput_w);
+        mYoloKernel.resize(mNumYoloLayers);
+        auto kernelSize = mNumYoloLayers*sizeof(YoloKernel);
+        memcpy(mYoloKernel.data(),d,kernelSize);
+        d += kernelSize;
+        assert(d == a + length);
+    }
+    // 序列化模型,即保存模型,将插件内用到的参数保存到模型中
+    void YoloLayerPlugin::serialize(void* buffer) const
+    {
+        using namespace Tn;
+        char* d = static_cast<char*>(buffer), *a = d;
+        write(d, mClassCount);
+        write(d, mThreadCount);
+        write(d, mNumYoloLayers);
+        write(d, mInput_h);
+        write(d, mInput_w);
+        auto kernelSize = mNumYoloLayers*sizeof(YoloKernel);
+        memcpy(d,mYoloKernel.data(),kernelSize);
+        d += kernelSize;
+        assert(d == a + getSerializationSize());
+    }
+    // 保存模型,序列化阶段,计算插件需要保存的数据长度
+    size_t YoloLayerPlugin::getSerializationSize() const
+    {  
+        int size  = sizeof(mInput_w) +sizeof(mInput_h)+
+                sizeof(mClassCount) + sizeof(mThreadCount) +
+                sizeof(mNumYoloLayers)  + sizeof(YoloKernel) * mYoloKernel.size();
+        return size;
+    }
+
+    int YoloLayerPlugin::initialize()
+    { 
+        return 0;
+    }
+    
+    Dims YoloLayerPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
+    {
+        //output the result to channel
+        int totalsize = max_output_box * sizeof(Detection) / sizeof(float);
+        return Dims3(totalsize + 1, 1, 1);
+    }
+
+    // Set plugin namespace
+    void YoloLayerPlugin::setPluginNamespace(const char* pluginNamespace)
+    {
+        mPluginNamespace = pluginNamespace;
+    }
+
+    const char* YoloLayerPlugin::getPluginNamespace() const
+    {
+        return mPluginNamespace;
+    }
+
+    // Return the DataType of the plugin output at the requested index
+    DataType YoloLayerPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const
+    {
+        return DataType::kFLOAT;
+    }
+
+    // Return true if output tensor is broadcast across a batch.
+    bool YoloLayerPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const
+    {
+        return false;
+    }
+
+    // Return true if plugin can use input that is broadcast across batch without replication.
+    bool YoloLayerPlugin::canBroadcastInputAcrossBatch(int inputIndex) const
+    {
+        return false;
+    }
+
+    void YoloLayerPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)
+    {
+    }
+
+    // Attach the plugin object to an execution context and grant the plugin the access to some context resource.
+    void YoloLayerPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator)
+    {
+    }
+
+    // Detach the plugin object from its execution context.
+    void YoloLayerPlugin::detachFromContext() {}
+
+    const char* YoloLayerPlugin::getPluginType() const
+    {
+        return "YoloLayer_TRT";
+    }
+
+    const char* YoloLayerPlugin::getPluginVersion() const
+    {
+        return "1";
+    }
+
+    void YoloLayerPlugin::destroy()
+    {
+        delete this;
+    }
+
+    // Clone the plugin
+    IPluginV2IOExt* YoloLayerPlugin::clone() const
+    {
+
+        YoloLayerPlugin *p = new YoloLayerPlugin(*this);
+        p->setPluginNamespace(mPluginNamespace);
+        return p;
+    }
+    // 核函数 sigmoid
+    __device__ float Logist(float data){ return 1.0f / (1.0f + __expf(-data)); };
+    // cuda 调用接口
+    __global__ void CalDetection(const float *input, float *output,int noElements, 
+            int yoloWidth,int yoloHeight,const float* anchors,int classes,
+                                 int outputElem,int input_w,int input_h,
+                                 float ignore_thresh,int every_yolo_anchors,int max_out_put_bbox_count) {
+ 
+        int idx = threadIdx.x + blockDim.x * blockIdx.x;
+        if (idx >= noElements) return;
+
+        int total_grid = yoloWidth * yoloHeight;
+        int bnIdx = idx / total_grid;
+        idx = idx - total_grid*bnIdx;
+        int info_len_i = 5 + classes;
+        const float* curInput = input + bnIdx * (info_len_i * total_grid * every_yolo_anchors);
+
+        for (int k = 0; k < 3; ++k) {
+            int class_id = 0;
+            float max_cls_prob = 0.0;
+            for (int i = 5; i < info_len_i; ++i) {
+                float p = Logist(curInput[idx + k * info_len_i * total_grid + i * total_grid]);
+                if (p > max_cls_prob) {
+                    max_cls_prob = p;
+                    class_id = i - 5;
+                }
+            }
+            float box_prob = Logist(curInput[idx + k * info_len_i * total_grid + 4 * total_grid]);
+            if (max_cls_prob*box_prob < ignore_thresh) continue;
+
+            float *res_count = output + bnIdx*outputElem;
+            int count = (int)atomicAdd(res_count, 1);
+            if (count >= max_out_put_bbox_count) return;
+            char* data = (char * )res_count + sizeof(float) + count*sizeof(Detection);
+            Detection* det =  (Detection*)(data);
+
+            int row = idx / yoloWidth;
+            int col = idx % yoloWidth;
+
+            //Location
+            det->bbox[0] = (col + Logist(curInput[idx + k * info_len_i * total_grid + 0 * total_grid]))* input_w/ yoloWidth;
+            det->bbox[1] = (row + Logist(curInput[idx + k * info_len_i * total_grid + 1 * total_grid]))* input_h/ yoloHeight;
+            det->bbox[2] = __expf(curInput[idx + k * info_len_i * total_grid + 2 * total_grid]) * anchors[2*k];
+            det->bbox[3] = __expf(curInput[idx + k * info_len_i * total_grid + 3 * total_grid]) * anchors[2*k + 1];
+            det->det_confidence = box_prob;
+            det->class_id = class_id;
+            det->class_confidence = max_cls_prob;
+        }
+    }
+
+    void YoloLayerPlugin::forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize)
+    {
+        // 每一层的输出大小长度,
+        int outputElem = 1 + max_output_box * sizeof(Detection) / sizeof(float);
+        // 根据batchsize调整输出的output 内存大小,初始化为0, 以最小内存单位字节为长度
+        for(int idx = 0 ; idx < batchSize; ++idx) {
+            CUDA_CHECK(cudaMemset(output + idx*outputElem, 0, sizeof(float)));
+        }
+        int numElem = 0;
+        void* devAnchor;
+        for (unsigned int i = 0;i< mYoloKernel.size();++i)
+        {
+            // yolo 每一层的参数
+            const auto& yolo = mYoloKernel[i];
+            numElem = yolo.width*yolo.height*batchSize;
+            if (numElem < mThreadCount)
+                mThreadCount = numElem;
+            int every_yolo_anchor_num = yolo.everyYoloAnchors;
+            size_t AnchorLen = sizeof(float)* yolo.everyYoloAnchors*2;
+            CUDA_CHECK(cudaMalloc(&devAnchor,AnchorLen));
+            CUDA_CHECK(cudaMemcpy(devAnchor, yolo.anchors, AnchorLen, cudaMemcpyHostToDevice));
+            CUDA_CHECK(cudaFree(devAnchor));
+            // 调用cuda接口,<调用的block数量,每一个block中的thread数量>
+            CalDetection<<< (yolo.width*yolo.height*batchSize + mThreadCount - 1) / mThreadCount, mThreadCount>>>
+                (inputs[i],output, numElem, yolo.width, yolo.height,
+                 (float *)devAnchor, mClassCount ,outputElem,mInput_w, mInput_w,
+                 mIgnore_thresh,every_yolo_anchor_num,max_output_box);
+        }
+    }
+
+    // 插件标准调用接口,enqueue
+    int YoloLayerPlugin::enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream)
+    {
+        forwardGpu((const float *const *)inputs, (float*)outputs[0], stream, batchSize);
+        return 0;
+    }
+
+    YoloPluginCreator::YoloPluginCreator()
+    {
+    }
+
+    const char* YoloPluginCreator::getPluginName() const
+    {
+            return "YoloLayer_TRT";
+    }
+
+    const char* YoloPluginCreator::getPluginVersion() const
+    {
+            return "1";
+    }
+
+    const PluginFieldCollection* YoloPluginCreator::getFieldNames()
+    {
+            return 0;
+    }
+
+    IPluginV2IOExt* YoloPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)
+    {
+        YoloLayerPlugin* obj = new YoloLayerPlugin(*fc);
+        obj->setPluginNamespace(mNamespace.c_str());
+        return obj;
+    }
+
+    IPluginV2IOExt* YoloPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)
+    {
+        // This object will be deleted when the network is destroyed
+        YoloLayerPlugin* obj = new YoloLayerPlugin(serialData, serialLength);
+        obj->setPluginNamespace(mNamespace.c_str());
+        return obj;
+    }
+
+}