simple_profiler.cpp 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. // Copyright 2023 TIER IV, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include <tensorrt_common/simple_profiler.hpp>
  15. #include <iomanip>
  16. namespace tensorrt_common
  17. {
  18. SimpleProfiler::SimpleProfiler(std::string name, const std::vector<SimpleProfiler> & src_profilers)
  19. : m_name(name)
  20. {
  21. m_index = 0;
  22. for (const auto & src_profiler : src_profilers) {
  23. for (const auto & rec : src_profiler.m_profile) {
  24. auto it = m_profile.find(rec.first);
  25. if (it == m_profile.end()) {
  26. m_profile.insert(rec);
  27. } else {
  28. it->second.time += rec.second.time;
  29. it->second.count += rec.second.count;
  30. }
  31. }
  32. }
  33. }
  34. void SimpleProfiler::reportLayerTime(const char * layerName, float ms) noexcept
  35. {
  36. m_profile[layerName].count++;
  37. m_profile[layerName].time += ms;
  38. if (m_profile[layerName].min_time == -1.0) {
  39. m_profile[layerName].min_time = ms;
  40. m_profile[layerName].index = m_index;
  41. m_index++;
  42. } else if (m_profile[layerName].min_time > ms) {
  43. m_profile[layerName].min_time = ms;
  44. }
  45. }
  46. void SimpleProfiler::setProfDict(nvinfer1::ILayer * layer) noexcept
  47. {
  48. std::string name = layer->getName();
  49. m_layer_dict[name];
  50. m_layer_dict[name].type = layer->getType();
  51. if (layer->getType() == nvinfer1::LayerType::kCONVOLUTION) {
  52. nvinfer1::IConvolutionLayer * conv = (nvinfer1::IConvolutionLayer *)layer;
  53. nvinfer1::ITensor * in = layer->getInput(0);
  54. nvinfer1::Dims dim_in = in->getDimensions();
  55. nvinfer1::ITensor * out = layer->getOutput(0);
  56. nvinfer1::Dims dim_out = out->getDimensions();
  57. nvinfer1::Dims k_dims = conv->getKernelSizeNd();
  58. nvinfer1::Dims s_dims = conv->getStrideNd();
  59. int groups = conv->getNbGroups();
  60. int stride = s_dims.d[0];
  61. int kernel = k_dims.d[0];
  62. m_layer_dict[name].in_c = dim_in.d[1];
  63. m_layer_dict[name].out_c = dim_out.d[1];
  64. m_layer_dict[name].w = dim_in.d[3];
  65. m_layer_dict[name].h = dim_in.d[2];
  66. m_layer_dict[name].k = kernel;
  67. ;
  68. m_layer_dict[name].stride = stride;
  69. m_layer_dict[name].groups = groups;
  70. }
  71. }
  72. std::ostream & operator<<(std::ostream & out, const SimpleProfiler & value)
  73. {
  74. out << "========== " << value.m_name << " profile ==========" << std::endl;
  75. float totalTime = 0;
  76. std::string layerNameStr = "Operation";
  77. int maxLayerNameLength = static_cast<int>(layerNameStr.size());
  78. for (const auto & elem : value.m_profile) {
  79. totalTime += elem.second.time;
  80. maxLayerNameLength = std::max(maxLayerNameLength, static_cast<int>(elem.first.size()));
  81. }
  82. auto old_settings = out.flags();
  83. auto old_precision = out.precision();
  84. // Output header
  85. {
  86. out << "index, " << std::setw(12);
  87. out << std::setw(maxLayerNameLength) << layerNameStr << " ";
  88. out << std::setw(12) << "Runtime"
  89. << "%,"
  90. << " ";
  91. out << std::setw(12) << "Invocations"
  92. << " , ";
  93. out << std::setw(12) << "Runtime[ms]"
  94. << " , ";
  95. out << std::setw(12) << "Avg Runtime[ms]"
  96. << " ,";
  97. out << std::setw(12) << "Min Runtime[ms]" << std::endl;
  98. }
  99. int index = value.m_index;
  100. for (int i = 0; i < index; i++) {
  101. for (const auto & elem : value.m_profile) {
  102. if (elem.second.index == i) {
  103. out << i << ", ";
  104. out << std::setw(maxLayerNameLength) << elem.first << ",";
  105. out << std::setw(12) << std::fixed << std::setprecision(1)
  106. << (elem.second.time * 100.0F / totalTime) << "%"
  107. << ",";
  108. out << std::setw(12) << elem.second.count << ",";
  109. out << std::setw(12) << std::fixed << std::setprecision(2) << elem.second.time << ", ";
  110. out << std::setw(12) << std::fixed << std::setprecision(2)
  111. << elem.second.time / elem.second.count << ", ";
  112. out << std::setw(12) << std::fixed << std::setprecision(2) << elem.second.min_time
  113. << std::endl;
  114. }
  115. }
  116. }
  117. out.flags(old_settings);
  118. out.precision(old_precision);
  119. out << "========== " << value.m_name << " total runtime = " << totalTime
  120. << " ms ==========" << std::endl;
  121. return out;
  122. }
  123. } // namespace tensorrt_common