gaotingquan 1 год назад
Родитель
Сommit
a12f265f8f
2 измененных файлов с 37 добавлено и 45 удалено
  1. 33 33
      docs/module_usage/instructions/benchmark.md
  2. 4 12
      paddlex/inference/utils/benchmark.py

+ 33 - 33
docs/module_usage/instructions/benchmark.md

@@ -26,40 +26,40 @@ python main.py \
 在开启 Benchmark 后,将自动打印 benchmark 指标:
 
 ```
-+-------------------+-------------+------------------------+
-|     Component     | Call Counts | Avg Time Per Call (ms) |
-+-------------------+-------------+------------------------+
-|      ReadCmp      |     1000    |      19.22814894       |
-|       Resize      |     1000    |       2.52388239       |
-|     Normalize     |     1000    |       1.33547258       |
-|     ToCHWImage    |     1000    |       0.00310326       |
-| ImageDetPredictor |     1000    |       6.83180261       |
-|   DetPostProcess  |     1000    |       0.03265357       |
-+-------------------+-------------+------------------------+
-+-------------+------------------+----------------------------+
-|    Stage    | Num of Instances | Avg Time Per Instance (ms) |
-+-------------+------------------+----------------------------+
-|  PreProcess |       1000       |        23.09060717         |
-|  Inference  |       1000       |         6.83180261         |
-| PostProcess |       1000       |         0.03265357         |
-|   End2End   |       1000       |        30.48534989         |
-+-------------+------------------+----------------------------+
++-------------------+-----------------+------+---------------+
+|       Stage       | Total Time (ms) | Nums | Avg Time (ms) |
++-------------------+-----------------+------+---------------+
+|      ReadCmp      |   49.95107651   |  10  |   4.99510765  |
+|       Resize      |    8.48054886   |  10  |   0.84805489  |
+|     Normalize     |   23.08964729   |  10  |   2.30896473  |
+|     ToCHWImage    |    0.02717972   |  10  |   0.00271797  |
+| ImageDetPredictor |   75.94108582   |  10  |   7.59410858  |
+|   DetPostProcess  |    0.26535988   |  10  |   0.02653599  |
++-------------------+-----------------+------+---------------+
++-------------+-----------------+------+---------------+
+|    Stage    | Total Time (ms) | Nums | Avg Time (ms) |
++-------------+-----------------+------+---------------+
+|  PreProcess |   81.54845238   |  10  |   8.15484524  |
+|  Inference  |   75.94108582   |  10  |   7.59410858  |
+| PostProcess |    0.26535988   |  10  |   0.02653599  |
+|   End2End   |   161.07797623  |  10  |  16.10779762  |
+|    WarmUp   |  5496.41847610  |  5   | 1099.28369522 |
++-------------+-----------------+------+---------------+
 ```
 
-在 Benchmark 结果中,会统计该模型全部组件(`Component`)的平均执行耗时(`Avg Time Per Call`,单位为“毫秒”)和调用次数(`Call Counts`),以及按预处理(`PreProcess`)、模型推理(`Inference`)、后处理(`PostProcess`)和端到端(`End2End`)汇总得到的单样本平均耗时(`Avg Time Per Instance`,单位为“毫秒”),同时,保存相关指标会到本地 `./benchmark.txt` 文件中:
+在 Benchmark 结果中,会统计该模型全部组件(`Component`)的总耗时(`Total Time`,单位为“毫秒”)、调用次数(`Nums`)、调用平均执行耗时(`Avg Time`,单位为“毫秒”),以及按预热(`WarmUp`)、预处理(`PreProcess`)、模型推理(`Inference`)、后处理(`PostProcess`)和端到端(`End2End`)进行划分的耗时统计,包括每个阶段的总耗时(`Total Time`,单位为“毫秒”)、样本数(`Nums`)和单样本平均执行耗时(`Avg Time`,单位为“毫秒”),同时,保存相关指标会到本地 `./benchmark.csv` 文件中:
 
-```
-Component, Call Counts, Avg Time Per Call (ms)
-ReadCmp, 1000, 19.329239845275878906
-Resize, 1000, 2.562829017639160156
-Normalize, 1000, 1.369090795516967773
-ToCHWImage, 1000, 0.003165960311889648
-ImageDetPredictor, 1000, 7.323185205459594727
-DetPostProcess, 1000, 0.033131122589111328
-****************************************************************************************************
-Stage, Num of Instances, Avg Time Per Instance (ms)
-PreProcess, 1000, 23.264325618743896484
-Inference, 1000, 7.323185205459594727
-PostProcess, 1000, 0.033131122589111328
-End2End, 1000, 31.181738615036010742
+```csv
+Stage,Total Time (ms),Nums,Avg Time (ms)
+ReadCmp,0.04995107650756836,10,0.004995107650756836
+Resize,0.008480548858642578,10,0.0008480548858642578
+Normalize,0.02308964729309082,10,0.002308964729309082
+ToCHWImage,2.7179718017578125e-05,10,2.7179718017578126e-06
+ImageDetPredictor,0.07594108581542969,10,0.007594108581542969
+DetPostProcess,0.00026535987854003906,10,2.6535987854003906e-05
+PreProcess,0.08154845237731934,10,0.008154845237731934
+Inference,0.07594108581542969,10,0.007594108581542969
+PostProcess,0.00026535987854003906,10,2.6535987854003906e-05
+End2End,0.16107797622680664,10,0.016107797622680664
+WarmUp,5.496418476104736,5,1.0992836952209473
 ```

+ 4 - 12
paddlex/inference/utils/benchmark.py

@@ -103,9 +103,8 @@ class Benchmark:
         self._e2e_elapse = time.time() - self._e2e_tic
         detail, summary = self.gather(e2e_num)
 
-        table = PrettyTable(
-            ["Component", "Total Time (ms)", "Call Counts", "Avg Time Per Call (ms)"]
-        )
+        table_head = ["Stage", "Total Time (ms)", "Nums", "Avg Time (ms)"]
+        table = PrettyTable(table_head)
         table.add_rows(
             [
                 (name, f"{total * 1000:.8f}", cnts, f"{avg * 1000:.8f}")
@@ -114,14 +113,7 @@ class Benchmark:
         )
         logging.info(table)
 
-        table = PrettyTable(
-            [
-                "Stage",
-                "Total Time (ms)",
-                "Num of Instances",
-                "Avg Time Per Instance (ms)",
-            ]
-        )
+        table = PrettyTable(table_head)
         table.add_rows(
             [
                 (name, f"{total * 1000:.8f}", cnts, f"{avg * 1000:.8f}")
@@ -131,7 +123,7 @@ class Benchmark:
         logging.info(table)
 
         if INFER_BENCHMARK_OUTPUT:
-            csv_data = [["Stage", "Total Time", "Num", "Avg Time"]]
+            csv_data = [table_head]
             csv_data.extend(detail)
             csv_data.extend(summary)
             with open("benchmark.csv", "w", newline="") as file: