فهرست منبع

benchmark support end to end elapse

gaotingquan 1 سال پیش
والد
کامیت
27225ed673

+ 32 - 26
docs/module_usage/instructions/benchmark.md

@@ -26,34 +26,40 @@ python main.py \
 在开启 Benchmark 后,将自动打印 benchmark 指标:
 
 ```
-+-------------------+--------+------------------+
-|     Component     | Counts | Average Time(ms) |
-+-------------------+--------+------------------+
-|      ReadCmp      |   10   |    7.86035061    |
-|       Resize      |   10   |    1.38545036    |
-|     Normalize     |   10   |    3.77433300    |
-|     ToCHWImage    |   10   |    0.00545979    |
-| ImageDetPredictor |   10   |   14.97282982    |
-|   DetPostProcess  |   10   |    0.06134510    |
-|  ***************  | ****** | ***************  |
-|     PreProcess    |   \    |   13.02559376    |
-|     Inference     |   \    |   14.97282982    |
-|    PostProcess    |   \    |    0.06134510    |
-+-------------------+--------+------------------+
++-------------------+-------------+------------------------+
+|     Component     | Call Counts | Avg Time Per Call (ms) |
++-------------------+-------------+------------------------+
+|      ReadCmp      |     1000    |      19.22814894       |
+|       Resize      |     1000    |       2.52388239       |
+|     Normalize     |     1000    |       1.33547258       |
+|     ToCHWImage    |     1000    |       0.00310326       |
+| ImageDetPredictor |     1000    |       6.83180261       |
+|   DetPostProcess  |     1000    |       0.03265357       |
++-------------------+-------------+------------------------+
++-------------+------------------+----------------------------+
+|    Stage    | Num of Instances | Avg Time Per Instance (ms) |
++-------------+------------------+----------------------------+
+|  PreProcess |       1000       |        23.09060717         |
+|  Inference  |       1000       |         6.83180261         |
+| PostProcess |       1000       |         0.03265357         |
+|   End2End   |       1000       |        30.48534989         |
++-------------+------------------+----------------------------+
 ```
 
-在 Benchmark 结果中,会统计该模型全部组件(`Component`)的平均执行耗时(`Average Time`,单位为“毫秒”)和调用次数(`Counts`),以及按预处理(`PreProcess`)、模型推理(`Inference`)和后处理(`PostProcess`)汇总得到的执行耗时,同时,保存相关指标会到本地 `./benchmark.txt` 文件中:
+在 Benchmark 结果中,会统计该模型全部组件(`Component`)的平均执行耗时(`Avg Time Per Call`,单位为“毫秒”)和调用次数(`Call Counts`),以及按预处理(`PreProcess`)、模型推理(`Inference`)、后处理(`PostProcess`)和端到端(`End2End`)汇总得到的单样本平均耗时(`Avg Time Per Instance`,单位为“毫秒”),同时,保存相关指标会到本地 `./benchmark.txt` 文件中:
 
 ```
-Component, Counts, Average Time(ms)
-ReadCmp, 10, 7.860350608825682706
-Resize, 10, 1.385450363159179688
-Normalize, 10, 3.774333000183105469
-ToCHWImage, 10, 0.005459785461425781
-ImageDetPredictor, 10, 14.972829818725585938
-DetPostProcess, 10, 0.061345100402832031
-***************, ***, ***************
-PreProcess, \, 13.025593757629394531
-Inference, \, 14.972829818725585938
-PostProcess, \, 0.061345100402832031
+Component, Call Counts, Avg Time Per Call (ms)
+ReadCmp, 1000, 19.329239845275878906
+Resize, 1000, 2.562829017639160156
+Normalize, 1000, 1.369090795516967773
+ToCHWImage, 1000, 0.003165960311889648
+ImageDetPredictor, 1000, 7.323185205459594727
+DetPostProcess, 1000, 0.033131122589111328
+****************************************************************************************************
+Stage, Num of Instances, Avg Time Per Instance (ms)
+PreProcess, 1000, 23.264325618743896484
+Inference, 1000, 7.323185205459594727
+PostProcess, 1000, 0.033131122589111328
+End2End, 1000, 31.181738615036010742
 ```

+ 12 - 7
paddlex/inference/components/transforms/image/common.py

@@ -19,7 +19,11 @@ from copy import deepcopy
 import numpy as np
 import cv2
 
-from .....utils.flags import INFER_BENCHMARK, INFER_BENCHMARK_DATA_SIZE
+from .....utils.flags import (
+    INFER_BENCHMARK,
+    INFER_BENCHMARK_ITER,
+    INFER_BENCHMARK_DATA_SIZE,
+)
 from .....utils.cache import CACHE_DIR, temp_file_manager
 from ....utils.io import ImageReader, ImageWriter, PDFReader
 from ...base import BaseComponent
@@ -107,12 +111,13 @@ class ReadImage(_BaseRead):
 
         if INFER_BENCHMARK and img is None:
             size = int(INFER_BENCHMARK_DATA_SIZE)
-            yield [
-                process_ndarray(
-                    np.random.randint(0, 256, (size, size, 3), dtype=np.uint8)
-                )
-                for _ in range(self.batch_size)
-            ]
+            for _ in range(INFER_BENCHMARK_ITER):
+                yield [
+                    process_ndarray(
+                        np.random.randint(0, 256, (size, size, 3), dtype=np.uint8)
+                    )
+                    for _ in range(self.batch_size)
+                ]
 
         elif isinstance(img, np.ndarray):
             yield [process_ndarray(img)]

+ 7 - 9
paddlex/inference/models/base/basic_predictor.py

@@ -19,7 +19,6 @@ from ....utils.subclass_register import AutoRegisterABCMetaClass
 from ....utils.flags import (
     INFER_BENCHMARK,
     INFER_BENCHMARK_WARMUP,
-    INFER_BENCHMARK_ITER,
 )
 from ....utils import logging
 from ...components.base import BaseComponent, ComponentsEngine
@@ -53,16 +52,15 @@ class BasicPredictor(
             self.benchmark = Benchmark(self.components)
 
     def __call__(self, input, **kwargs):
+        self.set_predictor(**kwargs)
         if self.benchmark:
-            for _ in range(INFER_BENCHMARK_WARMUP):
-                list(super().__call__(None))
+            if INFER_BENCHMARK_WARMUP > 0:
+                output = super().__call__(input)
+                for _ in range(INFER_BENCHMARK_WARMUP):
+                    next(output)
             self.benchmark.reset()
-            if input is None:
-                for _ in range(INFER_BENCHMARK_ITER):
-                    list(super().__call__(input))
-            else:
-                list(super().__call__(input))
-            self.benchmark.collect()
+            output = list(super().__call__(input))
+            self.benchmark.collect(len(output))
         else:
             yield from super().__call__(input)
 

+ 35 - 20
paddlex/inference/utils/benchmark.py

@@ -25,13 +25,16 @@ from ...utils import logging
 class Benchmark:
     def __init__(self, components):
         self._components = components
+        self._e2e_tic = None
+        self._e2e_elapse = None
 
     def reset(self):
         for name in self._components:
             cmp = self._components[name]
             cmp.timer.reset()
+        self._e2e_tic = time.time()
 
-    def gather(self):
+    def gather(self, e2e_num):
         # lazy import for avoiding circular import
         from ..components.paddle_predictor import BasePaddlePredictor
 
@@ -42,38 +45,50 @@ class Benchmark:
             cmp = self._components[name]
             times = cmp.timer.logs
             counts = len(times)
-            avg = np.mean(times) * 1000
+            avg = np.mean(times)
+            total = np.sum(times)
             detail.append((name, counts, avg))
             if isinstance(cmp, BasePaddlePredictor):
-                summary["inference"] += avg
+                summary["inference"] += total
                 op_tag = "postprocess"
             else:
-                summary[op_tag] += avg
+                summary[op_tag] += total
+
+        summary = [
+            ("PreProcess", e2e_num, summary["preprocess"] / e2e_num),
+            ("Inference", e2e_num, summary["inference"] / e2e_num),
+            ("PostProcess", e2e_num, summary["postprocess"] / e2e_num),
+            ("End2End", e2e_num, self._e2e_elapse / e2e_num),
+        ]
         return detail, summary
 
-    def collect(self):
-        detail, summary = self.gather()
-        table = PrettyTable(["Component", "Counts", "Average Time(ms)"])
-        table.add_rows([(name, cnts, f"{avg:.8f}") for name, cnts, avg in detail])
-        table.add_row(("***************", "******", "***************"))
-        table.add_row(("PreProcess", "\\", f"{summary['preprocess']:.8f}"))
-        table.add_row(("Inference", "\\", f"{summary['inference']:.8f}"))
-        table.add_row(("PostProcess", "\\", f"{summary['postprocess']:.8f}"))
+    def collect(self, e2e_num):
+        self._e2e_elapse = time.time() - self._e2e_tic
+        detail, summary = self.gather(e2e_num)
+
+        table = PrettyTable(["Component", "Call Counts", "Avg Time Per Call (ms)"])
+        table.add_rows(
+            [(name, cnts, f"{avg * 1000:.8f}") for name, cnts, avg in detail]
+        )
+        logging.info(table)
+
+        table = PrettyTable(["Stage", "Num of Instances", "Avg Time Per Instance (ms)"])
+        table.add_rows(
+            [(name, cnts, f"{avg * 1000:.8f}") for name, cnts, avg in summary]
+        )
         logging.info(table)
 
         if INFER_BENCHMARK_OUTPUT:
-            str_ = "Component, Counts, Average Time(ms)\n"
+            str_ = "Component, Call Counts, Avg Time Per Call (ms)\n"
             str_ += "\n".join(
-                [f"{name}, {cnts}, {avg:.18f}" for name, cnts, avg in detail]
+                [f"{name}, {cnts}, {avg * 1000:.18f}" for name, cnts, avg in detail]
             )
-            str_ += "\n***************, ***, ***************\n"
+            str_ += "\n" + "*" * 100 + "\n"
+            str_ += "Stage, Num of Instances, Avg Time Per Instance (ms)\n"
             str_ += "\n".join(
-                [
-                    f"PreProcess, \, {summary['preprocess']:.18f}",
-                    f"Inference, \, {summary['inference']:.18f}",
-                    f"PostProcess, \, {summary['postprocess']:.18f}",
-                ]
+                [f"{name}, {cnts}, {avg * 1000:.18f}" for name, cnts, avg in summary]
             )
+
             with open(INFER_BENCHMARK_OUTPUT, "w") as f:
                 f.write(str_)