trt_config.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from collections import defaultdict
  15. from ...utils.flags import USE_PIR_TRT
  16. class LazyLoadDict(dict):
  17. def __init__(self, *args, **kwargs):
  18. self._initialized = False
  19. super().__init__(*args, **kwargs)
  20. def _initialize(self):
  21. if not self._initialized:
  22. self.update(self._load())
  23. self._initialized = True
  24. def __getitem__(self, key):
  25. self._initialize()
  26. return super().__getitem__(key)
  27. def __contains__(self, key):
  28. self._initialize()
  29. return super().__contains__(key)
  30. def _load(self):
  31. raise NotImplementedError
  32. class OLD_IR_TRT_PRECISION_MAP_CLASS(LazyLoadDict):
  33. def _load(self):
  34. from paddle.inference import PrecisionType
  35. return {
  36. "trt_int8": PrecisionType.Int8,
  37. "trt_fp32": PrecisionType.Float32,
  38. "trt_fp16": PrecisionType.Half,
  39. }
  40. class PIR_TRT_PRECISION_MAP_CLASS(LazyLoadDict):
  41. def _load(self):
  42. from paddle.tensorrt.export import PrecisionMode
  43. return {
  44. "trt_int8": PrecisionMode.INT8,
  45. "trt_fp32": PrecisionMode.FP32,
  46. "trt_fp16": PrecisionMode.FP16,
  47. }
  48. ############ old ir trt ############
  49. OLD_IR_TRT_PRECISION_MAP = OLD_IR_TRT_PRECISION_MAP_CLASS()
  50. OLD_IR_TRT_CFG_DEFAULT_SETTING = {
  51. "workspace_size": 1 << 30,
  52. "max_batch_size": 32,
  53. "min_subgraph_size": 3,
  54. "use_static": True,
  55. "use_calib_mode": False,
  56. }
  57. OLD_IR_TRT_CFG_SETTING = {
  58. "SegFormer-B0": {
  59. "enable_tensorrt_engine": {
  60. **OLD_IR_TRT_CFG_DEFAULT_SETTING,
  61. "workspace_size": 1 << 32,
  62. }
  63. },
  64. "SegFormer-B1": {
  65. "enable_tensorrt_engine": {
  66. **OLD_IR_TRT_CFG_DEFAULT_SETTING,
  67. "workspace_size": 1 << 32,
  68. }
  69. },
  70. "SegFormer-B2": {
  71. "enable_tensorrt_engine": {
  72. **OLD_IR_TRT_CFG_DEFAULT_SETTING,
  73. "workspace_size": 1 << 32,
  74. }
  75. },
  76. "SegFormer-B3": {
  77. "enable_tensorrt_engine": {
  78. **OLD_IR_TRT_CFG_DEFAULT_SETTING,
  79. "workspace_size": 1 << 32,
  80. }
  81. },
  82. "SegFormer-B4": {
  83. "enable_tensorrt_engine": {
  84. **OLD_IR_TRT_CFG_DEFAULT_SETTING,
  85. "workspace_size": 1 << 32,
  86. }
  87. },
  88. "SegFormer-B5": {
  89. "enable_tensorrt_engine": {
  90. **OLD_IR_TRT_CFG_DEFAULT_SETTING,
  91. "workspace_size": 1 << 32,
  92. }
  93. },
  94. "SLANeXt_wired": {
  95. "enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING,
  96. # the exp_disable_tensorrt_ops() func don't support to be pass argument by keyword
  97. # therefore, using list instead of dict
  98. "exp_disable_tensorrt_ops": [
  99. [
  100. "linear_0.tmp_0",
  101. "linear_4.tmp_0",
  102. "linear_12.tmp_0",
  103. "linear_16.tmp_0",
  104. "linear_24.tmp_0",
  105. "linear_28.tmp_0",
  106. "linear_36.tmp_0",
  107. "linear_40.tmp_0",
  108. ]
  109. ],
  110. },
  111. "SLANeXt_wireless": {
  112. "enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING,
  113. "exp_disable_tensorrt_ops": [
  114. [
  115. "linear_0.tmp_0",
  116. "linear_4.tmp_0",
  117. "linear_12.tmp_0",
  118. "linear_16.tmp_0",
  119. "linear_24.tmp_0",
  120. "linear_28.tmp_0",
  121. "linear_36.tmp_0",
  122. "linear_40.tmp_0",
  123. ]
  124. ],
  125. },
  126. "PP-YOLOE_seg-S": {
  127. "enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING,
  128. "exp_disable_tensorrt_ops": [
  129. ["bilinear_interp_v2_1.tmp_0", "bilinear_interp_v2_1.tmp_0_slice_0"]
  130. ],
  131. },
  132. "TiDE": {
  133. "enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING,
  134. "exp_disable_tensorrt_ops": [
  135. [
  136. "reshape2_3.tmp_0",
  137. "reshape2_2.tmp_0",
  138. "reshape2_1.tmp_0",
  139. "reshape2_0.tmp_0",
  140. ]
  141. ],
  142. },
  143. "Nonstationary": {
  144. "enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING,
  145. "exp_disable_tensorrt_ops": [
  146. [
  147. "reshape2_13.tmp_0",
  148. ]
  149. ],
  150. },
  151. "ch_SVTRv2_rec": {
  152. "enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING,
  153. "exp_disable_tensorrt_ops": [
  154. [
  155. "reshape2_3.tmp_0",
  156. "reshape2_5.tmp_0",
  157. "reshape2_7.tmp_0",
  158. "reshape2_9.tmp_0",
  159. "reshape2_11.tmp_0",
  160. "reshape2_13.tmp_0",
  161. "reshape2_15.tmp_0",
  162. "reshape2_17.tmp_0",
  163. "reshape2_19.tmp_0",
  164. "reshape2_28.tmp_0",
  165. "reshape2_42.tmp_0",
  166. "reshape2_47.tmp_0",
  167. "layer_norm_15.tmp_2",
  168. "layer_norm_13.tmp_2",
  169. ]
  170. ],
  171. },
  172. "PP-YOLOE_plus_SOD-largesize-L": {
  173. "enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING,
  174. "exp_disable_tensorrt_ops": [
  175. [
  176. "conv2d",
  177. "fused_conv2d_add_act",
  178. "swish",
  179. "reduce_mean",
  180. "softmax",
  181. "layer_norm",
  182. "gelu",
  183. ]
  184. ],
  185. },
  186. }
  187. DISABLE_TRT_HALF_OPS_CONFIG = {
  188. "ConvNeXt_tiny": {"layer_norm"},
  189. "ConvNeXt_small": {"layer_norm"},
  190. "ConvNeXt_base_224": {"layer_norm"},
  191. "ConvNeXt_large_224": {"layer_norm"},
  192. "ConvNeXt_base_384": {"layer_norm"},
  193. "ConvNeXt_large_384": {"layer_norm"},
  194. "PP-HGNetV2-B3": {"softmax"},
  195. "MobileNetV1_x0_5": {"fused_conv2d_add_act"},
  196. "SeaFormer_small": {"fused_conv2d_add_act"},
  197. "SeaFormer_tiny": {"fused_conv2d_add_act"},
  198. "PP-OCRv4_mobile_seal_det": {"fused_conv2d_add_act", "softmax", "conv2d", "multiply"},
  199. "PicoDet_LCNet_x2_5_face": {"fused_conv2d_add_act", "softmax", "elementwise_mul", "matrix_multiply"},
  200. "PP-YOLOE_plus_SOD-S": {"fused_conv2d_add_act", "softmax", "conv2d", "elementwise_mul", "matrix_multiply"},
  201. "BlazeFace-FPN-SSH": {"fused_conv2d_add_act"},
  202. "PP-YOLOE_plus-S_face": {"fused_conv2d_add_act", "conv2d", "multiply"},
  203. "PP-ShiTuV2_det": {"conv2d", "depthwise_conv2d", "fused_conv2d_add_act", "matrix_multiply"},
  204. "RT-DETR-H_layout_3cls": {"fused_conv2d_add_act", "elementwise_mul", "elementwise_add" ,"elementwise_div", "matrix_multiply", "layer_norm"},
  205. "DETR-R50": {"fused_conv2d_add_act", "elementwise_mul", "elementwise_add", "elementwise_div", "matrix_multiply", "layer_norm"},
  206. "RT-DETR-R50": {"fused_conv2d_add_act", "elementwise_mul", "elementwise_add", "elementwise_div", "matrix_multiply", "layer_norm"},
  207. "YOLOX-M": {"fused_conv2d_add_act", "elementwise_mul", "elementwise_add", "scale"},
  208. "YOLOv3-MobileNetV3": {"fused_conv2d_add_act", "elementwise_mul", "elementwise_add", "depthwise_conv2d", "elementwise_div"},
  209. "PP-OCRv4_server_det": {"fused_conv2d_add_act", "conv2d"},
  210. }
  211. ############ pir trt ############
  212. PIR_TRT_PRECISION_MAP = PIR_TRT_PRECISION_MAP_CLASS()
  213. PIR_TRT_CFG_SETTING = {
  214. "PP-YOLOE_plus_SOD-largesize-L": {"workspace_size": 1 << 32},
  215. "SLANeXt_wired": {"disable_ops": ["pd_op.slice"]},
  216. "SLANeXt_wireless": {"disable_ops": ["pd_op.slice"]},
  217. "DETR-R50": {"optimization_level": 4, "workspace_size": 1 << 32},
  218. "SegFormer-B0": {"optimization_level": 4, "workspace_size": 1 << 32},
  219. "SegFormer-B1": {"optimization_level": 4, "workspace_size": 1 << 32},
  220. "SegFormer-B2": {"optimization_level": 4, "workspace_size": 1 << 32},
  221. "SegFormer-B3": {"optimization_level": 4, "workspace_size": 1 << 32},
  222. "SegFormer-B4": {"optimization_level": 4, "workspace_size": 1 << 32},
  223. "SegFormer-B5": {"optimization_level": 4, "workspace_size": 1 << 32},
  224. "LaTeX_OCR_rec": {"disable_ops": ["pd_op.slice", "pd_op.reshape"]},
  225. "PP-YOLOE_seg-S": {"disable_ops": ["pd_op.slice", "pd_op.bilinear_interp"]},
  226. "PP-YOLOE_plus_SOD-largesize-L": {"disable_ops": ["pd_op.conv2d", "pd_op.fused_conv2d_add_act", "pd_op.swish", "pd_op.mean", "pd_op.softmax", "pd_op.layer_norm", "pd_op.gelu"]},
  227. "PP-FormulaNet-L": {
  228. "disable_ops": ["pd_op.full_with_tensor"],
  229. "workspace_size": 2 << 32,
  230. },
  231. "PP-FormulaNet-S": {
  232. "disable_ops": ["pd_op.full_with_tensor"],
  233. "workspace_size": 1 << 32,
  234. },
  235. "ConvNeXt_tiny": {"ops_run_float": {"pd_op.layer_norm"}},
  236. "ConvNeXt_small": {"ops_run_float": {"pd_op.layer_norm"}},
  237. "ConvNeXt_base_224": {"ops_run_float": {"pd_op.layer_norm"}},
  238. "ConvNeXt_base_384": {"ops_run_float": {"pd_op.layer_norm"}},
  239. "ConvNeXt_large_224": {"ops_run_float": {"pd_op.layer_norm"}},
  240. "ConvNeXt_large_384": {"ops_run_float": {"pd_op.layer_norm"}},
  241. "PP-HGNetV2-B3": {"ops_run_float": {"pd_op.softmax"}},
  242. "BlazeFace-FPN-SSH": {"ops_run_float": {"pd_op.fused_conv2d_add_act"}},
  243. "PP-OCRv4_mobile_seal_det": {"ops_run_float": {"pd_op.fused_conv2d_add_act", "pd_op.softmax", "pd_op.multiply", "pd_op.conv2d"}},
  244. "PP-YOLOE_plus_SOD-S": {"ops_run_float": {"pd_op.fused_conv2d_add_act", "pd_op.softmax", "pd_op.conv2d", "pd_op.multiply", "pd_op.matmul"}},
  245. "PicoDet_LCNet_x2_5_face": {"ops_run_float": {"pd_op.fused_conv2d_add_act", "pd_op.softmax", "pd_op.conv2d", "pd_op.multiply", "pd_op.matmul"}},
  246. "PP-YOLOE_plus-S_face": {"ops_run_float": {"pd_op.fused_conv2d_add_act", "pd_op.multiply", "pd_op.conv2d"}},
  247. "PP-ShiTuV2_det": {"ops_run_float": {"pd_op.fused_conv2d_add_act", "pd_op.depthwise_conv2d", "pd_op.conv2d"}},
  248. "RT-DETR-H_layout_3cls": {"ops_run_float": {"pd_op.matmul", "pd_op.conv2d", "pd_op.depthwise_conv2d", "pd_op.fused_conv2d_add_act", "pd_op.batch_norm_"}},
  249. "DETR-R50": {"ops_run_float": {"pd_op.matmul", "pd_op.conv2d", "pd_op.fused_conv2d_add_act"}},
  250. "RT-DETR-R50": {"ops_run_float": {"pd_op.matmul", "pd_op.conv2d", "pd_op.fused_conv2d_add_act"}},
  251. "YOLOX-M": {"ops_run_float": {"pd_op.multiply", "pd_op.conv2d", "pd_op.fused_conv2d_add_act"}},
  252. "YOLOv3-MobileNetV3": {"ops_run_float": {"pd_op.depthwise_conv2d", "pd_op.conv2d", "pd_op.fused_conv2d_add_act"}},
  253. "PP-OCRv4_server_det": {"ops_run_float": {"pd_op.conv2d", "pd_op.fused_conv2d_add_act"}},
  254. "PP-YOLOE_seg-S": {"ops_run_float": {"pd_op.conv2d", "pd_op.fused_conv2d_add_act", "pd_op.conv2d_transpose", "pd_op.matmul"}},
  255. }
  256. if USE_PIR_TRT:
  257. TRT_PRECISION_MAP = PIR_TRT_PRECISION_MAP
  258. TRT_CFG_SETTING = defaultdict(dict, PIR_TRT_CFG_SETTING)
  259. else:
  260. TRT_PRECISION_MAP = OLD_IR_TRT_PRECISION_MAP
  261. TRT_CFG_SETTING = defaultdict(
  262. lambda: {"enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING},
  263. OLD_IR_TRT_CFG_SETTING,
  264. )