trt_config.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from collections import defaultdict
  15. from ...utils.flags import USE_PIR_TRT
  16. class LazyLoadDict(dict):
  17. def __init__(self, *args, **kwargs):
  18. self._initialized = False
  19. super().__init__(*args, **kwargs)
  20. def _initialize(self):
  21. if not self._initialized:
  22. self.update(self._load())
  23. self._initialized = True
  24. def __getitem__(self, key):
  25. self._initialize()
  26. return super().__getitem__(key)
  27. def __contains__(self, key):
  28. self._initialize()
  29. return super().__contains__(key)
  30. def _load(self):
  31. raise NotImplementedError
  32. class OLD_IR_TRT_PRECISION_MAP_CLASS(LazyLoadDict):
  33. def _load(self):
  34. from paddle.inference import PrecisionType
  35. return {
  36. "trt_int8": PrecisionType.Int8,
  37. "trt_fp32": PrecisionType.Float32,
  38. "trt_fp16": PrecisionType.Half,
  39. }
  40. class PIR_TRT_PRECISION_MAP_CLASS(LazyLoadDict):
  41. def _load(self):
  42. from paddle.tensorrt.export import PrecisionMode
  43. return {
  44. "trt_int8": PrecisionMode.INT8,
  45. "trt_fp32": PrecisionMode.FP32,
  46. "trt_fp16": PrecisionMode.FP16,
  47. }
  48. ############ old ir trt ############
  49. OLD_IR_TRT_PRECISION_MAP = OLD_IR_TRT_PRECISION_MAP_CLASS()
  50. OLD_IR_TRT_CFG_DEFAULT_SETTING = {
  51. "workspace_size": 1 << 30,
  52. "max_batch_size": 32,
  53. "min_subgraph_size": 3,
  54. "use_static": True,
  55. "use_calib_mode": False,
  56. }
  57. OLD_IR_TRT_CFG_SETTING = {
  58. "SegFormer-B0": {
  59. "enable_tensorrt_engine": {
  60. **OLD_IR_TRT_CFG_DEFAULT_SETTING,
  61. "workspace_size": 1 << 32,
  62. }
  63. },
  64. "SegFormer-B1": {
  65. "enable_tensorrt_engine": {
  66. **OLD_IR_TRT_CFG_DEFAULT_SETTING,
  67. "workspace_size": 1 << 32,
  68. }
  69. },
  70. "SegFormer-B2": {
  71. "enable_tensorrt_engine": {
  72. **OLD_IR_TRT_CFG_DEFAULT_SETTING,
  73. "workspace_size": 1 << 32,
  74. }
  75. },
  76. "SegFormer-B3": {
  77. "enable_tensorrt_engine": {
  78. **OLD_IR_TRT_CFG_DEFAULT_SETTING,
  79. "workspace_size": 1 << 32,
  80. }
  81. },
  82. "SegFormer-B4": {
  83. "enable_tensorrt_engine": {
  84. **OLD_IR_TRT_CFG_DEFAULT_SETTING,
  85. "workspace_size": 1 << 32,
  86. }
  87. },
  88. "SegFormer-B5": {
  89. "enable_tensorrt_engine": {
  90. **OLD_IR_TRT_CFG_DEFAULT_SETTING,
  91. "workspace_size": 1 << 32,
  92. }
  93. },
  94. "SLANeXt_wired": {
  95. "enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING,
  96. # the exp_disable_tensorrt_ops() func don't support to be pass argument by keyword
  97. # therefore, using list instead of dict
  98. "exp_disable_tensorrt_ops": [
  99. [
  100. "linear_0.tmp_0",
  101. "linear_4.tmp_0",
  102. "linear_12.tmp_0",
  103. "linear_16.tmp_0",
  104. "linear_24.tmp_0",
  105. "linear_28.tmp_0",
  106. "linear_36.tmp_0",
  107. "linear_40.tmp_0",
  108. ]
  109. ],
  110. },
  111. "SLANeXt_wireless": {
  112. "enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING,
  113. "exp_disable_tensorrt_ops": [
  114. [
  115. "linear_0.tmp_0",
  116. "linear_4.tmp_0",
  117. "linear_12.tmp_0",
  118. "linear_16.tmp_0",
  119. "linear_24.tmp_0",
  120. "linear_28.tmp_0",
  121. "linear_36.tmp_0",
  122. "linear_40.tmp_0",
  123. ]
  124. ],
  125. },
  126. "PP-YOLOE_seg-S": {
  127. "enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING,
  128. "exp_disable_tensorrt_ops": [
  129. ["bilinear_interp_v2_1.tmp_0", "bilinear_interp_v2_1.tmp_0_slice_0"]
  130. ],
  131. },
  132. "TiDE": {
  133. "enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING,
  134. "exp_disable_tensorrt_ops": [
  135. [
  136. "reshape2_3.tmp_0",
  137. "reshape2_2.tmp_0",
  138. "reshape2_1.tmp_0",
  139. "reshape2_0.tmp_0",
  140. ]
  141. ],
  142. },
  143. "Nonstationary": {
  144. "enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING,
  145. "exp_disable_tensorrt_ops": [
  146. [
  147. "reshape2_13.tmp_0",
  148. ]
  149. ],
  150. },
  151. "ch_SVTRv2_rec": {
  152. "enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING,
  153. "exp_disable_tensorrt_ops": [
  154. [
  155. "reshape2_3.tmp_0",
  156. "reshape2_5.tmp_0",
  157. "reshape2_7.tmp_0",
  158. "reshape2_9.tmp_0",
  159. "reshape2_11.tmp_0",
  160. "reshape2_13.tmp_0",
  161. "reshape2_15.tmp_0",
  162. "reshape2_17.tmp_0",
  163. "reshape2_19.tmp_0",
  164. "reshape2_28.tmp_0",
  165. "reshape2_42.tmp_0",
  166. "reshape2_47.tmp_0",
  167. "layer_norm_15.tmp_2",
  168. "layer_norm_13.tmp_2",
  169. ]
  170. ],
  171. },
  172. }
  173. DISABLE_TRT_HALF_OPS_CONFIG = {
  174. "ConvNeXt_tiny": {"layer_norm"},
  175. "ConvNeXt_small": {"layer_norm"},
  176. "ConvNeXt_base_224": {"layer_norm"},
  177. "ConvNeXt_large_224": {"layer_norm"},
  178. "ConvNeXt_base_384": {"layer_norm"},
  179. "ConvNeXt_large_384": {"layer_norm"},
  180. "PP-HGNetV2-B3": {"softmax"},
  181. "MobileNetV1_x0_5": {"fused_conv2d_add_act"},
  182. "SeaFormer_small": {"fused_conv2d_add_act"},
  183. "SeaFormer_tiny": {"fused_conv2d_add_act"},
  184. "PP-OCRv4_mobile_seal_det": {"fused_conv2d_add_act", "softmax"},
  185. "BlazeFace-FPN-SSH": {"fused_conv2d_add_act"},
  186. }
  187. ############ pir trt ############
  188. PIR_TRT_PRECISION_MAP = PIR_TRT_PRECISION_MAP_CLASS()
  189. PIR_TRT_CFG_SETTING = {
  190. "PP-YOLOE_plus_SOD-largesize-L": {"workspace_size": 1 << 32},
  191. "SLANeXt_wired": {"disable_ops": ["pd_op.slice"]},
  192. "SLANeXt_wireless": {"disable_ops": ["pd_op.slice"]},
  193. "DETR-R50": {"optimization_level": 4, "workspace_size": 1 << 32},
  194. "SegFormer-B0": {"optimization_level": 4, "workspace_size": 1 << 32},
  195. "SegFormer-B1": {"optimization_level": 4, "workspace_size": 1 << 32},
  196. "SegFormer-B2": {"optimization_level": 4, "workspace_size": 1 << 32},
  197. "SegFormer-B3": {"optimization_level": 4, "workspace_size": 1 << 32},
  198. "SegFormer-B4": {"optimization_level": 4, "workspace_size": 1 << 32},
  199. "SegFormer-B5": {"optimization_level": 4, "workspace_size": 1 << 32},
  200. "LaTeX_OCR_rec": {"disable_ops": ["pd_op.slice"]},
  201. "PP-YOLOE_seg-S": {"disable_ops": ["pd_op.slice", "pd_op.bilinear_interp"]},
  202. "PP-FormulaNet-L": {
  203. "disable_ops": ["pd_op.full_with_tensor"],
  204. "workspace_size": 2 << 32,
  205. },
  206. "PP-FormulaNet-S": {
  207. "disable_ops": ["pd_op.full_with_tensor"],
  208. "workspace_size": 1 << 32,
  209. },
  210. "ConvNeXt_tiny": {"ops_run_float": {"pd_op.layer_norm"}},
  211. "ConvNeXt_small": {"ops_run_float": {"pd_op.layer_norm"}},
  212. "ConvNeXt_base_224": {"ops_run_float": {"pd_op.layer_norm"}},
  213. "ConvNeXt_base_384": {"ops_run_float": {"pd_op.layer_norm"}},
  214. "ConvNeXt_large_224": {"ops_run_float": {"pd_op.layer_norm"}},
  215. "ConvNeXt_large_384": {"ops_run_float": {"pd_op.layer_norm"}},
  216. "PP-HGNetV2-B3": {"ops_run_float": {"pd_op.softmax"}},
  217. "BlazeFace-FPN-SSH": {"ops_run_float": {"pd_op.fused_conv2d_add_act"}},
  218. }
  219. if USE_PIR_TRT:
  220. TRT_PRECISION_MAP = PIR_TRT_PRECISION_MAP
  221. TRT_CFG_SETTING = defaultdict(dict, PIR_TRT_CFG_SETTING)
  222. else:
  223. TRT_PRECISION_MAP = OLD_IR_TRT_PRECISION_MAP
  224. TRT_CFG_SETTING = defaultdict(
  225. lambda: {"enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING},
  226. OLD_IR_TRT_CFG_SETTING,
  227. )