trt_config.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from collections import defaultdict
  15. from ...utils.flags import USE_PIR_TRT
  16. class LazyLoadDict(dict):
  17. def __init__(self, *args, **kwargs):
  18. self._initialized = False
  19. super().__init__(*args, **kwargs)
  20. def _initialize(self):
  21. if not self._initialized:
  22. self.update(self._load())
  23. self._initialized = True
  24. def __getitem__(self, key):
  25. self._initialize()
  26. return super().__getitem__(key)
  27. def __contains__(self, key):
  28. self._initialize()
  29. return super().__contains__(key)
  30. def _load(self):
  31. raise NotImplementedError
  32. class OLD_IR_TRT_PRECISION_MAP_CLASS(LazyLoadDict):
  33. def _load(self):
  34. from paddle.inference import PrecisionType
  35. return {
  36. "trt_int8": PrecisionType.Int8,
  37. "trt_fp32": PrecisionType.Float32,
  38. "trt_fp16": PrecisionType.Half,
  39. }
  40. class PIR_TRT_PRECISION_MAP_CLASS(LazyLoadDict):
  41. def _load(self):
  42. from paddle.tensorrt.export import PrecisionMode
  43. return {
  44. "trt_int8": PrecisionMode.INT8,
  45. "trt_fp32": PrecisionMode.FP32,
  46. "trt_fp16": PrecisionMode.FP16,
  47. }
  48. ############ old ir trt ############
  49. OLD_IR_TRT_PRECISION_MAP = OLD_IR_TRT_PRECISION_MAP_CLASS()
  50. OLD_IR_TRT_CFG_DEFAULT_SETTING = {
  51. "workspace_size": 1 << 30,
  52. "max_batch_size": 32,
  53. "min_subgraph_size": 3,
  54. "use_static": True,
  55. "use_calib_mode": False,
  56. }
  57. OLD_IR_TRT_CFG_SETTING = {
  58. "SegFormer-B0": {
  59. "enable_tensorrt_engine": {
  60. **OLD_IR_TRT_CFG_DEFAULT_SETTING,
  61. "workspace_size": 1 << 32,
  62. }
  63. },
  64. "SegFormer-B1": {
  65. "enable_tensorrt_engine": {
  66. **OLD_IR_TRT_CFG_DEFAULT_SETTING,
  67. "workspace_size": 1 << 32,
  68. }
  69. },
  70. "SegFormer-B2": {
  71. "enable_tensorrt_engine": {
  72. **OLD_IR_TRT_CFG_DEFAULT_SETTING,
  73. "workspace_size": 1 << 32,
  74. }
  75. },
  76. "SegFormer-B3": {
  77. "enable_tensorrt_engine": {
  78. **OLD_IR_TRT_CFG_DEFAULT_SETTING,
  79. "workspace_size": 1 << 32,
  80. }
  81. },
  82. "SegFormer-B4": {
  83. "enable_tensorrt_engine": {
  84. **OLD_IR_TRT_CFG_DEFAULT_SETTING,
  85. "workspace_size": 1 << 32,
  86. }
  87. },
  88. "SegFormer-B5": {
  89. "enable_tensorrt_engine": {
  90. **OLD_IR_TRT_CFG_DEFAULT_SETTING,
  91. "workspace_size": 1 << 32,
  92. }
  93. },
  94. "SLANeXt_wired": {
  95. "enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING,
  96. # the exp_disable_tensorrt_ops() func don't support to be pass argument by keyword
  97. # therefore, using list instead of dict
  98. "exp_disable_tensorrt_ops": [
  99. [
  100. "linear_0.tmp_0",
  101. "linear_4.tmp_0",
  102. "linear_12.tmp_0",
  103. "linear_16.tmp_0",
  104. "linear_24.tmp_0",
  105. "linear_28.tmp_0",
  106. "linear_36.tmp_0",
  107. "linear_40.tmp_0",
  108. ]
  109. ],
  110. },
  111. "SLANeXt_wireless": {
  112. "enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING,
  113. "exp_disable_tensorrt_ops": [
  114. [
  115. "linear_0.tmp_0",
  116. "linear_4.tmp_0",
  117. "linear_12.tmp_0",
  118. "linear_16.tmp_0",
  119. "linear_24.tmp_0",
  120. "linear_28.tmp_0",
  121. "linear_36.tmp_0",
  122. "linear_40.tmp_0",
  123. ]
  124. ],
  125. },
  126. "PP-YOLOE_seg-S": {
  127. "enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING,
  128. "exp_disable_tensorrt_ops": [
  129. ["bilinear_interp_v2_1.tmp_0", "bilinear_interp_v2_1.tmp_0_slice_0"]
  130. ],
  131. },
  132. "TiDE": {
  133. "enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING,
  134. "exp_disable_tensorrt_ops": [
  135. [
  136. "reshape2_3.tmp_0",
  137. "reshape2_2.tmp_0",
  138. "reshape2_1.tmp_0",
  139. "reshape2_0.tmp_0",
  140. ]
  141. ],
  142. },
  143. "Nonstationary": {
  144. "enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING,
  145. "exp_disable_tensorrt_ops": [
  146. [
  147. "reshape2_13.tmp_0",
  148. ]
  149. ],
  150. },
  151. "ch_SVTRv2_rec": {
  152. "enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING,
  153. "exp_disable_tensorrt_ops": [
  154. [
  155. "reshape2_3.tmp_0",
  156. "reshape2_5.tmp_0",
  157. "reshape2_7.tmp_0",
  158. "reshape2_9.tmp_0",
  159. "reshape2_11.tmp_0",
  160. "reshape2_13.tmp_0",
  161. "reshape2_15.tmp_0",
  162. "reshape2_17.tmp_0",
  163. "reshape2_19.tmp_0",
  164. "reshape2_28.tmp_0",
  165. "reshape2_42.tmp_0",
  166. "reshape2_47.tmp_0",
  167. "layer_norm_15.tmp_2",
  168. "layer_norm_13.tmp_2",
  169. ]
  170. ],
  171. },
  172. "PP-YOLOE_plus_SOD-largesize-L": {
  173. "enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING,
  174. "exp_disable_tensorrt_ops": [
  175. [
  176. "conv2d",
  177. "fused_conv2d_add_act",
  178. "swish",
  179. "reduce_mean",
  180. "softmax",
  181. "layer_norm",
  182. "gelu",
  183. ]
  184. ],
  185. },
  186. }
  187. DISABLE_TRT_HALF_OPS_CONFIG = {
  188. "ConvNeXt_tiny": {"layer_norm"},
  189. "ConvNeXt_small": {"layer_norm"},
  190. "ConvNeXt_base_224": {"layer_norm"},
  191. "ConvNeXt_large_224": {"layer_norm"},
  192. "ConvNeXt_base_384": {"layer_norm"},
  193. "ConvNeXt_large_384": {"layer_norm"},
  194. "PP-HGNetV2-B3": {"softmax"},
  195. "MobileNetV1_x0_5": {"fused_conv2d_add_act"},
  196. "SeaFormer_small": {"fused_conv2d_add_act"},
  197. "SeaFormer_tiny": {"fused_conv2d_add_act"},
  198. "PP-OCRv4_mobile_seal_det": {
  199. "fused_conv2d_add_act",
  200. "softmax",
  201. "conv2d",
  202. "multiply",
  203. },
  204. "PicoDet_LCNet_x2_5_face": {
  205. "fused_conv2d_add_act",
  206. "softmax",
  207. "elementwise_mul",
  208. "matrix_multiply",
  209. },
  210. "PP-YOLOE_plus_SOD-S": {
  211. "fused_conv2d_add_act",
  212. "softmax",
  213. "conv2d",
  214. "elementwise_mul",
  215. "matrix_multiply",
  216. },
  217. "BlazeFace-FPN-SSH": {"fused_conv2d_add_act"},
  218. "PP-YOLOE_plus-S_face": {"fused_conv2d_add_act", "conv2d", "multiply"},
  219. "PP-ShiTuV2_det": {
  220. "conv2d",
  221. "depthwise_conv2d",
  222. "fused_conv2d_add_act",
  223. "matrix_multiply",
  224. },
  225. "RT-DETR-H_layout_3cls": {
  226. "fused_conv2d_add_act",
  227. "elementwise_mul",
  228. "elementwise_add",
  229. "elementwise_div",
  230. "matrix_multiply",
  231. "layer_norm",
  232. },
  233. "DETR-R50": {
  234. "fused_conv2d_add_act",
  235. "elementwise_mul",
  236. "elementwise_add",
  237. "elementwise_div",
  238. "matrix_multiply",
  239. "layer_norm",
  240. },
  241. "RT-DETR-R50": {
  242. "fused_conv2d_add_act",
  243. "elementwise_mul",
  244. "elementwise_add",
  245. "elementwise_div",
  246. "matrix_multiply",
  247. "layer_norm",
  248. },
  249. "YOLOX-M": {"fused_conv2d_add_act", "elementwise_mul", "elementwise_add", "scale"},
  250. "YOLOv3-MobileNetV3": {
  251. "fused_conv2d_add_act",
  252. "elementwise_mul",
  253. "elementwise_add",
  254. "depthwise_conv2d",
  255. "elementwise_div",
  256. },
  257. "PP-OCRv4_server_det": {"fused_conv2d_add_act", "conv2d"},
  258. }
  259. ############ pir trt ############
  260. PIR_TRT_PRECISION_MAP = PIR_TRT_PRECISION_MAP_CLASS()
  261. PIR_TRT_CFG_SETTING = {
  262. "PP-YOLOE_plus_SOD-largesize-L": {
  263. "workspace_size": 1 << 32,
  264. "disable_ops": [
  265. "pd_op.conv2d",
  266. "pd_op.fused_conv2d_add_act",
  267. "pd_op.swish",
  268. "pd_op.mean",
  269. "pd_op.softmax",
  270. "pd_op.layer_norm",
  271. "pd_op.gelu",
  272. ],
  273. },
  274. "SLANeXt_wired": {"disable_ops": ["pd_op.slice"]},
  275. "SLANeXt_wireless": {"disable_ops": ["pd_op.slice"]},
  276. "DETR-R50": {
  277. "optimization_level": 4,
  278. "workspace_size": 1 << 32,
  279. "ops_run_float": {"pd_op.matmul", "pd_op.conv2d", "pd_op.fused_conv2d_add_act"},
  280. },
  281. "SegFormer-B0": {"optimization_level": 4, "workspace_size": 1 << 32},
  282. "SegFormer-B1": {"optimization_level": 4, "workspace_size": 1 << 32},
  283. "SegFormer-B2": {"optimization_level": 4, "workspace_size": 1 << 32},
  284. "SegFormer-B3": {"optimization_level": 4, "workspace_size": 1 << 32},
  285. "SegFormer-B4": {"optimization_level": 4, "workspace_size": 1 << 32},
  286. "SegFormer-B5": {"optimization_level": 4, "workspace_size": 1 << 32},
  287. "LaTeX_OCR_rec": {"disable_ops": ["pd_op.slice", "pd_op.reshape"]},
  288. "PP-YOLOE_seg-S": {
  289. "disable_ops": ["pd_op.slice", "pd_op.bilinear_interp"],
  290. "ops_run_float": {
  291. "pd_op.conv2d",
  292. "pd_op.fused_conv2d_add_act",
  293. "pd_op.conv2d_transpose",
  294. "pd_op.matmul",
  295. },
  296. },
  297. "PP-FormulaNet-L": {
  298. "disable_ops": ["pd_op.full_with_tensor"],
  299. "workspace_size": 2 << 32,
  300. },
  301. "PP-FormulaNet-S": {
  302. "disable_ops": ["pd_op.full_with_tensor"],
  303. "workspace_size": 1 << 32,
  304. },
  305. "ConvNeXt_tiny": {"ops_run_float": {"pd_op.layer_norm"}},
  306. "ConvNeXt_small": {"ops_run_float": {"pd_op.layer_norm"}},
  307. "ConvNeXt_base_224": {"ops_run_float": {"pd_op.layer_norm"}},
  308. "ConvNeXt_base_384": {"ops_run_float": {"pd_op.layer_norm"}},
  309. "ConvNeXt_large_224": {"ops_run_float": {"pd_op.layer_norm"}},
  310. "ConvNeXt_large_384": {"ops_run_float": {"pd_op.layer_norm"}},
  311. "PP-HGNetV2-B3": {"ops_run_float": {"pd_op.softmax"}},
  312. "BlazeFace-FPN-SSH": {"ops_run_float": {"pd_op.fused_conv2d_add_act"}},
  313. "PP-OCRv4_mobile_seal_det": {
  314. "ops_run_float": {
  315. "pd_op.fused_conv2d_add_act",
  316. "pd_op.softmax",
  317. "pd_op.multiply",
  318. "pd_op.conv2d",
  319. }
  320. },
  321. "PP-YOLOE_plus_SOD-S": {
  322. "ops_run_float": {
  323. "pd_op.fused_conv2d_add_act",
  324. "pd_op.softmax",
  325. "pd_op.conv2d",
  326. "pd_op.multiply",
  327. "pd_op.matmul",
  328. }
  329. },
  330. "PicoDet_LCNet_x2_5_face": {
  331. "ops_run_float": {
  332. "pd_op.fused_conv2d_add_act",
  333. "pd_op.softmax",
  334. "pd_op.conv2d",
  335. "pd_op.multiply",
  336. "pd_op.matmul",
  337. }
  338. },
  339. "PP-YOLOE_plus-S_face": {
  340. "ops_run_float": {
  341. "pd_op.fused_conv2d_add_act",
  342. "pd_op.multiply",
  343. "pd_op.conv2d",
  344. }
  345. },
  346. "PP-ShiTuV2_det": {
  347. "ops_run_float": {
  348. "pd_op.fused_conv2d_add_act",
  349. "pd_op.depthwise_conv2d",
  350. "pd_op.conv2d",
  351. }
  352. },
  353. "RT-DETR-H_layout_3cls": {
  354. "ops_run_float": {
  355. "pd_op.matmul",
  356. "pd_op.conv2d",
  357. "pd_op.depthwise_conv2d",
  358. "pd_op.fused_conv2d_add_act",
  359. "pd_op.batch_norm_",
  360. }
  361. },
  362. "RT-DETR-R50": {
  363. "ops_run_float": {"pd_op.matmul", "pd_op.conv2d", "pd_op.fused_conv2d_add_act"}
  364. },
  365. "YOLOX-M": {
  366. "ops_run_float": {
  367. "pd_op.multiply",
  368. "pd_op.conv2d",
  369. "pd_op.fused_conv2d_add_act",
  370. }
  371. },
  372. "YOLOv3-MobileNetV3": {
  373. "ops_run_float": {
  374. "pd_op.depthwise_conv2d",
  375. "pd_op.conv2d",
  376. "pd_op.fused_conv2d_add_act",
  377. }
  378. },
  379. "PP-OCRv4_server_det": {
  380. "ops_run_float": {"pd_op.conv2d", "pd_op.fused_conv2d_add_act"}
  381. },
  382. "PP-OCRv4_server_seal_det": {
  383. "ops_run_float": {"pd_op.conv2d", "pd_op.fused_conv2d_add_act"}
  384. },
  385. "PP-YOLOE_plus-M": {
  386. "ops_run_float": {"pd_op.conv2d", "pd_op.fused_conv2d_add_act"}
  387. },
  388. }
  389. if USE_PIR_TRT:
  390. TRT_PRECISION_MAP = PIR_TRT_PRECISION_MAP
  391. TRT_CFG_SETTING = defaultdict(dict, PIR_TRT_CFG_SETTING)
  392. else:
  393. TRT_PRECISION_MAP = OLD_IR_TRT_PRECISION_MAP
  394. TRT_CFG_SETTING = defaultdict(
  395. lambda: {"enable_tensorrt_engine": OLD_IR_TRT_CFG_DEFAULT_SETTING},
  396. OLD_IR_TRT_CFG_SETTING,
  397. )