فهرست منبع

fix trt config (#3829)

* fix trt config for DETR-R50 and PP-YOLOE_seg-S & styling code

* fix trt config

1. fix PP-YOLOE_plus_SOD-largesize-L;
2. add PP-OCRv4_server_seal_det, PP-YOLOE_plus-M.
Tingquan Gao 7 ماه پیش
والد
کامیت
4c7548eaf2
1فایلهای تغییر یافته به همراه158 افزوده شده و 25 حذف شده
  1. 158 25
      paddlex/inference/utils/trt_config.py

+ 158 - 25
paddlex/inference/utils/trt_config.py

@@ -214,28 +214,91 @@ DISABLE_TRT_HALF_OPS_CONFIG = {
     "MobileNetV1_x0_5": {"fused_conv2d_add_act"},
     "SeaFormer_small": {"fused_conv2d_add_act"},
     "SeaFormer_tiny": {"fused_conv2d_add_act"},
-    "PP-OCRv4_mobile_seal_det": {"fused_conv2d_add_act", "softmax", "conv2d", "multiply"},
-    "PicoDet_LCNet_x2_5_face": {"fused_conv2d_add_act", "softmax", "elementwise_mul", "matrix_multiply"},
-    "PP-YOLOE_plus_SOD-S": {"fused_conv2d_add_act", "softmax", "conv2d", "elementwise_mul", "matrix_multiply"},
+    "PP-OCRv4_mobile_seal_det": {
+        "fused_conv2d_add_act",
+        "softmax",
+        "conv2d",
+        "multiply",
+    },
+    "PicoDet_LCNet_x2_5_face": {
+        "fused_conv2d_add_act",
+        "softmax",
+        "elementwise_mul",
+        "matrix_multiply",
+    },
+    "PP-YOLOE_plus_SOD-S": {
+        "fused_conv2d_add_act",
+        "softmax",
+        "conv2d",
+        "elementwise_mul",
+        "matrix_multiply",
+    },
     "BlazeFace-FPN-SSH": {"fused_conv2d_add_act"},
     "PP-YOLOE_plus-S_face": {"fused_conv2d_add_act", "conv2d", "multiply"},
-    "PP-ShiTuV2_det": {"conv2d", "depthwise_conv2d", "fused_conv2d_add_act", "matrix_multiply"},
-    "RT-DETR-H_layout_3cls": {"fused_conv2d_add_act", "elementwise_mul", "elementwise_add" ,"elementwise_div", "matrix_multiply", "layer_norm"},
-    "DETR-R50": {"fused_conv2d_add_act", "elementwise_mul", "elementwise_add", "elementwise_div", "matrix_multiply", "layer_norm"},
-    "RT-DETR-R50": {"fused_conv2d_add_act", "elementwise_mul", "elementwise_add", "elementwise_div", "matrix_multiply", "layer_norm"},
+    "PP-ShiTuV2_det": {
+        "conv2d",
+        "depthwise_conv2d",
+        "fused_conv2d_add_act",
+        "matrix_multiply",
+    },
+    "RT-DETR-H_layout_3cls": {
+        "fused_conv2d_add_act",
+        "elementwise_mul",
+        "elementwise_add",
+        "elementwise_div",
+        "matrix_multiply",
+        "layer_norm",
+    },
+    "DETR-R50": {
+        "fused_conv2d_add_act",
+        "elementwise_mul",
+        "elementwise_add",
+        "elementwise_div",
+        "matrix_multiply",
+        "layer_norm",
+    },
+    "RT-DETR-R50": {
+        "fused_conv2d_add_act",
+        "elementwise_mul",
+        "elementwise_add",
+        "elementwise_div",
+        "matrix_multiply",
+        "layer_norm",
+    },
     "YOLOX-M": {"fused_conv2d_add_act", "elementwise_mul", "elementwise_add", "scale"},
-    "YOLOv3-MobileNetV3": {"fused_conv2d_add_act", "elementwise_mul", "elementwise_add", "depthwise_conv2d", "elementwise_div"},
-    "PP-OCRv4_server_det":  {"fused_conv2d_add_act", "conv2d"},
+    "YOLOv3-MobileNetV3": {
+        "fused_conv2d_add_act",
+        "elementwise_mul",
+        "elementwise_add",
+        "depthwise_conv2d",
+        "elementwise_div",
+    },
+    "PP-OCRv4_server_det": {"fused_conv2d_add_act", "conv2d"},
 }
 
 ############ pir trt ############
 PIR_TRT_PRECISION_MAP = PIR_TRT_PRECISION_MAP_CLASS()
 
 PIR_TRT_CFG_SETTING = {
-    "PP-YOLOE_plus_SOD-largesize-L": {"workspace_size": 1 << 32},
+    "PP-YOLOE_plus_SOD-largesize-L": {
+        "workspace_size": 1 << 32,
+        "disable_ops": [
+            "pd_op.conv2d",
+            "pd_op.fused_conv2d_add_act",
+            "pd_op.swish",
+            "pd_op.mean",
+            "pd_op.softmax",
+            "pd_op.layer_norm",
+            "pd_op.gelu",
+        ],
+    },
     "SLANeXt_wired": {"disable_ops": ["pd_op.slice"]},
     "SLANeXt_wireless": {"disable_ops": ["pd_op.slice"]},
-    "DETR-R50": {"optimization_level": 4, "workspace_size": 1 << 32},
+    "DETR-R50": {
+        "optimization_level": 4,
+        "workspace_size": 1 << 32,
+        "ops_run_float": {"pd_op.matmul", "pd_op.conv2d", "pd_op.fused_conv2d_add_act"},
+    },
     "SegFormer-B0": {"optimization_level": 4, "workspace_size": 1 << 32},
     "SegFormer-B1": {"optimization_level": 4, "workspace_size": 1 << 32},
     "SegFormer-B2": {"optimization_level": 4, "workspace_size": 1 << 32},
@@ -243,8 +306,15 @@ PIR_TRT_CFG_SETTING = {
     "SegFormer-B4": {"optimization_level": 4, "workspace_size": 1 << 32},
     "SegFormer-B5": {"optimization_level": 4, "workspace_size": 1 << 32},
     "LaTeX_OCR_rec": {"disable_ops": ["pd_op.slice", "pd_op.reshape"]},
-    "PP-YOLOE_seg-S": {"disable_ops": ["pd_op.slice", "pd_op.bilinear_interp"]},
-    "PP-YOLOE_plus_SOD-largesize-L": {"disable_ops": ["pd_op.conv2d", "pd_op.fused_conv2d_add_act", "pd_op.swish", "pd_op.mean", "pd_op.softmax", "pd_op.layer_norm", "pd_op.gelu"]},
+    "PP-YOLOE_seg-S": {
+        "disable_ops": ["pd_op.slice", "pd_op.bilinear_interp"],
+        "ops_run_float": {
+            "pd_op.conv2d",
+            "pd_op.fused_conv2d_add_act",
+            "pd_op.conv2d_transpose",
+            "pd_op.matmul",
+        },
+    },
     "PP-FormulaNet-L": {
         "disable_ops": ["pd_op.full_with_tensor"],
         "workspace_size": 2 << 32,
@@ -261,18 +331,81 @@ PIR_TRT_CFG_SETTING = {
     "ConvNeXt_large_384": {"ops_run_float": {"pd_op.layer_norm"}},
     "PP-HGNetV2-B3": {"ops_run_float": {"pd_op.softmax"}},
     "BlazeFace-FPN-SSH": {"ops_run_float": {"pd_op.fused_conv2d_add_act"}},
-    "PP-OCRv4_mobile_seal_det": {"ops_run_float": {"pd_op.fused_conv2d_add_act", "pd_op.softmax", "pd_op.multiply", "pd_op.conv2d"}},
-    "PP-YOLOE_plus_SOD-S": {"ops_run_float": {"pd_op.fused_conv2d_add_act", "pd_op.softmax", "pd_op.conv2d", "pd_op.multiply", "pd_op.matmul"}},
-    "PicoDet_LCNet_x2_5_face": {"ops_run_float": {"pd_op.fused_conv2d_add_act", "pd_op.softmax", "pd_op.conv2d", "pd_op.multiply", "pd_op.matmul"}},
-    "PP-YOLOE_plus-S_face": {"ops_run_float": {"pd_op.fused_conv2d_add_act", "pd_op.multiply", "pd_op.conv2d"}},
-    "PP-ShiTuV2_det": {"ops_run_float": {"pd_op.fused_conv2d_add_act", "pd_op.depthwise_conv2d", "pd_op.conv2d"}},
-    "RT-DETR-H_layout_3cls": {"ops_run_float": {"pd_op.matmul", "pd_op.conv2d", "pd_op.depthwise_conv2d", "pd_op.fused_conv2d_add_act", "pd_op.batch_norm_"}},
-    "DETR-R50": {"ops_run_float": {"pd_op.matmul", "pd_op.conv2d", "pd_op.fused_conv2d_add_act"}},
-    "RT-DETR-R50": {"ops_run_float": {"pd_op.matmul", "pd_op.conv2d", "pd_op.fused_conv2d_add_act"}},
-    "YOLOX-M": {"ops_run_float": {"pd_op.multiply", "pd_op.conv2d", "pd_op.fused_conv2d_add_act"}},
-    "YOLOv3-MobileNetV3": {"ops_run_float": {"pd_op.depthwise_conv2d", "pd_op.conv2d", "pd_op.fused_conv2d_add_act"}},
-    "PP-OCRv4_server_det":  {"ops_run_float": {"pd_op.conv2d", "pd_op.fused_conv2d_add_act"}},
-    "PP-YOLOE_seg-S": {"ops_run_float": {"pd_op.conv2d", "pd_op.fused_conv2d_add_act", "pd_op.conv2d_transpose", "pd_op.matmul"}},
+    "PP-OCRv4_mobile_seal_det": {
+        "ops_run_float": {
+            "pd_op.fused_conv2d_add_act",
+            "pd_op.softmax",
+            "pd_op.multiply",
+            "pd_op.conv2d",
+        }
+    },
+    "PP-YOLOE_plus_SOD-S": {
+        "ops_run_float": {
+            "pd_op.fused_conv2d_add_act",
+            "pd_op.softmax",
+            "pd_op.conv2d",
+            "pd_op.multiply",
+            "pd_op.matmul",
+        }
+    },
+    "PicoDet_LCNet_x2_5_face": {
+        "ops_run_float": {
+            "pd_op.fused_conv2d_add_act",
+            "pd_op.softmax",
+            "pd_op.conv2d",
+            "pd_op.multiply",
+            "pd_op.matmul",
+        }
+    },
+    "PP-YOLOE_plus-S_face": {
+        "ops_run_float": {
+            "pd_op.fused_conv2d_add_act",
+            "pd_op.multiply",
+            "pd_op.conv2d",
+        }
+    },
+    "PP-ShiTuV2_det": {
+        "ops_run_float": {
+            "pd_op.fused_conv2d_add_act",
+            "pd_op.depthwise_conv2d",
+            "pd_op.conv2d",
+        }
+    },
+    "RT-DETR-H_layout_3cls": {
+        "ops_run_float": {
+            "pd_op.matmul",
+            "pd_op.conv2d",
+            "pd_op.depthwise_conv2d",
+            "pd_op.fused_conv2d_add_act",
+            "pd_op.batch_norm_",
+        }
+    },
+    "RT-DETR-R50": {
+        "ops_run_float": {"pd_op.matmul", "pd_op.conv2d", "pd_op.fused_conv2d_add_act"}
+    },
+    "YOLOX-M": {
+        "ops_run_float": {
+            "pd_op.multiply",
+            "pd_op.conv2d",
+            "pd_op.fused_conv2d_add_act",
+        }
+    },
+    "YOLOv3-MobileNetV3": {
+        "ops_run_float": {
+            "pd_op.depthwise_conv2d",
+            "pd_op.conv2d",
+            "pd_op.fused_conv2d_add_act",
+        }
+    },
+    "PP-OCRv4_server_det": {
+        "ops_run_float": {"pd_op.conv2d", "pd_op.fused_conv2d_add_act"}
+    },
+    "PP-OCRv4_server_seal_det": {
+        "ops_run_float": {"pd_op.conv2d", "pd_op.fused_conv2d_add_act"}
+    },
+    "PP-YOLOE_plus-M": {
+        "ops_run_float": {"pd_op.conv2d", "pd_op.fused_conv2d_add_act"}
+    },
 }