Ver código fonte

feat: 更新自动检测工具类型规则,优化优先级和条件配置

zhch158_admin 1 mês atrás
pai
commit
99d8611d0b
1 arquivos alterados com 38 adições e 9 exclusões
  1. 38 9
      config.yaml

+ 38 - 9
config.yaml

@@ -98,18 +98,47 @@ ocr:
       rotation:
         coordinates_are_pre_rotated: false
   
-  # 自动检测工具类型的规则
+  # 自动检测工具类型的规则(按优先级从高到低)
   auto_detection:
     enabled: true
     rules:
-      - field_exists: "table_res_list"
-        tool_type: "table_recognition_v2"
-      - field_exists: "parsing_res_list"
-        tool_type: "ppstructv3"
-      - field_exists: "page_idx"  # 新增:检测MinerU特征
-        tool_type: "mineru"
-      - field_exists: "category_field"
-        tool_type: "dots_ocr"
+      # Table Recognition V2 - 最高优先级
+      - tool_type: "table_recognition_v2"
+        conditions:
+          - type: "field_exists"
+            field: "table_res_list"
+          - type: "field_not_exists"
+            field: "parsing_res_list"
+        priority: 4
+      
+      # PPStructV3 - 第二优先级
+      - tool_type: "ppstructv3"
+        conditions:
+          - type: "field_exists"
+            field: "parsing_res_list"
+          - type: "field_exists"
+            field: "doc_preprocessor_res"
+        priority: 2
+      
+      # MinerU - 第三优先级
+      - tool_type: "mineru"
+        conditions:
+          - type: "field_exists"
+            field: "page_idx"
+          - type: "field_exists"
+            field: "type"
+          - type: "json_structure"
+            structure: "array"
+        priority: 1
+      
+      # Dots OCR - 最低优先级(默认)
+      - tool_type: "dots_ocr"
+        conditions:
+          - type: "json_structure"
+            structure: "array"
+          - type: "field_exists"
+            field: "category"
+        priority: 3
 
 data_sources:
   - name: "A用户_单元格扫描流水"