|
@@ -98,18 +98,47 @@ ocr:
|
|
|
rotation:
|
|
rotation:
|
|
|
coordinates_are_pre_rotated: false
|
|
coordinates_are_pre_rotated: false
|
|
|
|
|
|
|
|
- # 自动检测工具类型的规则
|
|
|
|
|
|
|
+ # 自动检测工具类型的规则(按优先级从高到低)
|
|
|
auto_detection:
|
|
auto_detection:
|
|
|
enabled: true
|
|
enabled: true
|
|
|
rules:
|
|
rules:
|
|
|
- - field_exists: "table_res_list"
|
|
|
|
|
- tool_type: "table_recognition_v2"
|
|
|
|
|
- - field_exists: "parsing_res_list"
|
|
|
|
|
- tool_type: "ppstructv3"
|
|
|
|
|
- - field_exists: "page_idx" # 新增:检测MinerU特征
|
|
|
|
|
- tool_type: "mineru"
|
|
|
|
|
- - field_exists: "category_field"
|
|
|
|
|
- tool_type: "dots_ocr"
|
|
|
|
|
|
|
+ # Table Recognition V2 - 最高优先级
|
|
|
|
|
+ - tool_type: "table_recognition_v2"
|
|
|
|
|
+ conditions:
|
|
|
|
|
+ - type: "field_exists"
|
|
|
|
|
+ field: "table_res_list"
|
|
|
|
|
+ - type: "field_not_exists"
|
|
|
|
|
+ field: "parsing_res_list"
|
|
|
|
|
+ priority: 4
|
|
|
|
|
+
|
|
|
|
|
+ # PPStructV3 - 第二优先级
|
|
|
|
|
+ - tool_type: "ppstructv3"
|
|
|
|
|
+ conditions:
|
|
|
|
|
+ - type: "field_exists"
|
|
|
|
|
+ field: "parsing_res_list"
|
|
|
|
|
+ - type: "field_exists"
|
|
|
|
|
+ field: "doc_preprocessor_res"
|
|
|
|
|
+ priority: 2
|
|
|
|
|
+
|
|
|
|
|
+ # MinerU - 第三优先级
|
|
|
|
|
+ - tool_type: "mineru"
|
|
|
|
|
+ conditions:
|
|
|
|
|
+ - type: "field_exists"
|
|
|
|
|
+ field: "page_idx"
|
|
|
|
|
+ - type: "field_exists"
|
|
|
|
|
+ field: "type"
|
|
|
|
|
+ - type: "json_structure"
|
|
|
|
|
+ structure: "array"
|
|
|
|
|
+ priority: 1
|
|
|
|
|
+
|
|
|
|
|
+ # Dots OCR - 最低优先级(默认)
|
|
|
|
|
+ - tool_type: "dots_ocr"
|
|
|
|
|
+ conditions:
|
|
|
|
|
+ - type: "json_structure"
|
|
|
|
|
+ structure: "array"
|
|
|
|
|
+ - type: "field_exists"
|
|
|
|
|
+ field: "category"
|
|
|
|
|
+ priority: 3
|
|
|
|
|
|
|
|
data_sources:
|
|
data_sources:
|
|
|
- name: "A用户_单元格扫描流水"
|
|
- name: "A用户_单元格扫描流水"
|