Browse Source

新增MinerU OCR工具配置及数据源,支持多用户扫描结果处理

zhch158_admin 1 month ago
parent
commit
e6484d53be
1 changed files with 48 additions and 1 deletions
  1. 48 1
      config.yaml

+ 48 - 1
config.yaml

@@ -81,6 +81,21 @@ ocr:
       confidence_field: "confidence"
       confidence_field: "confidence"
       rotation:
       rotation:
         coordinates_are_pre_rotated: true
         coordinates_are_pre_rotated: true
+    
+    mineru:
+      name: "MinerU"
+      description: "MinerU OCR"
+      json_structure: "array"  # JSON为数组格式
+      text_field: "text"
+      bbox_field: "bbox"
+      category_field: "type"
+      confidence_field: "confidence"
+      # 表格相关字段
+      table_body_field: "table_body"
+      img_path_field: "img_path"
+      # 旋转处理配置
+      rotation:
+        coordinates_are_pre_rotated: false
   
   
   # 自动检测工具类型的规则
   # 自动检测工具类型的规则
   auto_detection:
   auto_detection:
@@ -90,7 +105,9 @@ ocr:
         tool_type: "table_recognition_v2"
         tool_type: "table_recognition_v2"
       - field_exists: "parsing_res_list"
       - field_exists: "parsing_res_list"
         tool_type: "ppstructv3"
         tool_type: "ppstructv3"
-      - json_is_array: true  # 如果JSON是数组,判断为dots_ocr
+      - field_exists: "page_idx"  # 新增:检测MinerU特征
+        tool_type: "mineru"
+      - field_exists: "category_field"
         tool_type: "dots_ocr"
         tool_type: "dots_ocr"
 
 
 data_sources:
 data_sources:
@@ -112,6 +129,12 @@ data_sources:
     src_img_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/data_PPStructureV3_Results/A用户_单元格扫描流水"
     src_img_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/data_PPStructureV3_Results/A用户_单元格扫描流水"
     description: "A用户使用Table Recognition V2的单元格扫描结果"
     description: "A用户使用Table Recognition V2的单元格扫描结果"
         
         
+  - name: "A用户_单元格扫描流水"
+    ocr_tool: "mineru"
+    ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/mineru-vlm-2.5.3_Results"
+    src_img_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水.img"
+    description: "A用户使用Mineru的单元格扫描结果"
+
   - name: "A用户_单元格图片合成"
   - name: "A用户_单元格图片合成"
     ocr_tool: "dots_ocr"
     ocr_tool: "dots_ocr"
     ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格图片合成/data_DotsOCR_Results"
     ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/A用户_单元格图片合成/data_DotsOCR_Results"
@@ -136,6 +159,12 @@ data_sources:
     src_img_dir: "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/data_PPStructureV3_Results/B用户_扫描流水"
     src_img_dir: "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/data_PPStructureV3_Results/B用户_扫描流水"
     description: "B用户使用PPStructV3的扫描结果"
     description: "B用户使用PPStructV3的扫描结果"
 
 
+  - name: "B用户_扫描流水"
+    ocr_tool: "mineru"
+    ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/mineru-vlm-2.5.3_Results"
+    src_img_dir: "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水.img"
+    description: "B用户使用Mineru的扫描结果"
+
   - name: "B用户_图片合成流水"
   - name: "B用户_图片合成流水"
     ocr_tool: "dots_ocr"
     ocr_tool: "dots_ocr"
     ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/B用户_图片合成流水/data_DotsOCR_Results"
     ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/B用户_图片合成流水/data_DotsOCR_Results"
@@ -154,12 +183,24 @@ data_sources:
     src_img_dir: "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/data_PPStructureV3_Results/德_内蒙古银行照"
     src_img_dir: "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/data_PPStructureV3_Results/德_内蒙古银行照"
     description: "德_内蒙古银行照使用PPStructV3的图片合成结果"
     description: "德_内蒙古银行照使用PPStructV3的图片合成结果"
 
 
+  - name: "德_内蒙古银行照"
+    ocr_tool: "mineru"
+    ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/mineru-vlm-2.5.3_Results"
+    src_img_dir: "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/mineru-vlm-2.5.3_Results/德_内蒙古银行照"
+    description: "德_内蒙古银行照使用Mineru的图片合成结果"
+
   - name: "对公_招商银行图"
   - name: "对公_招商银行图"
     ocr_tool: "ppstructv3"
     ocr_tool: "ppstructv3"
     ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/data_PPStructureV3_Results"
     ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/data_PPStructureV3_Results"
     src_img_dir: "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/data_PPStructureV3_Results/对公_招商银行图"
     src_img_dir: "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/data_PPStructureV3_Results/对公_招商银行图"
     description: "对公_招商银行图使用PPStructV3的图片合成结果"
     description: "对公_招商银行图使用PPStructV3的图片合成结果"
 
 
+  - name: "对公_招商银行图"
+    ocr_tool: "mineru"
+    ocr_out_dir: "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/mineru-vlm-2.5.3_Results"
+    src_img_dir: "/Users/zhch158/workspace/data/流水分析/对公_招商银行图/mineru-vlm-2.5.3_Results/对公_招商银行图"
+    description: "对公_招商银行图使用Mineru的图片合成结果"
+
   - name: "至远彩色_2023年报"
   - name: "至远彩色_2023年报"
     ocr_tool: "dots_ocr"
     ocr_tool: "dots_ocr"
     ocr_out_dir: "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_DotsOCR_Results"
     ocr_out_dir: "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_DotsOCR_Results"
@@ -172,6 +213,12 @@ data_sources:
     src_img_dir: "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_PPStructureV3_Results/2023年度报告母公司"
     src_img_dir: "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_PPStructureV3_Results/2023年度报告母公司"
     description: "至远彩色使用PPStructV3的2023年报"
     description: "至远彩色使用PPStructV3的2023年报"
 
 
+  - name: "至远彩色_2023年报"
+    ocr_tool: "mineru"
+    ocr_out_dir: "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/mineru-vlm-2.5.3_Results"
+    src_img_dir: "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/mineru-vlm-2.5.3_Results/2023年度报告母公司"
+    description: "至远彩色使用mineru的2023年报"
+
 # 预校验结果文件路径
 # 预校验结果文件路径
 pre_validation:
 pre_validation:
   out_dir: "./output/pre_validation/"
   out_dir: "./output/pre_validation/"