Browse Source

feat: 添加 DotsOCR (带 cell bbox) 工具配置,支持结果目录和描述

zhch158_admin 1 week ago
parent
commit
810f8e84a7

+ 7 - 0
config/A用户_单元格扫描流水.yaml

@@ -45,4 +45,11 @@ document:
       image_dir: "dotsocr_vllm_results/{{name}}"
       description: "Dots OCR 图片合成结果"
       enabled: true
+  
+    # DotsOCR (带 cell bbox)
+    - tool: "mineru"
+      result_dir: "dotsocr_vllm_results_cell_bbox"
+      image_dir: "dotsocr_vllm_results/{{name}}"
+      description: "Dots OCR + PaddleOCR 坐标"
+      enabled: true
   

+ 7 - 0
config/B用户_扫描流水.yaml

@@ -44,4 +44,11 @@ document:
       result_dir: "dotsocr_vllm_results"
       image_dir: "dotsocr_vllm_results/{{name}}"
       description: "Dots OCR 图片合成结果"
+      enabled: true
+  
+    # DotsOCR (带 cell bbox)
+    - tool: "mineru"
+      result_dir: "dotsocr_vllm_results_cell_bbox"
+      image_dir: "dotsocr_vllm_results/{{name}}"
+      description: "Dots OCR + PaddleOCR 坐标"
       enabled: true

+ 7 - 0
config/对公_招商银行图.yaml

@@ -44,4 +44,11 @@ document:
       result_dir: "dotsocr_vllm_results"
       image_dir: "dotsocr_vllm_results/{{name}}"
       description: "Dots OCR 图片合成结果"
+      enabled: true
+  
+    # DotsOCR (带 cell bbox)
+    - tool: "mineru"
+      result_dir: "dotsocr_vllm_results_cell_bbox"
+      image_dir: "dotsocr_vllm_results/{{name}}"
+      description: "Dots OCR + PaddleOCR 坐标"
       enabled: true

+ 7 - 0
config/德_内蒙古银行照.yaml

@@ -45,4 +45,11 @@ document:
       result_dir: "dotsocr_vllm_results"
       image_dir: "dotsocr_vllm_results/{{name}}"
       description: "Dots OCR 图片合成结果"
+      enabled: true
+  
+    # DotsOCR (带 cell bbox)
+    - tool: "mineru"
+      result_dir: "dotsocr_vllm_results_cell_bbox"
+      image_dir: "dotsocr_vllm_results/{{name}}"
+      description: "Dots OCR + PaddleOCR 坐标"
       enabled: true

+ 9 - 2
config/至远彩色_2023年报.yaml

@@ -1,6 +1,6 @@
 document:
-  name: "至远彩色_2023年报"
-  base_dir: "/Users/zhch158/workspace/data/流水分析/至远彩色_2023年报"
+  name: "2023年告母公司"
+  base_dir: "/Users/zhch158/workspace/data/流水分析/2023年告母公司"
   
   # 🎯 关键改进:定义该文档使用的 OCR 工具及其结果目录
   ocr_results:
@@ -44,4 +44,11 @@ document:
       result_dir: "dotsocr_vllm_results"
       image_dir: "dotsocr_vllm_results/{{name}}"
       description: "Dots OCR 图片合成结果"
+      enabled: true
+  
+    # DotsOCR (带 cell bbox)
+    - tool: "mineru"
+      result_dir: "dotsocr_vllm_results_cell_bbox"
+      image_dir: "dotsocr_vllm_results/{{name}}"
+      description: "Dots OCR + PaddleOCR 坐标"
       enabled: true