3 Комити d5620857e3 ... f4ab823488

Аутор SHA1 Порука Датум
  zhch158_admin f4ab823488 feat(新增韩_中国银行图数据源): 添加韩_中国银行图.yaml至数据源列表 пре 2 недеља
  zhch158_admin 7f063346c2 feat(新增韩_中国银行图配置): 添加韩_中国银行图.yaml以支持银行流水OCR处理 пре 2 недеља
  zhch158_admin 1c3ee22a05 feat(新增银行交易流水): 添加韩_中国银行图.pdf以支持银行流水OCR处理 пре 2 недеља

+ 1 - 0
ocr_tools/ocr_batch/pdf_list.txt

@@ -15,4 +15,5 @@ B用户_扫描流水.pdf,bank_statement
 湛_平安银行图.pdf,bank_statement
 张_微信图.pdf,bank_statement
 朱_中信银行图.pdf,bank_statement
+韩_中国银行图.pdf,bank_statement
 

+ 1 - 0
ocr_validator/config/global.yaml

@@ -160,3 +160,4 @@ data_sources:
   - 张_微信图.yaml
   - 付_工商银行943825图.yaml
   - 许_民生银行图.yaml
+  - 韩_中国银行图.yaml

+ 76 - 0
ocr_validator/config/韩_中国银行图.yaml

@@ -0,0 +1,76 @@
+# 文档: 韩_中国银行图
+document:
+  name: "韩_中国银行图"
+  base_dir: "/Users/zhch158/workspace/data/流水分析/韩_中国银行图"
+  
+  # 🎯 关键改进:定义该文档使用的 OCR 工具及其结果目录
+  ocr_results:
+    # bank_statement_yusys_v4
+    - tool: "mineru"
+      result_dir: "bank_statement_yusys_v4"
+      image_dir: "bank_statement_yusys_v4/{{name}}"
+      description: "YUSYS-OCR框架 v4.0 GLM-OCR"
+      enabled: true
+
+    # bank_statement_yusys_v3
+    - tool: "mineru"
+      result_dir: "bank_statement_yusys_v3"
+      image_dir: "bank_statement_yusys_v3/{{name}}"
+      description: "YUSYS-OCR框架 v3.0"
+      enabled: true
+
+    # bank_statement_yusys_local
+    - tool: "mineru"
+      result_dir: "bank_statement_yusys_local"
+      image_dir: "bank_statement_yusys_local/{{name}}"
+      description: "YUSYS-OCR框架(local) GLM-OCR"
+      enabled: true
+
+     # MinerU
+    - tool: "mineru"
+      result_dir: "mineru_vllm_results"
+      image_dir: "mineru_vllm_results/{{name}}"
+      description: "MinerU 图片合成结果"
+      enabled: true
+    
+    # MinerU (带 cell bbox)
+    - tool: "mineru"
+      result_dir: "mineru_vllm_results_cell_bbox"
+      image_dir: "mineru_vllm_results/{{name}}"
+      description: "MinerU + PaddleOCR 坐标"
+      enabled: true
+   
+    # PaddleOCR-VL
+    - tool: "paddleocr_vl"
+      result_dir: "paddleocr_vl_results"
+      image_dir: "paddleocr_vl_results/{{name}}"
+      description: "PaddleOCR VLM 图片合成结果"
+      enabled: true
+    
+    # PaddleOCR-VL (带 cell bbox)
+    - tool: "mineru"  # 格式同 MinerU
+      result_dir: "paddleocr_vl_results_cell_bbox"
+      image_dir: "paddleocr_vl_results/{{name}}"
+      description: "PaddleOCR VLM + PaddleOCR 坐标"
+      enabled: true
+    
+    # DotsOCR
+    - tool: "dots_ocr"
+      result_dir: "dotsocr_vllm_results"
+      image_dir: "dotsocr_vllm_results/{{name}}"
+      description: "Dots OCR 图片合成结果"
+      enabled: true
+  
+    # DotsOCR (带 cell bbox)
+    - tool: "mineru"
+      result_dir: "dotsocr_vllm_results_cell_bbox"
+      image_dir: "dotsocr_vllm_results/{{name}}"
+      description: "Dots OCR + PaddleOCR 坐标"
+      enabled: true
+
+    # PPStructV3
+    - tool: "ppstructv3"
+      result_dir: "ppstructurev3_client_results"
+      image_dir: "ppstructurev3_client_results/{{name}}"
+      description: "PPStructV3 图片合成结果"
+      enabled: true