Forráskód Böngészése

并添加多个银行文档配置

zhch158_admin 2 hete
szülő
commit
90c5adad77

+ 68 - 0
ocr_validator/config/乔_建设银行图.yaml

@@ -0,0 +1,68 @@
+document:
+  name: "乔_建设银行图"
+  base_dir: "/Users/zhch158/workspace/data/流水分析/乔_建设银行图"
+  
+  # 🎯 关键改进:定义该文档使用的 OCR 工具及其结果目录
+  ocr_results:
+    # bank_statement_yusys_v3
+    - tool: "mineru"
+      result_dir: "bank_statement_yusys_v3"
+      image_dir: "bank_statement_yusys_v3/{{name}}"
+      description: "YUSYS-OCR框架 v3.0"
+      enabled: true
+
+    # # bank_statement_yusys_v2
+    # - tool: "mineru"
+    #   result_dir: "bank_statement_yusys_v2"
+    #   image_dir: "mineru_vllm_results/{{name}}"
+    #   description: "YUSYS统一OCR框架"
+    #   enabled: true
+
+    #  # MinerU
+    # - tool: "mineru"
+    #   result_dir: "mineru_vllm_results"
+    #   image_dir: "mineru_vllm_results/{{name}}"
+    #   description: "MinerU 图片合成结果"
+    #   enabled: true
+    
+    # # MinerU (带 cell bbox)
+    # - tool: "mineru"
+    #   result_dir: "mineru_vllm_results_cell_bbox"
+    #   image_dir: "mineru_vllm_results/{{name}}"
+    #   description: "MinerU + PaddleOCR 坐标"
+    #   enabled: true
+   
+    # # PaddleOCR-VL
+    # - tool: "paddleocr_vl"
+    #   result_dir: "paddleocr_vl_results"
+    #   image_dir: "paddleocr_vl_results/{{name}}"
+    #   description: "PaddleOCR VLM 图片合成结果"
+    #   enabled: true
+    
+    # # PaddleOCR-VL (带 cell bbox)
+    # - tool: "mineru"  # 格式同 MinerU
+    #   result_dir: "paddleocr_vl_results_cell_bbox"
+    #   image_dir: "paddleocr_vl_results/{{name}}"
+    #   description: "PaddleOCR VLM + PaddleOCR 坐标"
+    #   enabled: true
+    
+    # # DotsOCR
+    # - tool: "dots_ocr"
+    #   result_dir: "dotsocr_vllm_results"
+    #   image_dir: "dotsocr_vllm_results/{{name}}"
+    #   description: "Dots OCR 图片合成结果"
+    #   enabled: true
+  
+    # # DotsOCR (带 cell bbox)
+    # - tool: "mineru"
+    #   result_dir: "dotsocr_vllm_results_cell_bbox"
+    #   image_dir: "dotsocr_vllm_results/{{name}}"
+    #   description: "Dots OCR + PaddleOCR 坐标"
+    #   enabled: true
+
+    # # PPStructV3
+    # - tool: "ppstructv3"
+    #   result_dir: "ppstructurev3_client_results"
+    #   image_dir: "ppstructurev3_client_results/{{name}}"
+    #   description: "PPStructV3 图片合成结果"
+    #   enabled: true

+ 68 - 0
ocr_validator/config/付_工商银行943825图.yaml

@@ -0,0 +1,68 @@
+document:
+  name: "付_工商银行943825图"
+  base_dir: "/Users/zhch158/workspace/data/流水分析/付_工商银行943825图"
+  
+  # 🎯 关键改进:定义该文档使用的 OCR 工具及其结果目录
+  ocr_results:
+    # bank_statement_yusys_v3
+    - tool: "mineru"
+      result_dir: "bank_statement_yusys_v3"
+      image_dir: "bank_statement_yusys_v3/{{name}}"
+      description: "YUSYS-OCR框架 v3.0"
+      enabled: true
+
+    # bank_statement_mineru_vl
+    - tool: "mineru"
+      result_dir: "bank_statement_mineru_vl"
+      image_dir: "bank_statement_mineru_vl/{{name}}"
+      description: "YUSYS统一OCR框架mineru_vl"
+      enabled: true
+
+    #  # MinerU
+    # - tool: "mineru"
+    #   result_dir: "mineru_vllm_results"
+    #   image_dir: "mineru_vllm_results/{{name}}"
+    #   description: "MinerU 图片合成结果"
+    #   enabled: true
+    
+    # # MinerU (带 cell bbox)
+    # - tool: "mineru"
+    #   result_dir: "mineru_vllm_results_cell_bbox"
+    #   image_dir: "mineru_vllm_results/{{name}}"
+    #   description: "MinerU + PaddleOCR 坐标"
+    #   enabled: true
+   
+    # # PaddleOCR-VL
+    # - tool: "paddleocr_vl"
+    #   result_dir: "paddleocr_vl_results"
+    #   image_dir: "paddleocr_vl_results/{{name}}"
+    #   description: "PaddleOCR VLM 图片合成结果"
+    #   enabled: true
+    
+    # # PaddleOCR-VL (带 cell bbox)
+    # - tool: "mineru"  # 格式同 MinerU
+    #   result_dir: "paddleocr_vl_results_cell_bbox"
+    #   image_dir: "paddleocr_vl_results/{{name}}"
+    #   description: "PaddleOCR VLM + PaddleOCR 坐标"
+    #   enabled: true
+    
+    # # DotsOCR
+    # - tool: "dots_ocr"
+    #   result_dir: "dotsocr_vllm_results"
+    #   image_dir: "dotsocr_vllm_results/{{name}}"
+    #   description: "Dots OCR 图片合成结果"
+    #   enabled: true
+  
+    # # DotsOCR (带 cell bbox)
+    # - tool: "mineru"
+    #   result_dir: "dotsocr_vllm_results_cell_bbox"
+    #   image_dir: "dotsocr_vllm_results/{{name}}"
+    #   description: "Dots OCR + PaddleOCR 坐标"
+    #   enabled: true
+
+    # # PPStructV3
+    # - tool: "ppstructv3"
+    #   result_dir: "ppstructurev3_client_results"
+    #   image_dir: "ppstructurev3_client_results/{{name}}"
+    #   description: "PPStructV3 图片合成结果"
+    #   enabled: true

+ 68 - 0
ocr_validator/config/张_微信图.yaml

@@ -0,0 +1,68 @@
+document:
+  name: "张_微信图"
+  base_dir: "/Users/zhch158/workspace/data/流水分析/张_微信图"
+  
+  # 🎯 关键改进:定义该文档使用的 OCR 工具及其结果目录
+  ocr_results:
+    # bank_statement_yusys_v3
+    - tool: "mineru"
+      result_dir: "bank_statement_yusys_v3"
+      image_dir: "bank_statement_yusys_v3/{{name}}"
+      description: "YUSYS-OCR框架 v3.0"
+      enabled: true
+
+    # bank_statement_mineru_vl
+    - tool: "mineru"
+      result_dir: "bank_statement_mineru_vl"
+      image_dir: "bank_statement_mineru_vl/{{name}}"
+      description: "YUSYS统一OCR框架mineru_vl"
+      enabled: true
+
+    #  # MinerU
+    # - tool: "mineru"
+    #   result_dir: "mineru_vllm_results"
+    #   image_dir: "mineru_vllm_results/{{name}}"
+    #   description: "MinerU 图片合成结果"
+    #   enabled: true
+    
+    # # MinerU (带 cell bbox)
+    # - tool: "mineru"
+    #   result_dir: "mineru_vllm_results_cell_bbox"
+    #   image_dir: "mineru_vllm_results/{{name}}"
+    #   description: "MinerU + PaddleOCR 坐标"
+    #   enabled: true
+   
+    # # PaddleOCR-VL
+    # - tool: "paddleocr_vl"
+    #   result_dir: "paddleocr_vl_results"
+    #   image_dir: "paddleocr_vl_results/{{name}}"
+    #   description: "PaddleOCR VLM 图片合成结果"
+    #   enabled: true
+    
+    # # PaddleOCR-VL (带 cell bbox)
+    # - tool: "mineru"  # 格式同 MinerU
+    #   result_dir: "paddleocr_vl_results_cell_bbox"
+    #   image_dir: "paddleocr_vl_results/{{name}}"
+    #   description: "PaddleOCR VLM + PaddleOCR 坐标"
+    #   enabled: true
+    
+    # # DotsOCR
+    # - tool: "dots_ocr"
+    #   result_dir: "dotsocr_vllm_results"
+    #   image_dir: "dotsocr_vllm_results/{{name}}"
+    #   description: "Dots OCR 图片合成结果"
+    #   enabled: true
+  
+    # # DotsOCR (带 cell bbox)
+    # - tool: "mineru"
+    #   result_dir: "dotsocr_vllm_results_cell_bbox"
+    #   image_dir: "dotsocr_vllm_results/{{name}}"
+    #   description: "Dots OCR + PaddleOCR 坐标"
+    #   enabled: true
+
+    # # PPStructV3
+    # - tool: "ppstructv3"
+    #   result_dir: "ppstructurev3_client_results"
+    #   image_dir: "ppstructurev3_client_results/{{name}}"
+    #   description: "PPStructV3 图片合成结果"
+    #   enabled: true

+ 68 - 0
ocr_validator/config/朱_中信银行图.yaml

@@ -0,0 +1,68 @@
+document:
+  name: "朱_中信银行图"
+  base_dir: "/Users/zhch158/workspace/data/流水分析/朱_中信银行图"
+  
+  # 🎯 关键改进:定义该文档使用的 OCR 工具及其结果目录
+  ocr_results:
+    # bank_statement_yusys_v3
+    - tool: "mineru"
+      result_dir: "bank_statement_yusys_v3"
+      image_dir: "bank_statement_yusys_v3/{{name}}"
+      description: "YUSYS-OCR框架 v3.0"
+      enabled: true
+
+    # # bank_statement_yusys_v2
+    # - tool: "mineru"
+    #   result_dir: "bank_statement_yusys_v2"
+    #   image_dir: "mineru_vllm_results/{{name}}"
+    #   description: "YUSYS统一OCR框架"
+    #   enabled: true
+
+    #  # MinerU
+    # - tool: "mineru"
+    #   result_dir: "mineru_vllm_results"
+    #   image_dir: "mineru_vllm_results/{{name}}"
+    #   description: "MinerU 图片合成结果"
+    #   enabled: true
+    
+    # # MinerU (带 cell bbox)
+    # - tool: "mineru"
+    #   result_dir: "mineru_vllm_results_cell_bbox"
+    #   image_dir: "mineru_vllm_results/{{name}}"
+    #   description: "MinerU + PaddleOCR 坐标"
+    #   enabled: true
+   
+    # # PaddleOCR-VL
+    # - tool: "paddleocr_vl"
+    #   result_dir: "paddleocr_vl_results"
+    #   image_dir: "paddleocr_vl_results/{{name}}"
+    #   description: "PaddleOCR VLM 图片合成结果"
+    #   enabled: true
+    
+    # # PaddleOCR-VL (带 cell bbox)
+    # - tool: "mineru"  # 格式同 MinerU
+    #   result_dir: "paddleocr_vl_results_cell_bbox"
+    #   image_dir: "paddleocr_vl_results/{{name}}"
+    #   description: "PaddleOCR VLM + PaddleOCR 坐标"
+    #   enabled: true
+    
+    # # DotsOCR
+    # - tool: "dots_ocr"
+    #   result_dir: "dotsocr_vllm_results"
+    #   image_dir: "dotsocr_vllm_results/{{name}}"
+    #   description: "Dots OCR 图片合成结果"
+    #   enabled: true
+  
+    # # DotsOCR (带 cell bbox)
+    # - tool: "mineru"
+    #   result_dir: "dotsocr_vllm_results_cell_bbox"
+    #   image_dir: "dotsocr_vllm_results/{{name}}"
+    #   description: "Dots OCR + PaddleOCR 坐标"
+    #   enabled: true
+
+    # # PPStructV3
+    # - tool: "ppstructv3"
+    #   result_dir: "ppstructurev3_client_results"
+    #   image_dir: "ppstructurev3_client_results/{{name}}"
+    #   description: "PPStructV3 图片合成结果"
+    #   enabled: true

+ 68 - 0
ocr_validator/config/湛_平安银行图.yaml

@@ -0,0 +1,68 @@
+document:
+  name: "湛_平安银行图"
+  base_dir: "/Users/zhch158/workspace/data/流水分析/湛_平安银行图"
+  
+  # 🎯 关键改进:定义该文档使用的 OCR 工具及其结果目录
+  ocr_results:
+    # bank_statement_yusys_v3
+    - tool: "mineru"
+      result_dir: "bank_statement_yusys_v3"
+      image_dir: "bank_statement_yusys_v3/{{name}}"
+      description: "YUSYS-OCR框架 v3.0"
+      enabled: true
+
+    # # bank_statement_yusys_v2
+    # - tool: "mineru"
+    #   result_dir: "bank_statement_yusys_v2"
+    #   image_dir: "mineru_vllm_results/{{name}}"
+    #   description: "YUSYS统一OCR框架"
+    #   enabled: true
+
+    #  # MinerU
+    # - tool: "mineru"
+    #   result_dir: "mineru_vllm_results"
+    #   image_dir: "mineru_vllm_results/{{name}}"
+    #   description: "MinerU 图片合成结果"
+    #   enabled: true
+    
+    # # MinerU (带 cell bbox)
+    # - tool: "mineru"
+    #   result_dir: "mineru_vllm_results_cell_bbox"
+    #   image_dir: "mineru_vllm_results/{{name}}"
+    #   description: "MinerU + PaddleOCR 坐标"
+    #   enabled: true
+   
+    # # PaddleOCR-VL
+    # - tool: "paddleocr_vl"
+    #   result_dir: "paddleocr_vl_results"
+    #   image_dir: "paddleocr_vl_results/{{name}}"
+    #   description: "PaddleOCR VLM 图片合成结果"
+    #   enabled: true
+    
+    # # PaddleOCR-VL (带 cell bbox)
+    # - tool: "mineru"  # 格式同 MinerU
+    #   result_dir: "paddleocr_vl_results_cell_bbox"
+    #   image_dir: "paddleocr_vl_results/{{name}}"
+    #   description: "PaddleOCR VLM + PaddleOCR 坐标"
+    #   enabled: true
+    
+    # # DotsOCR
+    # - tool: "dots_ocr"
+    #   result_dir: "dotsocr_vllm_results"
+    #   image_dir: "dotsocr_vllm_results/{{name}}"
+    #   description: "Dots OCR 图片合成结果"
+    #   enabled: true
+  
+    # # DotsOCR (带 cell bbox)
+    # - tool: "mineru"
+    #   result_dir: "dotsocr_vllm_results_cell_bbox"
+    #   image_dir: "dotsocr_vllm_results/{{name}}"
+    #   description: "Dots OCR + PaddleOCR 坐标"
+    #   enabled: true
+
+    # # PPStructV3
+    # - tool: "ppstructv3"
+    #   result_dir: "ppstructurev3_client_results"
+    #   image_dir: "ppstructurev3_client_results/{{name}}"
+    #   description: "PPStructV3 图片合成结果"
+    #   enabled: true

+ 68 - 0
ocr_validator/config/许_民生银行图.yaml

@@ -0,0 +1,68 @@
+document:
+  name: "许_民生银行图"
+  base_dir: "/Users/zhch158/workspace/data/流水分析/许_民生银行图"
+  
+  # 🎯 关键改进:定义该文档使用的 OCR 工具及其结果目录
+  ocr_results:
+    # bank_statement_yusys_v3
+    - tool: "mineru"
+      result_dir: "bank_statement_yusys_v3"
+      image_dir: "bank_statement_yusys_v3/{{name}}"
+      description: "YUSYS-OCR框架 v3.0"
+      enabled: true
+
+    # bank_statement_mineru_vl
+    - tool: "mineru"
+      result_dir: "bank_statement_mineru_vl"
+      image_dir: "bank_statement_mineru_vl/{{name}}"
+      description: "YUSYS统一OCR框架mineru_vl"
+      enabled: true
+
+    #  # MinerU
+    # - tool: "mineru"
+    #   result_dir: "mineru_vllm_results"
+    #   image_dir: "mineru_vllm_results/{{name}}"
+    #   description: "MinerU 图片合成结果"
+    #   enabled: true
+    
+    # # MinerU (带 cell bbox)
+    # - tool: "mineru"
+    #   result_dir: "mineru_vllm_results_cell_bbox"
+    #   image_dir: "mineru_vllm_results/{{name}}"
+    #   description: "MinerU + PaddleOCR 坐标"
+    #   enabled: true
+   
+    # # PaddleOCR-VL
+    # - tool: "paddleocr_vl"
+    #   result_dir: "paddleocr_vl_results"
+    #   image_dir: "paddleocr_vl_results/{{name}}"
+    #   description: "PaddleOCR VLM 图片合成结果"
+    #   enabled: true
+    
+    # # PaddleOCR-VL (带 cell bbox)
+    # - tool: "mineru"  # 格式同 MinerU
+    #   result_dir: "paddleocr_vl_results_cell_bbox"
+    #   image_dir: "paddleocr_vl_results/{{name}}"
+    #   description: "PaddleOCR VLM + PaddleOCR 坐标"
+    #   enabled: true
+    
+    # # DotsOCR
+    # - tool: "dots_ocr"
+    #   result_dir: "dotsocr_vllm_results"
+    #   image_dir: "dotsocr_vllm_results/{{name}}"
+    #   description: "Dots OCR 图片合成结果"
+    #   enabled: true
+  
+    # # DotsOCR (带 cell bbox)
+    # - tool: "mineru"
+    #   result_dir: "dotsocr_vllm_results_cell_bbox"
+    #   image_dir: "dotsocr_vllm_results/{{name}}"
+    #   description: "Dots OCR + PaddleOCR 坐标"
+    #   enabled: true
+
+    # # PPStructV3
+    # - tool: "ppstructv3"
+    #   result_dir: "ppstructurev3_client_results"
+    #   image_dir: "ppstructurev3_client_results/{{name}}"
+    #   description: "PPStructV3 图片合成结果"
+    #   enabled: true