Procházet zdrojové kódy

feat: 新增银行交易流水场景配置,支持多种输入格式和输出选项

zhch158_admin před 2 týdny
rodič
revize
6f9eb6afc4

+ 6 - 3
zhch/universal_doc_parser/config/bank_statement_enhanced.yaml → zhch/universal_doc_parser/config/bank_statement_mineru_vl.yaml

@@ -16,9 +16,12 @@ preprocessor:
     enabled: false
 
 layout_detection:
-  module: "paddle"
-  model_name: "RT-DETR-H_layout_17cls"
-  model_dir: /Users/zhch158/workspace/repository.git/PaddleX/zhch/unified_pytorch_models/Layout/RT-DETR-H_layout_17cls.onnx  # 使用默认路径,或指定: "./Layout/RT-DETR-H_layout_17cls.onnx"
+  # module: "paddle"
+  # model_name: "RT-DETR-H_layout_17cls"
+  # model_dir: /Users/zhch158/workspace/repository.git/PaddleX/zhch/unified_pytorch_models/Layout/RT-DETR-H_layout_17cls.onnx  # 使用默认路径,或指定: "./Layout/RT-DETR-H_layout_17cls.onnx"
+  module: "mineru"
+  model_name: "layout"
+  model_dir: null  # 使用默认路径
   device: "cpu"
   # batch_size: 4
   # conf: 0.1

+ 86 - 0
zhch/universal_doc_parser/config/bank_statement_paddle_vl.yaml

@@ -0,0 +1,86 @@
+# 银行交易流水场景配置(增强版)
+scene_name: "bank_statement"
+description: "银行交易流水、对账单等场景"
+
+input:
+  supported_formats: [".pdf", ".png", ".jpg"]
+  dpi: 200
+
+preprocessor:
+  module: "mineru"
+  orientation_classifier:
+    enabled: true
+    model_name: "paddle_orientation_classification"
+    model_dir: null  # 使用默认路径
+  unwarping:
+    enabled: false
+
+layout_detection:
+  # module: "paddle"
+  # model_name: "RT-DETR-H_layout_17cls"
+  # model_dir: /Users/zhch158/workspace/repository.git/PaddleX/zhch/unified_pytorch_models/Layout/RT-DETR-H_layout_17cls.onnx  # 使用默认路径,或指定: "./Layout/RT-DETR-H_layout_17cls.onnx"
+  module: "mineru"
+  model_name: "layout"
+  model_dir: null  # 使用默认路径
+  device: "cpu"
+  # batch_size: 4
+  # conf: 0.1
+  # iou: 0.45
+
+vl_recognition:
+  module: "paddle"
+  backend: "http-client"
+  model_name: "PaddleOCR-VL-0.9B"
+  server_url: "http://10.192.72.11:8110"
+  max_image_size: 4096  # 🔧 添加:最大图片尺寸
+  resize_mode: 'max'    # 🔧 添加:缩放模式 ('max' 保持宽高比, 'fixed' 固定尺寸)
+  device: "cpu"
+  batch_size: 1
+  model_params:
+    max_concurrency: 10
+    http_timeout: 600
+  
+  # 场景特定配置
+  table_recognition:
+    return_cells_coordinate: true
+    bank_statement_mode: true
+    
+ocr_recognition:
+  module: "mineru" 
+  language: "ch"
+  det_threshold: 0.3
+  unclip_ratio: 1.8
+  batch_size: 8
+  device: "cpu"
+
+output:
+  save_json: true
+  save_markdown: true
+  save_html: true
+  save_layout_image: true
+  save_ocr_image: true
+  draw_type_label: true
+  draw_bbox_number: true
+  
+# 场景特定配置
+scene_config:
+  bank_statement:
+    table_structure: "single_column_list"
+    merged_cells: false
+    expected_columns: ["日期", "摘要", "收入", "支出", "余额"]
+    amount_validation: true
+    date_validation: true
+    
+  processing_rules:
+    # 表格处理规则
+    table_rules:
+      - detect_table_type: ["wired", "wireless"]  
+      - extract_header_automatically: true
+      - validate_amount_format: true
+      - merge_continuation_rows: true
+      
+    # OCR后处理规则  
+    ocr_rules:
+      - filter_low_confidence: 0.7
+      - merge_adjacent_text: true
+      - number_format_normalization: true