# 银行交易流水场景配置 v2 # 支持完整的处理流程:PDF分类 → 方向识别 → Layout检测 → OCR/VLM并行处理 → 坐标匹配 scene_name: "bank_statement" description: "银行交易流水、对账单等场景 - 增强版" # ============================================================ # 输入配置 # ============================================================ input: supported_formats: [".pdf", ".png", ".jpg", ".jpeg", ".bmp", ".tiff"] dpi: 200 # PDF转图片的DPI # ============================================================ # 预处理配置(方向识别) # ============================================================ preprocessor: module: "mineru" orientation_classifier: enabled: true # 扫描件自动开启,数字PDF自动跳过 model_name: "paddle_orientation_classification" model_dir: null # 使用默认路径 unwarping: enabled: false # 图像矫正(可选) # ============================================================ # 版式检测配置 # ============================================================ layout_detection: module: "mineru" model_name: "layout" model_dir: null # 使用默认路径,自动下载 doclayout_yolo_docstructbench_imgsz1280_2501.pt device: "cpu" # 可选: "cpu", "cuda", "mps" # batch_size: 4 # conf: 0.25 # iou: 0.45 # ============================================================ # VL识别配置(表格、公式) # ============================================================ vl_recognition: # 可选: "mineru" (MinerU VLM) 或 "paddle" (PaddleOCR-VL) module: "mineru" # 后端配置 backend: "http-client" # 可选: "http-client", "vllm-engine", "transformers" server_url: "http://10.192.72.11:8121" # MinerU VLM 服务地址 # 图片尺寸限制(避免序列长度超限) max_image_size: 4096 resize_mode: 'max' # 'max' 保持宽高比, 'fixed' 固定尺寸 device: "cpu" batch_size: 1 model_params: max_concurrency: 10 http_timeout: 600 # 表格识别特定配置 table_recognition: return_cells_coordinate: true # 返回单元格坐标 bank_statement_mode: true # 银行流水优化模式 # ============================================================ # OCR识别配置(文本检测+识别) # ============================================================ ocr_recognition: module: "mineru" language: "ch" # 语言: ch, ch_lite, en, japan 等 det_threshold: 0.3 # 检测阈值 unclip_ratio: 1.8 # 文本框扩展比例 batch_size: 8 device: "cpu" # ============================================================ # 输出配置 # ============================================================ output: # 基础输出 save_json: true # 保存 middle.json(MinerU标准格式) save_markdown: true # 保存 Markdown 文件 save_html: true # 保存表格 HTML 文件 # Debug 输出(通过命令行 --debug 开启) save_layout_image: false # 保存 layout 可视化图片 save_ocr_image: false # 保存 OCR 可视化图片 draw_type_label: true # 在可视化图片上标注类型 draw_bbox_number: true # 在可视化图片上标注序号 # 增强输出 save_enhanced_json: true # 保存增强版 JSON(包含单元格坐标) coordinate_precision: 2 # 坐标精度(小数位数) # ============================================================ # 场景特定配置 # ============================================================ scene_config: bank_statement: # 表格结构特征 table_structure: "single_column_list" # 单栏列表形式 merged_cells: false # 无合并单元格 # 预期列名(用于验证) expected_columns: ["日期", "摘要", "收入", "支出", "余额"] # 验证规则 amount_validation: true # 金额格式验证 date_validation: true # 日期格式验证 balance_validation: true # 余额一致性验证 processing_rules: # 表格处理规则 table_rules: - detect_table_type: ["wired", "wireless"] # 检测有线/无线表格 - extract_header_automatically: true # 自动提取表头 - validate_amount_format: true # 验证金额格式 - merge_continuation_rows: true # 合并续行 # OCR后处理规则 ocr_rules: - filter_low_confidence: 0.7 # 过滤低置信度结果 - merge_adjacent_text: true # 合并相邻文本 - number_format_normalization: true # 数字格式标准化 # ============================================================ # 跨页表格合并配置 # ============================================================ cross_page_merge: enabled: true # 判断表格是否跨页的条件 conditions: - table_at_page_bottom: true # 表格位于页面底部 - table_at_page_top: true # 下一页表格位于顶部 - similar_column_count: true # 列数相似 - header_match: false # 表头匹配(跨页表格通常没有重复表头)