|
|
@@ -0,0 +1,178 @@
|
|
|
+# 银行交易流水场景配置 - V4版本
|
|
|
+# Pipeline V3逻辑: 有线表格使用MinerU UNet, 无线表格/seal使用GLM-OCR VLM
|
|
|
+scene_name: "bank_statement_yusys_v4"
|
|
|
+
|
|
|
+description: "银行流水V4: PP-DocLayoutV3 layout + PaddleOCR + MinerU UNet(有线表格)+ GLM-OCR VLM(无线表格/seal)"
|
|
|
+
|
|
|
+input:
|
|
|
+ supported_formats: [".pdf", ".png", ".jpg", ".jpeg", ".bmp", ".tiff"]
|
|
|
+ dpi: 200
|
|
|
+
|
|
|
+preprocessor:
|
|
|
+ module: "mineru"
|
|
|
+ orientation_classifier:
|
|
|
+ enabled: true
|
|
|
+ model_name: "paddle_orientation_classification"
|
|
|
+ model_dir: null # 使用默认路径
|
|
|
+ unwarping:
|
|
|
+ enabled: false
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# Layout 检测配置 - 使用 PP-DocLayoutV3
|
|
|
+# ============================================================
|
|
|
+layout_detection:
|
|
|
+ module: "paddle"
|
|
|
+ model_name: "PP-DocLayoutV3"
|
|
|
+ model_dir: "PaddlePaddle/PP-DocLayoutV3_safetensors"
|
|
|
+ device: "cpu"
|
|
|
+ conf: 0.3
|
|
|
+ num_threads: 4
|
|
|
+ batch_size: 1
|
|
|
+
|
|
|
+ # 后处理配置
|
|
|
+ post_process:
|
|
|
+ # 将大面积文本块转换为表格(后处理)
|
|
|
+ convert_large_text_to_table: true # 是否启用
|
|
|
+ min_text_area_ratio: 0.25 # 最小面积占比(25%)
|
|
|
+ min_text_width_ratio: 0.4 # 最小宽度占比(40%)
|
|
|
+ min_text_height_ratio: 0.3 # 最小高度占比(30%)
|
|
|
+
|
|
|
+ # Debug 可视化配置
|
|
|
+ debug_options:
|
|
|
+ enabled: true # 是否开启调试可视化输出
|
|
|
+ output_dir: null # 调试输出目录;null不输出
|
|
|
+ prefix: "" # 保存文件名前缀(如设置为页码)
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# OCR 识别配置
|
|
|
+# ============================================================
|
|
|
+ocr_recognition:
|
|
|
+ module: "mineru"
|
|
|
+ language: "ch"
|
|
|
+ det_threshold: 0.5
|
|
|
+ unclip_ratio: 1.5
|
|
|
+ enable_merge_det_boxes: false
|
|
|
+ batch_size: 8
|
|
|
+ device: "cpu"
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# 表格分类配置(自动区分有线/无线表格)
|
|
|
+# ============================================================
|
|
|
+table_classification:
|
|
|
+ enabled: true # 启用自动表格分类
|
|
|
+ module: "paddle" # 分类模型:paddle(MinerU PaddleTableClsModel)
|
|
|
+ confidence_threshold: 0.5 # 分类置信度阈值
|
|
|
+ batch_size: 16 # 批处理大小
|
|
|
+
|
|
|
+ # Debug 可视化配置
|
|
|
+ debug_options:
|
|
|
+ enabled: true # 是否开启调试可视化输出
|
|
|
+ output_dir: null # 调试输出目录;null不输出
|
|
|
+ save_table_lines: true # 保存表格线可视化(unet横线/竖线叠加)
|
|
|
+ image_format: "png" # 可视化图片格式:png/jpg
|
|
|
+ prefix: "" # 保存文件名前缀(如设置为页码/表格序号)
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# 有线表格识别专用配置(MinerU UNet)
|
|
|
+# ============================================================
|
|
|
+table_recognition_wired:
|
|
|
+ use_wired_unet: true
|
|
|
+ upscale_ratio: 3.333
|
|
|
+ need_ocr: true
|
|
|
+ row_threshold: 10
|
|
|
+ col_threshold: 15
|
|
|
+ ocr_conf_threshold: 0.9 # 单元格 OCR 置信度阈值
|
|
|
+ cell_crop_margin: 2
|
|
|
+ use_custom_postprocess: true # 是否使用自定义后处理(默认启用)
|
|
|
+
|
|
|
+ # 是否启用倾斜矫正
|
|
|
+ enable_deskew: true
|
|
|
+
|
|
|
+ # 🆕 启用多源单元格融合
|
|
|
+ use_cell_fusion: true
|
|
|
+
|
|
|
+ # 融合引擎配置
|
|
|
+ cell_fusion:
|
|
|
+ # RT-DETR 模型路径(必需)
|
|
|
+ rtdetr_model_path: "/Users/zhch158/models/pytorch_models/Table/RT-DETR-L_wired_table_cell_det.onnx"
|
|
|
+
|
|
|
+ # 融合权重
|
|
|
+ unet_weight: 0.6 # UNet 权重(结构性强)
|
|
|
+ rtdetr_weight: 0.4 # RT-DETR 权重(鲁棒性强)
|
|
|
+
|
|
|
+ # 阈值配置
|
|
|
+ iou_merge_threshold: 0.7 # 高IoU合并阈值(>0.7则加权平均)
|
|
|
+ iou_nms_threshold: 0.5 # NMS去重阈值
|
|
|
+ rtdetr_conf_threshold: 0.5 # RT-DETR置信度阈值
|
|
|
+
|
|
|
+ # 功能开关
|
|
|
+ enable_ocr_compensation: true # 启用OCR边缘补偿
|
|
|
+
|
|
|
+ # Debug 可视化配置
|
|
|
+ debug_options:
|
|
|
+ enabled: true # 是否开启调试可视化输出
|
|
|
+ output_dir: null # 调试输出目录;null不输出
|
|
|
+ save_table_lines: true # 保存表格线可视化(unet横线/竖线叠加)
|
|
|
+ save_connected_components: true # 保存连通域提取的单元格图
|
|
|
+ save_grid_structure: true # 保存逻辑网格结构(row/col/rowspan/colspan)
|
|
|
+ save_text_overlay: true # 保存文本填充覆盖图
|
|
|
+ image_format: "png" # 可视化图片格式:png/jpg
|
|
|
+ prefix: "" # 保存文件名前缀(如设置为页码/表格序号)
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# VL识别配置 - 使用 GLM-OCR(无线表格 + seal识别)
|
|
|
+# ============================================================
|
|
|
+vl_recognition:
|
|
|
+ module: "glmocr"
|
|
|
+ api_url: "http://10.192.72.11:20036/v1/chat/completions"
|
|
|
+ api_key: null # 可选,如需要可填写
|
|
|
+ model: "glm-ocr"
|
|
|
+ max_image_size: 3500 # GLM-OCR 推荐的最大图片尺寸
|
|
|
+ resize_mode: 'max' # 缩放模式: 'max' 保持宽高比, 'fixed' 固定尺寸
|
|
|
+ verify_ssl: false
|
|
|
+
|
|
|
+ # Task prompt mapping - 针对不同任务使用不同提示词
|
|
|
+ task_prompt_mapping:
|
|
|
+ text: "Text Recognition:"
|
|
|
+ table: "Table Recognition:"
|
|
|
+ formula: "Formula Recognition:"
|
|
|
+ seal: "Seal Recognition:" # 印章识别的专用提示词
|
|
|
+
|
|
|
+ # 模型参数
|
|
|
+ model_params:
|
|
|
+ connection_pool_size: 128 # HTTP 连接池大小(应 >= max_workers)
|
|
|
+ http_timeout: 300 # HTTP 请求超时时间(秒)
|
|
|
+ connect_timeout: 30 # 连接超时时间(秒)
|
|
|
+ retry_max_attempts: 2 # 最大重试次数
|
|
|
+ retry_backoff_base_seconds: 0.5
|
|
|
+ retry_backoff_max_seconds: 8.0
|
|
|
+ retry_jitter_ratio: 0.2
|
|
|
+ retry_status_codes: [429, 500, 502, 503, 504]
|
|
|
+ max_tokens: 4096
|
|
|
+ temperature: 0.8
|
|
|
+ top_p: 0.9
|
|
|
+ top_k: 50
|
|
|
+ repetition_penalty: 1.1
|
|
|
+
|
|
|
+ # 场景特定配置
|
|
|
+ table_recognition:
|
|
|
+ return_cells_coordinate: false # GLM-OCR 不直接返回单元格坐标
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# 输出配置
|
|
|
+# ============================================================
|
|
|
+output:
|
|
|
+ create_subdir: false
|
|
|
+ save_pdf_images: true
|
|
|
+ save_json: true
|
|
|
+ save_page_json: true
|
|
|
+ save_markdown: true
|
|
|
+ save_page_markdown: true
|
|
|
+ save_html: true
|
|
|
+ save_layout_image: true
|
|
|
+ save_ocr_image: true
|
|
|
+ draw_type_label: true
|
|
|
+ draw_bbox_number: true
|
|
|
+ save_enhanced_json: true
|
|
|
+ normalize_numbers: true
|
|
|
+ debug_mode: true
|