|
|
@@ -0,0 +1,113 @@
|
|
|
+# 银行交易流水场景配置 - GLM-OCR 版本
|
|
|
+scene_name: "bank_statement_glm"
|
|
|
+description: "银行交易流水、对账单等场景(使用 GLM-OCR 进行 VL 识别)"
|
|
|
+
|
|
|
+input:
|
|
|
+ supported_formats: [".pdf", ".png", ".jpg", ".jpeg", ".bmp", ".tiff"]
|
|
|
+ dpi: 200 # PDF转图片的DPI
|
|
|
+
|
|
|
+preprocessor:
|
|
|
+ module: "mineru"
|
|
|
+ orientation_classifier:
|
|
|
+ enabled: true
|
|
|
+ model_name: "paddle_orientation_classification"
|
|
|
+ model_dir: null # 使用默认路径
|
|
|
+ unwarping:
|
|
|
+ enabled: false
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# Layout 检测配置 - 使用 PP-DocLayoutV3
|
|
|
+# ============================================================
|
|
|
+layout_detection:
|
|
|
+ module: "paddle"
|
|
|
+ model_name: "PP-DocLayoutV3"
|
|
|
+ model_dir: "PaddlePaddle/PP-DocLayoutV3_safetensors"
|
|
|
+ device: "cpu"
|
|
|
+ conf: 0.3
|
|
|
+ num_threads: 4
|
|
|
+ batch_size: 1
|
|
|
+
|
|
|
+ # 后处理配置
|
|
|
+ post_process:
|
|
|
+ # 将大面积文本块转换为表格(后处理)
|
|
|
+ convert_large_text_to_table: true # 是否启用
|
|
|
+ min_text_area_ratio: 0.25 # 最小面积占比(25%)
|
|
|
+ min_text_width_ratio: 0.4 # 最小宽度占比(40%)
|
|
|
+ min_text_height_ratio: 0.3 # 最小高度占比(30%)
|
|
|
+
|
|
|
+ # Debug 可视化配置
|
|
|
+ debug_options:
|
|
|
+ enabled: true # 是否开启调试可视化输出
|
|
|
+ output_dir: null # 调试输出目录;null不输出
|
|
|
+ prefix: "" # 保存文件名前缀(如设置为页码)
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# VL识别配置 - 使用 GLM-OCR
|
|
|
+# ============================================================
|
|
|
+vl_recognition:
|
|
|
+ module: "glmocr"
|
|
|
+ api_url: "http://10.192.72.11:20036/v1/chat/completions"
|
|
|
+ api_key: null # 可选,如需要可填写
|
|
|
+ model: "glm-ocr"
|
|
|
+ max_image_size: 3500 # GLM-OCR 推荐的最大图片尺寸
|
|
|
+ resize_mode: 'max' # 缩放模式: 'max' 保持宽高比, 'fixed' 固定尺寸
|
|
|
+ verify_ssl: false
|
|
|
+
|
|
|
+ # Task prompt mapping - 针对不同任务使用不同提示词
|
|
|
+ task_prompt_mapping:
|
|
|
+ text: "Text Recognition:"
|
|
|
+ table: "Table Recognition:"
|
|
|
+ formula: "Formula Recognition:"
|
|
|
+ seal: "Seal Recognition:" # 印章识别的专用提示词
|
|
|
+
|
|
|
+ # 模型参数
|
|
|
+ model_params:
|
|
|
+ connection_pool_size: 128 # HTTP 连接池大小(应 >= max_workers)
|
|
|
+ http_timeout: 300 # HTTP 请求超时时间(秒)
|
|
|
+ connect_timeout: 30 # 连接超时时间(秒)
|
|
|
+ retry_max_attempts: 2 # 最大重试次数
|
|
|
+ retry_backoff_base_seconds: 0.5
|
|
|
+ retry_backoff_max_seconds: 8.0
|
|
|
+ retry_jitter_ratio: 0.2
|
|
|
+ retry_status_codes: [429, 500, 502, 503, 504]
|
|
|
+ max_tokens: 4096
|
|
|
+ temperature: 0.8
|
|
|
+ top_p: 0.9
|
|
|
+ top_k: 50
|
|
|
+ repetition_penalty: 1.1
|
|
|
+
|
|
|
+ # 场景特定配置
|
|
|
+ table_recognition:
|
|
|
+ return_cells_coordinate: false # GLM-OCR 不直接返回单元格坐标
|
|
|
+ bank_statement_mode: true
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# OCR识别配置
|
|
|
+# ============================================================
|
|
|
+ocr_recognition:
|
|
|
+ module: "mineru"
|
|
|
+ language: "ch"
|
|
|
+ det_threshold: 0.6
|
|
|
+ unclip_ratio: 1.5
|
|
|
+ enable_merge_det_boxes: false
|
|
|
+ batch_size: 8
|
|
|
+ device: "cpu"
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# 输出配置
|
|
|
+# ============================================================
|
|
|
+output:
|
|
|
+ create_subdir: false
|
|
|
+ save_pdf_images: true
|
|
|
+ save_json: true
|
|
|
+ save_page_json: true
|
|
|
+ save_markdown: true
|
|
|
+ save_page_markdown: true
|
|
|
+ save_html: true
|
|
|
+ save_layout_image: true
|
|
|
+ save_ocr_image: true
|
|
|
+ draw_type_label: true
|
|
|
+ draw_bbox_number: true
|
|
|
+ save_enhanced_json: true
|
|
|
+ normalize_numbers: true
|
|
|
+ debug_mode: true
|