Browse Source

feat: 添加布局后处理和表格分类配置,优化表格识别逻辑

zhch158_admin 1 day ago
parent
commit
a4b4be0968
1 changed files with 42 additions and 0 deletions
  1. 42 0
      ocr_tools/universal_doc_parser/config/bank_statement_wired_unet.yaml

+ 42 - 0
ocr_tools/universal_doc_parser/config/bank_statement_wired_unet.yaml

@@ -20,6 +20,16 @@ layout_detection:
   conf: 0.3
   num_threads: 4
 
+# ============================================================
+# Layout后处理配置
+# ============================================================
+layout:
+  # 将大面积文本块转换为表格(后处理)
+  convert_large_text_to_table: true  # 是否启用
+  min_text_area_ratio: 0.25         # 最小面积占比(25%)
+  min_text_width_ratio: 0.4         # 最小宽度占比(40%)
+  min_text_height_ratio: 0.3        # 最小高度占比(30%)
+
 ocr_recognition:
   module: "mineru"
   language: "ch"
@@ -29,6 +39,14 @@ ocr_recognition:
   batch_size: 8
   device: "cpu"
 
+# 表格分类配置(自动区分有线/无线表格)
+# 启用后将自动调用分类模型,根据结果选择合适的表格识别器
+table_classification:
+  enabled: true               # 是否启用自动表格分类(默认关闭,使用手动配置)
+  module: "paddle"            # 分类模型:paddle(MinerU PaddleTableClsModel)
+  confidence_threshold: 0.5   # 分类置信度阈值
+  batch_size: 16              # 批处理大小
+
 # 有线表格识别专用配置
 table_recognition_wired:
   use_wired_unet: true
@@ -55,6 +73,30 @@ table_recognition_wired:
     image_format: "png"          # 可视化图片格式:png/jpg
     prefix: ""                  # 保存文件名前缀(如设置为页码/表格序号)
 
+# VLM 表格识别配置(当分类为 'wireless' 时使用)
+vl_recognition:
+  # 可选: "mineru" (MinerU VLM) 或 "paddle" (PaddleOCR-VL)
+  module: "paddle"
+  
+  # 后端配置
+  backend: "http-client"  # 可选: "http-client", "vllm-engine", "transformers"
+  server_url: "http://10.192.72.11:20016"  # PaddleOCR-VL 服务地址
+  
+  # 图片尺寸限制(避免序列长度超限)
+  max_image_size: 4096
+  resize_mode: 'max'  # 'max' 保持宽高比, 'fixed' 固定尺寸
+  
+  device: "cpu"
+  batch_size: 1
+  
+  model_params:
+    max_concurrency: 10
+    http_timeout: 600
+  
+  # 表格识别特定配置
+  table_recognition:
+    return_cells_coordinate: true  # 返回单元格坐标
+
 output:
   create_subdir: false
   save_pdf_images: true