1 bulan lalu · e72a51154f
--- a/ocr_tools/universal_doc_parser/config/bank_statement_glm_vl.yaml
+++ b/ocr_tools/universal_doc_parser/config/bank_statement_glm_vl.yaml
@@ -79,6 +79,22 @@ layout_detection:
 
															     min_text_width_ratio: 0.4         # 最小宽度占比（40%）
														
 
															     min_text_height_ratio: 0.3        # 最小高度占比（30%）
														
 
															+  # 印章补充检测：使用 PP-DocLayoutV3 补充 docling 无法识别的密封区域
														
 
															+  seal_supplement:
														
 
															+    enabled: true                # 启用 seal 补充检测
														
 
															+    replace_existing: false      # false=增量合并; true=完全替换主结果中已有 seal
														
 
															+    replace_overlapping_image: true   # seal 与 image_body/image 等高 IoU 时替换为 seal（非丢弃）
														
 
															+    replace_iou_threshold: 0.7        # 触发替换的最小 IoU
														
 
															+    duplicate_iou_threshold: 0.3      # 未替换时，与任意框 IoU 超此值视为重复 seal
														
 
															+    # seal_detector 使用的模型配置，默认复用 paddle_ppdoclayoutv3 的配置
														
 
															+    model_config:
														
 
															+      module: "paddle"
														
 
															+      model_name: "PP-DocLayoutV3"
														
 
															+      model_dir: "PaddlePaddle/PP-DocLayoutV3_safetensors"
														
 
															+      device: "cpu"
														
 
															+      conf: 0.3
														
 
															+      num_threads: 4
														
 
															+
														
 
															   # Debug 可视化（底图为 inference_image，与 Layout 检测输入一致）
														
 
															   debug_options:
														
 
															     enabled: false              # 由命令行 --debug / --debug-layout 控制
														
--- a/ocr_tools/universal_doc_parser/config/bank_statement_glm_vl_local.yaml
+++ b/ocr_tools/universal_doc_parser/config/bank_statement_glm_vl_local.yaml
@@ -82,6 +82,22 @@ layout_detection:
 
															     min_text_width_ratio: 0.4         # 最小宽度占比（40%）
														
 
															     min_text_height_ratio: 0.3        # 最小高度占比（30%）
														
 
															+  # 印章补充检测：使用 PP-DocLayoutV3 补充 docling 无法识别的密封区域
														
 
															+  seal_supplement:
														
 
															+    enabled: true                # 启用 seal 补充检测
														
 
															+    replace_existing: false      # false=增量合并; true=完全替换主结果中已有 seal
														
 
															+    replace_overlapping_image: true   # seal 与 image_body/image 等高 IoU 时替换为 seal（非丢弃）
														
 
															+    replace_iou_threshold: 0.7        # 触发替换的最小 IoU
														
 
															+    duplicate_iou_threshold: 0.3      # 未替换时，与任意框 IoU 超此值视为重复 seal
														
 
															+    # seal_detector 使用的模型配置，默认复用 paddle_ppdoclayoutv3 的配置
														
 
															+    model_config:
														
 
															+      module: "paddle"
														
 
															+      model_name: "PP-DocLayoutV3"
														
 
															+      model_dir: "PaddlePaddle/PP-DocLayoutV3_safetensors"
														
 
															+      device: "cpu"
														
 
															+      conf: 0.3
														
 
															+      num_threads: 4
														
 
															+
														
 
															   # Debug 可视化（底图为 inference_image，与 Layout 检测输入一致）
														
 
															   debug_options:
														
 
															     enabled: false              # 由命令行 --debug / --debug-layout 控制
														
--- a/ocr_tools/universal_doc_parser/config/bank_statement_yusys_local.yaml
+++ b/ocr_tools/universal_doc_parser/config/bank_statement_yusys_local.yaml
@@ -3,7 +3,7 @@
 
															 # llama-server -hf ggml-org/GLM-OCR-GGUF
														
 
															 scene_name: "bank_statement_yusys_local"
														
 
															-description: "银行流水V4: PP-DocLayoutV3 layout + PaddleOCR + MinerU UNet（有线表格）+ GLM-OCR VLM（无线表格/seal）"
														
 
															+description: "银行流水V4: Docling-layout-old layout + PaddleOCR + MinerU UNet（有线表格）+ GLM-OCR VLM（无线表格）+ PP-DocLayoutV3 seal补充检测 + MinerU seal专用OCR"
														
 
															 input:
														
 
															   supported_formats: [".pdf", ".png", ".jpg", ".jpeg", ".bmp", ".tiff"]
														
@@ -82,6 +82,22 @@ layout_detection:
 
															     min_text_width_ratio: 0.4         # 最小宽度占比（40%）
														
 
															     min_text_height_ratio: 0.3        # 最小高度占比（30%）
														
 
															+  # 印章补充检测：使用 PP-DocLayoutV3 补充 docling 无法识别的密封区域
														
 
															+  seal_supplement:
														
 
															+    enabled: true                # 启用 seal 补充检测
														
 
															+    replace_existing: false      # false=增量合并; true=完全替换主结果中已有 seal
														
 
															+    replace_overlapping_image: true   # seal 与 image_body/image 等高 IoU 时替换为 seal（非丢弃）
														
 
															+    replace_iou_threshold: 0.7        # 触发替换的最小 IoU
														
 
															+    duplicate_iou_threshold: 0.3      # 未替换时，与任意框 IoU 超此值视为重复 seal
														
 
															+    # seal_detector 使用的模型配置，默认复用 paddle_ppdoclayoutv3 的配置
														
 
															+    model_config:
														
 
															+      module: "paddle"
														
 
															+      model_name: "PP-DocLayoutV3"
														
 
															+      model_dir: "PaddlePaddle/PP-DocLayoutV3_safetensors"
														
 
															+      device: "cpu"
														
 
															+      conf: 0.3
														
 
															+      num_threads: 4
														
 
															+
														
 
															   # Debug 可视化（底图为 inference_image，与 Layout 检测输入一致）
														
 
															   debug_options:
														
 
															     enabled: false              # 由命令行 --debug / --debug-layout 控制
														
@@ -244,6 +260,13 @@ vl_recognition:
 
															   table_recognition:
														
 
															 # ============================================================
														
 
															+# 印章 OCR 识别配置 - 基于 MinerU PytorchPaddleOCR(lang="seal")
														
 
															+# ============================================================
														
 
															+seal_recognition:
														
 
															+  enabled: true                # 启用印章专用 OCR，关闭则回退 VLM 识别
														
 
															+  module: "mineru"             # 使用 MinerU 印章 OCR 模型
														
 
															+
														
 
															+# ============================================================
														
 
															 # 输出配置
														
 
															 # ============================================================
														
 
															 output:
														
--- a/ocr_tools/universal_doc_parser/config/bank_statement_yusys_v4.yaml
+++ b/ocr_tools/universal_doc_parser/config/bank_statement_yusys_v4.yaml
@@ -81,6 +81,22 @@ layout_detection:
 
															     min_text_width_ratio: 0.4         # 最小宽度占比（40%）
														
 
															     min_text_height_ratio: 0.3        # 最小高度占比（30%）
														
 
															+  # 印章补充检测：使用 PP-DocLayoutV3 补充 docling 无法识别的密封区域
														
 
															+  seal_supplement:
														
 
															+    enabled: true                # 启用 seal 补充检测
														
 
															+    replace_existing: false      # false=增量合并; true=完全替换主结果中已有 seal
														
 
															+    replace_overlapping_image: true   # seal 与 image_body/image 等高 IoU 时替换为 seal（非丢弃）
														
 
															+    replace_iou_threshold: 0.7        # 触发替换的最小 IoU
														
 
															+    duplicate_iou_threshold: 0.3      # 未替换时，与任意框 IoU 超此值视为重复 seal
														
 
															+    # seal_detector 使用的模型配置，默认复用 paddle_ppdoclayoutv3 的配置
														
 
															+    model_config:
														
 
															+      module: "paddle"
														
 
															+      model_name: "PP-DocLayoutV3"
														
 
															+      model_dir: "PaddlePaddle/PP-DocLayoutV3_safetensors"
														
 
															+      device: "cpu"
														
 
															+      conf: 0.3
														
 
															+      num_threads: 4
														
 
															+
														
 
															   # Debug 可视化（底图为 inference_image，与 Layout 检测输入一致）
														
 
															   debug_options:
														
 
															     enabled: false              # 由命令行 --debug / --debug-layout 控制
														
@@ -243,6 +259,13 @@ vl_recognition:
 
															   table_recognition:
														
 
															 # ============================================================
														
 
															+# 印章 OCR 识别配置 - 基于 MinerU PytorchPaddleOCR(lang="seal")
														
 
															+# ============================================================
														
 
															+seal_recognition:
														
 
															+  enabled: true                # 启用印章专用 OCR，关闭则回退 VLM 识别
														
 
															+  module: "mineru"             # 使用 MinerU 印章 OCR 模型
														
 
															+
														
 
															+# ============================================================
														
 
															 # 输出配置
														
 
															 # ============================================================
														
 
															 output: