SHA1
--- a/ocr_tools/ocr_batch/batch_process_pdf.py
+++ b/ocr_tools/ocr_batch/batch_process_pdf.py
@@ -14,7 +14,7 @@ import json
 
				 import yaml
			
 
				 from pathlib import Path
			
 
				 from datetime import datetime
			
 
				-from typing import List, Dict, Optional, Any
			
 
				+from typing import List, Dict, Optional, Any, Tuple
			
 
				 from dataclasses import dataclass, field
			
 
				 import logging
			
 
				 from tqdm import tqdm
			
@@ -34,11 +34,19 @@ class ProcessorConfig:
 
				     extra_args: List[str] = field(default_factory=list)
			
 
				     output_subdir: str = "results"
			
 
				     log_subdir: str = "logs"  # 🎯 新增：日志子目录
			
 
				+    scene_arg: Optional[str] = None  # 场景参数名（如 --scene）
			
 
				     venv: Optional[str] = None
			
 
				     description: str = ""
			
 
				 
			
 
				 
			
 
				 @dataclass
			
 
				+class PDFTask:
			
 
				+    """PDF 处理任务"""
			
 
				+    path: Path
			
 
				+    scene: Optional[str] = None
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				 class ProcessResult:
			
 
				     """处理结果"""
			
 
				     pdf_file: str
			
@@ -165,6 +173,7 @@ class ConfigManager:
 
				             extra_args=proc_config.get('extra_args', []),
			
 
				             output_subdir=proc_config.get('output_subdir', processor_name + '_results'),
			
 
				             log_subdir=proc_config.get('log_subdir', f'logs/{processor_name}'),  # 🎯 新增
			
 
				+            scene_arg=proc_config.get('scene_arg'),
			
 
				             venv=proc_config.get('venv'),
			
 
				             description=proc_config.get('description', '')
			
 
				         )
			
@@ -188,9 +197,9 @@ class PDFFileFinder:
 
				     def __init__(self, base_dir: str):
			
 
				         self.base_dir = Path(base_dir)
			
 
				     
			
 
				-    def from_file_list(self, list_file: str) -> List[Path]:
			
 
				+    def from_file_list(self, list_file: str) -> List[PDFTask]:
			
 
				         """从文件列表读取"""
			
 
				-        pdf_files = []
			
 
				+        pdf_files: List[PDFTask] = []
			
 
				         
			
 
				         with open(list_file, 'r', encoding='utf-8') as f:
			
 
				             for line in f:
			
@@ -199,27 +208,39 @@ class PDFFileFinder:
 
				                 if not line or line.startswith('#'):
			
 
				                     continue
			
 
				                 
			
 
				+                file_part, scene = self._parse_list_line(line)
			
 
				                 # 构建完整路径
			
 
				-                pdf_path = self._resolve_path(line)
			
 
				+                pdf_path = self._resolve_path(file_part)
			
 
				                 if pdf_path:
			
 
				-                    pdf_files.append(pdf_path)
			
 
				+                    pdf_files.append(PDFTask(path=pdf_path, scene=scene))
			
 
				         
			
 
				         return pdf_files
			
 
				     
			
 
				-    def from_list(self, pdf_list: List[str]) -> List[Path]:
			
 
				+    def from_list(self, pdf_list: List[str]) -> List[PDFTask]:
			
 
				         """从列表读取"""
			
 
				-        pdf_files = []
			
 
				+        pdf_files: List[PDFTask] = []
			
 
				         
			
 
				         for pdf in pdf_list:
			
 
				-            pdf_path = self._resolve_path(pdf.strip())
			
 
				+            file_part, scene = self._parse_list_line(pdf.strip())
			
 
				+            pdf_path = self._resolve_path(file_part)
			
 
				             if pdf_path:
			
 
				-                pdf_files.append(pdf_path)
			
 
				+                pdf_files.append(PDFTask(path=pdf_path, scene=scene))
			
 
				         
			
 
				         return pdf_files
			
 
				     
			
 
				-    def find_all(self) -> List[Path]:
			
 
				+    def find_all(self) -> List[PDFTask]:
			
 
				         """查找基础目录下所有 PDF"""
			
 
				-        return sorted(self.base_dir.rglob('*.pdf'))
			
 
				+        return [PDFTask(path=path) for path in sorted(self.base_dir.rglob('*.pdf'))]
			
 
				+
			
 
				+    def _parse_list_line(self, line: str) -> Tuple[str, Optional[str]]:
			
 
				+        """解析列表行（支持  文件<TAB>场景  或  文件,场景）"""
			
 
				+        for sep in ["\t", ","]:
			
 
				+            if sep in line:
			
 
				+                file_part, scene_part = line.split(sep, 1)
			
 
				+                file_part = file_part.strip()
			
 
				+                scene_part = scene_part.strip()
			
 
				+                return file_part, scene_part or None
			
 
				+        return line.strip(), None
			
 
				     
			
 
				     def _resolve_path(self, path_str: str) -> Optional[Path]:
			
 
				         """解析路径"""
			
@@ -263,13 +284,15 @@ class PDFBatchProcessor:
 
				         processor_config: ProcessorConfig,
			
 
				         output_subdir: Optional[str] = None,
			
 
				         log_base_dir: Optional[str] = None,  # 🎯 新增：日志基础目录
			
 
				-        dry_run: bool = False
			
 
				+        dry_run: bool = False,
			
 
				+        default_scene: Optional[str] = None
			
 
				     ):
			
 
				         self.processor_config = processor_config
			
 
				         # 如果指定了output_subdir，使用指定的；否则使用处理器配置中的
			
 
				         self.output_subdir = output_subdir or processor_config.output_subdir
			
 
				         self.log_base_dir = Path(log_base_dir) if log_base_dir else Path('logs')  # 🎯 新增
			
 
				         self.dry_run = dry_run
			
 
				+        self.default_scene = default_scene
			
 
				         
			
 
				         # 设置日志
			
 
				         self.logger = self._setup_logger()
			
@@ -320,7 +343,7 @@ class PDFBatchProcessor:
 
				         
			
 
				         return log_file
			
 
				     
			
 
				-    def process_files(self, pdf_files: List[Path]) -> Dict[str, Any]:
			
 
				+    def process_files(self, pdf_files: List[PDFTask]) -> Dict[str, Any]:
			
 
				         """批量处理文件"""
			
 
				         self.logger.info(f"开始处理 {len(pdf_files)} 个文件")
			
 
				         self.logger.info(f"处理器: {self.processor_config.description}")
			
@@ -335,8 +358,8 @@ class PDFBatchProcessor:
 
				         
			
 
				         # 使用进度条
			
 
				         with tqdm(total=len(pdf_files), desc="处理进度", unit="file") as pbar:
			
 
				-            for pdf_file in pdf_files:
			
 
				-                result = self._process_single_file(pdf_file)
			
 
				+            for task in pdf_files:
			
 
				+                result = self._process_single_file(task)
			
 
				                 self.results.append(result)
			
 
				                 pbar.update(1)
			
 
				                 
			
@@ -355,9 +378,12 @@ class PDFBatchProcessor:
 
				         
			
 
				         return stats
			
 
				     
			
 
				-    def _process_single_file(self, pdf_file: Path) -> ProcessResult:
			
 
				+    def _process_single_file(self, task: PDFTask) -> ProcessResult:
			
 
				         """🎯 处理单个文件（支持日志重定向）"""
			
 
				-        self.logger.info(f"处理: {pdf_file}")
			
 
				+        pdf_file = task.path
			
 
				+        scene = task.scene or self.default_scene
			
 
				+        scene_info = f" (scene: {scene})" if scene else ""
			
 
				+        self.logger.info(f"处理: {pdf_file}{scene_info}")
			
 
				         
			
 
				         # 检查文件是否存在
			
 
				         if not pdf_file.exists():
			
@@ -376,7 +402,7 @@ class PDFBatchProcessor:
 
				         log_file = self._get_log_file_path(pdf_file)
			
 
				         
			
 
				         # 构建命令
			
 
				-        cmd = self._build_command(pdf_file, output_dir)
			
 
				+        cmd = self._build_command(pdf_file, output_dir, scene)
			
 
				         
			
 
				         self.logger.debug(f"执行命令: {cmd if isinstance(cmd, str) else ' '.join(cmd)}")
			
 
				         self.logger.info(f"日志输出: {log_file}")
			
@@ -398,7 +424,7 @@ class PDFBatchProcessor:
 
				                 # 写入日志头
			
 
				                 log_f.write(f"{'='*80}\n")
			
 
				                 log_f.write(f"处理器: {self.processor_config.description}\n")
			
 
				-                log_f.write(f"PDF 文件: {pdf_file}\n")
			
 
				+                log_f.write(f"PDF 文件: {pdf_file}{scene_info}\n")
			
 
				                 log_f.write(f"输出目录: {output_dir}\n")
			
 
				                 log_f.write(f"开始时间: {datetime.now()}\n")
			
 
				                 log_f.write(f"{'='*80}\n\n")
			
@@ -486,7 +512,7 @@ class PDFBatchProcessor:
 
				                 log_file=str(log_file)
			
 
				             )
			
 
				     
			
 
				-    def _build_command(self, pdf_file: Path, output_dir: Path):
			
 
				+    def _build_command(self, pdf_file: Path, output_dir: Path, scene: Optional[str]):
			
 
				         """构建执行命令
			
 
				         
			
 
				         Returns:
			
@@ -503,6 +529,13 @@ class PDFBatchProcessor:
 
				         
			
 
				         # 添加额外参数
			
 
				         base_cmd.extend(self.processor_config.extra_args)
			
 
				+
			
 
				+        # 添加场景参数（如果配置了scene_arg）
			
 
				+        if scene:
			
 
				+            if self.processor_config.scene_arg:
			
 
				+                base_cmd.extend([self.processor_config.scene_arg, scene])
			
 
				+            else:
			
 
				+                self.logger.warning("⚠️ 场景已提供但未配置scene_arg，已忽略场景参数")
			
 
				         
			
 
				         # 如果配置了虚拟环境，构建 shell 命令
			
 
				         if self.processor_config.venv:
			
@@ -690,6 +723,17 @@ def create_parser() -> argparse.ArgumentParser:
 
				         nargs='+',
			
 
				         help='PDF 文件列表 (空格分隔)'
			
 
				     )
			
 
				+
			
 
				+    # 场景参数
			
 
				+    parser.add_argument(
			
 
				+        '--scene',
			
 
				+        help='默认场景名称（文件列表未提供场景时使用）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--scene-arg',
			
 
				+        default='--scene',
			
 
				+        help='场景参数名称 (默认: --scene)'
			
 
				+    )
			
 
				     
			
 
				     # 额外参数
			
 
				     parser.add_argument(
			
@@ -772,8 +816,12 @@ def main():
 
				             script=args.script,
			
 
				             extra_args=args.extra_args.split() if args.extra_args else [],
			
 
				             output_subdir=args.output_subdir or 'manual_results',
			
 
				+            scene_arg=args.scene_arg,
			
 
				             venv=args.venv
			
 
				         )
			
 
				+        # 如果配置中没有scene_arg且用户指定了scene，默认设置为--scene
			
 
				+        if args.scene and not processor_config.scene_arg:
			
 
				+            print("⚠️  已指定场景但未配置scene_arg，忽略场景参数")
			
 
				     else:
			
 
				         parser.error("必须指定 -p 或 -s 参数")
			
 
				     
			
@@ -806,18 +854,20 @@ def main():
 
				         return 1
			
 
				     
			
 
				     # 显示找到的文件
			
 
				-    valid_file_paths = [f.as_posix() for f in pdf_files if f.exists()]
			
 
				+    valid_file_paths = [f"{t.path.as_posix()}\t{t.scene}" if t.scene else t.path.as_posix()
			
 
				+                        for t in pdf_files if t.path.exists()]
			
 
				     if valid_file_paths:
			
 
				         print("\n".join(valid_file_paths))    
			
 
				 
			
 
				     # 验证文件
			
 
				-    valid_files = [f for f in pdf_files if f.exists()]
			
 
				-    invalid_files = [f for f in pdf_files if not f.exists()]
			
 
				+    valid_files = [t for t in pdf_files if t.path.exists()]
			
 
				+    invalid_files = [t for t in pdf_files if not t.path.exists()]
			
 
				     
			
 
				     if invalid_files:
			
 
				         print(f"\n⚠️  警告: {len(invalid_files)} 个文件不存在:")
			
 
				-        for f in invalid_files[:5]:
			
 
				-            print(f"  - {f}")
			
 
				+        for t in invalid_files[:5]:
			
 
				+            scene_suffix = f" (scene: {t.scene})" if t.scene else ""
			
 
				+            print(f"  - {t.path}{scene_suffix}")
			
 
				         if len(invalid_files) > 5:
			
 
				             print(f"  ... 还有 {len(invalid_files) - 5} 个")
			
 
				     
			
@@ -834,7 +884,8 @@ def main():
 
				         processor_config=processor_config,
			
 
				         output_subdir=args.output_subdir,
			
 
				         log_base_dir=log_base_dir,  # 🎯 传递日志目录
			
 
				-        dry_run=args.dry_run
			
 
				+        dry_run=args.dry_run,
			
 
				+        default_scene=args.scene
			
 
				     )
			
 
				     
			
 
				     stats = processor.process_files(valid_files)
			
--- a/ocr_tools/ocr_batch/pdf_list.txt
+++ b/ocr_tools/ocr_batch/pdf_list.txt
@@ -1,17 +1,18 @@
 
				-德_内蒙古银行照.pdf
			
 
				-对公_招商银行图.pdf
			
 
				-A用户_单元格扫描流水.pdf
			
 
				-B用户_扫描流水.pdf
			
 
				-康强_北京农村商业银行.pdf
			
 
				-施博深.pdf
			
 
				-山西云集科技有限公司.pdf
			
 
				-2023年度报告母公司.pdf
			
 
				-提取自赤峰黄金2023年报.pdf
			
 
				-许_民生银行图.pdf
			
 
				-方_广发银行图.pdf
			
 
				-付_工商银行943825图.pdf
			
 
				-乔_建设银行图.pdf
			
 
				-湛_平安银行图.pdf
			
 
				-张_微信图.pdf
			
 
				-朱_中信银行图.pdf
			
 
				+# 文件名<TAB>","场景（bank_statement / financial_report）
			
 
				+德_内蒙古银行照.pdf,bank_statement
			
 
				+对公_招商银行图.pdf,bank_statement
			
 
				+A用户_单元格扫描流水.pdf,bank_statement
			
 
				+B用户_扫描流水.pdf,bank_statement
			
 
				+康强_北京农村商业银行.pdf,bank_statement
			
 
				+施博深.pdf,bank_statement
			
 
				+山西云集科技有限公司.pdf,bank_statement
			
 
				+2023年度报告母公司.pdf,financial_report
			
 
				+提取自赤峰黄金2023年报.pdf,financial_report
			
 
				+许_民生银行图.pdf,bank_statement
			
 
				+方_广发银行图.pdf,bank_statement
			
 
				+付_工商银行943825图.pdf,bank_statement
			
 
				+乔_建设银行图.pdf,bank_statement
			
 
				+湛_平安银行图.pdf,bank_statement
			
 
				+张_微信图.pdf,bank_statement
			
 
				+朱_中信银行图.pdf,bank_statement
			
 
				 
			
--- a/ocr_tools/ocr_batch/processor_configs.yaml
+++ b/ocr_tools/ocr_batch/processor_configs.yaml
@@ -12,6 +12,7 @@ processors:
 
				     script: "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/main_v2.py"
			
 
				     input_arg: "--input"
			
 
				     output_arg: "--output_dir"
			
 
				+    scene_arg: "--scene"
			
 
				     extra_args:
			
 
				       - "--config=/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/config/bank_statement_yusys_v4.yaml"
			
 
				       - "--pages=1-35"
			
@@ -27,6 +28,7 @@ processors:
 
				     script: "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/main_v2.py"
			
 
				     input_arg: "--input"
			
 
				     output_arg: "--output_dir"
			
 
				+    scene_arg: "--scene"
			
 
				     extra_args:
			
 
				       - "--config=/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/config/bank_statement_yusys_v3.yaml"
			
 
				       # - "--config=/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/config/bank_statement_yusys_v2.yaml"
			
@@ -45,6 +47,7 @@ processors:
 
				     script: "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/main_v2.py"
			
 
				     input_arg: "--input"
			
 
				     output_arg: "--output_dir"
			
 
				+    scene_arg: "--scene"
			
 
				     extra_args:
			
 
				       - "--config=/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/config/bank_statement_mineru_vl.yaml"
			
 
				       # - "--config=/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/config/bank_statement_yusys_v2.yaml"
			
--- a/ocr_tools/universal_doc_parser/config/bank_statement_yusys_v4.yaml
+++ b/ocr_tools/universal_doc_parser/config/bank_statement_yusys_v4.yaml
@@ -18,16 +18,38 @@ preprocessor:
 
				     enabled: false
			
 
				 
			
 
				 # ============================================================
			
 
				-# Layout 检测配置 - 使用 PP-DocLayoutV3
			
 
				+# Layout 检测配置 - 智能路由器（按场景直接选择模型）
			
 
				 # ============================================================
			
 
				 layout_detection:
			
 
				-  module: "paddle"
			
 
				-  model_name: "PP-DocLayoutV3"
			
 
				-  model_dir: "PaddlePaddle/PP-DocLayoutV3_safetensors"
			
 
				-  device: "cpu"
			
 
				-  conf: 0.3
			
 
				-  num_threads: 4
			
 
				-  batch_size: 1
			
 
				+  module: "smart_router"
			
 
				+  strategy: "scene"  # 按场景直接选择模型，不走ocr_eval
			
 
				+
			
 
				+  # 场景策略：指定场景直接选用的布局模型
			
 
				+  scene_strategy:
			
 
				+    bank_statement:
			
 
				+      model: "docling"
			
 
				+    financial_report:
			
 
				+      model: "paddle_ppdoclayoutv3"
			
 
				+  default_model: "docling"
			
 
				+
			
 
				+  # 配置多个模型
			
 
				+  models:
			
 
				+    docling:
			
 
				+      module: "docling"
			
 
				+      model_name: "docling-layout-old"
			
 
				+      model_dir: "ds4sd/docling-layout-old"
			
 
				+      device: "cpu"
			
 
				+      conf: 0.3
			
 
				+      num_threads: 4
			
 
				+
			
 
				+    paddle_ppdoclayoutv3:
			
 
				+      module: "paddle"
			
 
				+      model_name: "PP-DocLayoutV3"
			
 
				+      model_dir: "PaddlePaddle/PP-DocLayoutV3_safetensors"
			
 
				+      device: "cpu"
			
 
				+      conf: 0.3
			
 
				+      num_threads: 4
			
 
				+      batch_size: 1
			
 
				   
			
 
				   # 后处理配置
			
 
				   post_process:
			
--- a/ocr_tools/universal_doc_parser/core/element_processors.py
+++ b/ocr_tools/universal_doc_parser/core/element_processors.py
@@ -495,13 +495,13 @@ class ElementProcessors:
 
				         
			
 
				         # VLM 识别获取表格结构HTML（懒加载）
			
 
				         table_html = ""
			
 
				-        try:
			
 
				-            vl_recognizer = self._ensure_vl_recognizer()
			
 
				-            if vl_recognizer is None:
			
 
				-                logger.error("❌ VL recognizer not available for table recognition")
			
 
				-                # return self._create_empty_table_result(layout_item, bbox, table_angle, ocr_source)
			
 
				-                raise RuntimeError("VL recognizer not available")
			
 
				+        vl_recognizer = self._ensure_vl_recognizer()
			
 
				+        if vl_recognizer is None:
			
 
				+            logger.error("❌ VL recognizer not available for table recognition")
			
 
				+            # return self._create_empty_table_result(layout_item, bbox, table_angle, ocr_source)
			
 
				+            raise RuntimeError("VL recognizer not available")
			
 
				             
			
 
				+        try:
			
 
				             vl_result = vl_recognizer.recognize_table(
			
 
				                 cropped_table,
			
 
				                 return_cells_coordinate=True
			
--- a/ocr_tools/universal_doc_parser/core/layout_model_router.py
+++ b/ocr_tools/universal_doc_parser/core/layout_model_router.py
@@ -30,12 +30,15 @@ class SmartLayoutRouter(BaseLayoutDetector):
 
				     
			
 
				     def __init__(self, config: Dict[str, Any]):
			
 
				         super().__init__(config)
			
 
				-        self.strategy = config.get('strategy', 'ocr_eval')  # ocr_eval, auto
			
 
				+        self.strategy = config.get('strategy', 'ocr_eval')  # ocr_eval, auto, scene
			
 
				         self.models = {}
			
 
				         self.model_configs = config.get('models', {})
			
 
				         self.fallback_config = config.get('fallback_model', None)
			
 
				         self.evaluator = OCRBasedLayoutEvaluator()
			
 
				         self.ocr_recognizer = None  # 用于在ocr_eval策略中获取OCR结果
			
 
				+        self.scene_name = config.get('scene_name', None)
			
 
				+        self.scene_strategy = config.get('scene_strategy', {})
			
 
				+        self.default_model = config.get('default_model', None)
			
 
				         # 调试模式支持
			
 
				         self.debug_mode = config.get('debug_mode', False)
			
 
				         self.output_dir = config.get('output_dir', None)
			
@@ -90,6 +93,10 @@ class SmartLayoutRouter(BaseLayoutDetector):
 
				     def set_ocr_recognizer(self, ocr_recognizer):
			
 
				         """设置OCR识别器（用于ocr_eval策略）"""
			
 
				         self.ocr_recognizer = ocr_recognizer
			
 
				+
			
 
				+    def set_scene_name(self, scene_name: Optional[str]):
			
 
				+        """设置场景名称（用于scene策略）"""
			
 
				+        self.scene_name = scene_name
			
 
				     
			
 
				     def _detect_raw(
			
 
				         self, 
			
@@ -137,8 +144,40 @@ class SmartLayoutRouter(BaseLayoutDetector):
 
				             return self._ocr_eval_detect(image, ocr_spans)
			
 
				         elif self.strategy == 'auto':
			
 
				             return self._auto_select_detect(image)
			
 
				+        elif self.strategy == 'scene':
			
 
				+            return self._scene_select_detect(image)
			
 
				         else:
			
 
				             raise ValueError(f"Unknown strategy: {self.strategy}")
			
 
				+
			
 
				+    def _scene_select_detect(
			
 
				+        self,
			
 
				+        image: Union[np.ndarray, Image.Image]
			
 
				+    ) -> List[Dict[str, Any]]:
			
 
				+        """
			
 
				+        场景策略：根据scene_strategy直接选择模型
			
 
				+
			
 
				+        注意：不执行ocr_eval，直接使用选定模型
			
 
				+        """
			
 
				+        selected_model = None
			
 
				+        if self.scene_name:
			
 
				+            scene_rule = self.scene_strategy.get(self.scene_name)
			
 
				+            if isinstance(scene_rule, str):
			
 
				+                selected_model = scene_rule
			
 
				+            elif isinstance(scene_rule, dict):
			
 
				+                selected_model = scene_rule.get('model')
			
 
				+
			
 
				+        if not selected_model:
			
 
				+            selected_model = self.default_model
			
 
				+
			
 
				+        if not selected_model and self.models:
			
 
				+            selected_model = next(iter(self.models.keys()))
			
 
				+
			
 
				+        if selected_model not in self.models:
			
 
				+            logger.warning(f"⚠️ Scene strategy model not available: {selected_model}, using first model")
			
 
				+            selected_model = next(iter(self.models.keys()))
			
 
				+
			
 
				+        logger.info(f"🎯 Scene strategy selected model: {selected_model} (scene: {self.scene_name})")
			
 
				+        return self.models[selected_model].detect(image)
			
 
				     
			
 
				     def _ocr_eval_detect(
			
 
				         self, 
			
--- a/ocr_tools/universal_doc_parser/core/pipeline_manager_v2.py
+++ b/ocr_tools/universal_doc_parser/core/pipeline_manager_v2.py
@@ -117,6 +117,15 @@ class EnhancedDocPipeline:
 
				         self._init_element_processors()
			
 
				         
			
 
				         logger.info(f"✅ Pipeline initialized for scene: {self.scene_name}")
			
 
				+
			
 
				+    def set_scene_name(self, scene_name: Optional[str]):
			
 
				+        """设置场景名称，并同步到布局路由器"""
			
 
				+        if not scene_name:
			
 
				+            return
			
 
				+        self.scene_name = scene_name
			
 
				+        if hasattr(self.layout_detector, 'set_scene_name'):
			
 
				+            self.layout_detector.set_scene_name(scene_name)
			
 
				+        logger.info(f"🔄 Scene updated in pipeline: {scene_name}")
			
 
				     
			
 
				     def _ensure_vl_recognizer(self):
			
 
				         """懒加载 VL 识别器（仅在需要时初始化，且只初始化一次）"""
			
@@ -155,6 +164,9 @@ class EnhancedDocPipeline:
 
				             self.layout_detector = ModelFactory.create_layout_detector(
			
 
				                 self.config['layout_detection']
			
 
				             )
			
 
				+
			
 
				+            if hasattr(self.layout_detector, 'set_scene_name'):
			
 
				+                self.layout_detector.set_scene_name(self.scene_name)
			
 
				             
			
 
				             # 如果是智能路由器且使用ocr_eval策略，需要设置OCR识别器
			
 
				             if hasattr(self.layout_detector, 'set_ocr_recognizer'):
			
--- a/ocr_tools/universal_doc_parser/main_v2.py
+++ b/ocr_tools/universal_doc_parser/main_v2.py
@@ -179,6 +179,8 @@ def process_single_input(
 
				         try:
			
 
				             if scene:
			
 
				                 pipeline.scene_name = scene
			
 
				+                if hasattr(pipeline, 'set_scene_name'):
			
 
				+                    pipeline.set_scene_name(scene)
			
 
				                 logger.info(f"🔄 Scene overridden to: {scene}")
			
 
				             
			
 
				             logger.info(f"🚀 开始处理: {input_path}")
			
@@ -349,6 +351,7 @@ def main():
 
				     )
			
 
				     parser.add_argument(
			
 
				         "--scene", "-s",
			
 
				+        required=True,
			
 
				         choices=["bank_statement", "financial_report"],
			
 
				         help="场景类型（覆盖配置文件设置）"
			
 
				     )
			
@@ -436,10 +439,10 @@ if __name__ == "__main__":
 
				             # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_003.png",
			
 
				             # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_003.png",
			
 
				             # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_003_270_skew(-0.4).png",
			
 
				-            "input": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司.pdf",
			
 
				+            # "input": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司.pdf",
			
 
				             # "output_dir": "./output/2023年度报告母公司/bank_statement_yusys_v3",
			
 
				             # "output_dir": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/bank_statement_yusys_v3",
			
 
				-            "output_dir": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/bank_statement_glm_vl",
			
 
				+            # "output_dir": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/bank_statement_glm_vl",
			
 
				 
			
 
				             # "input": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司.pdf",
			
 
				             # "output_dir": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/bank_statement_yusys_v2",
			
@@ -458,8 +461,9 @@ if __name__ == "__main__":
 
				 
			
 
				             # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/提取自赤峰黄金2023年报.pdf",
			
 
				             # "output_dir": "./output/提取自赤峰黄金2023年报/bank_statement_yusys_v3",
			
 
				-            # "input": "/Users/zhch158/workspace/data/流水分析/提取自赤峰黄金2023年报.pdf",
			
 
				-            # "output_dir": "/Users/zhch158/workspace/data/流水分析/提取自赤峰黄金2023年报/bank_statement_yusys_v3",
			
 
				+            "input": "/Users/zhch158/workspace/data/流水分析/提取自赤峰黄金2023年报.pdf",
			
 
				+            "output_dir": "./output/提取自赤峰黄金2023年报/bank_statement_yusys_v4",
			
 
				+            # "output_dir": "/Users/zhch158/workspace/data/流水分析/提取自赤峰黄金2023年报/bank_statement_yusys_v4",
			
 
				 
			
 
				             # "input": "/Users/zhch158/workspace/data/流水分析/施博深.pdf",
			
 
				             # "output_dir": "/Users/zhch158/workspace/data/流水分析/施博深/bank_statement_yusys_v3",
			
@@ -475,7 +479,7 @@ if __name__ == "__main__":
 
				             # "output_dir": "/Users/zhch158/workspace/data/流水分析/山西云集科技有限公司/bank_statement_yusys_v3",
			
 
				 
			
 
				             # 配置文件
			
 
				-            "config": "./config/bank_statement_glm_vl.yaml",
			
 
				+            "config": "./config/bank_statement_yusys_v4.yaml",
			
 
				             # "config": "./config/bank_statement_yusys_v3.yaml",
			
 
				             # "config": "./config/bank_statement_smart_router.yaml",
			
 
				             # "config": "./config/bank_statement_mineru_vl.yaml",
			
@@ -483,10 +487,11 @@ if __name__ == "__main__":
 
				             # "config": "./config/bank_statement_paddle_vl.yaml",
			
 
				             
			
 
				             # 场景
			
 
				-            "scene": "bank_statement",
			
 
				+            # "scene": "bank_statement",
			
 
				+            "scene": "financial_report",
			
 
				             
			
 
				             # 页面范围（可选）
			
 
				-            "pages": "3-7",  # 只处理前1页
			
 
				+            "pages": "11",  # 只处理前1页
			
 
				             # "pages": "1-3,5,7-10",  # 处理指定页面
			
 
				             # "pages": "83-109",  # 处理指定页面
			
 
				 
			
@@ -499,7 +504,7 @@ if __name__ == "__main__":
 
				             "log_level": "DEBUG",
			
 
				 
			
 
				             # 日志文件
			
 
				-            "log_file": "./output/logs/bank_statement_glm_vl/process.log",
			
 
				+            "log_file": "./output/logs/bank_statement_yusys_v4/process.log",
			
 
				         }
			
 
				         
			
 
				         # 构造参数
			
--- a/ocr_tools/universal_doc_parser/models/adapters/glmocr_vl_adapter.py
+++ b/ocr_tools/universal_doc_parser/models/adapters/glmocr_vl_adapter.py
@@ -121,6 +121,16 @@ class GLMOCRVLRecognizer(BaseVLRecognizer):
 
				                     'Authorization': f'Bearer {self.api_key}'
			
 
				                 })
			
 
				             
			
 
				+            # 需要向MinerU-VL，进行连通测试
			
 
				+            try:
			
 
				+                test_response = self.session.get(self.api_url, timeout=(self.connect_timeout, self.http_timeout), verify=self.verify_ssl)
			
 
				+                if test_response.status_code == 200:
			
 
				+                    logger.debug(f"Successfully connected to GLM-OCR API at {self.api_url}")
			
 
				+                else:
			
 
				+                    logger.warning(f"Received unexpected status code {test_response.status_code} from GLM-OCR API: {test_response.text}")
			
 
				+            except requests.exceptions.RequestException as e:
			
 
				+                logger.error(f"Failed to connect to GLM-OCR API at {self.api_url}: {e}")
			
 
				+                raise
			
 
				             logger.success(f"✅ GLM-OCR VL recognizer initialized: {self.api_url}")
			
 
				             
			
 
				         except Exception as e:
			
--- a/ocr_tools/universal_doc_parser/models/adapters/mineru_wired_table.py
+++ b/ocr_tools/universal_doc_parser/models/adapters/mineru_wired_table.py
@@ -378,6 +378,19 @@ class MinerUWiredTableRecognizer:
 
				 
			
 
				             # Step 2: 使用连通域法提取单元格 (替换了原来的投影法)
			
 
				             debug_prefix = f"{dbg.prefix}_grid" if dbg.prefix else "grid"
			
 
				+
			
 
				+            # 计算 OCR 文本容差：取最小行高的 50%，无有效 OCR 时回退为 0
			
 
				+            ocr_heights = []
			
 
				+            for ocr in ocr_boxes or []:
			
 
				+                bbox = ocr.get("bbox", [])
			
 
				+                if len(bbox) >= 4:
			
 
				+                    height = bbox[3] - bbox[1]
			
 
				+                    if height > 0:
			
 
				+                        ocr_heights.append(height)
			
 
				+            if ocr_heights:
			
 
				+                ocr_text_pixel_tolerance = min(ocr_heights) * 0.5
			
 
				+            else:
			
 
				+                ocr_text_pixel_tolerance = 10.0
			
 
				             
			
 
				             # 传入原图的实际尺寸和裁剪padding
			
 
				             bboxes = self.grid_recovery.compute_cells_from_lines(
			
@@ -402,6 +415,7 @@ class MinerUWiredTableRecognizer:
 
				                         table_image=table_image,
			
 
				                         unet_cells=bboxes,
			
 
				                         ocr_boxes=ocr_boxes or [],
			
 
				+                        ocr_text_pixel_tolerance=ocr_text_pixel_tolerance,
			
 
				                         pdf_type=pdf_type,
			
 
				                         debug_dir=debug_dir,
			
 
				                         debug_prefix=debug_prefix
			
@@ -425,7 +439,10 @@ class MinerUWiredTableRecognizer:
 
				 
			
 
				             # Step 3: 重建网格结构 (计算 row, col, rowspan, colspan)
			
 
				             # OCR补偿已在Step 2中完成，这里仅做网格重建
			
 
				-            merged_cells = self.grid_recovery.recover_grid_structure(bboxes)
			
 
				+            merged_cells = self.grid_recovery.recover_grid_structure(
			
 
				+                bboxes,
			
 
				+                ocr_text_pixel_tolerance=ocr_text_pixel_tolerance
			
 
				+            )
			
 
				             
			
 
				             # Step 3.5: 可视化逻辑结构 (新增)
			
 
				             if self.debug_utils.debug_is_on("save_grid_structure", dbg):
			
--- a/ocr_tools/universal_doc_parser/models/adapters/paddle_table_classifier.py
+++ b/ocr_tools/universal_doc_parser/models/adapters/paddle_table_classifier.py
@@ -198,20 +198,50 @@ class PaddleTableClassifier(BaseAdapter):
 
				         else:
			
 
				             gray = img_array
			
 
				         
			
 
				-        # 二值化
			
 
				-        _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
			
 
				+        # 二值化：自适应阈值更适合浅色表格线
			
 
				+        binary = cv2.adaptiveThreshold(
			
 
				+            gray,
			
 
				+            255,
			
 
				+            cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
			
 
				+            cv2.THRESH_BINARY_INV,
			
 
				+            25,
			
 
				+            10
			
 
				+        )
			
 
				         
			
 
				         h, w = binary.shape
			
 
				         
			
 
				         # 检测横线
			
 
				-        horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (max(20, w//30), 1))
			
 
				+        horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (max(20, w // 30), 1))
			
 
				         horizontal_mask = cv2.morphologyEx(binary, cv2.MORPH_OPEN, horizontal_kernel)
			
 
				-        horizontal_lines = cv2.findContours(horizontal_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
			
 
				+        horizontal_contours = cv2.findContours(horizontal_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
			
 
				         
			
 
				         # 检测竖线
			
 
				-        vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, max(20, h//30)))
			
 
				+        vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, max(20, h // 30)))
			
 
				         vertical_mask = cv2.morphologyEx(binary, cv2.MORPH_OPEN, vertical_kernel)
			
 
				-        vertical_lines = cv2.findContours(vertical_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
			
 
				+        vertical_contours = cv2.findContours(vertical_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
			
 
				+
			
 
				+        # 线段长度/长宽比过滤，降低文字竖画误检
			
 
				+        def filter_lines(contours, orientation):
			
 
				+            filtered = []
			
 
				+            for cnt in contours:
			
 
				+                x, y, cw, ch = cv2.boundingRect(cnt)
			
 
				+                if cw <= 0 or ch <= 0:
			
 
				+                    continue
			
 
				+                if orientation == "h":
			
 
				+                    if cw < w * 0.15:
			
 
				+                        continue
			
 
				+                    if cw / max(ch, 1) < 5.0:
			
 
				+                        continue
			
 
				+                else:
			
 
				+                    if ch < h * 0.15:
			
 
				+                        continue
			
 
				+                    if ch / max(cw, 1) < 5.0:
			
 
				+                        continue
			
 
				+                filtered.append(cnt)
			
 
				+            return filtered
			
 
				+
			
 
				+        horizontal_lines = filter_lines(horizontal_contours, "h")
			
 
				+        vertical_lines = filter_lines(vertical_contours, "v")
			
 
				         
			
 
				         # 调试可视画
			
 
				         # 使用传入的 debug_options (包含了可能的 override)
			
--- a/ocr_tools/universal_doc_parser/models/adapters/wired_table/cell_fusion.py
+++ b/ocr_tools/universal_doc_parser/models/adapters/wired_table/cell_fusion.py
@@ -57,11 +57,22 @@ class CellFusionEngine:
 
				         self.rtdetr_conf_threshold = self.config.get('rtdetr_conf_threshold', 0.5)
			
 
				         self.enable_ocr_compensation = self.config.get('enable_ocr_compensation', True)
			
 
				         self.enable_boundary_noise_filter = self.config.get('enable_boundary_noise_filter', True)
			
 
				+        self.unet_split_min_count = self.config.get('unet_split_min_count', 2)
			
 
				+        self.rtdetr_split_cover_threshold = self.config.get('rtdetr_split_cover_threshold', 0.5)
			
 
				+        self.unet_split_cover_threshold = self.config.get('unet_split_cover_threshold', 0.5)
			
 
				+        self.unet_split_rtdetr_score_threshold = self.config.get(
			
 
				+            'unet_split_rtdetr_score_threshold',
			
 
				+            self.rtdetr_conf_threshold
			
 
				+        )
			
 
				         
			
 
				         logger.info(f"🔧 CellFusionEngine initialized: "
			
 
				-                   f"unet_w={self.unet_weight}, rtdetr_w={self.rtdetr_weight}, "
			
 
				-                   f"iou_merge={self.iou_merge_threshold}, ocr_comp={self.enable_ocr_compensation}, "
			
 
				-                   f"boundary_filter={self.enable_boundary_noise_filter}")
			
 
				+               f"unet_w={self.unet_weight}, rtdetr_w={self.rtdetr_weight}, "
			
 
				+               f"iou_merge={self.iou_merge_threshold}, ocr_comp={self.enable_ocr_compensation}, "
			
 
				+               f"boundary_filter={self.enable_boundary_noise_filter}, "
			
 
				+               f"unet_split_min={self.unet_split_min_count}, "
			
 
				+               f"unet_split_cover={self.unet_split_cover_threshold}, "
			
 
				+               f"unet_split_score={self.unet_split_rtdetr_score_threshold}, "
			
 
				+               f"rtdetr_split_cover={self.rtdetr_split_cover_threshold}")
			
 
				     
			
 
				     def should_use_rtdetr(
			
 
				         self,
			
@@ -99,6 +110,7 @@ class CellFusionEngine:
 
				         table_image: np.ndarray,
			
 
				         unet_cells: List[List[float]],
			
 
				         ocr_boxes: List[Dict[str, Any]],
			
 
				+        ocr_text_pixel_tolerance: float = 10.0,
			
 
				         pdf_type: str = 'ocr',
			
 
				         debug_dir: Optional[str] = None,
			
 
				         debug_prefix: str = "fusion"
			
@@ -110,6 +122,7 @@ class CellFusionEngine:
 
				             table_image: 表格图像（原图坐标系）
			
 
				             unet_cells: UNet检测的单元格列表 [[x1,y1,x2,y2], ...]（原图坐标系）
			
 
				             ocr_boxes: OCR结果列表
			
 
				+            ocr_text_pixel_tolerance: OCR文本容差（原图坐标系，默认10.0）
			
 
				             pdf_type: PDF类型 ('txt' 或 'ocr')
			
 
				             debug_dir: 调试输出目录（可选）
			
 
				             debug_prefix: 调试文件前缀
			
@@ -126,7 +139,7 @@ class CellFusionEngine:
 
				             max(unet_cells, key=lambda box: box[2])[2], \
			
 
				             max(unet_cells, key=lambda box: box[3])[3]
			
 
				         ] if unet_cells else [0,0,0,0]
			
 
				-        
			
 
				+
			
 
				         # 决策：是否使用 RT-DETR
			
 
				         use_rtdetr = self.should_use_rtdetr(pdf_type, len(unet_cells), (w, h))
			
 
				         
			
@@ -165,8 +178,8 @@ class CellFusionEngine:
 
				                 table_image,
			
 
				                 conf_threshold=self.rtdetr_conf_threshold
			
 
				             )
			
 
				-            # rtdetr_result从上到下，从左到右排序
			
 
				-            rtdetr_results.sort(key=lambda x: (x['bbox'][1], x['bbox'][0]))
			
 
				+            # rtdetr_result从上到下，从左到右排序, 排序按取整后，容差为10
			
 
				+            rtdetr_results = sorted(rtdetr_results, key=lambda x: (round(x['bbox'][1] / 10), round(x['bbox'][0])))
			
 
				             rtdetr_cells = [res['bbox'] for res in rtdetr_results]
			
 
				             rtdetr_scores = [res['score'] for res in rtdetr_results]
			
 
				             fusion_stats['rtdetr_count'] = len(rtdetr_cells)
			
@@ -179,7 +192,7 @@ class CellFusionEngine:
 
				         
			
 
				         # Phase 2: 智能融合
			
 
				         # 使用稳健边界估计（避免单个超大单元格撑开边界）
			
 
				-        table_bbox = self._estimate_robust_table_bbox(rtdetr_cells)
			
 
				+        table_bbox = self._estimate_robust_table_bbox(rtdetr_cells, ocr_text_pixel_tolerance)
			
 
				         
			
 
				         # 将所有单元格的边界限制在表格边界内
			
 
				         # rtdetr_cells = self._clip_cells_to_bbox(rtdetr_cells, table_bbox)
			
@@ -190,6 +203,7 @@ class CellFusionEngine:
 
				         fusion_stats['merged_count'] = merge_stats['merged']
			
 
				         fusion_stats['merged_cells_count'] = merge_stats['merged_cells']
			
 
				         fusion_stats['added_count'] = merge_stats['added']
			
 
				+        fusion_stats['split_count'] = merge_stats.get('split', 0)
			
 
				         
			
 
				         # Phase 3: NMS 去重
			
 
				         fused_cells, suppressed = self._nms_filter(fused_cells, self.iou_nms_threshold)
			
@@ -199,7 +213,8 @@ class CellFusionEngine:
 
				         # Phase 4: 边界噪声过滤（过滤掉边界的 unet_only 噪声单元格）
			
 
				         if self.enable_boundary_noise_filter:
			
 
				             fused_cells, cell_labels, noise_filtered = self._filter_boundary_noise(
			
 
				-                fused_cells, cell_labels, ocr_boxes, table_bbox
			
 
				+                fused_cells, cell_labels, ocr_boxes, table_bbox,
			
 
				+                boundary_tolerance=ocr_text_pixel_tolerance
			
 
				             )
			
 
				             fusion_stats['noise_filtered_count'] = noise_filtered
			
 
				         else:
			
@@ -220,7 +235,7 @@ class CellFusionEngine:
 
				         logger.info(
			
 
				             f"📊 Fusion (UNet+RT-DETR): UNet={len(unet_cells)}, RT-DETR={len(rtdetr_cells)}, "
			
 
				             f"1:1Merged={merge_stats['merged']}, MergedCells={merge_stats['merged_cells']}, "
			
 
				-            f"Added={merge_stats['added']}, NoiseFiltered={noise_filtered}, "
			
 
				+            f"Split={merge_stats.get('split', 0)}, Added={merge_stats['added']}, NoiseFiltered={noise_filtered}, "
			
 
				             f"OCRCompensated={fusion_stats.get('ocr_compensated_count', 0)}, Final={len(fused_cells)}"
			
 
				         )
			
 
				         
			
@@ -243,13 +258,14 @@ class CellFusionEngine:
 
				         """
			
 
				         融合 UNet 和 RT-DETR 检测结果（增强版：支持合并单元格检测）
			
 
				         
			
 
				-        融合规则：
			
 
				-        1. 检测RT-DETR的合并单元格（一对多匹配，基于包含关系）
			
 
				-           - 判断RT-DETR单元格包含多少个UNet单元格
			
 
				-           - 使用中心点+包含率判断（而非IoU）
			
 
				-        2. UNet + RT-DETR 高IoU (>threshold) → 加权平均合并（一对一）
			
 
				-        3. RT-DETR 独有 + 高置信度 (>0.7) → 补充
			
 
				-        4. UNet 独有 → 保留
			
 
				+          融合规则：
			
 
				+          1. 检测RT-DETR的合并单元格（一对多匹配，基于包含关系）
			
 
				+              - 判断RT-DETR单元格包含多少个UNet单元格
			
 
				+              - 使用中心点+包含率判断（而非IoU）
			
 
				+          2. 检测UNet过度合并（一个UNet包含多个RT-DETR）并拆分
			
 
				+          3. UNet + RT-DETR 高IoU (>threshold) → 加权平均合并（一对一）
			
 
				+          4. RT-DETR 独有 + 高置信度 (>0.7) → 补充
			
 
				+          5. UNet 独有 → 保留
			
 
				         
			
 
				         包含关系判断逻辑：
			
 
				         - UNet单元格的中心点在RT-DETR内
			
@@ -267,14 +283,14 @@ class CellFusionEngine:
 
				             (fused_cells, stats, cell_labels)
			
 
				             - fused_cells: 融合后的单元格
			
 
				             - stats: {'merged': int, 'added': int, 'merged_cells': int}
			
 
				-            - cell_labels: 每个单元格的来源标签列表 ['merged_span', 'merged_1to1', 'unet_only', 'rtdetr_only', 'new']
			
 
				+            - cell_labels: 每个单元格的来源标签列表 ['merged_span', 'merged_1to1', 'unet_only', 'rtdetr_only', 'split_rtdetr', 'new']
			
 
				         """
			
 
				         
			
 
				         fused_cells = []
			
 
				         cell_labels = []  # 记录每个单元格的来源标签
			
 
				         unet_matched = [False] * len(unet_cells)
			
 
				         rtdetr_matched = [False] * len(rtdetr_cells)
			
 
				-        stats = {'merged': 0, 'added': 0, 'merged_cells': 0}
			
 
				+        stats = {'merged': 0, 'added': 0, 'merged_cells': 0, 'split': 0}
			
 
				         
			
 
				         # Step 1: 检测RT-DETR的合并单元格（一对多匹配）
			
 
				         # 遍历RT-DETR单元格，查找被包含的多个UNet单元格
			
@@ -332,7 +348,7 @@ class CellFusionEngine:
 
				                     coverage = min(total_unet_area / rtdetr_area, 1.0) if rtdetr_area > 0 else 0
			
 
				                     
			
 
				                     # 如果覆盖率>50%，说明这是一个真实的合并单元格
			
 
				-                    if coverage > 0.5:
			
 
				+                    if coverage > self.rtdetr_split_cover_threshold:
			
 
				                         # 认定为合并单元格，取bounding与RT-DETR的最大范围, 且不能超过table_bbox范围
			
 
				                         fused_cell = [
			
 
				                             min(bounding_x1, rtdetr_cell[0]),
			
@@ -342,9 +358,9 @@ class CellFusionEngine:
 
				                         ]
			
 
				                         # x限制在table_bbox范围内
			
 
				                         fused_cell[0] = max(fused_cell[0], table_bbox[0])
			
 
				-                        # fused_cell[1] = max(fused_cell[1], table_bbox[1])
			
 
				+                        fused_cell[1] = max(fused_cell[1], table_bbox[1])
			
 
				                         fused_cell[2] = min(fused_cell[2], table_bbox[2])
			
 
				-                        # fused_cell[3] = min(fused_cell[3], table_bbox[3])
			
 
				+                        fused_cell[3] = min(fused_cell[3], table_bbox[3])
			
 
				                         fused_cells.append(fused_cell)
			
 
				                         cell_labels.append('merged_span')  # 标记为合并单元格
			
 
				                         rtdetr_matched[rt_idx] = True
			
@@ -357,6 +373,80 @@ class CellFusionEngine:
 
				                             f"(coverage={coverage:.2f}, score={rtdetr_scores[rt_idx]:.2f})"
			
 
				                         )
			
 
				         
			
 
				+        # Step 1.5: 检测UNet过度合并（一个UNet包含多个RT-DETR）并拆分
			
 
				+        for u_idx, unet_cell in enumerate(unet_cells):
			
 
				+            if unet_matched[u_idx]:
			
 
				+                continue
			
 
				+
			
 
				+            unet_area = self._calc_bbox_area(unet_cell)
			
 
				+            if unet_area <= 0:
			
 
				+                continue
			
 
				+
			
 
				+            contained_rtdetr = []
			
 
				+            contained_intersects = []
			
 
				+
			
 
				+            for rt_idx, rtdetr_cell in enumerate(rtdetr_cells):
			
 
				+                if rtdetr_matched[rt_idx]:
			
 
				+                    continue
			
 
				+                if rtdetr_scores[rt_idx] < self.unet_split_rtdetr_score_threshold:
			
 
				+                    continue
			
 
				+
			
 
				+                rt_cx = (rtdetr_cell[0] + rtdetr_cell[2]) / 2
			
 
				+                rt_cy = (rtdetr_cell[1] + rtdetr_cell[3]) / 2
			
 
				+                if not (unet_cell[0] <= rt_cx <= unet_cell[2] and
			
 
				+                        unet_cell[1] <= rt_cy <= unet_cell[3]):
			
 
				+                    continue
			
 
				+
			
 
				+                intersect_x1 = max(unet_cell[0], rtdetr_cell[0])
			
 
				+                intersect_y1 = max(unet_cell[1], rtdetr_cell[1])
			
 
				+                intersect_x2 = min(unet_cell[2], rtdetr_cell[2])
			
 
				+                intersect_y2 = min(unet_cell[3], rtdetr_cell[3])
			
 
				+                if intersect_x2 <= intersect_x1 or intersect_y2 <= intersect_y1:
			
 
				+                    continue
			
 
				+
			
 
				+                intersect_area = (intersect_x2 - intersect_x1) * (intersect_y2 - intersect_y1)
			
 
				+                rtdetr_area = self._calc_bbox_area(rtdetr_cell)
			
 
				+                contain_ratio = intersect_area / rtdetr_area if rtdetr_area > 0 else 0
			
 
				+                if contain_ratio > 0.5:
			
 
				+                    contained_rtdetr.append(rt_idx)
			
 
				+                    contained_intersects.append(intersect_area)
			
 
				+
			
 
				+            if len(contained_rtdetr) >= self.unet_split_min_count:
			
 
				+                # 计算总包含率：使用所有被包含RT-DETR单元格的外接矩形面积 vs UNet面积
			
 
				+                # 与RT-DETR合并逻辑保持一致，避免相邻框重复/间隙导致覆盖率失真
			
 
				+                rt_indices = contained_rtdetr
			
 
				+                bounding_x1 = min(rtdetr_cells[i][0] for i in rt_indices)
			
 
				+                bounding_y1 = min(rtdetr_cells[i][1] for i in rt_indices)
			
 
				+                bounding_x2 = max(rtdetr_cells[i][2] for i in rt_indices)
			
 
				+                bounding_y2 = max(rtdetr_cells[i][3] for i in rt_indices)
			
 
				+                total_rtdetr_area = (bounding_x2 - bounding_x1) * (bounding_y2 - bounding_y1)
			
 
				+                coverage = min(total_rtdetr_area / unet_area, 1.0)
			
 
				+                if coverage >= self.unet_split_cover_threshold:
			
 
				+                    # 认定为合并单元格，取bounding与RT-DETR的最大范围, 且不能超过table_bbox范围
			
 
				+                    split_cell = [
			
 
				+                        min(bounding_x1, unet_cell[0]),
			
 
				+                        min(bounding_y1, unet_cell[1]),
			
 
				+                        max(bounding_x2, unet_cell[2]),
			
 
				+                        max(bounding_y2, unet_cell[3])
			
 
				+                    ]
			
 
				+                    split_cell = [
			
 
				+                        max(split_cell[0], table_bbox[0]),
			
 
				+                        max(split_cell[1], table_bbox[1]),
			
 
				+                        min(split_cell[2], table_bbox[2]),
			
 
				+                        min(split_cell[3], table_bbox[3])
			
 
				+                    ]
			
 
				+                    fused_cells.append(split_cell)
			
 
				+                    cell_labels.append('split_rtdetr')
			
 
				+                    for rt_idx in contained_rtdetr:
			
 
				+                        rtdetr_matched[rt_idx] = True
			
 
				+
			
 
				+                    unet_matched[u_idx] = True
			
 
				+                    stats['split'] += len(contained_rtdetr)
			
 
				+                    logger.debug(
			
 
				+                        f"🧩 UNet过度合并拆分: UNet[{u_idx}] -> {len(contained_rtdetr)} RT-DETR "
			
 
				+                        f"(coverage={coverage:.2f})"
			
 
				+                    )
			
 
				+
			
 
				         # Step 2: 一对一匹配（处理剩余的单元格）
			
 
				         for u_idx, unet_cell in enumerate(unet_cells):
			
 
				             if unet_matched[u_idx]:
			
@@ -401,9 +491,9 @@ class CellFusionEngine:
 
				             if not rtdetr_matched[idx] and score > 0.7:
			
 
				                 # rtdetr_cell不能超出table_bbox范围, x方向分别限制
			
 
				                 rtdetr_cell[0] = max(rtdetr_cell[0], table_bbox[0])
			
 
				-                # rtdetr_cell[1] = max(rtdetr_cell[1], table_bbox[1])
			
 
				+                rtdetr_cell[1] = max(rtdetr_cell[1], table_bbox[1])
			
 
				                 rtdetr_cell[2] = min(rtdetr_cell[2], table_bbox[2])
			
 
				-                # rtdetr_cell[3] = min(rtdetr_cell[3], table_bbox[3])
			
 
				+                rtdetr_cell[3] = min(rtdetr_cell[3], table_bbox[3])
			
 
				                 fused_cells.append(rtdetr_cell)
			
 
				                 cell_labels.append('rtdetr_only')  # 标记为RT-DETR独有
			
 
				                 stats['added'] += 1
			
@@ -418,18 +508,16 @@ class CellFusionEngine:
 
				         """
			
 
				         稳健的表格边界估计
			
 
				         
			
 
				-        使用聚类方法找到"主流"的左右边界，避免单个超大单元格撑开边界。
			
 
				+        使用聚类方法找到"主流"的边界，避免单个超大单元格撑开边界。
			
 
				         
			
 
				         算法：
			
 
				-        1. 收集所有单元格的左边界x1和右边界x2
			
 
				-        2. 对x1聚类，选择支持度最高的聚类中心作为表格左边界
			
 
				-        3. 对x2聚类，选择支持度最高的聚类中心作为表格右边界
			
 
				-        4. y方向使用简单的min/max（行高变化大，不适合聚类）
			
 
				+        1. 收集所有单元格的边界
			
 
				+        2. 聚类，选择支持度最高的聚类中心作为表格边界
			
 
				+        3. 通过容差向内调整边界，过滤掉过于宽松的边界（可能包含噪声单元格）
			
 
				         
			
 
				         Args:
			
 
				             rtdetr_cells: RT-DETR单元格列表
			
 
				             cluster_tolerance: 聚类容差（像素）
			
 
				-            
			
 
				         Returns:
			
 
				             table_bbox: [x1, y1, x2, y2]
			
 
				         """
			
@@ -448,11 +536,13 @@ class CellFusionEngine:
 
				         # 对x2聚类，找主流右边界
			
 
				         robust_x2 = self._find_dominant_boundary(x2_coords, cluster_tolerance, mode='max')
			
 
				         # y方向直接取极值
			
 
				-        robust_y1 = min(y1_coords)
			
 
				-        robust_y2 = max(y2_coords)
			
 
				+        robust_y1 = self._find_dominant_boundary(y1_coords, cluster_tolerance, mode='min')
			
 
				+        robust_y2 = self._find_dominant_boundary(y2_coords, cluster_tolerance, mode='max')
			
 
				         
			
 
				         logger.debug(f"📐 稳健边界估计: x=[{robust_x1:.1f}, {robust_x2:.1f}], "
			
 
				-                    f"原始x范围=[{min(x1_coords):.1f}, {max(x2_coords):.1f}]")
			
 
				+                    f"原始x范围=[{min(x1_coords):.1f}, {max(x2_coords):.1f}]"
			
 
				+                    f" | y=[{robust_y1:.1f}, {robust_y2:.1f}], "
			
 
				+                    f"原始y范围=[{min(y1_coords):.1f}, {max(y2_coords):.1f}]")
			
 
				         
			
 
				         return [robust_x1, robust_y1, robust_x2, robust_y2]
			
 
				     
			
@@ -624,7 +714,8 @@ class CellFusionEngine:
 
				         cells: List[List[float]],
			
 
				         cell_labels: List[str],
			
 
				         ocr_boxes: List[Dict[str, Any]],
			
 
				-        rtdetr_bbox: List[float]
			
 
				+        rtdetr_bbox: List[float],
			
 
				+        boundary_tolerance: float = 0.0
			
 
				     ) -> Tuple[List[List[float]], List[str], int]:
			
 
				         """
			
 
				         过滤边界噪声单元格
			
@@ -639,6 +730,7 @@ class CellFusionEngine:
 
				             cell_labels: 单元格标签列表
			
 
				             ocr_boxes: OCR结果列表
			
 
				             rtdetr_bbox: RT-DETR单元格的边界框 [x1, y1, x2, y2]
			
 
				+            boundary_tolerance: 边界判定容忍范围（像素，原图坐标系）
			
 
				         Returns:
			
 
				             (filtered_cells, filtered_labels, filtered_count)
			
 
				         """
			
@@ -646,6 +738,8 @@ class CellFusionEngine:
 
				         filtered_labels = []
			
 
				         filtered_count = 0
			
 
				         
			
 
				+        tol = max(0.0, boundary_tolerance)
			
 
				+
			
 
				         for cell, label in zip(cells, cell_labels):
			
 
				             # # 只过滤 unet_only 标记的单元格
			
 
				             # if label != 'unet_only':
			
@@ -655,9 +749,9 @@ class CellFusionEngine:
 
				             
			
 
				             x1, y1, x2, y2 = cell
			
 
				             
			
 
				-            # 检查是否在边界
			
 
				-            is_left_boundary = x1 <= rtdetr_bbox[0]
			
 
				-            is_right_boundary = x2 >= rtdetr_bbox[2]
			
 
				+            # 检查是否在边界（加入容忍范围，避免贴边被误判）
			
 
				+            is_left_boundary = x1 <= (rtdetr_bbox[0] - tol)
			
 
				+            is_right_boundary = x2 >= (rtdetr_bbox[2] + tol)
			
 
				             is_on_boundary = is_left_boundary or is_right_boundary
			
 
				             
			
 
				             if not is_on_boundary:
			
@@ -906,6 +1000,7 @@ class CellFusionEngine:
 
				             merged_cells_1to1 = []  # 1:1融合单元格（黄色）
			
 
				             merged_cells_span = []  # 合并单元格（品红色，RT-DETR检测的跨格单元格）
			
 
				             new_cells = []  # 新增单元格（紫色）
			
 
				+            split_cells = []  # UNet拆分得到的RT-DETR单元格（青色）
			
 
				             ocr_compensated = []  # OCR补偿单元格（橙色）
			
 
				             
			
 
				             for fused_cell, label in zip(fused_cells, cell_labels):
			
@@ -919,6 +1014,8 @@ class CellFusionEngine:
 
				                     merged_cells_span.append(fused_cell)
			
 
				                 elif label == 'new':
			
 
				                     new_cells.append(fused_cell)
			
 
				+                elif label == 'split_rtdetr':
			
 
				+                    split_cells.append(fused_cell)
			
 
				                 elif label == 'ocr_compensated':
			
 
				                     ocr_compensated.append(fused_cell)
			
 
				             
			
@@ -942,6 +1039,10 @@ class CellFusionEngine:
 
				             for cell in new_cells:
			
 
				                 x1, y1, x2, y2 = [int(v) for v in cell]
			
 
				                 cv2.rectangle(img3, (x1, y1), (x2, y2), (128, 0, 128), 2)  # 紫色 - 新增
			
 
				+
			
 
				+            for cell in split_cells:
			
 
				+                x1, y1, x2, y2 = [int(v) for v in cell]
			
 
				+                cv2.rectangle(img3, (x1, y1), (x2, y2), (255, 255, 0), 3)  # 青色 - UNet拆分
			
 
				             
			
 
				             for cell in ocr_compensated:
			
 
				                 x1, y1, x2, y2 = [int(v) for v in cell]
			
@@ -967,6 +1068,10 @@ class CellFusionEngine:
 
				                 legend_y += 30
			
 
				                 cv2.putText(img3, f"Purple: New ({len(new_cells)})", (10, legend_y),
			
 
				                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (128, 0, 128), 2)
			
 
				+            if split_cells:
			
 
				+                legend_y += 30
			
 
				+                cv2.putText(img3, f"Cyan: Split ({len(split_cells)})", (10, legend_y),
			
 
				+                           cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2)
			
 
				             if ocr_compensated:
			
 
				                 legend_y += 30
			
 
				                 cv2.putText(img3, f"Orange: OCR Compensated ({len(ocr_compensated)})", (10, legend_y),
			
--- a/ocr_tools/universal_doc_parser/models/adapters/wired_table/grid_recovery.py
+++ b/ocr_tools/universal_doc_parser/models/adapters/wired_table/grid_recovery.py
@@ -498,7 +498,10 @@ class GridRecovery:
 
				     
			
 
				     
			
 
				     @staticmethod
			
 
				-    def recover_grid_structure(bboxes: List[List[float]]) -> List[Dict]:
			
 
				+    def recover_grid_structure(
			
 
				+        bboxes: List[List[float]],
			
 
				+        ocr_text_pixel_tolerance: float = 0.0
			
 
				+    ) -> List[Dict]:
			
 
				         """
			
 
				         从散乱的单元格 bbox 恢复表格的行列结构 (row, col, rowspan, colspan)
			
 
				         重构版：基于投影网格线 (Projected Grid Lines) 的算法
			
@@ -506,6 +509,7 @@ class GridRecovery:
 
				         
			
 
				         Args:
			
 
				             bboxes: 单元格bbox列表
			
 
				+            ocr_text_pixel_tolerance: OCR文本容差（原图坐标系）
			
 
				             
			
 
				         Returns:
			
 
				             结构化单元格列表，包含 row, col, rowspan, colspan
			
@@ -519,14 +523,15 @@ class GridRecovery:
 
				             y_coords.append(b[1])
			
 
				             y_coords.append(b[3])
			
 
				         
			
 
				-        row_dividers= GridRecovery.find_grid_lines(y_coords, tolerance=5, min_support=1)
			
 
				+        tolerance = max(5.0, min(float(ocr_text_pixel_tolerance), 20.0))
			
 
				+        row_dividers = GridRecovery.find_grid_lines(y_coords, tolerance=tolerance, min_support=1)
			
 
				         
			
 
				         # 2. 识别列分割线 (X轴)
			
 
				         x_coords = []
			
 
				         for b in bboxes:
			
 
				             x_coords.append(b[0])
			
 
				             x_coords.append(b[2])
			
 
				-        col_dividers= GridRecovery.find_grid_lines(x_coords, tolerance=5, min_support=1)
			
 
				+        col_dividers = GridRecovery.find_grid_lines(x_coords, tolerance=tolerance, min_support=1)
			
 
				         
			
 
				         # 3. 构建网格结构
			
 
				         structured_cells = []
			
--- a/ocr_utils/markdown_generator.py
+++ b/ocr_utils/markdown_generator.py
@@ -381,7 +381,8 @@ pages: {len(results.get('pages', []))}
 
				                 text = content.get('text', '') if isinstance(content, dict) else str(content)
			
 
				                 if text:
			
 
				                     confidence = content.get('confidence', 0.0) if isinstance(content, dict) else 0.0
			
 
				-                    md_lines.append(f"🔖 **[印章]** {text} _(置信度: {confidence:.2f})_")
			
 
				+                    # md_lines.append(f"🔖 **[印章]** {text} _(置信度: {confidence:.2f})_")
			
 
				+                    md_lines.append(f"🔖 **[印章]** {text}")
			
 
				                     md_lines.append("")
			
 
				             
			
 
				             elif elem_type == 'discarded':
Auteur	SHA1 Message	Date
zhch158_admin	abf5932769 fix(markdown_generator): 移除印章置信度信息以简化输出格式	il y a 1 mois
zhch158_admin	4ede25dc86 feat(process_single_input): 添加场景名称设置功能并更新场景参数	il y a 1 mois
zhch158_admin	87c5b916fb feat(pipeline_manager): 添加场景名称设置功能，并同步到布局路由器	il y a 1 mois
zhch158_admin	3ab44b691b feat(layout_detection): 更新布局检测配置，支持智能路由器场景选择和多模型配置	il y a 1 mois
zhch158_admin	10872b84e9 feat(pdf_processing): 添加场景参数支持，优化 PDF 处理任务配置	il y a 1 mois
zhch158_admin	59f2fdd74c feat(glmocr_vl_adapter): 添加连通性测试以验证与 GLM-OCR API 的连接	il y a 1 mois
zhch158_admin	08cfd7cd25 fix(element_processors): 优化表格识别中的 VL 识别器异常处理逻辑	il y a 1 mois
zhch158_admin	d2258858b5 feat(paddle_table_classifier): 优化表格线检测，使用自适应阈值和线段过滤	il y a 1 mois
zhch158_admin	73f20ff9e2 feat(mineru_wired_table): 添加 OCR 文本容差计算，优化网格结构恢复	il y a 1 mois
zhch158_admin	93977737f5 feat(grid_recovery): 增加 OCR 文本容差参数，优化网格结构恢复算法	il y a 1 mois
zhch158_admin	ce673e8fc6 feat(cell_fusion): 增强单元格融合逻辑，支持UNet过度合并拆分，添加新配置参数	il y a 1 mois