6 달 전 · b433d1211c
--- a/ocr_tools/paddle_common/__init__.py
+++ b/ocr_tools/paddle_common/__init__.py
@@ -0,0 +1,9 @@
 
				+"""
			
 
				+PaddleX 共享核心模块
			
 
				+
			
 
				+提供 PaddleOCR-VL 和 PP-StructureV3 工具共享的核心处理逻辑
			
 
				+"""
			
 
				+
			
 
				+__version__ = "1.0.0"
			
 
				+__author__ = "zhch158"
			
 
				+
			
--- a/ocr_tools/paddle_common/adapters/__init__.py
+++ b/ocr_tools/paddle_common/adapters/__init__.py
@@ -0,0 +1,28 @@
 
				+"""
			
 
				+适配器包初始化 - 支持自动激活
			
 
				+"""
			
 
				+from .table_recognition_adapter import (
			
 
				+    apply_table_recognition_adapter,
			
 
				+    restore_original_function,
			
 
				+    enhanced_predict_single_table_recognition_res
			
 
				+)
			
 
				+
			
 
				+from .doc_preprocessor_adapter import (
			
 
				+    apply_enhanced_doc_preprocessor,
			
 
				+    restore_paddlex_doc_preprocessor,
			
 
				+    DocPreprocessorAdapter,
			
 
				+    EnhancedDocPreprocessor,
			
 
				+)
			
 
				+
			
 
				+__all__ = [
			
 
				+    # 表格识别适配器
			
 
				+    'apply_table_recognition_adapter',
			
 
				+    'restore_original_function',
			
 
				+    'enhanced_predict_single_table_recognition_res',
			
 
				+
			
 
				+    # 文档预处理适配器
			
 
				+    'apply_enhanced_doc_preprocessor',
			
 
				+    'restore_paddlex_doc_preprocessor',
			
 
				+    'DocPreprocessorAdapter',
			
 
				+    'EnhancedDocPreprocessor',
			
 
				+]
			
--- a/ocr_tools/paddle_common/adapters/doc_preprocessor_adapter.py
+++ b/ocr_tools/paddle_common/adapters/doc_preprocessor_adapter.py
@@ -0,0 +1,472 @@
 
				+"""
			
 
				+文档预处理适配器
			
 
				+使用 MinerU 的方向判断算法，但保留 PaddleX 的模型
			
 
				+"""
			
 
				+
			
 
				+import sys
			
 
				+from pathlib import Path
			
 
				+from typing import Any, Dict, List, Optional, Union, Tuple
			
 
				+import numpy as np
			
 
				+import cv2
			
 
				+
			
 
				+from paddlex.inference.pipelines.doc_preprocessor.result import DocPreprocessorResult
			
 
				+from paddlex.inference.common.reader import ReadImage
			
 
				+from paddlex.inference.common.batch_sampler import ImageBatchSampler
			
 
				+from paddlex.inference.pipelines.components import rotate_image
			
 
				+
			
 
				+
			
 
				+class EnhancedDocPreprocessor:
			
 
				+    """
			
 
				+    增强版文档预处理器
			
 
				+    核心思路：采用 MinerU 的两阶段方向判断算法
			
 
				+    1. 快速过滤：宽高比判断（纵向图片才需要方向分类）
			
 
				+    2. OCR 引导：检测文本框，判断是否有大量垂直文本
			
 
				+    3. 精确分类：仅对疑似旋转的图片调用分类模型
			
 
				+    """
			
 
				+    
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        doc_ori_classify_model,
			
 
				+        doc_unwarping_model,
			
 
				+        ocr_det_model=None,  # 🎯 OCR 检测模型（可选）
			
 
				+        device: str = "cpu",
			
 
				+        use_doc_orientation_classify: bool = True,
			
 
				+        use_doc_unwarping: bool = False,
			
 
				+        batch_size: int = 1,
			
 
				+    ):
			
 
				+        """
			
 
				+        Args:
			
 
				+            doc_ori_classify_model: PaddleX 的方向分类模型
			
 
				+            doc_unwarping_model: PaddleX 的文档矫正模型
			
 
				+            ocr_det_model: OCR 文本检测模型（用于判断是否需要旋转，可选）
			
 
				+            device: 设备类型（cpu/gpu）
			
 
				+            use_doc_orientation_classify: 是否使用方向分类
			
 
				+            use_doc_unwarping: 是否使用文档矫正
			
 
				+            batch_size: 批处理大小
			
 
				+        """
			
 
				+        self.doc_ori_classify_model = doc_ori_classify_model
			
 
				+        self.doc_unwarping_model = doc_unwarping_model
			
 
				+        self.device = device
			
 
				+        self.use_doc_orientation_classify = use_doc_orientation_classify
			
 
				+        self.use_doc_unwarping = use_doc_unwarping
			
 
				+        self.batch_size = batch_size
			
 
				+        
			
 
				+        self.img_reader = ReadImage(format="BGR")
			
 
				+        self.batch_sampler = ImageBatchSampler(batch_size=batch_size)
			
 
				+        
			
 
				+        # 🎯 MinerU 算法参数
			
 
				+        self.portrait_threshold = 1.2  # 宽高比阈值
			
 
				+        self.vertical_ratio_threshold = 0.28  # 垂直文本框比例阈值
			
 
				+        self.min_vertical_count = 3  # 最少垂直文本框数量
			
 
				+        
			
 
				+        # 🎯 初始化 OCR 检测模型（只初始化一次）
			
 
				+        self.ocr_det_model = ocr_det_model
			
 
				+        if self.ocr_det_model is None:
			
 
				+            self._initialize_ocr_det_model()
			
 
				+        
			
 
				+        print(f"📐 Enhanced DocPreprocessor initialized")
			
 
				+        print(f"   - Device: {self.device}")
			
 
				+        print(f"   - Portrait threshold: {self.portrait_threshold}")
			
 
				+        print(f"   - Vertical ratio threshold: {self.vertical_ratio_threshold}")
			
 
				+        print(f"   - Min vertical count: {self.min_vertical_count}")
			
 
				+        print(f"   - OCR detection model: {'✅ Available' if self.ocr_det_model else '❌ Not available'}")
			
 
				+    
			
 
				+    def _initialize_ocr_det_model(self):
			
 
				+        """初始化 OCR 检测模型（只执行一次）"""
			
 
				+        try:
			
 
				+            from paddlex import create_model
			
 
				+            
			
 
				+            print("🔧 Initializing OCR detection model...")
			
 
				+            self.ocr_det_model = create_model(
			
 
				+                'PP-OCRv5_server_det',
			
 
				+                device=self.device
			
 
				+            )
			
 
				+            print("✅ OCR detection model initialized successfully")
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            print(f"⚠️  Failed to initialize OCR detection model: {e}")
			
 
				+            print("   Will skip OCR-guided filtering")
			
 
				+            self.ocr_det_model = None
			
 
				+    
			
 
				+    def _is_portrait_image(self, image: np.ndarray) -> bool:
			
 
				+        """判断是否为纵向图片"""
			
 
				+        img_height, img_width = image.shape[:2]
			
 
				+        aspect_ratio = img_height / img_width if img_width > 0 else 1.0
			
 
				+        is_portrait = aspect_ratio > self.portrait_threshold
			
 
				+        print(f"   📏 Image size: {img_width}x{img_height}, aspect_ratio: {aspect_ratio:.2f}, is_portrait: {is_portrait}")
			
 
				+        return is_portrait
			
 
				+    
			
 
				+    def _detect_vertical_text_boxes(self, image: np.ndarray) -> Tuple[int, int]:
			
 
				+        """
			
 
				+        检测图片中的垂直文本框
			
 
				+        
			
 
				+        Returns:
			
 
				+            (vertical_count, total_count): 垂直文本框数量和总数量
			
 
				+        """
			
 
				+        if self.ocr_det_model is None:
			
 
				+            print("   ⚠️  OCR detection model not available")
			
 
				+            return 0, 0
			
 
				+        
			
 
				+        try:
			
 
				+            # 🎯 调用 OCR 检测模型
			
 
				+            det_results = list(self.ocr_det_model([image]))
			
 
				+            if not det_results or len(det_results) == 0:
			
 
				+                print("   ℹ️  No OCR detection results")
			
 
				+                return 0, 0
			
 
				+            
			
 
				+            det_result = det_results[0]
			
 
				+            
			
 
				+            # 🎯 从检测结果中提取文本框
			
 
				+            # PaddleX 的检测结果格式: {"dt_polys": [...], ...}
			
 
				+            boxes = None
			
 
				+            if isinstance(det_result, dict):
			
 
				+                boxes = det_result.get('dt_polys', None)
			
 
				+            elif isinstance(det_result, np.ndarray):
			
 
				+                boxes = det_result
			
 
				+            
			
 
				+            if boxes is None or len(boxes) == 0:
			
 
				+                print("   ℹ️  No text boxes detected")
			
 
				+                return 0, 0
			
 
				+            
			
 
				+            # 🎯 统计垂直文本框
			
 
				+            vertical_count = 0
			
 
				+            total_count = len(boxes)
			
 
				+            
			
 
				+            # 🎯 处理 numpy 数组格式: shape=(N, 4, 2)
			
 
				+            if isinstance(boxes, np.ndarray) and len(boxes.shape) == 3 and boxes.shape[1] == 4 and boxes.shape[2] == 2:
			
 
				+                # 格式: (N, 4, 2) - 每个框有4个点，每个点有(x,y)坐标
			
 
				+                for box in boxes:
			
 
				+                    # box: shape=(4, 2) - [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
			
 
				+                    p1, p2, p3, p4 = box
			
 
				+                    
			
 
				+                    # 计算宽高
			
 
				+                    width = abs(float(p2[0] - p1[0]))  # x2 - x1
			
 
				+                    height = abs(float(p3[1] - p2[1]))  # y3 - y2
			
 
				+                    
			
 
				+                    if height == 0:
			
 
				+                        continue
			
 
				+                    
			
 
				+                    aspect_ratio = width / height
			
 
				+                    
			
 
				+                    # 🎯 MinerU 的判断标准：宽高比 < 0.8 为垂直文本
			
 
				+                    if aspect_ratio < 0.8:
			
 
				+                        vertical_count += 1
			
 
				+            
			
 
				+            print(f"   📊 OCR detection: {vertical_count}/{total_count} vertical boxes ({vertical_count/total_count:.1%} vertical)")
			
 
				+            return vertical_count, total_count
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            print(f"   ⚠️  OCR detection failed: {e}")
			
 
				+            import traceback
			
 
				+            traceback.print_exc()
			
 
				+            return 0, 0
			
 
				+    
			
 
				+    def _should_classify_orientation(self, image: np.ndarray) -> bool:
			
 
				+        """
			
 
				+        判断是否需要进行方向分类
			
 
				+        参考 MinerU 的两阶段判断逻辑
			
 
				+        
			
 
				+        Returns:
			
 
				+            True: 需要分类
			
 
				+            False: 跳过分类（直接使用原图）
			
 
				+        """
			
 
				+        print("🔍 Checking if orientation classification is needed...")
			
 
				+        
			
 
				+        # 🎯 阶段 1: 快速过滤 - 宽高比检查
			
 
				+        if not self._is_portrait_image(image):
			
 
				+            print("   ⏭️  Skipped: Image is landscape")
			
 
				+            return False
			
 
				+        
			
 
				+        # 🎯 阶段 2: OCR 引导判断 - 检测垂直文本框
			
 
				+        vertical_count, total_count = self._detect_vertical_text_boxes(image)
			
 
				+        
			
 
				+        if total_count == 0:
			
 
				+            print("   ⏭️  Skipped: No text detected")
			
 
				+            return False
			
 
				+        
			
 
				+        # 🎯 MinerU 的判断标准：
			
 
				+        # 垂直文本框比例 >= 28% 且数量 >= 3，才认为可能需要旋转
			
 
				+        vertical_ratio = vertical_count / total_count
			
 
				+        is_rotated = (
			
 
				+            vertical_ratio >= self.vertical_ratio_threshold and 
			
 
				+            vertical_count >= self.min_vertical_count
			
 
				+        )
			
 
				+        
			
 
				+        print(f"   📈 Vertical ratio: {vertical_ratio:.1%} (threshold: {self.vertical_ratio_threshold:.1%})")
			
 
				+        print(f"   📊 Vertical count: {vertical_count} (min: {self.min_vertical_count})")
			
 
				+        print(f"   🎯 Need classification: {is_rotated}")
			
 
				+        
			
 
				+        return is_rotated
			
 
				+    
			
 
				+    def _predict_orientation(self, image: np.ndarray) -> int:
			
 
				+        """
			
 
				+        预测图像方向
			
 
				+        
			
 
				+        Args:
			
 
				+            image: BGR 格式的图像
			
 
				+            
			
 
				+        Returns:
			
 
				+            旋转角度 (0, 90, 180, 270)
			
 
				+        """
			
 
				+        if not self.use_doc_orientation_classify or self.doc_ori_classify_model is None:
			
 
				+            return 0
			
 
				+        
			
 
				+        try:
			
 
				+            # 调用 PaddleX 的分类模型
			
 
				+            preds = list(self.doc_ori_classify_model([image]))
			
 
				+            if preds and len(preds) > 0:
			
 
				+                pred = preds[0]
			
 
				+                angle = int(pred["label_names"][0])
			
 
				+                print(f"   🔄 Orientation classification result: {angle}°")
			
 
				+                return angle
			
 
				+            return 0
			
 
				+        except Exception as e:
			
 
				+            print(f"   ⚠️  Orientation prediction failed: {e}")
			
 
				+            return 0
			
 
				+    
			
 
				+    def predict(
			
 
				+        self,
			
 
				+        input: Union[str, List[str], np.ndarray, List[np.ndarray]],
			
 
				+        use_doc_orientation_classify: Optional[bool] = None,
			
 
				+        use_doc_unwarping: Optional[bool] = None,
			
 
				+    ):
			
 
				+        """
			
 
				+        预测文档预处理结果
			
 
				+        
			
 
				+        Args:
			
 
				+            input: 输入图像路径、数组或列表
			
 
				+            use_doc_orientation_classify: 是否使用方向分类
			
 
				+            use_doc_unwarping: 是否使用文档矫正
			
 
				+            
			
 
				+        Yields:
			
 
				+            DocPreprocessorResult: 预处理结果
			
 
				+        """
			
 
				+        # 处理模型设置
			
 
				+        if use_doc_orientation_classify is None:
			
 
				+            use_doc_orientation_classify = self.use_doc_orientation_classify
			
 
				+        if use_doc_unwarping is None:
			
 
				+            use_doc_unwarping = self.use_doc_unwarping
			
 
				+        
			
 
				+        model_settings = {
			
 
				+            "use_doc_orientation_classify": use_doc_orientation_classify,
			
 
				+            "use_doc_unwarping": use_doc_unwarping,
			
 
				+        }
			
 
				+        
			
 
				+        print(f"\n{'='*60}")
			
 
				+        print(f"🎯 Enhanced DocPreprocessor - MinerU Algorithm")
			
 
				+        print(f"   Settings: orientation={use_doc_orientation_classify}, unwarping={use_doc_unwarping}")
			
 
				+        print(f"{'='*60}\n")
			
 
				+        
			
 
				+        # 批处理
			
 
				+        for batch_data in self.batch_sampler(input):
			
 
				+            # 读取图像
			
 
				+            image_arrays = self.img_reader(batch_data.instances)
			
 
				+            
			
 
				+            # 🎯 增强的方向分类和旋转逻辑
			
 
				+            angles = []
			
 
				+            rot_imgs = []
			
 
				+            
			
 
				+            for idx, img in enumerate(image_arrays):
			
 
				+                print(f"\n📄 Processing image {idx + 1}/{len(image_arrays)}")
			
 
				+                
			
 
				+                if use_doc_orientation_classify:
			
 
				+                    # 🎯 关键改进：先判断是否需要分类
			
 
				+                    if self._should_classify_orientation(img):
			
 
				+                        # 需要分类：调用模型预测角度
			
 
				+                        angle = self._predict_orientation(img)
			
 
				+                    else:
			
 
				+                        # 跳过分类：直接使用 0 度
			
 
				+                        angle = 0
			
 
				+                        print("   ⏭️  Skipped orientation classification")
			
 
				+                    
			
 
				+                    angles.append(angle)
			
 
				+                    if angle != 0:
			
 
				+                        rot_img = rotate_image(img, angle)
			
 
				+                    else:
			
 
				+                        rot_img = img
			
 
				+                    rot_imgs.append(rot_img)
			
 
				+                else:
			
 
				+                    angles.append(-1)  # -1 表示未进行方向分类
			
 
				+                    rot_imgs.append(img)
			
 
				+            
			
 
				+            # 文档矫正
			
 
				+            if use_doc_unwarping and self.doc_unwarping_model is not None:
			
 
				+                output_imgs = [
			
 
				+                    item["doctr_img"][:, :, ::-1]
			
 
				+                    for item in self.doc_unwarping_model(rot_imgs)
			
 
				+                ]
			
 
				+            else:
			
 
				+                output_imgs = rot_imgs
			
 
				+            
			
 
				+            # 生成结果
			
 
				+            for input_path, page_index, image_array, angle, rot_img, output_img in zip(
			
 
				+                batch_data.input_paths,
			
 
				+                batch_data.page_indexes,
			
 
				+                image_arrays,
			
 
				+                angles,
			
 
				+                rot_imgs,
			
 
				+                output_imgs,
			
 
				+            ):
			
 
				+                single_img_res = {
			
 
				+                    "input_path": input_path,
			
 
				+                    "page_index": page_index,
			
 
				+                    "input_img": image_array,
			
 
				+                    "model_settings": model_settings,
			
 
				+                    "angle": angle,
			
 
				+                    "rot_img": rot_img,
			
 
				+                    "output_img": output_img,
			
 
				+                }
			
 
				+                yield DocPreprocessorResult(single_img_res)
			
 
				+    
			
 
				+    def __call__(self, *args, **kwargs):
			
 
				+        """支持像函数一样调用"""
			
 
				+        return self.predict(*args, **kwargs)
			
 
				+
			
 
				+
			
 
				+class DocPreprocessorAdapter:
			
 
				+    """
			
 
				+    文档预处理适配器
			
 
				+    替换 _DocPreprocessorPipeline 的 predict 方法
			
 
				+    """
			
 
				+    
			
 
				+    _original_predict = None
			
 
				+    _shared_ocr_det_model = None  # 🎯 共享的 OCR 检测模型
			
 
				+    _enhanced_preprocessor_cache = {}  # 🎯 缓存 enhanced_preprocessor 实例
			
 
				+    
			
 
				+    @classmethod
			
 
				+    def _get_cache_key(cls, device: str, use_doc_orientation_classify: bool, 
			
 
				+                       use_doc_unwarping: bool, batch_size: int) -> str:
			
 
				+        """生成缓存键"""
			
 
				+        return f"{device}_{use_doc_orientation_classify}_{use_doc_unwarping}_{batch_size}"
			
 
				+    
			
 
				+    @classmethod
			
 
				+    def apply(cls, use_enhanced: bool = True):
			
 
				+        """
			
 
				+        应用适配器
			
 
				+        
			
 
				+        Args:
			
 
				+            use_enhanced: 是否使用增强版预处理器
			
 
				+        """
			
 
				+        if not use_enhanced:
			
 
				+            cls.restore()
			
 
				+            return False
			
 
				+        
			
 
				+        try:
			
 
				+            from paddlex.inference.pipelines.doc_preprocessor import pipeline
			
 
				+            
			
 
				+            # 保存原始方法
			
 
				+            if cls._original_predict is None:
			
 
				+                cls._original_predict = pipeline._DocPreprocessorPipeline.predict
			
 
				+            
			
 
				+            # 创建增强版 predict 方法
			
 
				+            def enhanced_predict(
			
 
				+                self,
			
 
				+                input: Union[str, List[str], np.ndarray, List[np.ndarray]],
			
 
				+                use_doc_orientation_classify: Optional[bool] = None,
			
 
				+                use_doc_unwarping: Optional[bool] = None,
			
 
				+            ):
			
 
				+                """增强版 predict 方法"""
			
 
				+                
			
 
				+                # 🎯 关键改进 1：初始化共享的 OCR 检测模型（只初始化一次）
			
 
				+                if cls._shared_ocr_det_model is None:
			
 
				+                    print("\n" + "="*80)
			
 
				+                    print(">>> [Adapter] Enhanced DocPreprocessor - First Time Initialization")
			
 
				+                    print("="*80)
			
 
				+                    print("🔧 Initializing shared OCR detection model...")
			
 
				+                    try:
			
 
				+                        from paddlex import create_model
			
 
				+                        cls._shared_ocr_det_model = create_model(
			
 
				+                            'PP-OCRv5_server_det',
			
 
				+                            device=self.device
			
 
				+                        )
			
 
				+                        print("✅ Shared OCR detection model initialized")
			
 
				+                    except Exception as e:
			
 
				+                        print(f"⚠️  Failed to initialize OCR detection model: {e}")
			
 
				+                        cls._shared_ocr_det_model = None
			
 
				+                
			
 
				+                # 🎯 关键改进 2：使用缓存的 enhanced_preprocessor（只创建一次）
			
 
				+                cache_key = cls._get_cache_key(
			
 
				+                    device=self.device,
			
 
				+                    use_doc_orientation_classify=self.use_doc_orientation_classify,
			
 
				+                    use_doc_unwarping=self.use_doc_unwarping,
			
 
				+                    batch_size=self.batch_sampler.batch_size
			
 
				+                )
			
 
				+                
			
 
				+                if cache_key not in cls._enhanced_preprocessor_cache:
			
 
				+                    print("🔧 Creating new enhanced preprocessor instance...")
			
 
				+                    enhanced_preprocessor = EnhancedDocPreprocessor(
			
 
				+                        doc_ori_classify_model=self.doc_ori_classify_model if self.use_doc_orientation_classify else None,
			
 
				+                        doc_unwarping_model=self.doc_unwarping_model if self.use_doc_unwarping else None,
			
 
				+                        ocr_det_model=cls._shared_ocr_det_model,  # 使用共享的模型
			
 
				+                        device=self.device,
			
 
				+                        use_doc_orientation_classify=self.use_doc_orientation_classify,
			
 
				+                        use_doc_unwarping=self.use_doc_unwarping,
			
 
				+                        batch_size=self.batch_sampler.batch_size,
			
 
				+                    )
			
 
				+                    cls._enhanced_preprocessor_cache[cache_key] = enhanced_preprocessor
			
 
				+                    print(f"✅ Enhanced preprocessor cached with key: {cache_key}")
			
 
				+                else:
			
 
				+                    enhanced_preprocessor = cls._enhanced_preprocessor_cache[cache_key]
			
 
				+                    print(f"♻️  Reusing cached enhanced preprocessor: {cache_key}")
			
 
				+                
			
 
				+                # 调用增强版处理逻辑
			
 
				+                return enhanced_preprocessor.predict(
			
 
				+                    input,
			
 
				+                    use_doc_orientation_classify,
			
 
				+                    use_doc_unwarping,
			
 
				+                )
			
 
				+            
			
 
				+            # 替换方法
			
 
				+            pipeline._DocPreprocessorPipeline.predict = enhanced_predict
			
 
				+            
			
 
				+            print("✅ DocPreprocessor adapter applied successfully (MinerU algorithm)")
			
 
				+            return True
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            print(f"❌ Failed to apply DocPreprocessor adapter: {e}")
			
 
				+            import traceback
			
 
				+            traceback.print_exc()
			
 
				+            return False
			
 
				+    
			
 
				+    @classmethod
			
 
				+    def restore(cls):
			
 
				+        """恢复原始方法"""
			
 
				+        if cls._original_predict is None:
			
 
				+            return False
			
 
				+        
			
 
				+        try:
			
 
				+            from paddlex.inference.pipelines.doc_preprocessor import pipeline
			
 
				+            
			
 
				+            pipeline._DocPreprocessorPipeline.predict = cls._original_predict
			
 
				+            cls._original_predict = None
			
 
				+            
			
 
				+            # 🎯 清理共享资源
			
 
				+            cls._shared_ocr_det_model = None
			
 
				+            cls._enhanced_preprocessor_cache.clear()
			
 
				+            
			
 
				+            print("✅ DocPreprocessor adapter restored")
			
 
				+            return True
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            print(f"❌ Failed to restore DocPreprocessor adapter: {e}")
			
 
				+            return False
			
 
				+
			
 
				+
			
 
				+# 🎯 便捷函数
			
 
				+def apply_enhanced_doc_preprocessor():
			
 
				+    """应用增强版文档预处理器"""
			
 
				+    return DocPreprocessorAdapter.apply(use_enhanced=True)
			
 
				+
			
 
				+
			
 
				+def restore_paddlex_doc_preprocessor():
			
 
				+    """恢复 PaddleX 原始文档预处理器"""
			
 
				+    return DocPreprocessorAdapter.restore()
			
 
				+
			
 
				+
			
 
				+# 导出
			
 
				+__all__ = [
			
 
				+    'EnhancedDocPreprocessor',
			
 
				+    'DocPreprocessorAdapter',
			
 
				+    'apply_enhanced_doc_preprocessor',
			
 
				+    'restore_paddlex_doc_preprocessor',
			
 
				+]
			
--- a/ocr_tools/paddle_common/adapters/table_recognition_adapter.py
+++ b/ocr_tools/paddle_common/adapters/table_recognition_adapter.py
@@ -0,0 +1,485 @@
 
				+"""
			
 
				+表格识别个性化适配器 (v6 - 行内重叠合并修正版)
			
 
				+
			
 
				+核心思想：
			
 
				+1. 废弃全局坐标聚类，改为按行分组和对齐，极大提升对倾斜、不规则表格的鲁棒性。
			
 
				+2. 结构生成与内容填充彻底分离：
			
 
				+   - `build_robust_html_from_cells`: 仅根据单元格几何位置，生成带`data-bbox`的HTML骨架。
			
 
				+   - `fill_html_with_ocr_by_bbox`: 根据`data-bbox`从全局OCR结果中查找文本并填充。
			
 
				+3. 通过适配器直接替换PaddleX Pipeline中的核心方法，实现无侵入式升级。
			
 
				+"""
			
 
				+import importlib
			
 
				+from typing import Any, Dict, List
			
 
				+import numpy as np
			
 
				+
			
 
				+from paddlex.inference.pipelines.table_recognition.result import SingleTableRecognitionResult
			
 
				+from paddlex.inference.pipelines.table_recognition.pipeline_v2 import OCRResult
			
 
				+
			
 
				+def _normalize_bbox(box: list) -> list:
			
 
				+    """
			
 
				+    将8点坐标或4点坐标统一转换为 [x1, y1, x2, y2]
			
 
				+    """
			
 
				+    if len(box) == 8:
			
 
				+        # 8点坐标：取最小和最大值
			
 
				+        xs = [box[0], box[2], box[4], box[6]]
			
 
				+        ys = [box[1], box[3], box[5], box[7]]
			
 
				+        return [min(xs), min(ys), max(xs), max(ys)]
			
 
				+    elif len(box) == 4:
			
 
				+        return box[:4]
			
 
				+    else:
			
 
				+        raise ValueError(f"Unsupported bbox format: {box}")
			
 
				+
			
 
				+# --- 1. 核心算法：基于排序和行分组的HTML结构生成 ---
			
 
				+def filter_nested_boxes(boxes: List[list]) -> List[list]:
			
 
				+    """
			
 
				+    移除被其他框完全包含的框。
			
 
				+    boxes: List[[x1, y1, x2, y2]]
			
 
				+    """
			
 
				+    if not boxes:
			
 
				+        return []
			
 
				+    
			
 
				+    filtered = []
			
 
				+    # 按面积从大到小排序，优先保留大框
			
 
				+    boxes.sort(key=lambda b: (b[2] - b[0]) * (b[3] - b[1]), reverse=True)
			
 
				+    
			
 
				+    for i, box in enumerate(boxes):
			
 
				+        is_nested = False
			
 
				+        for j in range(i): # 只需和排在前面的（更大的）框比较
			
 
				+            outer_box = boxes[j]
			
 
				+            # 判断 box 是否被 outer_box 包含
			
 
				+            if outer_box[0] <= box[0] and outer_box[1] <= box[1] and \
			
 
				+               outer_box[2] >= box[2] and outer_box[3] >= box[3]:
			
 
				+                is_nested = True
			
 
				+                break
			
 
				+        if not is_nested:
			
 
				+            filtered.append(box)
			
 
				+    return filtered
			
 
				+
			
 
				+def merge_overlapping_cells_in_row(row_cells: List[list], iou_threshold: float = 0.5) -> List[list]:
			
 
				+    """
			
 
				+    合并单行内水平方向上高度重叠的单元格。
			
 
				+    """
			
 
				+    if not row_cells:
			
 
				+        return []
			
 
				+
			
 
				+    # 按x坐标排序
			
 
				+    cells = sorted(row_cells, key=lambda c: c[0])
			
 
				+    
			
 
				+    merged_cells = []
			
 
				+    i = 0
			
 
				+    while i < len(cells):
			
 
				+        current_cell = list(cells[i]) # 使用副本
			
 
				+        j = i + 1
			
 
				+        while j < len(cells):
			
 
				+            next_cell = cells[j]
			
 
				+            
			
 
				+            # 计算交集
			
 
				+            inter_x1 = max(current_cell[0], next_cell[0])
			
 
				+            inter_y1 = max(current_cell[1], next_cell[1])
			
 
				+            inter_x2 = min(current_cell[2], next_cell[2])
			
 
				+            inter_y2 = min(current_cell[3], next_cell[3])
			
 
				+            
			
 
				+            inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)
			
 
				+            
			
 
				+            # 如果交集面积大于其中一个框面积的阈值，则认为是重叠
			
 
				+            current_area = (current_cell[2] - current_cell[0]) * (current_cell[3] - current_cell[1])
			
 
				+            next_area = (next_cell[2] - next_cell[0]) * (next_cell[3] - next_cell[1])
			
 
				+            
			
 
				+            if inter_area > min(current_area, next_area) * iou_threshold:
			
 
				+                # 合并两个框，取外包围框
			
 
				+                current_cell[0] = min(current_cell[0], next_cell[0])
			
 
				+                current_cell[1] = min(current_cell[1], next_cell[1])
			
 
				+                current_cell[2] = max(current_cell[2], next_cell[2])
			
 
				+                current_cell[3] = max(current_cell[3], next_cell[3])
			
 
				+                j += 1
			
 
				+            else:
			
 
				+                break # 不再与更远的单元格合并
			
 
				+        
			
 
				+        merged_cells.append(current_cell)
			
 
				+        i = j
			
 
				+        
			
 
				+    return merged_cells
			
 
				+
			
 
				+def build_robust_html_from_cells(cells_det_results: List[list]) -> str:
			
 
				+    """
			
 
				+    通过按行排序、分组、合并和对齐，稳健地将单元格Bbox列表转换为带data-bbox的HTML结构。
			
 
				+    """
			
 
				+    if not cells_det_results:
			
 
				+        return "<table><tbody></tbody></table>"
			
 
				+
			
 
				+    # ✅ 关键修复：使用副本防止修改原始列表
			
 
				+    import copy
			
 
				+    cells_copy = copy.deepcopy(cells_det_results)
			
 
				+    cells = filter_nested_boxes(cells_copy)
			
 
				+    cells.sort(key=lambda c: (c[1], c[0]))
			
 
				+
			
 
				+    rows = []
			
 
				+    if cells:
			
 
				+        current_row = [cells[0]]
			
 
				+        # ✅ 使用该行的Y范围而不是单个锚点
			
 
				+        row_y1 = cells[0][1]
			
 
				+        row_y2 = cells[0][3]
			
 
				+
			
 
				+        for cell in cells[1:]:
			
 
				+            # ✅ 计算垂直方向的重叠
			
 
				+            overlap_y1 = max(row_y1, cell[1])
			
 
				+            overlap_y2 = min(row_y2, cell[3])
			
 
				+            overlap_height = max(0, overlap_y2 - overlap_y1)
			
 
				+            
			
 
				+            # 单元格和当前行的平均高度
			
 
				+            cell_height = cell[3] - cell[1]
			
 
				+            row_height = row_y2 - row_y1
			
 
				+            avg_height = (cell_height + row_height) / 2
			
 
				+            
			
 
				+            # ✅ 重叠高度超过平均高度的50%，认为是同一行
			
 
				+            if overlap_height > avg_height * 0.5:
			
 
				+                current_row.append(cell)
			
 
				+                # 更新该行的Y范围（扩展以包含新单元格）
			
 
				+                row_y1 = min(row_y1, cell[1])
			
 
				+                row_y2 = max(row_y2, cell[3])
			
 
				+            else:
			
 
				+                rows.append(current_row)
			
 
				+                current_row = [cell]
			
 
				+                row_y1 = cell[1]
			
 
				+                row_y2 = cell[3]
			
 
				+        rows.append(current_row)
			
 
				+
			
 
				+    html = "<table><tbody>"
			
 
				+    for row_cells in rows:
			
 
				+        # 🎯 核心修正：在生成HTML前，合并行内的重叠单元格
			
 
				+        merged_row_cells = merge_overlapping_cells_in_row(row_cells)
			
 
				+        
			
 
				+        html += "<tr>"
			
 
				+        for cell in merged_row_cells:
			
 
				+            bbox_str = f"[{','.join(map(str, map(int, cell)))}]"
			
 
				+            html += f'<td data-bbox="{bbox_str}"></td>'
			
 
				+        html += "</tr>"
			
 
				+    html += "</tbody></table>"
			
 
				+    
			
 
				+    return html
			
 
				+
			
 
				+# --- 2. 内容填充工具 ---
			
 
				+
			
 
				+def fill_html_with_ocr_by_bbox(html_skeleton: str, ocr_dt_boxes: list, ocr_texts: list) -> str:
			
 
				+    """
			
 
				+    根据带有 data-bbox 的 HTML 骨架和全局 OCR 结果填充表格内容。
			
 
				+    """
			
 
				+    try:
			
 
				+        from bs4 import BeautifulSoup
			
 
				+    except ImportError:
			
 
				+        print("⚠️  BeautifulSoup not installed. Cannot fill table content. Returning skeleton.")
			
 
				+        return html_skeleton
			
 
				+
			
 
				+    soup = BeautifulSoup(html_skeleton, 'html.parser')
			
 
				+    # # ocr_dt_boxes = cells_ocr_res.get("rec_boxes", [])
			
 
				+    # ocr_texts = cells_ocr_res.get("rec_texts", [])
			
 
				+
			
 
				+    # 为快速查找，将OCR结果组织起来
			
 
				+    ocr_items = []
			
 
				+    for box, text in zip(ocr_dt_boxes, ocr_texts):
			
 
				+        center_x = (box[0] + box[2]) / 2
			
 
				+        center_y = (box[1] + box[3]) / 2
			
 
				+        ocr_items.append({'box': box, 'text': text, 'center': (center_x, center_y)})
			
 
				+
			
 
				+    for td in soup.find_all('td'):
			
 
				+        if not td.has_attr('data-bbox'):
			
 
				+            continue
			
 
				+        
			
 
				+        bbox_str = td['data-bbox'].strip('[]')
			
 
				+        cell_box = list(map(float, bbox_str.split(',')))
			
 
				+        cx1, cy1, cx2, cy2 = cell_box
			
 
				+
			
 
				+        cell_texts_with_pos = []
			
 
				+        # 查找所有中心点在该单元格内的OCR文本
			
 
				+        for item in ocr_items:
			
 
				+            if cx1 <= item['center'][0] <= cx2 and cy1 <= item['center'][1] <= cy2:
			
 
				+                # 记录文本和其y坐标，用于后续排序
			
 
				+                cell_texts_with_pos.append((item['text'], item['box'][1]))
			
 
				+        
			
 
				+        if cell_texts_with_pos:
			
 
				+            # 按y坐标排序，确保多行文本的顺序正确
			
 
				+            cell_texts_with_pos.sort(key=lambda x: x[1])
			
 
				+            # 合并文本
			
 
				+            td.string = " ".join([text for text, y in cell_texts_with_pos])
			
 
				+            
			
 
				+    return str(soup)
			
 
				+
			
 
				+# --- 3. 适配器主函数和应用逻辑 ---
			
 
				+
			
 
				+# 保存原始方法的引用
			
 
				+_original_predict_single = None
			
 
				+
			
 
				+def infer_missing_cells_from_ocr(
			
 
				+    detected_cells: List[list],
			
 
				+    cells_texts_list: List[str],
			
 
				+    overall_ocr_boxes: List[list],
			
 
				+    overall_ocr_texts: List[str],
			
 
				+    table_box: list
			
 
				+) -> tuple[List[list], List[str]]:
			
 
				+    """
			
 
				+    根据全局OCR结果推断缺失的单元格
			
 
				+    
			
 
				+    Args:
			
 
				+        detected_cells: 已检测到的单元格坐标 [[x1,y1,x2,y2], ...]
			
 
				+        overall_ocr_boxes: 全局OCR框坐标
			
 
				+        overall_ocr_texts: 全局OCR文本
			
 
				+        table_box: 表格区域 [x1,y1,x2,y2]
			
 
				+    
			
 
				+    Returns:
			
 
				+        补全后的单元格列表
			
 
				+    """
			
 
				+    import copy
			
 
				+    
			
 
				+    # 1. 找出未被覆盖的OCR框
			
 
				+    uncovered_ocr_boxes = []
			
 
				+    uncovered_ocr_texts = []
			
 
				+    
			
 
				+    for ocr_box, ocr_text in zip(overall_ocr_boxes, overall_ocr_texts):
			
 
				+        # 计算OCR框中心点
			
 
				+        ocr_cx = (ocr_box[0] + ocr_box[2]) / 2
			
 
				+        ocr_cy = (ocr_box[1] + ocr_box[3]) / 2
			
 
				+        
			
 
				+        # 检查是否被任何单元格覆盖
			
 
				+        is_covered = False
			
 
				+        for cell in detected_cells:
			
 
				+            if cell[0] <= ocr_cx <= cell[2] and cell[1] <= ocr_cy <= cell[3]:
			
 
				+                is_covered = True
			
 
				+                break
			
 
				+        
			
 
				+        if not is_covered:
			
 
				+            uncovered_ocr_boxes.append(ocr_box)
			
 
				+            uncovered_ocr_texts.append(ocr_text)
			
 
				+    
			
 
				+    if not uncovered_ocr_boxes:
			
 
				+        return detected_cells, cells_texts_list  # 没有漏检
			
 
				+    
			
 
				+    # 2. 按行分组已检测的单元格
			
 
				+    cells_sorted = sorted(detected_cells, key=lambda c: (c[1], c[0]))
			
 
				+    rows = []
			
 
				+    if cells_sorted:
			
 
				+        current_row = [cells_sorted[0]]
			
 
				+        row_y = (cells_sorted[0][1] + cells_sorted[0][3]) / 2
			
 
				+        row_height = cells_sorted[0][3] - cells_sorted[0][1]
			
 
				+        
			
 
				+        for cell in cells_sorted[1:]:
			
 
				+            cell_y = (cell[1] + cell[3]) / 2
			
 
				+            if abs(cell_y - row_y) < row_height * 0.7:
			
 
				+                current_row.append(cell)
			
 
				+            else:
			
 
				+                rows.append(current_row)
			
 
				+                current_row = [cell]
			
 
				+                row_y = (cell[1] + cell[3]) / 2
			
 
				+                row_height = cell[3] - cell[1]
			
 
				+        rows.append(current_row)
			
 
				+    
			
 
				+    # 3. 为每个未覆盖的OCR框推断单元格
			
 
				+    inferred_cells = []
			
 
				+    inferred_texts = []
			
 
				+    for ocr_box, ocr_text in zip(uncovered_ocr_boxes, uncovered_ocr_texts):
			
 
				+        ocr_cy = (ocr_box[1] + ocr_box[3]) / 2
			
 
				+        
			
 
				+        # 找到OCR框所在的行
			
 
				+        target_row_idx = None
			
 
				+        for i, row_cells in enumerate(rows):
			
 
				+            row_y1 = min(c[1] for c in row_cells)
			
 
				+            row_y2 = max(c[3] for c in row_cells)
			
 
				+            if row_y1 <= ocr_cy <= row_y2:
			
 
				+                target_row_idx = i
			
 
				+                break
			
 
				+        
			
 
				+        if target_row_idx is None:
			
 
				+            # 无法确定所属行，跳过
			
 
				+            print(f"⚠️  无法为OCR文本 '{ocr_text}' 确定所属行")
			
 
				+            continue
			
 
				+        
			
 
				+        target_row = rows[target_row_idx]
			
 
				+        
			
 
				+        # 4. 推断单元格边界
			
 
				+        # 上下边界：使用该行的统一高度
			
 
				+        cell_y1 = min(c[1] for c in target_row)
			
 
				+        cell_y2 = max(c[3] for c in target_row)
			
 
				+        
			
 
				+        # 左右边界：根据OCR框位置和相邻单元格推断
			
 
				+        ocr_cx = (ocr_box[0] + ocr_box[2]) / 2
			
 
				+        
			
 
				+        # 找左边最近的单元格
			
 
				+        left_cells = [c for c in target_row if c[2] < ocr_cx]
			
 
				+        if left_cells:
			
 
				+            cell_x1 = max(c[2] for c in left_cells)  # 左边单元格的右边界
			
 
				+        else:
			
 
				+            cell_x1 = table_box[0]  # 表格左边界
			
 
				+        
			
 
				+        # 找右边最近的单元格
			
 
				+        right_cells = [c for c in target_row if c[0] > ocr_cx]
			
 
				+        if right_cells:
			
 
				+            cell_x2 = min(c[0] for c in right_cells)  # 右边单元格的左边界
			
 
				+        else:
			
 
				+            cell_x2 = table_box[2]  # 表格右边界
			
 
				+        
			
 
				+        # 创建推断的单元格
			
 
				+        inferred_cell = [cell_x1, cell_y1, cell_x2, cell_y2]
			
 
				+        inferred_cells.append(inferred_cell)
			
 
				+        inferred_texts.append(ocr_text)
			
 
				+
			
 
				+        print(f"✅ 为OCR文本 '{ocr_text}' 推断单元格: {inferred_cell}")
			
 
				+    
			
 
				+    # 5. 合并检测到的和推断的单元格
			
 
				+    all_cells = detected_cells + inferred_cells
			
 
				+    all_texts = cells_texts_list + inferred_texts
			
 
				+    return all_cells, all_texts
			
 
				+
			
 
				+
			
 
				+def enhanced_predict_single_table_recognition_res(
			
 
				+    self,
			
 
				+    image_array: np.ndarray,
			
 
				+    overall_ocr_res: OCRResult,
			
 
				+    table_box: list,
			
 
				+    use_e2e_wired_table_rec_model: bool = False,
			
 
				+    use_e2e_wireless_table_rec_model: bool = False,
			
 
				+    use_wired_table_cells_trans_to_html: bool = False,
			
 
				+    use_wireless_table_cells_trans_to_html: bool = False,
			
 
				+    use_ocr_results_with_table_cells: bool = True,
			
 
				+    flag_find_nei_text: bool = True,
			
 
				+) -> SingleTableRecognitionResult:
			
 
				+    """增强版方法 - 使用OCR引导的单元格补全"""
			
 
				+    print(">>> [Adapter] enhanced_predict_single_table_recognition_res called")
			
 
				+    
			
 
				+    # 🎯 Step 1: 获取table_cells_result (原始逻辑)
			
 
				+    table_cls_pred = list(self.table_cls_model(image_array))[0]
			
 
				+    table_cls_result = self.extract_results(table_cls_pred, "cls")
			
 
				+
			
 
				+    if table_cls_result == "wired_table":
			
 
				+        table_cells_pred = list(self.wired_table_cells_detection_model(image_array, threshold=0.3))[0]
			
 
				+    else: # wireless_table
			
 
				+        table_cells_pred = list(self.wireless_table_cells_detection_model(image_array, threshold=0.3))[0]
			
 
				+    
			
 
				+    table_cells_result, table_cells_score = self.extract_results(table_cells_pred, "det")
			
 
				+    table_cells_result, table_cells_score = self.cells_det_results_nms(table_cells_result, table_cells_score)
			
 
				+    table_cells_result.sort(key=lambda c: (c[1], c[0]))
			
 
				+    
			
 
				+    # 🎯 Step 2: 坐标转换
			
 
				+    from paddlex.inference.pipelines.table_recognition.table_recognition_post_processing_v2 import (
			
 
				+        convert_to_four_point_coordinates,
			
 
				+        convert_table_structure_pred_bbox,
			
 
				+        get_sub_regions_ocr_res
			
 
				+    )
			
 
				+    import numpy as np
			
 
				+    
			
 
				+    # 转换为4点坐标
			
 
				+    table_cells_result_4pt = convert_to_four_point_coordinates(table_cells_result)
			
 
				+    
			
 
				+    # 准备坐标转换参数
			
 
				+    table_box_array = np.array([table_box])
			
 
				+    crop_start_point = [table_box[0], table_box[1]]
			
 
				+    img_shape = overall_ocr_res["doc_preprocessor_res"]["output_img"].shape[0:2]
			
 
				+    
			
 
				+    # 转换到原图坐标系
			
 
				+    table_cells_result_orig = convert_table_structure_pred_bbox(
			
 
				+        table_cells_result_4pt, crop_start_point, img_shape
			
 
				+    )
			
 
				+    # 处理NumPy数组
			
 
				+    if isinstance(table_cells_result_orig, np.ndarray):
			
 
				+        table_cells_result_orig = table_cells_result_orig.tolist()
			
 
				+    table_cells_result_orig.sort(key=lambda c: (c[1], c[0]))
			
 
				+
			
 
				+    # 🎯 Step 3: 获取表格区域的OCR结果
			
 
				+    table_ocr_pred = get_sub_regions_ocr_res(overall_ocr_res, table_box_array)
			
 
				+    
			
 
				+    # 🎯 Step 4: **关键改进** - OCR引导的单元格补全
			
 
				+    if (use_wired_table_cells_trans_to_html or use_wireless_table_cells_trans_to_html) and use_ocr_results_with_table_cells:
			
 
				+        # ✅ 修复: 确保 general_ocr_pipeline 被初始化
			
 
				+        if self.general_ocr_pipeline is None:
			
 
				+            if hasattr(self, 'general_ocr_config_bak') and self.general_ocr_config_bak is not None:
			
 
				+                print("🔧 [Adapter] Initializing general_ocr_pipeline from backup config")
			
 
				+                self.general_ocr_pipeline = self.create_pipeline(self.general_ocr_config_bak)
			
 
				+            else:
			
 
				+                print("⚠️  [Adapter] No OCR pipeline available, falling back to original implementation")
			
 
				+                return _original_predict_single(
			
 
				+                    self, image_array, overall_ocr_res, table_box,
			
 
				+                    use_e2e_wired_table_rec_model, use_e2e_wireless_table_rec_model,
			
 
				+                    use_wired_table_cells_trans_to_html, use_wireless_table_cells_trans_to_html,
			
 
				+                    use_ocr_results_with_table_cells, flag_find_nei_text
			
 
				+                )
			
 
				+        
			
 
				+        # ✅ 对每个单元格做OCR(使用裁剪前的坐标)
			
 
				+        cells_texts_list = self.gen_ocr_with_table_cells(image_array, table_cells_result)
			
 
				+        
			
 
				+        # ✅ 补全缺失的单元格
			
 
				+        completed_cells, cells_texts_list = infer_missing_cells_from_ocr(
			
 
				+            detected_cells=table_cells_result_orig,
			
 
				+            cells_texts_list=cells_texts_list,
			
 
				+            overall_ocr_boxes=table_ocr_pred["rec_boxes"],
			
 
				+            overall_ocr_texts=table_ocr_pred["rec_texts"],
			
 
				+            table_box=table_box
			
 
				+        )
			
 
				+
			
 
				+        # ✅ 生成HTML骨架(使用转换后的原图坐标)
			
 
				+        html_skeleton = build_robust_html_from_cells(completed_cells)
			
 
				+        
			
 
				+        # ✅ 填充内容(使用单元格bbox和单元格OCR文本)
			
 
				+        pred_html = fill_html_with_ocr_by_bbox(
			
 
				+            html_skeleton,
			
 
				+            completed_cells,      # ✅ 单元格bbox
			
 
				+            cells_texts_list      # ✅ 单元格OCR文本
			
 
				+        )
			
 
				+        
			
 
				+        single_img_res = {
			
 
				+            "cell_box_list": completed_cells,
			
 
				+            "table_ocr_pred": table_ocr_pred,  # 保留完整OCR信息
			
 
				+            "pred_html": pred_html,
			
 
				+        }
			
 
				+        
			
 
				+        res = SingleTableRecognitionResult(single_img_res)
			
 
				+        res["neighbor_texts"] = ""
			
 
				+        return res
			
 
				+    else:
			
 
				+        print(f"⚠️  Fallback to original implementation: {table_cls_result}")
			
 
				+        return _original_predict_single(
			
 
				+            self, image_array, overall_ocr_res, table_box,
			
 
				+            use_e2e_wired_table_rec_model, use_e2e_wireless_table_rec_model,
			
 
				+            use_wired_table_cells_trans_to_html, use_wireless_table_cells_trans_to_html,
			
 
				+            use_ocr_results_with_table_cells, flag_find_nei_text
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def apply_table_recognition_adapter():
			
 
				+    """
			
 
				+    应用表格识别适配器。
			
 
				+    我们直接替换 _TableRecognitionPipelineV2 类中的 `predict_single_table_recognition_res` 方法。
			
 
				+    """
			
 
				+    global _original_predict_single
			
 
				+    
			
 
				+    try:
			
 
				+        # 导入目标类
			
 
				+        from paddlex.inference.pipelines.table_recognition.pipeline_v2 import _TableRecognitionPipelineV2
			
 
				+        
			
 
				+        # 保存原函数，防止重复应用补丁
			
 
				+        if _original_predict_single is None:
			
 
				+             _original_predict_single = _TableRecognitionPipelineV2.predict_single_table_recognition_res
			
 
				+        
			
 
				+        # 替换为增强版
			
 
				+        _TableRecognitionPipelineV2.predict_single_table_recognition_res = enhanced_predict_single_table_recognition_res
			
 
				+        
			
 
				+        print("✅ Table recognition adapter applied successfully (v3 - corrected).")
			
 
				+        return True
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        print(f"❌ Failed to apply table recognition adapter: {e}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def restore_original_function():
			
 
				+    """恢复原始函数"""
			
 
				+    global _original_predict_single
			
 
				+    try:
			
 
				+        from paddlex.inference.pipelines.table_recognition.pipeline_v2 import _TableRecognitionPipelineV2
			
 
				+        
			
 
				+        if _original_predict_single is not None:
			
 
				+            _TableRecognitionPipelineV2.predict_single_table_recognition_res = _original_predict_single
			
 
				+            _original_predict_single = None # 重置状态
			
 
				+            print("✅ Original function restored.")
			
 
				+            return True
			
 
				+        return False
			
 
				+    except Exception as e:
			
 
				+        print(f"❌ Failed to restore original function: {e}")
			
 
				+        return False
			
--- a/ocr_tools/paddle_common/config/PP-StructureV3-RT-DETR-H_layout_17cls.yaml
+++ b/ocr_tools/paddle_common/config/PP-StructureV3-RT-DETR-H_layout_17cls.yaml
@@ -0,0 +1,226 @@
 
				+
			
 
				+pipeline_name: PP-StructureV3
			
 
				+
			
 
				+batch_size: 8
			
 
				+
			
 
				+use_doc_preprocessor: True
			
 
				+use_seal_recognition: True
			
 
				+use_table_recognition: True
			
 
				+use_formula_recognition: False
			
 
				+use_chart_recognition: True
			
 
				+use_region_detection: True
			
 
				+
			
 
				+SubModules:
			
 
				+  LayoutDetection:
			
 
				+    module_name: layout_detection
			
 
				+    model_name: RT-DETR-H_layout_17cls
			
 
				+    model_dir: null
			
 
				+    batch_size: 8
			
 
				+    threshold: 
			
 
				+      0: 0.3  # paragraph_title
			
 
				+      1: 0.5  # image
			
 
				+      2: 0.4  # text
			
 
				+      3: 0.5  # number
			
 
				+      4: 0.5  # abstract
			
 
				+      5: 0.5  # content
			
 
				+      6: 0.5  # figure_table_chart_title
			
 
				+      7: 0.3  # formula
			
 
				+      8: 0.5  # table
			
 
				+      9: 0.5  # reference
			
 
				+      10: 0.5 # doc_title
			
 
				+      11: 0.5 # footnote
			
 
				+      12: 0.5 # header
			
 
				+      13: 0.5 # algorithm
			
 
				+      14: 0.5 # footer
			
 
				+      15: 0.45 # seal
			
 
				+      16: 0.5 # chart
			
 
				+      17: 0.5 # formula_number
			
 
				+      18: 0.5 # aside_text
			
 
				+      19: 0.5 # reference_content
			
 
				+    layout_nms: True
			
 
				+    layout_unclip_ratio: [1.0, 1.0] 
			
 
				+    layout_merge_bboxes_mode: 
			
 
				+      0: "large"  # paragraph_title
			
 
				+      1: "large"  # image
			
 
				+      2: "union"  # text
			
 
				+      3: "union"  # number
			
 
				+      4: "union"  # abstract
			
 
				+      5: "union"  # content
			
 
				+      6: "union"  # figure_table_chart_title
			
 
				+      7: "large"  # formula
			
 
				+      8: "union"  # table
			
 
				+      9: "union"  # reference
			
 
				+      10: "union" # doc_title
			
 
				+      11: "union" # footnote
			
 
				+      12: "union" # header
			
 
				+      13: "union" # algorithm
			
 
				+      14: "union" # footer
			
 
				+      15: "union" # seal
			
 
				+      16: "large" # chart
			
 
				+      17: "union" # formula_number
			
 
				+      18: "union" # aside_text
			
 
				+      19: "union" # reference_content
			
 
				+  ChartRecognition:
			
 
				+    module_name: chart_recognition
			
 
				+    model_name: PP-Chart2Table
			
 
				+    model_dir: null
			
 
				+    batch_size: 1 
			
 
				+  RegionDetection:
			
 
				+    module_name: layout_detection
			
 
				+    model_name: PP-DocBlockLayout
			
 
				+    model_dir: null
			
 
				+    layout_nms: True
			
 
				+    layout_merge_bboxes_mode: "small"
			
 
				+
			
 
				+SubPipelines:
			
 
				+  DocPreprocessor:
			
 
				+    pipeline_name: doc_preprocessor
			
 
				+    batch_size: 8
			
 
				+    use_doc_orientation_classify: True
			
 
				+    use_doc_unwarping: True
			
 
				+    SubModules:
			
 
				+      DocOrientationClassify:
			
 
				+        module_name: doc_text_orientation
			
 
				+        model_name: PP-LCNet_x1_0_doc_ori
			
 
				+        model_dir: null
			
 
				+        batch_size: 8
			
 
				+      DocUnwarping:
			
 
				+        module_name: image_unwarping
			
 
				+        model_name: UVDoc
			
 
				+        model_dir: null
			
 
				+
			
 
				+  GeneralOCR:
			
 
				+    pipeline_name: OCR
			
 
				+    batch_size: 8
			
 
				+    text_type: general
			
 
				+    use_doc_preprocessor: False
			
 
				+    use_textline_orientation: True
			
 
				+    SubModules:
			
 
				+      TextDetection:
			
 
				+        module_name: text_detection
			
 
				+        model_name: PP-OCRv5_server_det
			
 
				+        model_dir: null
			
 
				+        limit_side_len: 2560
			
 
				+        limit_type: max
			
 
				+        max_side_limit: 4000
			
 
				+        thresh: 0.3
			
 
				+        box_thresh: 0.6
			
 
				+        unclip_ratio: 1.5
			
 
				+      TextLineOrientation:
			
 
				+        module_name: textline_orientation
			
 
				+        model_name: PP-LCNet_x1_0_textline_ori
			
 
				+        model_dir: null
			
 
				+        batch_size: 8
			
 
				+      TextRecognition:
			
 
				+        module_name: text_recognition
			
 
				+        model_name: PP-OCRv5_server_rec
			
 
				+        model_dir: null
			
 
				+        batch_size: 8
			
 
				+        score_thresh: 0.0
			
 
				+ 
			
 
				+
			
 
				+  TableRecognition:
			
 
				+    pipeline_name: table_recognition_v2
			
 
				+    use_layout_detection: False
			
 
				+    use_doc_preprocessor: False
			
 
				+    use_ocr_model: False
			
 
				+    SubModules:  
			
 
				+      TableClassification:
			
 
				+        module_name: table_classification
			
 
				+        model_name: PP-LCNet_x1_0_table_cls
			
 
				+        model_dir: null
			
 
				+
			
 
				+      WiredTableStructureRecognition:
			
 
				+        module_name: table_structure_recognition
			
 
				+        model_name: SLANeXt_wired
			
 
				+        model_dir: null
			
 
				+      
			
 
				+      WirelessTableStructureRecognition:
			
 
				+        module_name: table_structure_recognition
			
 
				+        model_name: SLANet_plus
			
 
				+        model_dir: null
			
 
				+      
			
 
				+      WiredTableCellsDetection:
			
 
				+        module_name: table_cells_detection
			
 
				+        model_name: RT-DETR-L_wired_table_cell_det
			
 
				+        model_dir: null
			
 
				+      
			
 
				+      WirelessTableCellsDetection:
			
 
				+        module_name: table_cells_detection
			
 
				+        model_name: RT-DETR-L_wireless_table_cell_det
			
 
				+        model_dir: null
			
 
				+
			
 
				+      TableOrientationClassify:
			
 
				+        module_name: doc_text_orientation
			
 
				+        model_name: PP-LCNet_x1_0_doc_ori
			
 
				+        model_dir: null
			
 
				+    SubPipelines:
			
 
				+      GeneralOCR:
			
 
				+        pipeline_name: OCR
			
 
				+        text_type: general
			
 
				+        use_doc_preprocessor: False
			
 
				+        use_textline_orientation: True
			
 
				+        SubModules:
			
 
				+          TextDetection:
			
 
				+            module_name: text_detection
			
 
				+            model_name: PP-OCRv5_server_det
			
 
				+            model_dir: null
			
 
				+            limit_side_len: 2560
			
 
				+            limit_type: max
			
 
				+            max_side_limit: 4000
			
 
				+            thresh: 0.3
			
 
				+            box_thresh: 0.4
			
 
				+            unclip_ratio: 1.5
			
 
				+          TextLineOrientation:
			
 
				+            module_name: textline_orientation
			
 
				+            model_name: PP-LCNet_x1_0_textline_ori
			
 
				+            model_dir: null
			
 
				+            batch_size: 8
			
 
				+          TextRecognition:
			
 
				+            module_name: text_recognition
			
 
				+            model_name: PP-OCRv5_server_rec
			
 
				+            model_dir: null
			
 
				+            batch_size: 8
			
 
				+        score_thresh: 0.0
			
 
				+
			
 
				+  SealRecognition:
			
 
				+    pipeline_name: seal_recognition
			
 
				+    batch_size: 8
			
 
				+    use_layout_detection: False
			
 
				+    use_doc_preprocessor: False
			
 
				+    SubPipelines:
			
 
				+      SealOCR:
			
 
				+        pipeline_name: OCR
			
 
				+        batch_size: 8
			
 
				+        text_type: seal
			
 
				+        use_doc_preprocessor: False
			
 
				+        use_textline_orientation: False
			
 
				+        SubModules:
			
 
				+          TextDetection:
			
 
				+            module_name: seal_text_detection
			
 
				+            model_name: PP-OCRv4_server_seal_det
			
 
				+            model_dir: null
			
 
				+            limit_side_len: 2560
			
 
				+            limit_type: max
			
 
				+            max_side_limit: 4000
			
 
				+            thresh: 0.2
			
 
				+            box_thresh: 0.6
			
 
				+            unclip_ratio: 0.5
			
 
				+          TextRecognition:
			
 
				+            module_name: text_recognition
			
 
				+            model_name: PP-OCRv5_server_rec
			
 
				+            model_dir: null
			
 
				+            batch_size: 8
			
 
				+            score_thresh: 0
			
 
				+    
			
 
				+  FormulaRecognition:
			
 
				+    pipeline_name: formula_recognition
			
 
				+    batch_size: 8
			
 
				+    use_layout_detection: False
			
 
				+    use_doc_preprocessor: False
			
 
				+    SubModules:
			
 
				+      FormulaRecognition:
			
 
				+        module_name: formula_recognition
			
 
				+        model_name: PP-FormulaNet_plus-L
			
 
				+        model_dir: null
			
 
				+        batch_size: 8
			
--- a/ocr_tools/paddle_common/config/PP-StructureV3-zhch.yaml
+++ b/ocr_tools/paddle_common/config/PP-StructureV3-zhch.yaml
@@ -0,0 +1,226 @@
 
				+
			
 
				+pipeline_name: PP-StructureV3
			
 
				+
			
 
				+batch_size: 8
			
 
				+
			
 
				+use_doc_preprocessor: True
			
 
				+use_seal_recognition: True
			
 
				+use_table_recognition: True
			
 
				+use_formula_recognition: True
			
 
				+use_chart_recognition: False
			
 
				+use_region_detection: True
			
 
				+
			
 
				+SubModules:
			
 
				+  LayoutDetection:
			
 
				+    module_name: layout_detection
			
 
				+    model_name: PP-DocLayout_plus-L
			
 
				+    model_dir: null
			
 
				+    batch_size: 8
			
 
				+    threshold: 
			
 
				+      0: 0.3  # paragraph_title
			
 
				+      1: 0.5  # image
			
 
				+      2: 0.4  # text
			
 
				+      3: 0.5  # number
			
 
				+      4: 0.5  # abstract
			
 
				+      5: 0.5  # content
			
 
				+      6: 0.5  # figure_table_chart_title
			
 
				+      7: 0.3  # formula
			
 
				+      8: 0.5  # table
			
 
				+      9: 0.5  # reference
			
 
				+      10: 0.5 # doc_title
			
 
				+      11: 0.5 # footnote
			
 
				+      12: 0.5 # header
			
 
				+      13: 0.5 # algorithm
			
 
				+      14: 0.5 # footer
			
 
				+      15: 0.45 # seal
			
 
				+      16: 0.5 # chart
			
 
				+      17: 0.5 # formula_number
			
 
				+      18: 0.5 # aside_text
			
 
				+      19: 0.5 # reference_content
			
 
				+    layout_nms: True
			
 
				+    layout_unclip_ratio: [1.0, 1.0] 
			
 
				+    layout_merge_bboxes_mode: 
			
 
				+      0: "large"  # paragraph_title
			
 
				+      1: "large"  # image
			
 
				+      2: "union"  # text
			
 
				+      3: "union"  # number
			
 
				+      4: "union"  # abstract
			
 
				+      5: "union"  # content
			
 
				+      6: "union"  # figure_table_chart_title
			
 
				+      7: "large"  # formula
			
 
				+      8: "union"  # table
			
 
				+      9: "union"  # reference
			
 
				+      10: "union" # doc_title
			
 
				+      11: "union" # footnote
			
 
				+      12: "union" # header
			
 
				+      13: "union" # algorithm
			
 
				+      14: "union" # footer
			
 
				+      15: "union" # seal
			
 
				+      16: "large" # chart
			
 
				+      17: "union" # formula_number
			
 
				+      18: "union" # aside_text
			
 
				+      19: "union" # reference_content
			
 
				+  ChartRecognition:
			
 
				+    module_name: chart_recognition
			
 
				+    model_name: PP-Chart2Table
			
 
				+    model_dir: null
			
 
				+    batch_size: 1 
			
 
				+  RegionDetection:
			
 
				+    module_name: layout_detection
			
 
				+    model_name: PP-DocBlockLayout
			
 
				+    model_dir: null
			
 
				+    layout_nms: True
			
 
				+    layout_merge_bboxes_mode: "small"
			
 
				+
			
 
				+SubPipelines:
			
 
				+  DocPreprocessor:
			
 
				+    pipeline_name: doc_preprocessor
			
 
				+    batch_size: 8
			
 
				+    use_doc_orientation_classify: True
			
 
				+    use_doc_unwarping: False
			
 
				+    SubModules:
			
 
				+      DocOrientationClassify:
			
 
				+        module_name: doc_text_orientation
			
 
				+        model_name: PP-LCNet_x1_0_doc_ori
			
 
				+        model_dir: null
			
 
				+        batch_size: 8
			
 
				+      DocUnwarping:
			
 
				+        module_name: image_unwarping
			
 
				+        model_name: UVDoc
			
 
				+        model_dir: null
			
 
				+
			
 
				+  GeneralOCR:
			
 
				+    pipeline_name: OCR
			
 
				+    batch_size: 8
			
 
				+    text_type: general
			
 
				+    use_doc_preprocessor: False
			
 
				+    use_textline_orientation: True
			
 
				+    SubModules:
			
 
				+      TextDetection:
			
 
				+        module_name: text_detection
			
 
				+        model_name: PP-OCRv5_server_det
			
 
				+        model_dir: null
			
 
				+        limit_side_len: 1200
			
 
				+        limit_type: max
			
 
				+        max_side_limit: 4000
			
 
				+        thresh: 0.3
			
 
				+        box_thresh: 0.6
			
 
				+        unclip_ratio: 1.5
			
 
				+      TextLineOrientation:
			
 
				+        module_name: textline_orientation
			
 
				+        model_name: PP-LCNet_x1_0_textline_ori
			
 
				+        model_dir: null
			
 
				+        batch_size: 8
			
 
				+      TextRecognition:
			
 
				+        module_name: text_recognition
			
 
				+        model_name: PP-OCRv5_server_rec
			
 
				+        model_dir: null
			
 
				+        batch_size: 8
			
 
				+        score_thresh: 0.0
			
 
				+ 
			
 
				+
			
 
				+  TableRecognition:
			
 
				+    pipeline_name: table_recognition_v2
			
 
				+    use_layout_detection: False
			
 
				+    use_doc_preprocessor: False
			
 
				+    use_ocr_model: False
			
 
				+    SubModules:  
			
 
				+      TableClassification:
			
 
				+        module_name: table_classification
			
 
				+        model_name: PP-LCNet_x1_0_table_cls
			
 
				+        model_dir: null
			
 
				+
			
 
				+      WiredTableStructureRecognition:
			
 
				+        module_name: table_structure_recognition
			
 
				+        model_name: SLANeXt_wired
			
 
				+        model_dir: null
			
 
				+      
			
 
				+      WirelessTableStructureRecognition:
			
 
				+        module_name: table_structure_recognition
			
 
				+        model_name: SLANet_plus
			
 
				+        model_dir: null
			
 
				+      
			
 
				+      WiredTableCellsDetection:
			
 
				+        module_name: table_cells_detection
			
 
				+        model_name: RT-DETR-L_wired_table_cell_det
			
 
				+        model_dir: null
			
 
				+      
			
 
				+      WirelessTableCellsDetection:
			
 
				+        module_name: table_cells_detection
			
 
				+        model_name: RT-DETR-L_wireless_table_cell_det
			
 
				+        model_dir: null
			
 
				+
			
 
				+      TableOrientationClassify:
			
 
				+        module_name: doc_text_orientation
			
 
				+        model_name: PP-LCNet_x1_0_doc_ori
			
 
				+        model_dir: null
			
 
				+    SubPipelines:
			
 
				+      GeneralOCR:
			
 
				+        pipeline_name: OCR
			
 
				+        text_type: general
			
 
				+        use_doc_preprocessor: False
			
 
				+        use_textline_orientation: True
			
 
				+        SubModules:
			
 
				+          TextDetection:
			
 
				+            module_name: text_detection
			
 
				+            model_name: PP-OCRv5_server_det
			
 
				+            model_dir: null
			
 
				+            limit_side_len: 1600
			
 
				+            limit_type: max
			
 
				+            max_side_limit: 4000
			
 
				+            thresh: 0.3
			
 
				+            box_thresh: 0.4
			
 
				+            unclip_ratio: 1.5
			
 
				+          TextLineOrientation:
			
 
				+            module_name: textline_orientation
			
 
				+            model_name: PP-LCNet_x1_0_textline_ori
			
 
				+            model_dir: null
			
 
				+            batch_size: 8
			
 
				+          TextRecognition:
			
 
				+            module_name: text_recognition
			
 
				+            model_name: PP-OCRv5_server_rec
			
 
				+            model_dir: null
			
 
				+            batch_size: 8
			
 
				+        score_thresh: 0.0
			
 
				+
			
 
				+  SealRecognition:
			
 
				+    pipeline_name: seal_recognition
			
 
				+    batch_size: 8
			
 
				+    use_layout_detection: False
			
 
				+    use_doc_preprocessor: False
			
 
				+    SubPipelines:
			
 
				+      SealOCR:
			
 
				+        pipeline_name: OCR
			
 
				+        batch_size: 8
			
 
				+        text_type: seal
			
 
				+        use_doc_preprocessor: False
			
 
				+        use_textline_orientation: False
			
 
				+        SubModules:
			
 
				+          TextDetection:
			
 
				+            module_name: seal_text_detection
			
 
				+            model_name: PP-OCRv4_server_seal_det
			
 
				+            model_dir: null
			
 
				+            limit_side_len: 736
			
 
				+            limit_type: min
			
 
				+            max_side_limit: 4000
			
 
				+            thresh: 0.2
			
 
				+            box_thresh: 0.6
			
 
				+            unclip_ratio: 0.5
			
 
				+          TextRecognition:
			
 
				+            module_name: text_recognition
			
 
				+            model_name: PP-OCRv5_server_rec
			
 
				+            model_dir: null
			
 
				+            batch_size: 8
			
 
				+            score_thresh: 0
			
 
				+    
			
 
				+  FormulaRecognition:
			
 
				+    pipeline_name: formula_recognition
			
 
				+    batch_size: 8
			
 
				+    use_layout_detection: False
			
 
				+    use_doc_preprocessor: False
			
 
				+    SubModules:
			
 
				+      FormulaRecognition:
			
 
				+        module_name: formula_recognition
			
 
				+        model_name: PP-FormulaNet_plus-L
			
 
				+        model_dir: null
			
 
				+        batch_size: 8
			
--- a/ocr_tools/paddle_common/config/PP-StructureV3.yaml
+++ b/ocr_tools/paddle_common/config/PP-StructureV3.yaml
@@ -0,0 +1,226 @@
 
				+
			
 
				+pipeline_name: PP-StructureV3
			
 
				+
			
 
				+batch_size: 8
			
 
				+
			
 
				+use_doc_preprocessor: True
			
 
				+use_seal_recognition: True
			
 
				+use_table_recognition: True
			
 
				+use_formula_recognition: False
			
 
				+use_chart_recognition: True
			
 
				+use_region_detection: True
			
 
				+
			
 
				+SubModules:
			
 
				+  LayoutDetection:
			
 
				+    module_name: layout_detection
			
 
				+    model_name: PP-DocLayout_plus-L
			
 
				+    model_dir: null
			
 
				+    batch_size: 8
			
 
				+    threshold: 
			
 
				+      0: 0.3  # paragraph_title
			
 
				+      1: 0.5  # image
			
 
				+      2: 0.4  # text
			
 
				+      3: 0.5  # number
			
 
				+      4: 0.5  # abstract
			
 
				+      5: 0.5  # content
			
 
				+      6: 0.5  # figure_table_chart_title
			
 
				+      7: 0.3  # formula
			
 
				+      8: 0.5  # table
			
 
				+      9: 0.5  # reference
			
 
				+      10: 0.5 # doc_title
			
 
				+      11: 0.5 # footnote
			
 
				+      12: 0.5 # header
			
 
				+      13: 0.5 # algorithm
			
 
				+      14: 0.5 # footer
			
 
				+      15: 0.45 # seal
			
 
				+      16: 0.5 # chart
			
 
				+      17: 0.5 # formula_number
			
 
				+      18: 0.5 # aside_text
			
 
				+      19: 0.5 # reference_content
			
 
				+    layout_nms: True
			
 
				+    layout_unclip_ratio: [1.0, 1.0] 
			
 
				+    layout_merge_bboxes_mode: 
			
 
				+      0: "large"  # paragraph_title
			
 
				+      1: "large"  # image
			
 
				+      2: "union"  # text
			
 
				+      3: "union"  # number
			
 
				+      4: "union"  # abstract
			
 
				+      5: "union"  # content
			
 
				+      6: "union"  # figure_table_chart_title
			
 
				+      7: "large"  # formula
			
 
				+      8: "union"  # table
			
 
				+      9: "union"  # reference
			
 
				+      10: "union" # doc_title
			
 
				+      11: "union" # footnote
			
 
				+      12: "union" # header
			
 
				+      13: "union" # algorithm
			
 
				+      14: "union" # footer
			
 
				+      15: "union" # seal
			
 
				+      16: "large" # chart
			
 
				+      17: "union" # formula_number
			
 
				+      18: "union" # aside_text
			
 
				+      19: "union" # reference_content
			
 
				+  ChartRecognition:
			
 
				+    module_name: chart_recognition
			
 
				+    model_name: PP-Chart2Table
			
 
				+    model_dir: null
			
 
				+    batch_size: 1 
			
 
				+  RegionDetection:
			
 
				+    module_name: layout_detection
			
 
				+    model_name: PP-DocBlockLayout
			
 
				+    model_dir: null
			
 
				+    layout_nms: True
			
 
				+    layout_merge_bboxes_mode: "small"
			
 
				+
			
 
				+SubPipelines:
			
 
				+  DocPreprocessor:
			
 
				+    pipeline_name: doc_preprocessor
			
 
				+    batch_size: 8
			
 
				+    use_doc_orientation_classify: True
			
 
				+    use_doc_unwarping: True
			
 
				+    SubModules:
			
 
				+      DocOrientationClassify:
			
 
				+        module_name: doc_text_orientation
			
 
				+        model_name: PP-LCNet_x1_0_doc_ori
			
 
				+        model_dir: null
			
 
				+        batch_size: 8
			
 
				+      DocUnwarping:
			
 
				+        module_name: image_unwarping
			
 
				+        model_name: UVDoc
			
 
				+        model_dir: null
			
 
				+
			
 
				+  GeneralOCR:
			
 
				+    pipeline_name: OCR
			
 
				+    batch_size: 8
			
 
				+    text_type: general
			
 
				+    use_doc_preprocessor: False
			
 
				+    use_textline_orientation: True
			
 
				+    SubModules:
			
 
				+      TextDetection:
			
 
				+        module_name: text_detection
			
 
				+        model_name: PP-OCRv5_server_det
			
 
				+        model_dir: null
			
 
				+        limit_side_len: 2560
			
 
				+        limit_type: max
			
 
				+        max_side_limit: 4000
			
 
				+        thresh: 0.3
			
 
				+        box_thresh: 0.6
			
 
				+        unclip_ratio: 1.5
			
 
				+      TextLineOrientation:
			
 
				+        module_name: textline_orientation
			
 
				+        model_name: PP-LCNet_x1_0_textline_ori
			
 
				+        model_dir: null
			
 
				+        batch_size: 8
			
 
				+      TextRecognition:
			
 
				+        module_name: text_recognition
			
 
				+        model_name: PP-OCRv5_server_rec
			
 
				+        model_dir: null
			
 
				+        batch_size: 8
			
 
				+        score_thresh: 0.0
			
 
				+ 
			
 
				+
			
 
				+  TableRecognition:
			
 
				+    pipeline_name: table_recognition_v2
			
 
				+    use_layout_detection: False
			
 
				+    use_doc_preprocessor: False
			
 
				+    use_ocr_model: False
			
 
				+    SubModules:  
			
 
				+      TableClassification:
			
 
				+        module_name: table_classification
			
 
				+        model_name: PP-LCNet_x1_0_table_cls
			
 
				+        model_dir: null
			
 
				+
			
 
				+      WiredTableStructureRecognition:
			
 
				+        module_name: table_structure_recognition
			
 
				+        model_name: SLANeXt_wired
			
 
				+        model_dir: null
			
 
				+      
			
 
				+      WirelessTableStructureRecognition:
			
 
				+        module_name: table_structure_recognition
			
 
				+        model_name: SLANet_plus
			
 
				+        model_dir: null
			
 
				+      
			
 
				+      WiredTableCellsDetection:
			
 
				+        module_name: table_cells_detection
			
 
				+        model_name: RT-DETR-L_wired_table_cell_det
			
 
				+        model_dir: null
			
 
				+      
			
 
				+      WirelessTableCellsDetection:
			
 
				+        module_name: table_cells_detection
			
 
				+        model_name: RT-DETR-L_wireless_table_cell_det
			
 
				+        model_dir: null
			
 
				+
			
 
				+      TableOrientationClassify:
			
 
				+        module_name: doc_text_orientation
			
 
				+        model_name: PP-LCNet_x1_0_doc_ori
			
 
				+        model_dir: null
			
 
				+    SubPipelines:
			
 
				+      GeneralOCR:
			
 
				+        pipeline_name: OCR
			
 
				+        text_type: general
			
 
				+        use_doc_preprocessor: False
			
 
				+        use_textline_orientation: True
			
 
				+        SubModules:
			
 
				+          TextDetection:
			
 
				+            module_name: text_detection
			
 
				+            model_name: PP-OCRv5_server_det
			
 
				+            model_dir: null
			
 
				+            limit_side_len: 2560
			
 
				+            limit_type: max
			
 
				+            max_side_limit: 4000
			
 
				+            thresh: 0.3
			
 
				+            box_thresh: 0.4
			
 
				+            unclip_ratio: 1.5
			
 
				+          TextLineOrientation:
			
 
				+            module_name: textline_orientation
			
 
				+            model_name: PP-LCNet_x1_0_textline_ori
			
 
				+            model_dir: null
			
 
				+            batch_size: 8
			
 
				+          TextRecognition:
			
 
				+            module_name: text_recognition
			
 
				+            model_name: PP-OCRv5_server_rec
			
 
				+            model_dir: null
			
 
				+            batch_size: 8
			
 
				+        score_thresh: 0.0
			
 
				+
			
 
				+  SealRecognition:
			
 
				+    pipeline_name: seal_recognition
			
 
				+    batch_size: 8
			
 
				+    use_layout_detection: False
			
 
				+    use_doc_preprocessor: False
			
 
				+    SubPipelines:
			
 
				+      SealOCR:
			
 
				+        pipeline_name: OCR
			
 
				+        batch_size: 8
			
 
				+        text_type: seal
			
 
				+        use_doc_preprocessor: False
			
 
				+        use_textline_orientation: False
			
 
				+        SubModules:
			
 
				+          TextDetection:
			
 
				+            module_name: seal_text_detection
			
 
				+            model_name: PP-OCRv4_server_seal_det
			
 
				+            model_dir: null
			
 
				+            limit_side_len: 2560
			
 
				+            limit_type: max
			
 
				+            max_side_limit: 4000
			
 
				+            thresh: 0.2
			
 
				+            box_thresh: 0.6
			
 
				+            unclip_ratio: 0.5
			
 
				+          TextRecognition:
			
 
				+            module_name: text_recognition
			
 
				+            model_name: PP-OCRv5_server_rec
			
 
				+            model_dir: null
			
 
				+            batch_size: 8
			
 
				+            score_thresh: 0
			
 
				+    
			
 
				+  FormulaRecognition:
			
 
				+    pipeline_name: formula_recognition
			
 
				+    batch_size: 8
			
 
				+    use_layout_detection: False
			
 
				+    use_doc_preprocessor: False
			
 
				+    SubModules:
			
 
				+      FormulaRecognition:
			
 
				+        module_name: formula_recognition
			
 
				+        model_name: PP-FormulaNet_plus-L
			
 
				+        model_dir: null
			
 
				+        batch_size: 8
			
--- a/ocr_tools/paddle_common/config/PaddleOCR-VL-Client-RT-DETR-H_layout_17cls.yaml
+++ b/ocr_tools/paddle_common/config/PaddleOCR-VL-Client-RT-DETR-H_layout_17cls.yaml
@@ -0,0 +1,98 @@
 
				+
			
 
				+pipeline_name: PaddleOCR-VL
			
 
				+
			
 
				+batch_size: 64
			
 
				+
			
 
				+use_queues: True
			
 
				+
			
 
				+use_doc_preprocessor: True
			
 
				+use_layout_detection: True
			
 
				+use_chart_recognition: False
			
 
				+format_block_content: False
			
 
				+
			
 
				+SubModules:
			
 
				+  LayoutDetection:
			
 
				+    module_name: layout_detection
			
 
				+    model_name: RT-DETR-H_layout_17cls
			
 
				+    model_dir: null
			
 
				+    batch_size: 8
			
 
				+    threshold: 
			
 
				+      0: 0.5 # abstract
			
 
				+      1: 0.5 # algorithm
			
 
				+      2: 0.5 # aside_text
			
 
				+      3: 0.5 # chart
			
 
				+      4: 0.5 # content
			
 
				+      5: 0.4 # formula
			
 
				+      6: 0.4 # doc_title
			
 
				+      7: 0.5 # figure_title
			
 
				+      8: 0.5 # footer
			
 
				+      9: 0.5 # footer
			
 
				+      10: 0.5 # footnote
			
 
				+      11: 0.5 # formula_number
			
 
				+      12: 0.5 # header
			
 
				+      13: 0.5 # header
			
 
				+      14: 0.5 # image
			
 
				+      15: 0.4 # formula
			
 
				+      16: 0.5 # number
			
 
				+      17: 0.4 # paragraph_title
			
 
				+      18: 0.5 # reference
			
 
				+      19: 0.5 # reference_content
			
 
				+      20: 0.45 # seal
			
 
				+      21: 0.5 # table
			
 
				+      22: 0.4 # text
			
 
				+      23: 0.4 # text
			
 
				+      24: 0.5 # vision_footnote
			
 
				+    layout_nms: True
			
 
				+    layout_unclip_ratio: [1.0, 1.0] 
			
 
				+    layout_merge_bboxes_mode: 
			
 
				+      0: "union" # abstract
			
 
				+      1: "union" # algorithm
			
 
				+      2: "union" # aside_text
			
 
				+      3: "large" # chart
			
 
				+      4: "union" # content
			
 
				+      5: "large" # display_formula
			
 
				+      6: "large" # doc_title
			
 
				+      7: "union" # figure_title
			
 
				+      8: "union" # footer
			
 
				+      9: "union" # footer
			
 
				+      10: "union" # footnote
			
 
				+      11: "union" # formula_number
			
 
				+      12: "union" # header
			
 
				+      13: "union" # header
			
 
				+      14: "union" # image
			
 
				+      15: "large" # inline_formula
			
 
				+      16: "union" # number
			
 
				+      17: "large" # paragraph_title
			
 
				+      18: "union" # reference
			
 
				+      19: "union" # reference_content
			
 
				+      20: "union" # seal
			
 
				+      21: "union" # table
			
 
				+      22: "union" # text
			
 
				+      23: "union" # text
			
 
				+      24: "union" # vision_footnote
			
 
				+  VLRecognition:
			
 
				+    module_name: vl_recognition
			
 
				+    model_name: PaddleOCR-VL-0.9B
			
 
				+    model_dir: null
			
 
				+    batch_size: 2048
			
 
				+    genai_config:
			
 
				+      backend: vllm-server
			
 
				+      server_url: http://10.192.72.11:20016/v1
			
 
				+
			
 
				+SubPipelines:
			
 
				+
			
 
				+  DocPreprocessor:
			
 
				+    pipeline_name: doc_preprocessor
			
 
				+    batch_size: 8
			
 
				+    use_doc_orientation_classify: True
			
 
				+    use_doc_unwarping: True
			
 
				+    SubModules:
			
 
				+      DocOrientationClassify:
			
 
				+        module_name: doc_text_orientation
			
 
				+        model_name: PP-LCNet_x1_0_doc_ori
			
 
				+        model_dir: null
			
 
				+        batch_size: 8
			
 
				+      DocUnwarping:
			
 
				+        module_name: image_unwarping
			
 
				+        model_name: UVDoc
			
 
				+        model_dir: null
			
--- a/ocr_tools/paddle_common/config/PaddleOCR-VL-Client.yaml
+++ b/ocr_tools/paddle_common/config/PaddleOCR-VL-Client.yaml
@@ -0,0 +1,98 @@
 
				+
			
 
				+pipeline_name: PaddleOCR-VL
			
 
				+
			
 
				+batch_size: 64
			
 
				+
			
 
				+use_queues: True
			
 
				+
			
 
				+use_doc_preprocessor: True
			
 
				+use_layout_detection: True
			
 
				+use_chart_recognition: False
			
 
				+format_block_content: False
			
 
				+
			
 
				+SubModules:
			
 
				+  LayoutDetection:
			
 
				+    module_name: layout_detection
			
 
				+    model_name: PP-DocLayoutV2
			
 
				+    model_dir: null
			
 
				+    batch_size: 8
			
 
				+    threshold: 
			
 
				+      0: 0.5 # abstract
			
 
				+      1: 0.5 # algorithm
			
 
				+      2: 0.5 # aside_text
			
 
				+      3: 0.5 # chart
			
 
				+      4: 0.5 # content
			
 
				+      5: 0.4 # formula
			
 
				+      6: 0.4 # doc_title
			
 
				+      7: 0.5 # figure_title
			
 
				+      8: 0.5 # footer
			
 
				+      9: 0.5 # footer
			
 
				+      10: 0.5 # footnote
			
 
				+      11: 0.5 # formula_number
			
 
				+      12: 0.5 # header
			
 
				+      13: 0.5 # header
			
 
				+      14: 0.5 # image
			
 
				+      15: 0.4 # formula
			
 
				+      16: 0.5 # number
			
 
				+      17: 0.4 # paragraph_title
			
 
				+      18: 0.5 # reference
			
 
				+      19: 0.5 # reference_content
			
 
				+      20: 0.45 # seal
			
 
				+      21: 0.5 # table
			
 
				+      22: 0.4 # text
			
 
				+      23: 0.4 # text
			
 
				+      24: 0.5 # vision_footnote
			
 
				+    layout_nms: True
			
 
				+    layout_unclip_ratio: [1.0, 1.0] 
			
 
				+    layout_merge_bboxes_mode: 
			
 
				+      0: "union" # abstract
			
 
				+      1: "union" # algorithm
			
 
				+      2: "union" # aside_text
			
 
				+      3: "large" # chart
			
 
				+      4: "union" # content
			
 
				+      5: "large" # display_formula
			
 
				+      6: "large" # doc_title
			
 
				+      7: "union" # figure_title
			
 
				+      8: "union" # footer
			
 
				+      9: "union" # footer
			
 
				+      10: "union" # footnote
			
 
				+      11: "union" # formula_number
			
 
				+      12: "union" # header
			
 
				+      13: "union" # header
			
 
				+      14: "union" # image
			
 
				+      15: "large" # inline_formula
			
 
				+      16: "union" # number
			
 
				+      17: "large" # paragraph_title
			
 
				+      18: "union" # reference
			
 
				+      19: "union" # reference_content
			
 
				+      20: "union" # seal
			
 
				+      21: "union" # table
			
 
				+      22: "union" # text
			
 
				+      23: "union" # text
			
 
				+      24: "union" # vision_footnote
			
 
				+  VLRecognition:
			
 
				+    module_name: vl_recognition
			
 
				+    model_name: PaddleOCR-VL-0.9B
			
 
				+    model_dir: null
			
 
				+    batch_size: 2048
			
 
				+    genai_config:
			
 
				+      backend: vllm-server
			
 
				+      server_url: http://10.192.72.11:20016/v1
			
 
				+
			
 
				+SubPipelines:
			
 
				+
			
 
				+  DocPreprocessor:
			
 
				+    pipeline_name: doc_preprocessor
			
 
				+    batch_size: 8
			
 
				+    use_doc_orientation_classify: True
			
 
				+    use_doc_unwarping: True
			
 
				+    SubModules:
			
 
				+      DocOrientationClassify:
			
 
				+        module_name: doc_text_orientation
			
 
				+        model_name: PP-LCNet_x1_0_doc_ori
			
 
				+        model_dir: null
			
 
				+        batch_size: 8
			
 
				+      DocUnwarping:
			
 
				+        module_name: image_unwarping
			
 
				+        model_name: UVDoc
			
 
				+        model_dir: null
			
--- a/ocr_tools/paddle_common/config/PaddleOCR-VL.yaml
+++ b/ocr_tools/paddle_common/config/PaddleOCR-VL.yaml
@@ -0,0 +1,101 @@
 
				+
			
 
				+pipeline_name: PaddleOCR-VL
			
 
				+
			
 
				+batch_size: 64
			
 
				+
			
 
				+use_queues: True
			
 
				+
			
 
				+use_doc_preprocessor: False
			
 
				+use_layout_detection: True
			
 
				+use_chart_recognition: False
			
 
				+format_block_content: False
			
 
				+
			
 
				+SubModules:
			
 
				+  LayoutDetection:
			
 
				+    module_name: layout_detection
			
 
				+    model_name: PP-DocLayoutV2
			
 
				+    model_dir: null
			
 
				+    batch_size: 8
			
 
				+    threshold: 
			
 
				+      0: 0.5 # abstract
			
 
				+      1: 0.5 # algorithm
			
 
				+      2: 0.5 # aside_text
			
 
				+      3: 0.5 # chart
			
 
				+      4: 0.5 # content
			
 
				+      5: 0.4 # formula
			
 
				+      6: 0.4 # doc_title
			
 
				+      7: 0.5 # figure_title
			
 
				+      8: 0.5 # footer
			
 
				+      9: 0.5 # footer
			
 
				+      10: 0.5 # footnote
			
 
				+      11: 0.5 # formula_number
			
 
				+      12: 0.5 # header
			
 
				+      13: 0.5 # header
			
 
				+      14: 0.5 # image
			
 
				+      15: 0.4 # formula
			
 
				+      16: 0.5 # number
			
 
				+      17: 0.4 # paragraph_title
			
 
				+      18: 0.5 # reference
			
 
				+      19: 0.5 # reference_content
			
 
				+      20: 0.45 # seal
			
 
				+      21: 0.5 # table
			
 
				+      22: 0.4 # text
			
 
				+      23: 0.4 # text
			
 
				+      24: 0.5 # vision_footnote
			
 
				+    layout_nms: True
			
 
				+    layout_unclip_ratio: [1.0, 1.0] 
			
 
				+    layout_merge_bboxes_mode: 
			
 
				+      0: "union" # abstract
			
 
				+      1: "union" # algorithm
			
 
				+      2: "union" # aside_text
			
 
				+      3: "large" # chart
			
 
				+      4: "union" # content
			
 
				+      5: "large" # display_formula
			
 
				+      6: "large" # doc_title
			
 
				+      7: "union" # figure_title
			
 
				+      8: "union" # footer
			
 
				+      9: "union" # footer
			
 
				+      10: "union" # footnote
			
 
				+      11: "union" # formula_number
			
 
				+      12: "union" # header
			
 
				+      13: "union" # header
			
 
				+      14: "union" # image
			
 
				+      15: "large" # inline_formula
			
 
				+      16: "union" # number
			
 
				+      17: "large" # paragraph_title
			
 
				+      18: "union" # reference
			
 
				+      19: "union" # reference_content
			
 
				+      20: "union" # seal
			
 
				+      21: "union" # table
			
 
				+      22: "union" # text
			
 
				+      23: "union" # text
			
 
				+      24: "union" # vision_footnote
			
 
				+  VLRecognition:
			
 
				+    module_name: vl_recognition
			
 
				+    model_name: PaddleOCR-VL-0.9B
			
 
				+    model_dir: null
			
 
				+    batch_size: 2048
			
 
				+    genai_config:
			
 
				+      backend: native
			
 
				+      gpu-memory-utilization: 0.3
			
 
				+      max-num-seqs: 64
			
 
				+      max-model-len: 16384
			
 
				+      enforce-eager: true
			
 
				+      disable-cuda-graph: true
			
 
				+
			
 
				+SubPipelines:
			
 
				+  DocPreprocessor:
			
 
				+    pipeline_name: doc_preprocessor
			
 
				+    batch_size: 8
			
 
				+    use_doc_orientation_classify: True
			
 
				+    use_doc_unwarping: True
			
 
				+    SubModules:
			
 
				+      DocOrientationClassify:
			
 
				+        module_name: doc_text_orientation
			
 
				+        model_name: PP-LCNet_x1_0_doc_ori
			
 
				+        model_dir: null
			
 
				+        batch_size: 8
			
 
				+      DocUnwarping:
			
 
				+        module_name: image_unwarping
			
 
				+        model_name: UVDoc
			
 
				+        model_dir: null
			
--- a/ocr_tools/paddle_common/config/README.md
+++ b/ocr_tools/paddle_common/config/README.md
@@ -0,0 +1,89 @@
 
				+# PaddleX Pipeline 配置文件
			
 
				+
			
 
				+本目录包含 PaddleX 的 pipeline 配置文件，用于配置不同的文档解析 pipeline。
			
 
				+
			
 
				+## 配置文件分类
			
 
				+
			
 
				+### PaddleOCR-VL 相关配置
			
 
				+
			
 
				+- **PaddleOCR-VL.yaml**: 基础 PaddleOCR-VL pipeline 配置
			
 
				+- **PaddleOCR-VL-Client.yaml**: PaddleOCR-VL 客户端配置
			
 
				+- **PaddleOCR-VL-Client-RT-DETR-H_layout_17cls.yaml**: 使用 RT-DETR-H 布局检测模型的 PaddleOCR-VL 配置（17 类布局）
			
 
				+
			
 
				+### PP-StructureV3 相关配置
			
 
				+
			
 
				+- **PP-StructureV3.yaml**: 基础 PP-StructureV3 pipeline 配置
			
 
				+- **PP-StructureV3-zhch.yaml**: 自定义的 PP-StructureV3 配置（zhch 版本）
			
 
				+- **PP-StructureV3-RT-DETR-H_layout_17cls.yaml**: 使用 RT-DETR-H 布局检测模型的 PP-StructureV3 配置（17 类布局）
			
 
				+
			
 
				+### 其他配置
			
 
				+
			
 
				+- **layout_parsing.yaml**: 布局解析配置
			
 
				+- **table_recognition_v2.yaml**: 表格识别 V2 配置
			
 
				+- **table_recognition_v2-zhch.yaml**: 自定义的表格识别 V2 配置（zhch 版本）
			
 
				+
			
 
				+## 使用方法
			
 
				+
			
 
				+### 在命令行中使用
			
 
				+
			
 
				+```bash
			
 
				+# 使用相对路径（从工具目录运行）
			
 
				+python main.py --input document.pdf --output_dir ./output \
			
 
				+  --pipeline ../paddle_common/config/PaddleOCR-VL-Client-RT-DETR-H_layout_17cls.yaml
			
 
				+
			
 
				+# 使用绝对路径
			
 
				+python main.py --input document.pdf --output_dir ./output \
			
 
				+  --pipeline /path/to/ocr_platform/ocr_tools/paddle_common/config/PP-StructureV3-zhch.yaml
			
 
				+```
			
 
				+
			
 
				+### 在代码中使用
			
 
				+
			
 
				+```python
			
 
				+from pathlib import Path
			
 
				+
			
 
				+# 获取配置文件路径
			
 
				+config_dir = Path(__file__).parent / "config"
			
 
				+config_path = config_dir / "PaddleOCR-VL-Client-RT-DETR-H_layout_17cls.yaml"
			
 
				+
			
 
				+# 使用配置文件初始化 pipeline
			
 
				+processor = PaddleXProcessor(
			
 
				+    pipeline_name=str(config_path),
			
 
				+    device="gpu:0"
			
 
				+)
			
 
				+```
			
 
				+
			
 
				+## 配置文件说明
			
 
				+
			
 
				+### PaddleOCR-VL vs PP-StructureV3
			
 
				+
			
 
				+- **PaddleOCR-VL**: 基于视觉语言模型的文档解析，专注于视觉理解
			
 
				+- **PP-StructureV3**: 更全面的文档结构分析，包括表格、公式、图表等识别
			
 
				+
			
 
				+### RT-DETR-H 布局检测模型
			
 
				+
			
 
				+使用 RT-DETR-H 作为布局检测模型，支持 17 类布局检测：
			
 
				+- abstract, algorithm, aside_text, chart, content, formula
			
 
				+- doc_title, figure_title, footer, footnote, formula_number
			
 
				+- header, image, number, paragraph_title, reference
			
 
				+- reference_content, seal, table, text, vision_footnote
			
 
				+
			
 
				+### 自定义配置（zhch 版本）
			
 
				+
			
 
				+带有 `-zhch` 后缀的配置文件是自定义版本，可能包含：
			
 
				+- 调整的阈值参数
			
 
				+- 优化的模型配置
			
 
				+- 特定的功能开关设置
			
 
				+
			
 
				+## 注意事项
			
 
				+
			
 
				+1. **路径引用**：配置文件路径可以是相对路径或绝对路径
			
 
				+2. **Pipeline 名称**：也可以直接使用 pipeline 名称（如 `PaddleOCR-VL`），无需指定配置文件
			
 
				+3. **设备配置**：某些配置可能需要特定的设备（GPU/CPU）支持
			
 
				+4. **模型文件**：确保配置文件中指定的模型文件已正确安装
			
 
				+
			
 
				+## 相关工具
			
 
				+
			
 
				+- `paddle_vl_tool`: PaddleOCR-VL 批量处理工具
			
 
				+- `ppstructure_tool`: PP-StructureV3 批量处理工具
			
 
				+- `paddle_common`: PaddleX 共享核心模块
			
 
				+
			
--- a/ocr_tools/paddle_common/config/layout_parsing.yaml
+++ b/ocr_tools/paddle_common/config/layout_parsing.yaml
@@ -0,0 +1,102 @@
 
				+
			
 
				+pipeline_name: layout_parsing
			
 
				+
			
 
				+use_doc_preprocessor: True
			
 
				+use_seal_recognition: True
			
 
				+use_table_recognition: True
			
 
				+use_formula_recognition: False
			
 
				+
			
 
				+SubModules:
			
 
				+  LayoutDetection:
			
 
				+    module_name: layout_detection
			
 
				+    model_name: RT-DETR-H_layout_17cls
			
 
				+    model_dir: null
			
 
				+
			
 
				+SubPipelines:
			
 
				+  DocPreprocessor:
			
 
				+    pipeline_name: doc_preprocessor
			
 
				+    use_doc_orientation_classify: True
			
 
				+    use_doc_unwarping: True
			
 
				+    SubModules:
			
 
				+      DocOrientationClassify:
			
 
				+        module_name: doc_text_orientation
			
 
				+        model_name: PP-LCNet_x1_0_doc_ori
			
 
				+        model_dir: null
			
 
				+      DocUnwarping:
			
 
				+        module_name: image_unwarping
			
 
				+        model_name: UVDoc
			
 
				+        model_dir: null
			
 
				+
			
 
				+  GeneralOCR:
			
 
				+    pipeline_name: OCR
			
 
				+    text_type: general
			
 
				+    use_doc_preprocessor: False
			
 
				+    use_textline_orientation: False
			
 
				+    SubModules:
			
 
				+      TextDetection:
			
 
				+        module_name: text_detection
			
 
				+        model_name: PP-OCRv5_server_det
			
 
				+        model_dir: null
			
 
				+        limit_side_len: 960
			
 
				+        limit_type: max
			
 
				+        max_side_limit: 4000
			
 
				+        thresh: 0.3
			
 
				+        box_thresh: 0.6
			
 
				+        unclip_ratio: 1.5
			
 
				+        
			
 
				+      TextRecognition:
			
 
				+        module_name: text_recognition
			
 
				+        model_name: PP-OCRv5_server_rec
			
 
				+        model_dir: null
			
 
				+        batch_size: 6
			
 
				+        score_thresh: 0
			
 
				+
			
 
				+  TableRecognition:
			
 
				+    pipeline_name: table_recognition
			
 
				+    use_layout_detection: False
			
 
				+    use_doc_preprocessor: False
			
 
				+    use_ocr_model: False
			
 
				+    SubModules:
			
 
				+      TableStructureRecognition:
			
 
				+        module_name: table_structure_recognition
			
 
				+        model_name: SLANet_plus
			
 
				+        model_dir: null
			
 
				+
			
 
				+  SealRecognition:
			
 
				+    pipeline_name: seal_recognition
			
 
				+    use_layout_detection: False
			
 
				+    use_doc_preprocessor: False
			
 
				+    SubPipelines:
			
 
				+      SealOCR:
			
 
				+        pipeline_name: OCR
			
 
				+        text_type: seal
			
 
				+        use_doc_preprocessor: False
			
 
				+        use_textline_orientation: False
			
 
				+        SubModules:
			
 
				+          TextDetection:
			
 
				+            module_name: seal_text_detection
			
 
				+            model_name: PP-OCRv4_server_seal_det
			
 
				+            model_dir: null
			
 
				+            limit_side_len: 736
			
 
				+            limit_type: min
			
 
				+            max_side_limit: 4000
			
 
				+            thresh: 0.2
			
 
				+            box_thresh: 0.6
			
 
				+            unclip_ratio: 0.5
			
 
				+          TextRecognition:
			
 
				+            module_name: text_recognition
			
 
				+            model_name: PP-OCRv4_server_rec
			
 
				+            model_dir: null
			
 
				+            batch_size: 1
			
 
				+            score_thresh: 0
			
 
				+    
			
 
				+  FormulaRecognition:
			
 
				+    pipeline_name: formula_recognition
			
 
				+    use_layout_detection: False
			
 
				+    use_doc_preprocessor: False
			
 
				+    SubModules:
			
 
				+      FormulaRecognition:
			
 
				+        module_name: formula_recognition
			
 
				+        model_name: PP-FormulaNet-L
			
 
				+        model_dir: null
			
 
				+        batch_size: 5
			
--- a/ocr_tools/paddle_common/config/table_recognition_v2-zhch.yaml
+++ b/ocr_tools/paddle_common/config/table_recognition_v2-zhch.yaml
@@ -0,0 +1,127 @@
 
				+
			
 
				+pipeline_name: table_recognition_v2
			
 
				+
			
 
				+use_doc_preprocessor: False
			
 
				+use_layout_detection: True
			
 
				+use_ocr_model: True
			
 
				+
			
 
				+SubModules:
			
 
				+  LayoutDetection:
			
 
				+    module_name: layout_detection
			
 
				+    model_name: PP-DocLayout_plus-L
			
 
				+    model_dir: null
			
 
				+    batch_size: 1
			
 
				+    threshold: 
			
 
				+      0: 0.3  # paragraph_title
			
 
				+      1: 0.5  # image
			
 
				+      2: 0.4  # text
			
 
				+      3: 0.5  # number
			
 
				+      4: 0.5  # abstract
			
 
				+      5: 0.5  # content
			
 
				+      6: 0.5  # figure_table_chart_title
			
 
				+      7: 0.3  # formula
			
 
				+      8: 0.5  # table
			
 
				+      9: 0.5  # reference
			
 
				+      10: 0.5 # doc_title
			
 
				+      11: 0.5 # footnote
			
 
				+      12: 0.5 # header
			
 
				+      13: 0.5 # algorithm
			
 
				+      14: 0.5 # footer
			
 
				+      15: 0.45 # seal
			
 
				+      16: 0.5 # chart
			
 
				+      17: 0.5 # formula_number
			
 
				+      18: 0.5 # aside_text
			
 
				+      19: 0.5 # reference_content
			
 
				+    layout_nms: True
			
 
				+    layout_unclip_ratio: [1.0, 1.0] 
			
 
				+    layout_merge_bboxes_mode: 
			
 
				+      0: "large"  # paragraph_title
			
 
				+      1: "large"  # image
			
 
				+      2: "union"  # text
			
 
				+      3: "union"  # number
			
 
				+      4: "union"  # abstract
			
 
				+      5: "union"  # content
			
 
				+      6: "union"  # figure_table_chart_title
			
 
				+      7: "large"  # formula
			
 
				+      8: "union"  # table
			
 
				+      9: "union"  # reference
			
 
				+      10: "union" # doc_title
			
 
				+      11: "union" # footnote
			
 
				+      12: "union" # header
			
 
				+      13: "union" # algorithm
			
 
				+      14: "union" # footer
			
 
				+      15: "union" # seal
			
 
				+      16: "large" # chart
			
 
				+      17: "union" # formula_number
			
 
				+      18: "union" # aside_text
			
 
				+      19: "union" # reference_content  
			
 
				+
			
 
				+  TableOrientationClassify:
			
 
				+    module_name: doc_text_orientation
			
 
				+    model_name: PP-LCNet_x1_0_doc_ori
			
 
				+    model_dir: null
			
 
				+  
			
 
				+  TableClassification:
			
 
				+    module_name: table_classification
			
 
				+    model_name: PP-LCNet_x1_0_table_cls
			
 
				+    model_dir: null
			
 
				+
			
 
				+  WiredTableStructureRecognition:
			
 
				+    module_name: table_structure_recognition
			
 
				+    model_name: SLANeXt_wired
			
 
				+    model_dir: null
			
 
				+  
			
 
				+  WirelessTableStructureRecognition:
			
 
				+    module_name: table_structure_recognition
			
 
				+    model_name: SLANeXt_wireless
			
 
				+    model_dir: null
			
 
				+  
			
 
				+  WiredTableCellsDetection:
			
 
				+    module_name: table_cells_detection
			
 
				+    model_name: RT-DETR-L_wired_table_cell_det
			
 
				+    model_dir: null
			
 
				+  
			
 
				+  WirelessTableCellsDetection:
			
 
				+    module_name: table_cells_detection
			
 
				+    model_name: RT-DETR-L_wireless_table_cell_det
			
 
				+    model_dir: null
			
 
				+
			
 
				+SubPipelines:
			
 
				+  DocPreprocessor:
			
 
				+    pipeline_name: doc_preprocessor
			
 
				+    use_doc_orientation_classify: False
			
 
				+    use_doc_unwarping: False
			
 
				+    SubModules:
			
 
				+      DocOrientationClassify:
			
 
				+        module_name: doc_text_orientation
			
 
				+        model_name: PP-LCNet_x1_0_doc_ori
			
 
				+        model_dir: null
			
 
				+
			
 
				+      DocUnwarping:
			
 
				+        module_name: image_unwarping
			
 
				+        model_name: UVDoc
			
 
				+        model_dir: null
			
 
				+
			
 
				+  GeneralOCR:
			
 
				+    pipeline_name: OCR
			
 
				+    text_type: general
			
 
				+    use_doc_preprocessor: False
			
 
				+    use_textline_orientation: False
			
 
				+    SubModules:
			
 
				+      TextDetection:
			
 
				+        module_name: text_detection
			
 
				+        model_name: PP-OCRv5_server_det
			
 
				+        model_dir: null
			
 
				+        limit_side_len: 1600
			
 
				+        limit_type: max
			
 
				+        max_side_limit: 4000
			
 
				+        thresh: 0.3
			
 
				+        box_thresh: 0.4
			
 
				+        unclip_ratio: 1.5
			
 
				+        
			
 
				+      TextRecognition:
			
 
				+        module_name: text_recognition
			
 
				+        model_name: PP-OCRv5_server_rec
			
 
				+        model_dir: null
			
 
				+        batch_size: 1
			
 
				+        score_thresh: 0
			
--- a/ocr_tools/paddle_common/config/table_recognition_v2.yaml
+++ b/ocr_tools/paddle_common/config/table_recognition_v2.yaml
@@ -0,0 +1,86 @@
 
				+
			
 
				+pipeline_name: table_recognition_v2
			
 
				+
			
 
				+use_doc_preprocessor: True
			
 
				+use_layout_detection: True
			
 
				+use_ocr_model: True
			
 
				+
			
 
				+SubModules:
			
 
				+  LayoutDetection:
			
 
				+    module_name: layout_detection
			
 
				+    model_name: PP-DocLayout-L
			
 
				+    model_dir: null
			
 
				+  
			
 
				+  TableOrientationClassify:
			
 
				+    module_name: doc_text_orientation
			
 
				+    model_name: PP-LCNet_x1_0_doc_ori
			
 
				+    model_dir: null
			
 
				+  
			
 
				+  TableClassification:
			
 
				+    module_name: table_classification
			
 
				+    model_name: PP-LCNet_x1_0_table_cls
			
 
				+    model_dir: null
			
 
				+
			
 
				+  WiredTableStructureRecognition:
			
 
				+    module_name: table_structure_recognition
			
 
				+    model_name: SLANeXt_wired
			
 
				+    model_dir: null
			
 
				+  
			
 
				+  WirelessTableStructureRecognition:
			
 
				+    module_name: table_structure_recognition
			
 
				+    model_name: SLANeXt_wireless
			
 
				+    model_dir: null
			
 
				+  
			
 
				+  WiredTableCellsDetection:
			
 
				+    module_name: table_cells_detection
			
 
				+    model_name: RT-DETR-L_wired_table_cell_det
			
 
				+    model_dir: null
			
 
				+  
			
 
				+  WirelessTableCellsDetection:
			
 
				+    module_name: table_cells_detection
			
 
				+    model_name: RT-DETR-L_wireless_table_cell_det
			
 
				+    model_dir: null
			
 
				+
			
 
				+SubPipelines:
			
 
				+  DocPreprocessor:
			
 
				+    pipeline_name: doc_preprocessor
			
 
				+    use_doc_orientation_classify: True
			
 
				+    use_doc_unwarping: False
			
 
				+    SubModules:
			
 
				+      DocOrientationClassify:
			
 
				+        module_name: doc_text_orientation
			
 
				+        model_name: PP-LCNet_x1_0_doc_ori
			
 
				+        model_dir: null
			
 
				+
			
 
				+      DocUnwarping:
			
 
				+        module_name: image_unwarping
			
 
				+        model_name: UVDoc
			
 
				+        model_dir: null
			
 
				+
			
 
				+  GeneralOCR:
			
 
				+    pipeline_name: OCR
			
 
				+    text_type: general
			
 
				+    use_doc_preprocessor: False
			
 
				+    use_textline_orientation: True
			
 
				+    SubModules:
			
 
				+      TextDetection:
			
 
				+        module_name: text_detection
			
 
				+        model_name: PP-OCRv5_server_det
			
 
				+        model_dir: null
			
 
				+        limit_side_len: 1200
			
 
				+        limit_type: max
			
 
				+        max_side_limit: 4000
			
 
				+        thresh: 0.3
			
 
				+        box_thresh: 0.4
			
 
				+        unclip_ratio: 1.5
			
 
				+      TextLineOrientation:
			
 
				+        module_name: textline_orientation
			
 
				+        model_name: PP-LCNet_x1_0_textline_ori
			
 
				+        model_dir: null
			
 
				+        batch_size: 8
			
 
				+      TextRecognition:
			
 
				+        module_name: text_recognition
			
 
				+        model_name: PP-OCRv5_server_rec
			
 
				+        model_dir: null
			
 
				+        batch_size: 8
			
 
				+        score_thresh: 0.0
			
--- a/ocr_tools/paddle_common/processor.py
+++ b/ocr_tools/paddle_common/processor.py
@@ -0,0 +1,269 @@
 
				+"""
			
 
				+PaddleX 统一处理器
			
 
				+
			
 
				+支持多种 pipeline（PaddleOCR-VL 和 PP-StructureV3）的文档处理类
			
 
				+"""
			
 
				+import os
			
 
				+import time
			
 
				+import traceback
			
 
				+import warnings
			
 
				+from pathlib import Path
			
 
				+from typing import List, Dict, Any
			
 
				+from loguru import logger
			
 
				+
			
 
				+# 抑制特定警告
			
 
				+warnings.filterwarnings("ignore", message="To copy construct from a tensor")
			
 
				+warnings.filterwarnings("ignore", message="Setting `pad_token_id`")
			
 
				+warnings.filterwarnings("ignore", category=UserWarning, module="paddlex")
			
 
				+
			
 
				+from paddlex import create_pipeline
			
 
				+
			
 
				+# 导入工具函数
			
 
				+import sys
			
 
				+paddle_common_root = Path(__file__).parent
			
 
				+if str(paddle_common_root) not in sys.path:
			
 
				+    sys.path.insert(0, str(paddle_common_root))
			
 
				+
			
 
				+from .utils import (
			
 
				+    convert_pruned_result_to_json,
			
 
				+    save_output_images,
			
 
				+    save_markdown_content
			
 
				+)
			
 
				+
			
 
				+# 导入适配器
			
 
				+from .adapters import (
			
 
				+    apply_table_recognition_adapter,
			
 
				+    restore_original_function,
			
 
				+    apply_enhanced_doc_preprocessor,
			
 
				+    restore_paddlex_doc_preprocessor
			
 
				+)
			
 
				+
			
 
				+
			
 
				+class PaddleXProcessor:
			
 
				+    """PaddleX 统一处理器，支持多种 pipeline"""
			
 
				+    
			
 
				+    def __init__(self,
			
 
				+                 pipeline_name: str = "PP-StructureV3",
			
 
				+                 device: str = "gpu:0",
			
 
				+                 normalize_numbers: bool = True,
			
 
				+                 use_enhanced_adapter: bool = True,
			
 
				+                 log_level: str = "INFO",
			
 
				+                 **kwargs):
			
 
				+        """
			
 
				+        初始化处理器
			
 
				+        
			
 
				+        Args:
			
 
				+            pipeline_name: Pipeline 名称或配置文件路径
			
 
				+            device: 设备字符串（如 'gpu:0', 'cpu'）
			
 
				+            normalize_numbers: 是否标准化数字
			
 
				+            use_enhanced_adapter: 是否使用增强适配器
			
 
				+            log_level: 日志级别（DEBUG, INFO, WARNING, ERROR），当为 DEBUG 时会打印详细错误信息
			
 
				+            **kwargs: 其他预测参数
			
 
				+        """
			
 
				+        self.pipeline_name = pipeline_name
			
 
				+        self.device = device
			
 
				+        self.normalize_numbers = normalize_numbers
			
 
				+        self.use_enhanced_adapter = use_enhanced_adapter
			
 
				+        self.log_level = log_level
			
 
				+        self.predict_kwargs = kwargs
			
 
				+        
			
 
				+        # 检测 pipeline 类型
			
 
				+        self.is_paddleocr_vl = 'PaddleOCR-VL'.lower() in str(pipeline_name).lower()
			
 
				+        
			
 
				+        # 应用适配器
			
 
				+        self.adapter_applied = False
			
 
				+        if use_enhanced_adapter:
			
 
				+            self.adapter_applied = apply_table_recognition_adapter() and apply_enhanced_doc_preprocessor()
			
 
				+            if self.adapter_applied:
			
 
				+                logger.info("🎯 Enhanced table recognition adapter activated and document preprocessor applied")
			
 
				+            else:
			
 
				+                logger.warning("⚠️  Failed to apply adapter, using original implementation")
			
 
				+        
			
 
				+        # 初始化 pipeline
			
 
				+        self.pipeline = None
			
 
				+        self._initialize_pipeline()
			
 
				+        
			
 
				+        logger.info(f"PaddleX Processor 初始化完成:")
			
 
				+        logger.info(f"  - Pipeline: {pipeline_name}")
			
 
				+        logger.info(f"  - 设备: {device}")
			
 
				+        logger.info(f"  - Pipeline 类型: {'PaddleOCR-VL' if self.is_paddleocr_vl else 'PP-StructureV3'}")
			
 
				+        logger.info(f"  - 数字标准化: {normalize_numbers}")
			
 
				+        logger.info(f"  - 增强适配器: {use_enhanced_adapter}")
			
 
				+        logger.info(f"  - 日志级别: {log_level}")
			
 
				+    
			
 
				+    def _initialize_pipeline(self):
			
 
				+        """初始化 pipeline"""
			
 
				+        try:
			
 
				+            # 设置环境变量以减少警告
			
 
				+            os.environ['PYTHONWARNINGS'] = 'ignore::UserWarning'
			
 
				+            
			
 
				+            logger.info(f"Initializing pipeline '{self.pipeline_name}' on device '{self.device}'...")
			
 
				+            self.pipeline = create_pipeline(self.pipeline_name, device=self.device)
			
 
				+            logger.info(f"Pipeline initialized successfully on {self.device}")
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"Failed to initialize pipeline: {e}")
			
 
				+            if self.log_level == "DEBUG":
			
 
				+                traceback.print_exc()
			
 
				+            if self.adapter_applied:
			
 
				+                restore_original_function()
			
 
				+                restore_paddlex_doc_preprocessor()
			
 
				+            raise
			
 
				+    
			
 
				+    def _get_predict_kwargs(self) -> Dict[str, Any]:
			
 
				+        """根据 pipeline 类型获取预测参数"""
			
 
				+        if self.is_paddleocr_vl:
			
 
				+            # PaddleOCR-VL 使用驼峰命名
			
 
				+            return {
			
 
				+                'use_layout_detection': self.predict_kwargs.get('use_layout_detection', True),
			
 
				+                'use_doc_orientation_classify': self.predict_kwargs.get('use_doc_orientation', True),
			
 
				+                'use_doc_unwarping': self.predict_kwargs.get('use_doc_unwarping', False),
			
 
				+            }
			
 
				+        else:
			
 
				+            # PP-StructureV3 使用下划线命名
			
 
				+            return {
			
 
				+                'use_doc_orientation_classify': self.predict_kwargs.get('use_doc_orientation', True),
			
 
				+                'use_doc_unwarping': self.predict_kwargs.get('use_doc_unwarping', False),
			
 
				+                'use_layout_detection': self.predict_kwargs.get('use_layout_detection', True),
			
 
				+                'use_seal_recognition': self.predict_kwargs.get('use_seal_recognition', True),
			
 
				+                'use_table_recognition': self.predict_kwargs.get('use_table_recognition', True),
			
 
				+                'use_formula_recognition': self.predict_kwargs.get('use_formula_recognition', False),
			
 
				+                'use_chart_recognition': self.predict_kwargs.get('use_chart_recognition', True),
			
 
				+                'use_ocr_results_with_table_cells': self.predict_kwargs.get('use_ocr_results_with_table_cells', True),
			
 
				+                'use_table_orientation_classify': self.predict_kwargs.get('use_table_orientation_classify', False),
			
 
				+                'use_wired_table_cells_trans_to_html': self.predict_kwargs.get('use_wired_table_cells_trans_to_html', True),
			
 
				+                'use_wireless_table_cells_trans_to_html': self.predict_kwargs.get('use_wireless_table_cells_trans_to_html', True),
			
 
				+            }
			
 
				+    
			
 
				+    def process_single_image(self, image_path: str, output_dir: str) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        处理单张图片
			
 
				+        
			
 
				+        Args:
			
 
				+            image_path: 图片路径
			
 
				+            output_dir: 输出目录
			
 
				+            
			
 
				+        Returns:
			
 
				+            dict: 处理结果，包含 success 字段（基于输出文件存在性判断）
			
 
				+        """
			
 
				+        start_time = time.time()
			
 
				+        image_path_obj = Path(image_path)
			
 
				+        image_name = image_path_obj.stem
			
 
				+        
			
 
				+        # 判断是否为PDF页面（根据文件名模式）
			
 
				+        is_pdf_page = "_page_" in image_path_obj.name
			
 
				+        
			
 
				+        result_info = {
			
 
				+            "image_path": image_path,
			
 
				+            "processing_time": 0,
			
 
				+            "success": False,
			
 
				+            "device": self.device,
			
 
				+            "error": None,
			
 
				+            "output_files": {},
			
 
				+            "is_pdf_page": is_pdf_page,
			
 
				+            "processing_info": {}
			
 
				+        }
			
 
				+        
			
 
				+        try:
			
 
				+            if self.pipeline is None:
			
 
				+                raise Exception("Pipeline not initialized")
			
 
				+            
			
 
				+            # 准备预测参数
			
 
				+            predict_kwargs = self._get_predict_kwargs()
			
 
				+            predict_kwargs['input'] = image_path
			
 
				+            
			
 
				+            # 使用 pipeline 预测
			
 
				+            results = self.pipeline.predict(**predict_kwargs)
			
 
				+            
			
 
				+            # 处理结果（应该只有一个结果）
			
 
				+            # 使用迭代方式处理生成器，与原始实现保持一致
			
 
				+            result = None
			
 
				+            for idx, res in enumerate(results):
			
 
				+                if idx > 0:
			
 
				+                    raise ValueError("Multiple results found for a single image")
			
 
				+                result = res
			
 
				+                break  # 只处理第一个结果
			
 
				+            
			
 
				+            if result is None:
			
 
				+                raise Exception("No results returned from pipeline")
			
 
				+            input_path = Path(result["input_path"])
			
 
				+            
			
 
				+            # 生成输出文件名
			
 
				+            # 使用输入文件名（PaddleX 的 result["input_path"] 可能包含页面信息）
			
 
				+            output_filename = input_path.stem
			
 
				+            
			
 
				+            # 转换并保存标准JSON格式
			
 
				+            json_content = result.json['res']
			
 
				+            json_output_path, converted_json = convert_pruned_result_to_json(
			
 
				+                json_content,
			
 
				+                str(input_path),
			
 
				+                output_dir,
			
 
				+                output_filename,
			
 
				+                normalize_numbers=self.normalize_numbers
			
 
				+            )
			
 
				+            
			
 
				+            # 保存输出图像
			
 
				+            img_content = result.img
			
 
				+            saved_images = save_output_images(img_content, str(output_dir), output_filename)
			
 
				+            
			
 
				+            # 保存Markdown内容
			
 
				+            markdown_content = result.markdown
			
 
				+            md_output_path = save_markdown_content(
			
 
				+                markdown_content,
			
 
				+                output_dir,
			
 
				+                output_filename,
			
 
				+                normalize_numbers=self.normalize_numbers,
			
 
				+                key_text='markdown_texts',
			
 
				+                key_images='markdown_images',
			
 
				+                json_data=converted_json
			
 
				+            )
			
 
				+            
			
 
				+            # 根据实际保存的文件路径判断成功（成功判断标准：.md 和 .json 文件都存在）
			
 
				+            # 使用实际保存的文件路径
			
 
				+            actual_md_path = Path(md_output_path) if md_output_path else Path(output_dir) / f"{output_filename}.md"
			
 
				+            actual_json_path = Path(json_output_path) if json_output_path else Path(output_dir) / f"{output_filename}.json"
			
 
				+            
			
 
				+            if actual_md_path.exists() and actual_json_path.exists():
			
 
				+                result_info.update({
			
 
				+                    "success": True,
			
 
				+                    "output_files": {
			
 
				+                        "md": str(actual_md_path),
			
 
				+                        "json": str(actual_json_path),
			
 
				+                        **saved_images
			
 
				+                    },
			
 
				+                    "processing_info": converted_json.get('processing_info', {})
			
 
				+                })
			
 
				+                logger.info(f"✅ 处理成功: {image_name}")
			
 
				+            else:
			
 
				+                # 文件不存在，标记为失败
			
 
				+                missing_files = []
			
 
				+                if not actual_md_path.exists():
			
 
				+                    missing_files.append("md")
			
 
				+                if not actual_json_path.exists():
			
 
				+                    missing_files.append("json")
			
 
				+                result_info["error"] = f"输出文件不存在: {', '.join(missing_files)}"
			
 
				+                result_info["success"] = False
			
 
				+                logger.error(f"❌ 处理失败: {image_name} - {result_info['error']}")
			
 
				+                
			
 
				+        except Exception as e:
			
 
				+            result_info["error"] = str(e)
			
 
				+            result_info["success"] = False
			
 
				+            logger.error(f"Error processing {image_name}: {e}")
			
 
				+            if self.log_level == "DEBUG":
			
 
				+                traceback.print_exc()
			
 
				+        
			
 
				+        finally:
			
 
				+            result_info["processing_time"] = time.time() - start_time
			
 
				+        
			
 
				+        return result_info
			
 
				+    
			
 
				+    def __del__(self):
			
 
				+        """清理资源"""
			
 
				+        if self.adapter_applied:
			
 
				+            try:
			
 
				+                restore_original_function()
			
 
				+                restore_paddlex_doc_preprocessor()
			
 
				+                logger.info("🔄 Original function restored")
			
 
				+            except Exception as e:
			
 
				+                logger.warning(f"Failed to restore original function: {e}")
			
 
				+
			
--- a/ocr_tools/paddle_common/utils.py
+++ b/ocr_tools/paddle_common/utils.py
@@ -0,0 +1,333 @@
 
				+"""PaddleX 公共工具函数"""
			
 
				+import json
			
 
				+import traceback
			
 
				+import warnings
			
 
				+import base64
			
 
				+from pathlib import Path
			
 
				+from PIL import Image
			
 
				+from typing import List, Dict, Any, Union
			
 
				+import numpy as np
			
 
				+
			
 
				+# 导入 ocr_utils
			
 
				+import sys
			
 
				+ocr_platform_root = Path(__file__).parents[2]
			
 
				+if str(ocr_platform_root) not in sys.path:
			
 
				+    sys.path.insert(0, str(ocr_platform_root))
			
 
				+
			
 
				+from ocr_utils import (
			
 
				+    normalize_markdown_table,
			
 
				+    normalize_financial_numbers
			
 
				+)
			
 
				+
			
 
				+# 注意：load_images_from_pdf 不再需要，因为 PDF 转图片由 ocr_utils.get_input_files() 统一处理
			
 
				+
			
 
				+
			
 
				+def convert_pruned_result_to_json(pruned_result: Dict[str, Any], 
			
 
				+                              input_image_path: str, 
			
 
				+                              output_dir: str, 
			
 
				+                              filename: str,
			
 
				+                              normalize_numbers: bool = True) -> tuple[str, Dict[str, Any]]:
			
 
				+    """
			
 
				+    将API返回结果转换为标准JSON格式，并支持数字标准化
			
 
				+    """
			
 
				+    if not pruned_result:
			
 
				+        return "", {}
			
 
				+    
			
 
				+    # 构造标准格式的JSON
			
 
				+    converted_json = {
			
 
				+        "input_path": input_image_path,
			
 
				+        "page_index": None,
			
 
				+        "model_settings": pruned_result.get('model_settings', {}),
			
 
				+        "parsing_res_list": pruned_result.get('parsing_res_list', []),
			
 
				+        "doc_preprocessor_res": {
			
 
				+            "input_path": None,
			
 
				+            "page_index": None,
			
 
				+            "model_settings": pruned_result.get('doc_preprocessor_res', {}).get('model_settings', {}),
			
 
				+            "angle": pruned_result.get('doc_preprocessor_res', {}).get('angle', 0)
			
 
				+        },
			
 
				+        "layout_det_res": {
			
 
				+            "input_path": None,
			
 
				+            "page_index": None,
			
 
				+            "boxes": pruned_result.get('layout_det_res', {}).get('boxes', [])
			
 
				+        },
			
 
				+        "overall_ocr_res": {
			
 
				+            "input_path": None,
			
 
				+            "page_index": None,
			
 
				+            "model_settings": pruned_result.get('overall_ocr_res', {}).get('model_settings', {}),
			
 
				+            "dt_polys": pruned_result.get('overall_ocr_res', {}).get('dt_polys', []),
			
 
				+            "text_det_params": pruned_result.get('overall_ocr_res', {}).get('text_det_params', {}),
			
 
				+            "text_type": pruned_result.get('overall_ocr_res', {}).get('text_type', 'general'),
			
 
				+            "textline_orientation_angles": pruned_result.get('overall_ocr_res', {}).get('textline_orientation_angles', []),
			
 
				+            "text_rec_score_thresh": pruned_result.get('overall_ocr_res', {}).get('text_rec_score_thresh', 0.0),
			
 
				+            "return_word_box": pruned_result.get('overall_ocr_res', {}).get('return_word_box', False),
			
 
				+            "rec_texts": pruned_result.get('overall_ocr_res', {}).get('rec_texts', []),
			
 
				+            "rec_scores": pruned_result.get('overall_ocr_res', {}).get('rec_scores', []),
			
 
				+            "rec_polys": pruned_result.get('overall_ocr_res', {}).get('rec_polys', []),
			
 
				+            "rec_boxes": pruned_result.get('overall_ocr_res', {}).get('rec_boxes', [])
			
 
				+        },
			
 
				+        "table_res_list": pruned_result.get('table_res_list', [])
			
 
				+    }
			
 
				+    
			
 
				+    # 数字标准化处理
			
 
				+    original_json = converted_json.copy()
			
 
				+    changes_count = 0
			
 
				+    
			
 
				+    if normalize_numbers:
			
 
				+        # 1. 标准化 parsing_res_list 中的文本内容
			
 
				+        for item in converted_json.get('parsing_res_list', []):
			
 
				+            if 'block_content' in item:
			
 
				+                original_content = item['block_content']
			
 
				+                normalized_content = original_content
			
 
				+                # 根据block_label类型选择标准化方法
			
 
				+                if item.get('block_label') == 'table':
			
 
				+                    normalized_content = normalize_markdown_table(original_content)
			
 
				+                
			
 
				+                if original_content != normalized_content:
			
 
				+                    item['block_content'] = normalized_content
			
 
				+                    changes_count += len([1 for o, n in zip(original_content, normalized_content) if o != n])
			
 
				+        
			
 
				+        # 2. 标准化 table_res_list 中的HTML表格
			
 
				+        for table_item in converted_json.get('table_res_list', []):
			
 
				+            if 'pred_html' in table_item:
			
 
				+                original_html = table_item['pred_html']
			
 
				+                normalized_html = normalize_markdown_table(original_html)
			
 
				+                
			
 
				+                if original_html != normalized_html:
			
 
				+                    table_item['pred_html'] = normalized_html
			
 
				+                    changes_count += len([1 for o, n in zip(original_html, normalized_html) if o != n])
			
 
				+
			
 
				+        # 统计表格数量
			
 
				+        parsing_res_tables_count = 0
			
 
				+        table_res_list_count = 0
			
 
				+        if 'parsing_res_list' in converted_json:
			
 
				+            parsing_res_tables_count = len([item for item in converted_json['parsing_res_list'] 
			
 
				+                                          if 'block_label' in item and item['block_label'] == 'table'])
			
 
				+        if 'table_res_list' in converted_json:
			
 
				+            table_res_list_count = len(converted_json["table_res_list"])
			
 
				+        table_consistency_fixed = False
			
 
				+        if parsing_res_tables_count != table_res_list_count:
			
 
				+            warnings.warn(f"⚠️ Warning: {filename} Table count mismatch - parsing_res_list has {parsing_res_tables_count} tables, "
			
 
				+                          f"but table_res_list has {table_res_list_count} tables.")
			
 
				+            table_consistency_fixed = True
			
 
				+        
			
 
				+        # 添加标准化处理信息
			
 
				+        converted_json['processing_info'] = {
			
 
				+            "normalize_numbers": normalize_numbers,
			
 
				+            "changes_applied": changes_count > 0,
			
 
				+            "character_changes_count": changes_count,
			
 
				+            "parsing_res_tables_count": parsing_res_tables_count,
			
 
				+            "table_res_list_count": table_res_list_count,
			
 
				+            "table_consistency_fixed": table_consistency_fixed
			
 
				+        }
			
 
				+    else:
			
 
				+        converted_json['processing_info'] = {
			
 
				+            "normalize_numbers": False,
			
 
				+            "changes_applied": False,
			
 
				+            "character_changes_count": 0
			
 
				+        }
			
 
				+    
			
 
				+    # 保存JSON文件
			
 
				+    output_path = Path(output_dir).resolve()
			
 
				+    output_path.mkdir(parents=True, exist_ok=True)
			
 
				+    
			
 
				+    json_file_path = output_path / f"{filename}.json"
			
 
				+    with open(json_file_path, 'w', encoding='utf-8') as f:
			
 
				+        json.dump(converted_json, f, ensure_ascii=False, indent=2)
			
 
				+    
			
 
				+    # 如果启用了标准化且有变化，保存原始版本用于对比
			
 
				+    if normalize_numbers and changes_count > 0:
			
 
				+        original_output_path = output_path / f"{filename}_original.json"
			
 
				+        with open(original_output_path, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(original_json, f, ensure_ascii=False, indent=2)
			
 
				+    
			
 
				+    return str(output_path), converted_json
			
 
				+
			
 
				+def save_image(image: Union[Image.Image, str, np.ndarray], output_path: str) -> str:
			
 
				+    """
			
 
				+    保存单个图像到指定路径
			
 
				+
			
 
				+    Args:
			
 
				+        image: 要保存的图像，可以是PIL Image对象、base64字符串或numpy数组
			
 
				+        output_path: 输出文件路径
			
 
				+
			
 
				+    Returns:
			
 
				+        保存的图像文件路径
			
 
				+    """
			
 
				+    try:
			
 
				+        if isinstance(image, Image.Image):
			
 
				+            image.save(output_path)
			
 
				+        elif isinstance(image, str):
			
 
				+            # 处理base64字符串
			
 
				+            img_data = base64.b64decode(image)
			
 
				+            with open(output_path, 'wb') as f:
			
 
				+                f.write(img_data)
			
 
				+        elif isinstance(image, np.ndarray):
			
 
				+            # 处理numpy数组
			
 
				+            pil_image = Image.fromarray(image)
			
 
				+            pil_image.save(output_path)
			
 
				+        else:
			
 
				+            raise ValueError(f"Unsupported image type: {type(image)}")
			
 
				+
			
 
				+        return str(output_path)
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"❌ Error saving image {output_path}: {e}")
			
 
				+        return ""
			
 
				+
			
 
				+def save_output_images(output_images: Dict[str, Any], output_dir: str, output_filename: str) -> Dict[str, str]:
			
 
				+    """
			
 
				+    保存API返回的输出图像
			
 
				+    
			
 
				+    Args:
			
 
				+        output_images: 图像数组字典或PIL Image对象字典
			
 
				+        output_dir: 输出目录
			
 
				+        output_filename: 输出文件名前缀
			
 
				+        
			
 
				+    Returns:
			
 
				+        保存的图像文件路径字典
			
 
				+    """
			
 
				+    if not output_images:
			
 
				+        return {}
			
 
				+    
			
 
				+    output_path = Path(output_dir).resolve()
			
 
				+    output_path.mkdir(parents=True, exist_ok=True)
			
 
				+    
			
 
				+    saved_images = {}
			
 
				+    
			
 
				+    for img_name, img_data in output_images.items():
			
 
				+        try:
			
 
				+            # 生成文件名
			
 
				+            img_filename = f"{output_filename}_{img_name}.jpg"
			
 
				+            img_path = output_path / img_filename
			
 
				+            save_image(img_data, str(img_path))
			
 
				+            saved_images[img_name] = str(img_path)
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            print(f"❌ Error saving image {img_name}: {e}")
			
 
				+            print(f"   Image data type: {type(img_data)}")
			
 
				+            if hasattr(img_data, 'shape'):
			
 
				+                print(f"   Image shape: {img_data.shape}")
			
 
				+            traceback.print_exc()
			
 
				+    
			
 
				+    return saved_images
			
 
				+
			
 
				+def save_markdown_content(markdown_data: Dict[str, Any], output_dir: str, 
			
 
				+                         filename: str, normalize_numbers: bool = True, 
			
 
				+                         key_text: str = 'text', key_images: str = 'images',
			
 
				+                         json_data: Dict[str, Any] = None) -> str:
			
 
				+    """
			
 
				+    保存Markdown内容，支持数字标准化和表格补全
			
 
				+    """
			
 
				+    if not markdown_data and not json_data:
			
 
				+        return ""
			
 
				+    
			
 
				+    output_path = Path(output_dir).resolve()
			
 
				+    output_path.mkdir(parents=True, exist_ok=True)
			
 
				+    
			
 
				+    # 🎯 优先使用json_data生成完整内容
			
 
				+    if json_data:
			
 
				+        return save_markdown_content_enhanced(json_data, str(output_path), filename, normalize_numbers)
			
 
				+    
			
 
				+    # 原有逻辑保持不变
			
 
				+    markdown_text = markdown_data.get(key_text, '')
			
 
				+    
			
 
				+    # 数字标准化处理
			
 
				+    changes_count = 0
			
 
				+    if normalize_numbers and markdown_text:
			
 
				+        original_markdown_text = markdown_text
			
 
				+        markdown_text = normalize_markdown_table(markdown_text)
			
 
				+        
			
 
				+        changes_count = len([1 for o, n in zip(original_markdown_text, markdown_text) if o != n])
			
 
				+    
			
 
				+    md_file_path = output_path / f"{filename}.md"
			
 
				+    with open(md_file_path, 'w', encoding='utf-8') as f:
			
 
				+        f.write(markdown_text)
			
 
				+    
			
 
				+    # 如果启用了标准化且有变化，保存原始版本用于对比
			
 
				+    if normalize_numbers and changes_count > 0:
			
 
				+        original_output_path = output_path / f"{filename}_original.md"
			
 
				+        with open(original_output_path, 'w', encoding='utf-8') as f:
			
 
				+            f.write(original_markdown_text)
			
 
				+
			
 
				+    # 保存Markdown中的图像
			
 
				+    markdown_images = markdown_data.get(key_images, {})
			
 
				+    for img_path, img_data in markdown_images.items():
			
 
				+        try:
			
 
				+            full_img_path = output_path / img_path
			
 
				+            full_img_path.parent.mkdir(parents=True, exist_ok=True)
			
 
				+            save_image(img_data, str(full_img_path))
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            print(f"❌ Error saving Markdown image {img_path}: {e}")
			
 
				+
			
 
				+    return str(md_file_path)
			
 
				+
			
 
				+def save_markdown_content_enhanced(json_data: Dict[str, Any], output_dir: str, 
			
 
				+                         filename: str, normalize_numbers: bool = True) -> str:
			
 
				+    """
			
 
				+    增强版Markdown内容保存，同时处理parsing_res_list和table_res_list
			
 
				+    """
			
 
				+    if not json_data:
			
 
				+        return ""
			
 
				+    
			
 
				+    output_path = Path(output_dir).resolve()
			
 
				+    output_path.mkdir(parents=True, exist_ok=True)
			
 
				+    
			
 
				+    markdown_content = []
			
 
				+    
			
 
				+    # 处理 parsing_res_list
			
 
				+    parsing_res_list = json_data.get('parsing_res_list', [])
			
 
				+    table_res_list = json_data.get('table_res_list', [])
			
 
				+    
			
 
				+    table_index = 0  # 用于匹配table_res_list中的表格
			
 
				+    
			
 
				+    for item in parsing_res_list:
			
 
				+        block_label = item.get('block_label', '')
			
 
				+        block_content = item.get('block_content', '')
			
 
				+        
			
 
				+        if block_label == 'table':
			
 
				+            # 如果是表格，优先使用table_res_list中的详细HTML
			
 
				+            if table_index < len(table_res_list):
			
 
				+                detailed_html = table_res_list[table_index].get('pred_html', block_content)
			
 
				+                if normalize_numbers:
			
 
				+                    detailed_html = normalize_markdown_table(detailed_html)
			
 
				+                
			
 
				+                # 转换为居中显示的HTML
			
 
				+                markdown_content.append(f'<div style="text-align: center;">{detailed_html}</div>')
			
 
				+                table_index += 1
			
 
				+            else:
			
 
				+                # 如果table_res_list中没有对应项，使用parsing_res_list中的内容
			
 
				+                if normalize_numbers:
			
 
				+                    block_content = normalize_markdown_table(block_content)
			
 
				+                markdown_content.append(f'<div style="text-align: center;">{block_content}</div>')
			
 
				+        else:
			
 
				+            # 非表格内容直接添加
			
 
				+            if normalize_numbers:
			
 
				+                block_content = normalize_financial_numbers(block_content)
			
 
				+            markdown_content.append(block_content)
			
 
				+    
			
 
				+    # 🎯 关键修复：处理剩余的table_res_list项目
			
 
				+    # 如果table_res_list中还有未处理的表格（比parsing_res_list中的表格多）
			
 
				+    remaining_tables = table_res_list[table_index:]
			
 
				+    for table_item in remaining_tables:
			
 
				+        detailed_html = table_item.get('pred_html', '')
			
 
				+        if detailed_html:
			
 
				+            if normalize_numbers:
			
 
				+                detailed_html = normalize_markdown_table(detailed_html)
			
 
				+            markdown_content.append(f'<div style="text-align: center;">{detailed_html}</div>')
			
 
				+    
			
 
				+    # 合并所有内容
			
 
				+    final_markdown = '\n\n'.join(markdown_content)
			
 
				+    
			
 
				+    # 保存文件
			
 
				+    md_file_path = output_path / f"{filename}.md"
			
 
				+    with open(md_file_path, 'w', encoding='utf-8') as f:
			
 
				+        f.write(final_markdown)
			
 
				+    
			
 
				+    print(f"📄 Enhanced Markdown saved: {md_file_path}")
			
 
				+    print(f"   - parsing_res_list tables: {sum(1 for item in parsing_res_list if item.get('block_label') == 'table')}")
			
 
				+    print(f"   - table_res_list tables: {len(table_res_list)}")
			
 
				+    print(f"   - remaining tables added: {len(remaining_tables)}")
			
 
				+    
			
 
				+    return str(md_file_path)
			
 
				+