Sfoglia il codice sorgente

feat: Implement intelligent table recognition mode selection system

- Added TableModeSelector class for analyzing table features including type, complexity, regularity, and border clarity.
- Introduced TableModeDecisionEngine class to evaluate conditions and select the best processing mode based on defined rules.
- Developed IntelligentTableProcessor class to orchestrate the feature analysis, mode selection, and processing execution.
- Included a demo script for showcasing intelligent table processing capabilities.
- Documented decision flow and mode selection summary for better understanding of the system's functionality.
- Explained why wireless tables are not recommended for HTML mode and the advantages of using end-to-end processing.
zhch158_admin 3 mesi fa
parent
commit
0a7c65af46
2 ha cambiato i file con 1293 aggiunte e 0 eliminazioni
  1. 739 0
      zhch/table_mode_selector.py
  2. 554 0
      zhch/智能表格识别模式选择.md

+ 739 - 0
zhch/table_mode_selector.py

@@ -0,0 +1,739 @@
+# zhch/table_mode_selector.py
+import cv2
+import numpy as np
+from paddlex import create_pipeline, create_model
+
+class TableModeSelector:
+    def __init__(self):
+        # 使用配置中的layout模型
+        self.layout_model = create_model(model_name="PP-DocLayout_plus-L")
+        # 使用配置中的模型进行预分析
+        self.table_cls_model = create_model(model_name="PP-LCNet_x1_0_table_cls")
+    
+    def analyze_table_features(self, table_image):
+        """分析表格特征,返回特征字典"""
+        features = {}
+        
+        # 1. 表格类型检测
+        table_type = self.get_table_type(table_image)
+        features['table_type'] = table_type
+        
+        # 2. 复杂度分析
+        complexity = self.analyze_complexity(table_image)
+        features.update(complexity)
+        
+        # 3. 结构规整度分析
+        regularity = self.analyze_regularity(table_image)
+        features.update(regularity)
+        
+        # 4. 边框清晰度分析
+        border_clarity = self.analyze_border_clarity(table_image)
+        features['border_clarity'] = border_clarity
+        
+        return features
+    
+    def get_table_type(self, image):
+        """获取表格类型"""
+        try:
+            result = next(self.table_cls_model.predict(image))
+            
+            # 调试输出,查看实际的结果格式
+            print(f"表格分类模型输出类型: {type(result).__name__}")
+            
+            # 根据实际输出格式调整
+            if hasattr(result, 'keys') or isinstance(result, dict):
+                # 处理TopkResult对象或字典
+                
+                # 标准的PaddleX输出格式
+                if 'class_ids' in result and 'scores' in result and 'label_names' in result:
+                    scores = result['scores']
+                    label_names = result['label_names']
+                    
+                    # 找到最高分数的索引
+                    max_score_idx = np.argmax(scores)
+                    best_label = label_names[max_score_idx]
+                    best_score = scores[max_score_idx]
+                    
+                    print(f"分类结果: {best_label} (置信度: {best_score:.4f})")
+                    return best_label
+                    
+                # 其他可能的格式处理...
+                elif 'class_ids' in result:
+                    class_ids = result['class_ids']
+                    if hasattr(class_ids, '__len__') and len(class_ids) > 0:
+                        class_id = int(class_ids[0])
+                    else:
+                        class_id = int(class_ids)
+                    return 'wired_table' if class_id == 0 else 'wireless_table'
+                    
+                elif 'label_names' in result:
+                    label_names = result['label_names']
+                    return label_names[0] if label_names else 'wired_table'
+                    
+                # 传统的字段名
+                elif 'label' in result:
+                    return result['label']
+                elif 'class_name' in result:
+                    return result['class_name'] 
+                elif 'prediction' in result:
+                    return result['prediction']
+                else:
+                    # 默认返回第一个可用值
+                    first_key = list(result.keys())[0]
+                    return str(result[first_key])
+            
+            # 如果上述方法都失败,使用备用方法
+            print("使用备用的线条检测方法判断表格类型")
+            return self.detect_table_type_by_lines(image)
+            
+        except Exception as e:
+            print(f"表格分类出错: {e},使用备用方法")
+            return self.detect_table_type_by_lines(image)
+
+    def detect_table_type_by_lines(self, image):
+        """通过线条检测判断表格类型(备用方法)"""
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        edges = cv2.Canny(gray, 50, 150)
+        
+        # 检测直线
+        lines = cv2.HoughLines(edges, 1, np.pi/180, threshold=100)
+        
+        if lines is not None and len(lines) > 10:
+            print("检测到较多直线,判断为有线表格")
+            return 'wired_table'
+        else:
+            print("检测到较少直线,判断为无线表格")
+            return 'wireless_table'
+    
+    def analyze_complexity(self, image):
+        """分析表格复杂度"""
+        h, w = image.shape[:2]
+        
+        # 检测线条密度
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        edges = cv2.Canny(gray, 50, 150)
+        line_density = np.sum(edges > 0) / (h * w)
+        
+        # 检测合并单元格(简化实现)
+        merged_cells_ratio = self.detect_merged_cells(image)
+        
+        # 文本密度分析(简化实现)
+        text_density = self.analyze_text_density(image)
+        
+        return {
+            'line_density': line_density,
+            'merged_cells_ratio': merged_cells_ratio,
+            'text_density': text_density,
+            'size_complexity': (h * w) / (1000 * 1000)  # 图像尺寸复杂度
+        }
+    
+    def detect_merged_cells(self, image):
+        """检测合并单元格比例(简化实现)"""
+        # 这里使用简化的启发式方法
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        
+        # 检测水平线
+        horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40, 1))
+        horizontal_lines = cv2.morphologyEx(gray, cv2.MORPH_OPEN, horizontal_kernel)
+        
+        # 检测垂直线
+        vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 40))
+        vertical_lines = cv2.morphologyEx(gray, cv2.MORPH_OPEN, vertical_kernel)
+        
+        # 计算线条覆盖率作为合并单元格的指标
+        h_coverage = np.sum(horizontal_lines > 0) / horizontal_lines.size
+        v_coverage = np.sum(vertical_lines > 0) / vertical_lines.size
+        
+        # 简化的合并单元格比例估算
+        merged_ratio = 1.0 - min(h_coverage, v_coverage) * 2
+        return max(0.0, min(1.0, merged_ratio))
+    
+    def analyze_text_density(self, image):
+        """分析文本密度(简化实现)"""
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        
+        # 使用简单的阈值化来估算文本区域
+        _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+        
+        # 计算非空白像素比例作为文本密度
+        text_pixels = np.sum(binary == 0)  # 黑色像素(文本)
+        total_pixels = binary.size
+        
+        return text_pixels / total_pixels
+    
+    def analyze_regularity(self, image):
+        """分析表格结构规整度"""
+        # 检测水平和垂直线条的规律性
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        
+        # 水平线检测
+        horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40, 1))
+        horizontal_lines = cv2.morphologyEx(gray, cv2.MORPH_OPEN, horizontal_kernel)
+        
+        # 垂直线检测
+        vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 40))
+        vertical_lines = cv2.morphologyEx(gray, cv2.MORPH_OPEN, vertical_kernel)
+        
+        # 计算规整度分数
+        h_regularity = self.calculate_line_regularity(horizontal_lines, axis='horizontal')
+        v_regularity = self.calculate_line_regularity(vertical_lines, axis='vertical')
+        
+        return {
+            'horizontal_regularity': h_regularity,
+            'vertical_regularity': v_regularity,
+            'overall_regularity': (h_regularity + v_regularity) / 2
+        }
+    
+    def calculate_line_regularity(self, lines_image, axis='horizontal'):
+        """计算线条规整度"""
+        if axis == 'horizontal':
+            # 水平方向投影
+            projection = np.sum(lines_image, axis=1)
+        else:
+            # 垂直方向投影
+            projection = np.sum(lines_image, axis=0)
+        
+        # 找到投影峰值
+        peaks = []
+        threshold = np.max(projection) * 0.3
+        for i in range(1, len(projection) - 1):
+            if projection[i] > threshold and projection[i] > projection[i-1] and projection[i] > projection[i+1]:
+                peaks.append(i)
+        
+        if len(peaks) < 2:
+            return 0.5  # 默认中等规整度
+        
+        # 计算峰值间距的标准差
+        intervals = [peaks[i+1] - peaks[i] for i in range(len(peaks)-1)]
+        if len(intervals) == 0:
+            return 0.5
+        
+        mean_interval = np.mean(intervals)
+        std_interval = np.std(intervals)
+        
+        # 规整度 = 1 - (标准差 / 平均值),值越大越规整
+        if mean_interval == 0:
+            return 0.5
+        
+        regularity = 1.0 - min(1.0, std_interval / mean_interval)
+        return max(0.0, regularity)
+    
+    def analyze_border_clarity(self, image):
+        """分析边框清晰度"""
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        
+        # 使用Sobel算子检测边缘强度
+        sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
+        sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
+        edge_magnitude = np.sqrt(sobelx**2 + sobely**2)
+        
+        # 计算边缘清晰度分数
+        clarity_score = np.mean(edge_magnitude) / 255.0
+        
+        return clarity_score
+
+class TableModeDecisionEngine:
+    def __init__(self):
+        self.rules = self.load_decision_rules()
+    
+    def load_decision_rules(self):
+        """加载决策规则"""
+        return {
+            'wired_html_mode': {
+                'conditions': [
+                    ('table_type', 'in', ['wired_table', 'wired', '0']),  # 支持多种格式
+                    ('border_clarity', '>', 0.6),
+                    ('merged_cells_ratio', '>', 0.3),
+                    ('overall_regularity', '<', 0.7),
+                    ('size_complexity', '>', 0.5)
+                ],
+                'weight': 0.9,
+                'description': '复杂有线表格,几何匹配更准确'
+            },
+            'wired_e2e_mode': {
+                'conditions': [
+                    ('table_type', 'in', ['wired_table', 'wired', '0']),
+                    ('overall_regularity', '>', 0.8),
+                    ('merged_cells_ratio', '<', 0.2),
+                    ('text_density', '>', 0.3)
+                ],
+                'weight': 0.8,
+                'description': '规整有线表格,端到端效果好'
+            },
+            'wireless_e2e_mode': {
+                'conditions': [
+                    ('table_type', 'in', ['wireless_table', 'wireless', '1']),
+                    ('line_density', '<', 0.1),
+                    ('text_density', '>', 0.2)
+                ],
+                'weight': 0.85,
+                'description': '无线表格,端到端预测最适合'
+            },
+            'regular_mode': {
+                'conditions': [
+                    ('size_complexity', '>', 1.0),
+                    ('OR', [
+                        ('border_clarity', '<', 0.4),
+                        ('overall_regularity', '<', 0.5)
+                    ])
+                ],
+                'weight': 0.7,
+                'description': '复杂场景,需要多模型协同'
+            }
+        }
+    
+    def check_single_condition(self, features, condition):
+        """检查单个条件"""
+        feature_name, operator, threshold = condition
+        
+        if feature_name not in features:
+            return False
+        
+        value = features[feature_name]
+        
+        if operator == '>':
+            return value > threshold
+        elif operator == '<':
+            return value < threshold
+        elif operator == '==':
+            return value == threshold
+        elif operator == '>=':
+            return value >= threshold
+        elif operator == '<=':
+            return value <= threshold
+        elif operator == 'in':
+            return value in threshold  # threshold 是一个列表
+        
+        return False
+    
+    def evaluate_conditions(self, features, conditions):
+        """评估条件是否满足"""
+        score = 0
+        total_conditions = 0
+        
+        for condition in conditions:
+            if condition[0] == 'OR':
+                # 处理OR条件
+                or_satisfied = any(
+                    self.check_single_condition(features, sub_cond) 
+                    for sub_cond in condition[1]
+                )
+                if or_satisfied:
+                    score += 1
+                total_conditions += 1
+            else:
+                # 处理单个条件
+                if self.check_single_condition(features, condition):
+                    score += 1
+                total_conditions += 1
+        
+        return score / total_conditions if total_conditions > 0 else 0
+    
+    def select_best_mode(self, features):
+        """选择最佳模式"""
+        mode_scores = {}
+        
+        for mode_name, rule in self.rules.items():
+            conditions_score = self.evaluate_conditions(features, rule['conditions'])
+            final_score = conditions_score * rule['weight']
+            mode_scores[mode_name] = {
+                'score': final_score,
+                'description': rule['description']
+            }
+        
+        # 选择得分最高的模式
+        best_mode = max(mode_scores.items(), key=lambda x: x[1]['score'])
+        
+        return best_mode[0], best_mode[1]
+
+class IntelligentTableProcessor:
+    def __init__(self, config_path="./PP-StructureV3-zhch.yaml"):
+        self.selector = TableModeSelector()
+        self.decision_engine = TableModeDecisionEngine()
+        # 暂时不初始化完整的pipeline,避免配置问题
+        self.config_path = config_path
+        self.pp_structure = None
+    
+    def execute_with_mode(self, image_path, mode, optimized_config=None):
+        """根据选择的模式执行表格识别"""
+        try:
+            print(f"正在使用 {mode} 模式处理表格...")
+            print(f"优化配置: {optimized_config}")
+            
+            # 创建动态配置的pipeline
+            result = self.create_and_run_pipeline(image_path, mode, optimized_config)
+            
+            return result
+            
+        except Exception as e:
+            print(f"执行 {mode} 模式时出错: {e}")
+            print("回退到基础处理模式")
+            return self.fallback_processing(image_path)
+    
+    def create_and_run_pipeline(self, image_path, mode, optimized_config):
+        """创建并运行特定模式的pipeline"""
+        
+        if mode == 'wired_html_mode':
+            return self.run_wired_html_mode(image_path, optimized_config)
+        elif mode == 'wired_e2e_mode':
+            return self.run_wired_e2e_mode(image_path, optimized_config)
+        elif mode == 'wireless_e2e_mode':
+            return self.run_wireless_e2e_mode(image_path, optimized_config)
+        elif mode == 'regular_mode':
+            return self.run_regular_mode(image_path, optimized_config)
+        else:
+            print(f"未知模式: {mode},使用默认处理")
+            return self.fallback_processing(image_path)
+    
+    def run_wired_html_mode(self, image_path, config):
+        """运行有线表格转HTML模式"""
+        print("执行有线表格转HTML模式...")
+        
+        try:
+            # 使用表格识别pipeline,启用HTML模式
+            from paddlex import create_pipeline
+            
+            # 创建表格识别pipeline
+            table_pipeline = create_pipeline(
+                pipeline=self.config_path,
+                model_dir=None
+            )
+            
+            # 模拟配置HTML模式的参数
+            # 注意:这里需要根据实际的PaddleX API调整
+            result = list(table_pipeline.predict(
+                image_path,
+                use_wired_table_html_mode=True,
+                use_wired_table_e2e_mode=False
+            ))
+            
+            return self.format_result(result, mode='wired_html_mode')
+            
+        except Exception as e:
+            print(f"有线表格HTML模式执行失败: {e}")
+            return self.create_mock_result(mode='wired_html_mode')
+    
+    def run_wired_e2e_mode(self, image_path, config):
+        """运行有线表格端到端模式"""
+        print("执行有线表格端到端模式...")
+        
+        try:
+            from paddlex import create_pipeline
+            
+            table_pipeline = create_pipeline(
+                pipeline=self.config_path,
+                model_dir=None
+            )
+            
+            result = list(table_pipeline.predict(
+                image_path,
+                use_wired_table_html_mode=False,
+                use_wired_table_e2e_mode=True
+            ))
+            
+            return self.format_result(result, mode='wired_e2e_mode')
+            
+        except Exception as e:
+            print(f"有线表格端到端模式执行失败: {e}")
+            return self.create_mock_result(mode='wired_e2e_mode')
+    
+    def run_wireless_e2e_mode(self, image_path, config):
+        """运行无线表格端到端模式"""
+        print("执行无线表格端到端模式...")
+        
+        try:
+            from paddlex import create_pipeline
+            
+            table_pipeline = create_pipeline(
+                pipeline=self.config_path,
+                model_dir=None
+            )
+            
+            result = list(table_pipeline.predict(
+                image_path,
+                use_wireless_table_e2e_mode=True
+            ))
+            
+            return self.format_result(result, mode='wireless_e2e_mode')
+            
+        except Exception as e:
+            print(f"无线表格端到端模式执行失败: {e}")
+            return self.create_mock_result(mode='wireless_e2e_mode')
+    
+    def run_regular_mode(self, image_path, config):
+        """运行常规模式"""
+        print("执行常规模式...")
+        
+        try:
+            # 使用完整的PP-StructureV3 pipeline
+            if self.pp_structure is None:
+                from paddlex import create_pipeline
+                self.pp_structure = create_pipeline(
+                    pipeline=self.config_path
+                )
+            
+            result = list(self.pp_structure.predict(image_path))
+            
+            return self.format_result(result, mode='regular_mode')
+            
+        except Exception as e:
+            print(f"常规模式执行失败: {e}")
+            return self.create_mock_result(mode='regular_mode')
+    
+    def format_result(self, raw_result, mode):
+        """格式化结果"""
+        try:
+            if not raw_result:
+                return self.create_mock_result(mode)
+            
+            formatted_result = {
+                'mode': mode,
+                'status': 'success',
+                'raw_output': raw_result,
+                'table_count': 0,
+                'tables': []
+            }
+            
+            # 提取表格结果
+            for item in raw_result:
+                if hasattr(item, 'table_recognition_res') or 'table_recognition_res' in item:
+                    table_res = item.get('table_recognition_res', item.table_recognition_res)
+                    if table_res and len(table_res) > 0:
+                        formatted_result['table_count'] = len(table_res)
+                        for i, table in enumerate(table_res):
+                            formatted_result['tables'].append({
+                                'table_id': i,
+                                'html': getattr(table, 'html', 'HTML不可用'),
+                                'bbox': getattr(table, 'bbox', [0, 0, 100, 100])
+                            })
+            
+            return formatted_result
+            
+        except Exception as e:
+            print(f"结果格式化失败: {e}")
+            return self.create_mock_result(mode)
+    
+    def create_mock_result(self, mode):
+        """创建模拟结果(用于测试和错误回退)"""
+        return {
+            'mode': mode,
+            'status': 'mock',
+            'message': f'{mode} 模式执行完成(模拟结果)',
+            'table_count': 1,
+            'tables': [{
+                'table_id': 0,
+                'html': f'<table><tr><td>模拟{mode}结果</td></tr></table>',
+                'bbox': [237, 201, 1416, 2044]
+            }]
+        }
+    
+    def fallback_processing(self, image_path):
+        """回退处理方法"""
+        print("使用基础OCR处理...")
+        
+        try:
+            from paddlex import create_pipeline
+            
+            # 使用基础OCR pipeline
+            ocr_pipeline = create_pipeline(pipeline="OCR")
+            result = list(ocr_pipeline.predict(image_path))
+            
+            return {
+                'mode': 'fallback_ocr',
+                'status': 'success',
+                'raw_output': result,
+                'message': '使用基础OCR处理'
+            }
+            
+        except Exception as e:
+            print(f"回退处理也失败: {e}")
+            return {
+                'mode': 'error',
+                'status': 'failed',
+                'message': f'所有处理方法都失败: {e}'
+            }
+
+    def extract_all_table_regions(self, image_path):
+        """提取所有表格区域(如果有多个表格)"""
+        original_image = cv2.imread(image_path)
+        layout_results = list(self.selector.layout_model.predict(image_path))
+        
+        all_tables = []
+        for layout_result in layout_results:
+            for i, box_info in enumerate(layout_result['boxes']):
+                if box_info['label'] == 'table':
+                    coordinate = box_info['coordinate']
+                    x1, y1, x2, y2 = [int(coord) for coord in coordinate]
+                    
+                    table_image = original_image[y1:y2, x1:x2]
+                    
+                    table_info = {
+                        'table_id': i,
+                        'image': table_image,
+                        'bbox': [x1, y1, x2, y2],
+                        'score': box_info['score']
+                    }
+                    all_tables.append(table_info)
+                    
+                    # 保存每个表格区域
+                    cv2.imwrite(f'./debug_table_{i}.jpg', table_image)
+                    print(f"表格 {i}: bbox=[{x1}, {y1}, {x2}, {y2}], score={box_info['score']:.4f}")
+        
+        return all_tables
+    
+    def extract_table_region(self, image_path):
+        """从图像中提取表格区域"""
+        # 读取原图
+        original_image = cv2.imread(image_path)
+        
+        # 使用layout模型检测版面
+        layout_results = list(self.selector.layout_model.predict(image_path))
+        
+        table_regions = []
+        for layout_result in layout_results:
+            # 遍历检测到的所有区域
+            for box_info in layout_result['boxes']:
+                if box_info['label'] == 'table':
+                    # 提取表格坐标
+                    coordinate = box_info['coordinate']
+                    x1, y1, x2, y2 = [int(coord) for coord in coordinate]
+                    
+                    # 裁剪表格区域
+                    table_image = original_image[y1:y2, x1:x2]
+                    
+                    table_regions.append({
+                        'image': table_image,
+                        'bbox': [x1, y1, x2, y2],
+                        'score': box_info['score']
+                    })
+                    
+                    print(f"检测到表格区域: bbox=[{x1}, {y1}, {x2}, {y2}], score={box_info['score']:.4f}")
+        
+        if len(table_regions) == 0:
+            print("未检测到表格区域,使用整个图像")
+            return original_image
+        
+        # 返回得分最高的表格区域
+        best_table = max(table_regions, key=lambda x: x['score'])
+        return best_table['image']
+
+    def process_table_intelligently(self, image_path, use_layout_model=True):
+        """智能处理表格"""
+        
+        try:
+            # 1. 提取表格区域
+            if use_layout_model:
+                table_image = self.extract_table_region(image_path)
+            else:
+                table_image = cv2.imread(image_path)
+            
+            if table_image is None or table_image.size == 0:
+                print("表格区域提取失败,使用原图")
+                table_image = cv2.imread(image_path)
+            
+            # 保存表格区域用于调试
+            cv2.imwrite('./debug_table_region.jpg', table_image)
+            print(f"表格区域已保存到: ./debug_table_region.jpg")
+            print(f"表格区域尺寸: {table_image.shape}")
+            
+            # 2. 分析表格特征
+            features = self.selector.analyze_table_features(table_image)
+            print(f"表格特征分析: {features}")
+            
+            # 3. 选择最佳模式
+            best_mode, mode_info = self.decision_engine.select_best_mode(features)
+            print(f"选择模式: {best_mode}, 分数: {mode_info['score']:.3f}")
+            
+            # # 4. 动态调整配置
+            # optimized_config = self.optimize_config_for_mode(best_mode, features)
+            # print(f"优化配置: {optimized_config}")
+            
+            # 5. 执行处理
+            result = self.execute_with_mode(image_path, best_mode, optimized_config=None)
+            
+            return {
+                'result': result,
+                'selected_mode': best_mode,
+                'mode_description': mode_info['description'],
+                'confidence_score': mode_info['score'],
+                'table_features': features,
+                'table_region_shape': table_image.shape
+            }
+            
+        except Exception as e:
+            print(f"智能处理过程出错: {e}")
+            import traceback
+            traceback.print_exc()
+            
+            # 返回错误信息
+            return {
+                'result': None,
+                'selected_mode': 'error',
+                'mode_description': f'处理失败: {e}',
+                'confidence_score': 0.0,
+                'table_features': {},
+                'error': str(e)
+            }
+
+# 修改demo函数,更好地处理结果
+def demo_intelligent_table_processing():
+    """演示智能表格处理"""
+    
+    try:
+        processor = IntelligentTableProcessor("./PP-StructureV3-zhch.yaml")
+        
+        # 处理您之前的复杂财务表格
+        result = processor.process_table_intelligently(
+            "./sample_data/600916_中国黄金_2002年报_83_94_2.png", 
+            use_layout_model=True
+        )
+        
+        print("\n" + "="*50)
+        print("智能表格处理结果:")
+        print("="*50)
+        print(f"选择的模式: {result['selected_mode']}")
+        print(f"选择原因: {result['mode_description']}")
+        print(f"置信度分数: {result['confidence_score']:.3f}")
+        
+        if 'table_region_shape' in result:
+            print(f"表格区域尺寸: {result['table_region_shape']}")
+        
+        print(f"\n表格特征分析:")
+        for key, value in result.get('table_features', {}).items():
+            if isinstance(value, float):
+                print(f"  {key}: {value:.4f}")
+            else:
+                print(f"  {key}: {value}")
+        
+        # 处理结果
+        if result['result']:
+            process_result = result['result']
+            print(f"\n处理结果:")
+            print(f"  模式: {process_result.get('mode', 'unknown')}")
+            print(f"  状态: {process_result.get('status', 'unknown')}")
+            print(f"  表格数量: {process_result.get('table_count', 0)}")
+            
+            if process_result.get('tables'):
+                for i, table in enumerate(process_result['tables']):
+                    print(f"  表格 {i}: bbox={table.get('bbox', 'N/A')}")
+                    html_preview = table.get('html', '')[:100]
+                    print(f"  HTML预览: {html_preview}...")
+        
+        # 根据置信度给出建议
+        if result['confidence_score'] > 0.8:
+            print("\n✅ 高置信度,推荐使用该模式")
+        elif result['confidence_score'] > 0.6:
+            print("\n⚠️  中等置信度,可能需要人工验证")
+        else:
+            print("\n❌ 低置信度,建议尝试其他模式或人工处理")
+        
+        return result
+        
+    except Exception as e:
+        print(f"演示程序出错: {e}")
+        import traceback
+        traceback.print_exc()
+        return None
+
+if __name__ == "__main__":
+    demo_intelligent_table_processing()

+ 554 - 0
zhch/智能表格识别模式选择.md

@@ -0,0 +1,554 @@
+# 智能表格识别模式选择
+```
+@workspace 如何能提前判断要处理的表格应该使用常规、端到端预测,转html模式,哪种模式更好?
+```
+
+## 1. 智能模式选择算法
+
+### 1. 表格特征分析器
+
+```python
+# zhch/table_mode_selector.py
+import cv2
+import numpy as np
+from paddlex import create_pipeline
+
+class TableModeSelector:
+    def __init__(self):
+        # 使用您配置中的模型进行预分析
+        self.layout_model = create_pipeline("layout_detection", 
+                                          model_name="PP-DocLayout_plus-L")
+        self.table_cls_model = create_pipeline("table_classification",
+                                             model_name="PP-LCNet_x1_0_table_cls")
+    
+    def analyze_table_features(self, table_image):
+        """分析表格特征,返回特征字典"""
+        features = {}
+        
+        # 1. 表格类型检测
+        table_type = self.get_table_type(table_image)
+        features['table_type'] = table_type
+        
+        # 2. 复杂度分析
+        complexity = self.analyze_complexity(table_image)
+        features.update(complexity)
+        
+        # 3. 结构规整度分析
+        regularity = self.analyze_regularity(table_image)
+        features.update(regularity)
+        
+        # 4. 边框清晰度分析
+        border_clarity = self.analyze_border_clarity(table_image)
+        features['border_clarity'] = border_clarity
+        
+        return features
+    
+    def get_table_type(self, image):
+        """获取表格类型"""
+        result = next(self.table_cls_model.predict(image))
+        return result['label']  # 'wired_table' or 'wireless_table'
+    
+    def analyze_complexity(self, image):
+        """分析表格复杂度"""
+        h, w = image.shape[:2]
+        
+        # 检测线条密度
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        edges = cv2.Canny(gray, 50, 150)
+        line_density = np.sum(edges > 0) / (h * w)
+        
+        # 检测合并单元格
+        merged_cells_ratio = self.detect_merged_cells(image)
+        
+        # 文本密度分析
+        text_density = self.analyze_text_density(image)
+        
+        return {
+            'line_density': line_density,
+            'merged_cells_ratio': merged_cells_ratio,
+            'text_density': text_density,
+            'size_complexity': (h * w) / (1000 * 1000)  # 图像尺寸复杂度
+        }
+    
+    def analyze_regularity(self, image):
+        """分析表格结构规整度"""
+        # 检测水平和垂直线条的规律性
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        
+        # 水平线检测
+        horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40, 1))
+        horizontal_lines = cv2.morphologyEx(gray, cv2.MORPH_OPEN, horizontal_kernel)
+        
+        # 垂直线检测
+        vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 40))
+        vertical_lines = cv2.morphologyEx(gray, cv2.MORPH_OPEN, vertical_kernel)
+        
+        # 计算规整度分数
+        h_regularity = self.calculate_line_regularity(horizontal_lines, axis='horizontal')
+        v_regularity = self.calculate_line_regularity(vertical_lines, axis='vertical')
+        
+        return {
+            'horizontal_regularity': h_regularity,
+            'vertical_regularity': v_regularity,
+            'overall_regularity': (h_regularity + v_regularity) / 2
+        }
+    
+    def analyze_border_clarity(self, image):
+        """分析边框清晰度"""
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        
+        # 使用Sobel算子检测边缘强度
+        sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
+        sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
+        edge_magnitude = np.sqrt(sobelx**2 + sobely**2)
+        
+        # 计算边缘清晰度分数
+        clarity_score = np.mean(edge_magnitude) / 255.0
+        
+        return clarity_score
+```
+
+### 2. 决策规则引擎
+
+```python
+class TableModeDecisionEngine:
+    def __init__(self):
+        self.rules = self.load_decision_rules()
+    
+    def load_decision_rules(self):
+        """加载决策规则"""
+        return {
+            'wired_html_mode': {
+                'conditions': [
+                    ('table_type', '==', 'wired_table'),
+                    ('border_clarity', '>', 0.6),
+                    ('merged_cells_ratio', '>', 0.3),
+                    ('overall_regularity', '<', 0.7),  # 不太规整的复杂表格
+                    ('size_complexity', '>', 0.5)
+                ],
+                'weight': 0.9,
+                'description': '复杂有线表格,几何匹配更准确'
+            },
+            'wired_e2e_mode': {
+                'conditions': [
+                    ('table_type', '==', 'wired_table'),
+                    ('overall_regularity', '>', 0.8),  # 规整的表格
+                    ('merged_cells_ratio', '<', 0.2),  # 较少合并单元格
+                    ('text_density', '>', 0.3)
+                ],
+                'weight': 0.8,
+                'description': '规整有线表格,端到端效果好'
+            },
+            'wireless_e2e_mode': {
+                'conditions': [
+                    ('table_type', '==', 'wireless_table'),
+                    ('line_density', '<', 0.1),
+                    ('text_density', '>', 0.2)
+                ],
+                'weight': 0.85,
+                'description': '无线表格,端到端预测最适合'
+            },
+            'regular_mode': {
+                'conditions': [
+                    ('size_complexity', '>', 1.0),  # 超大表格
+                    ('OR', [
+                        ('border_clarity', '<', 0.4),  # 边框不清晰
+                        ('overall_regularity', '<', 0.5)  # 非常不规整
+                    ])
+                ],
+                'weight': 0.7,
+                'description': '复杂场景,需要多模型协同'
+            }
+        }
+    
+    def evaluate_conditions(self, features, conditions):
+        """评估条件是否满足"""
+        score = 0
+        total_conditions = 0
+        
+        for condition in conditions:
+            if condition[0] == 'OR':
+                # 处理OR条件
+                or_satisfied = any(
+                    self.check_single_condition(features, sub_cond) 
+                    for sub_cond in condition[1]
+                )
+                if or_satisfied:
+                    score += 1
+                total_conditions += 1
+            else:
+                # 处理单个条件
+                if self.check_single_condition(features, condition):
+                    score += 1
+                total_conditions += 1
+        
+        return score / total_conditions if total_conditions > 0 else 0
+    
+    def check_single_condition(self, features, condition):
+        """检查单个条件"""
+        feature_name, operator, threshold = condition
+        
+        if feature_name not in features:
+            return False
+        
+        value = features[feature_name]
+        
+        if operator == '>':
+            return value > threshold
+        elif operator == '<':
+            return value < threshold
+        elif operator == '==':
+            return value == threshold
+        elif operator == '>=':
+            return value >= threshold
+        elif operator == '<=':
+            return value <= threshold
+        
+        return False
+    
+    def select_best_mode(self, features):
+        """选择最佳模式"""
+        mode_scores = {}
+        
+        for mode_name, rule in self.rules.items():
+            conditions_score = self.evaluate_conditions(features, rule['conditions'])
+            final_score = conditions_score * rule['weight']
+            mode_scores[mode_name] = {
+                'score': final_score,
+                'description': rule['description']
+            }
+        
+        # 选择得分最高的模式
+        best_mode = max(mode_scores.items(), key=lambda x: x[1]['score'])
+        
+        return best_mode[0], best_mode[1]
+```
+
+### 3. 智能选择器主类
+
+```python
+class IntelligentTableProcessor:
+    def __init__(self, config_path="zhch/PP-StructureV3-zhch.yaml"):
+        self.selector = TableModeSelector()
+        self.decision_engine = TableModeDecisionEngine()
+        self.pp_structure = create_pipeline("PP-StructureV3", config_path)
+    
+    def process_table_intelligently(self, image_path, table_bbox=None):
+        """智能处理表格"""
+        
+        # 1. 提取表格区域
+        if table_bbox:
+            table_image = self.extract_table_region(image_path, table_bbox)
+        else:
+            table_image = cv2.imread(image_path)
+        
+        # 2. 分析表格特征
+        features = self.selector.analyze_table_features(table_image)
+        
+        # 3. 选择最佳模式
+        best_mode, mode_info = self.decision_engine.select_best_mode(features)
+        
+        # 4. 动态调整配置
+        optimized_config = self.optimize_config_for_mode(best_mode, features)
+        
+        # 5. 执行处理
+        result = self.execute_with_mode(image_path, best_mode, optimized_config)
+        
+        return {
+            'result': result,
+            'selected_mode': best_mode,
+            'mode_description': mode_info['description'],
+            'confidence_score': mode_info['score'],
+            'table_features': features
+        }
+    
+    def optimize_config_for_mode(self, mode, features):
+        """根据模式和特征优化配置"""
+        config_optimizations = {}
+        
+        if mode == 'wired_html_mode':
+            config_optimizations = {
+                'TableRecognition.use_wired_table_html_mode': True,
+                'TableRecognition.use_wired_table_e2e_mode': False
+            }
+        elif mode == 'wired_e2e_mode':
+            config_optimizations = {
+                'TableRecognition.use_wired_table_html_mode': False, 
+                'TableRecognition.use_wired_table_e2e_mode': True
+            }
+        elif mode == 'wireless_e2e_mode':
+            config_optimizations = {
+                'TableRecognition.use_wireless_table_e2e_mode': True
+            }
+        elif mode == 'regular_mode':
+            # 使用完整的常规模式
+            config_optimizations = {
+                'TableRecognition.use_layout_detection': True,
+                'TableRecognition.use_doc_preprocessor': True,
+                'TableRecognition.use_ocr_model': True
+            }
+        
+        # 根据复杂度调整batch_size
+        if features.get('size_complexity', 0) > 1.0:
+            config_optimizations['batch_size'] = 4  # 降低batch_size
+        
+        return config_optimizations
+```
+
+### 4. 使用示例
+
+```python
+# zhch/intelligent_table_demo.py
+def demo_intelligent_table_processing():
+    """演示智能表格处理"""
+    
+    processor = IntelligentTableProcessor("zhch/PP-StructureV3-zhch.yaml")
+    
+    # 处理您之前的复杂财务表格
+    result = processor.process_table_intelligently(
+        "path/to/complex_financial_table.jpg"
+    )
+    
+    print(f"选择的模式: {result['selected_mode']}")
+    print(f"选择原因: {result['mode_description']}")
+    print(f"置信度分数: {result['confidence_score']:.3f}")
+    print(f"表格特征: {result['table_features']}")
+    
+    # 根据结果做进一步处理
+    if result['confidence_score'] > 0.8:
+        print("高置信度,直接使用结果")
+        return result['result']
+    else:
+        print("置信度较低,建议人工review或尝试其他模式")
+        return None
+
+if __name__ == "__main__":
+    demo_intelligent_table_processing()
+```
+
+## 决策流程图
+
+```mermaid
+graph TD
+    A[输入表格图像] --> B[表格特征分析]
+    B --> C{表格类型}
+    
+    C -->|有线表| D[分析复杂度和规整度]
+    C -->|无线表| E[无线表端到端模式]
+    
+    D --> F{合并单元格比例}
+    F -->|>30%| G[有线表转HTML模式]
+    F -->|<20%| H{规整度}
+    
+    H -->|>80%| I[有线表端到端模式]
+    H -->|<80%| J[常规模式]
+    
+    G --> K[执行处理]
+    I --> K
+    J --> K
+    E --> K
+    
+    K --> L[返回结果和置信度]
+```
+
+## 模式选择总结
+
+| 模式 | 适用场景 | 特征要求 | 优势 |
+|------|----------|----------|------|
+| **有线表转HTML** | 复杂财务表格、不规整布局 | 边框清晰、大量合并单元格 | 几何匹配准确 |
+| **有线表端到端** | 规整有线表格 | 规整度高、合并单元格少 | 处理速度快 |
+| **无线表端到端** | 无边框表格 | 线条密度低、文本对齐 | 专门优化 |
+| **常规模式** | 超复杂场景、边框模糊 | 需要多模型协同 | 最高准确性 |
+
+这个智能选择系统可以根据具体的表格特征自动选择最适合的处理模式,提高整体的识别准确性和处理效率。
+
+## 2.无线表格为什么不推荐使用转html模式
+
+```
+@workspace  无线表格为什么不推荐使用转html模式
+```
+
+### 结构特征
+```python
+# 无线表格特点分析
+wireless_table_characteristics = {
+    "border_lines": "无边框或边框不清晰",
+    "cell_boundaries": "依赖空白间隔或对齐方式",
+    "structure_cues": "主要依靠文本对齐和空间位置",
+    "detection_difficulty": "单元格边界难以精确检测"
+}
+```
+
+### 与有线表格的对比
+| 特征 | 有线表格 | 无线表格 |
+|------|----------|----------|
+| **边框清晰度** | 明确的线条边界 | 无边框或边框模糊 |
+| **单元格检测** | 基于线条检测,准确度高 | 基于空间对齐,难度大 |
+| **几何关系** | 边界框明确 | 边界模糊,依赖推理 |
+
+## 2. 转HTML模式的技术局限
+
+### 几何检测精度问题
+```python
+# 无线表格的单元格检测挑战
+def wireless_cell_detection_challenges():
+    """无线表格单元格检测面临的挑战"""
+    
+    challenges = {
+        "边界模糊": {
+            "问题": "无明确边框线条",
+            "影响": "RT-DETR检测器难以准确定位单元格边界",
+            "结果": "边界框不准确,影响几何匹配"
+        },
+        
+        "空白区域歧义": {
+            "问题": "空白可能是单元格内容或分隔符",
+            "影响": "难以区分真实单元格和空白间隔",
+            "结果": "虚假单元格或遗漏单元格"
+        },
+        
+        "对齐依赖": {
+            "问题": "依赖文本对齐判断列边界",
+            "影响": "轻微的对齐偏差影响检测",
+            "结果": "列划分错误"
+        }
+    }
+    
+    return challenges
+```
+
+### 基于[`RT-DETR-L_wireless_table_cell_det`]table_recognition_v2.en.md )模型的限制
+
+从您工作空间的配置可以看出,无线表格使用的是专门的检测模型,但其精度受限:
+
+```yaml
+# zhch/PP-StructureV3-zhch.yaml 中的配置
+WirelessTableCellsDetection:
+  module_name: table_cells_detection
+  model_name: RT-DETR-L_wireless_table_cell_det
+  model_dir: null
+```
+
+## 3. 为什么端到端模式更适合无线表格
+
+### 端到端模型的优势
+```python
+# 无线表格端到端处理的优势
+def wireless_e2e_advantages():
+    """无线表格端到端模式的优势"""
+    
+    return {
+        "结构理解": {
+            "描述": "SLANeXt_wireless直接理解表格逻辑结构",
+            "技术": "基于Transformer的序列建模",
+            "优势": "不依赖精确的几何边界检测"
+        },
+        
+        "上下文感知": {
+            "描述": "利用全局上下文信息推理结构",
+            "技术": "注意力机制捕获长距离依赖",
+            "优势": "能处理对齐不完美的表格"
+        },
+        
+        "语义理解": {
+            "描述": "结合文本内容和空间位置",
+            "技术": "多模态特征融合",
+            "优势": "更智能的结构推理"
+        }
+    }
+```
+
+### 技术对比分析
+
+```python
+# 技术路径对比
+def compare_technical_approaches():
+    """对比不同技术路径"""
+    
+    comparison = {
+        "转HTML模式": {
+            "流程": "单元格检测 → 几何匹配 → HTML构建",
+            "依赖": "精确的边界框检测",
+            "适用": "有清晰边框的表格",
+            "无线表格问题": [
+                "检测精度不足",
+                "几何关系模糊",
+                "匹配容易出错"
+            ]
+        },
+        
+        "端到端模式": {
+            "流程": "图像 → 结构序列 → HTML",
+            "依赖": "深度学习的结构理解",
+            "适用": "各种表格类型",
+            "无线表格优势": [
+                "直接学习结构模式",
+                "不依赖边界检测",
+                "更好的泛化能力"
+            ]
+        }
+    }
+    
+    return comparison
+```
+
+## 4. 实际案例说明
+
+### 无线表格示例问题
+```
+# 典型无线表格场景
+姓名    年龄    职业      薪资
+张三    25     工程师    8000
+李四    30     设计师    7500
+王五    28     产品经理  9000
+```
+
+在这种表格中:
+- **转HTML模式问题**:难以准确检测"张三"、"25"等文本块的单元格边界
+- **端到端模式优势**:直接理解这是3行4列的表格结构
+
+### 根据文档建议
+从[`docs/pipeline_usage/tutorials/ocr_pipelines/table_recognition_v2.en.md`]table_recognition_v2.en.md )可以看到:
+
+> "Generally speaking, it is recommended to enable this mode in scenarios where the wireless table cell detection is very accurate"
+
+这说明转HTML模式需要**非常准确**的单元格检测,而无线表格往往难以达到这个要求。
+
+## 5. 智能选择策略
+
+基于[`zhch/智能表格识别模式选择.md`]智能表格识别模式选择.md )中的算法:
+
+```python
+# 无线表格的决策规则
+wireless_table_rules = {
+    'wireless_e2e_mode': {
+        'conditions': [
+            ('table_type', '==', 'wireless_table'),
+            ('line_density', '<', 0.1),  # 线条密度低
+            ('text_density', '>', 0.2)   # 文本密度合适
+        ],
+        'weight': 0.85,
+        'description': '无线表格,端到端预测最适合'
+    }
+}
+
+# 为什么不推荐转HTML模式
+def why_not_html_mode_for_wireless():
+    return {
+        "检测精度不足": "无线表格的单元格边界模糊,检测精度不够",
+        "几何匹配困难": "缺乏明确边界,几何关系难以建立",
+        "错误率高": "容易产生单元格划分错误",
+        "不如端到端": "端到端模式专门针对无线表格优化"
+    }
+```
+
+## 总结
+
+**无线表格不推荐使用转HTML模式的核心原因**:
+
+1. **检测精度限制**:无边框导致单元格边界检测不准确
+2. **几何匹配困难**:缺乏明确的几何边界进行匹配
+3. **结构推理复杂**:需要更多上下文信息,不适合纯几何方法
+4. **端到端更优**:专门的端到端模型能更好地处理无线表格的结构推理
+
+因此,对于无线表格,推荐使用**端到端预测模式**,利用深度学习模型的结构理解能力,而不是依赖可能不准确的几何检测结果。