SHA1
--- a/table_line_generator/editor/__init__.py
+++ b/table_line_generator/editor/__init__.py
@@ -61,14 +61,13 @@ from .adjustments import create_adjustment_section
 
				 # 配置加载
			
 
				 from .config_loader import (
			
 
				     load_structure_from_config,
			
 
				-    save_structure_to_config,
			
 
				     load_table_editor_config,
			
 
				     parse_table_editor_cli_args,
			
 
				     build_data_source_catalog,
			
 
				 )
			
 
				 
			
 
				 # 数据处理
			
 
				-from .data_processor import parse_ocr_data
			
 
				+from .data_processor import get_structure_from_ocr
			
 
				 
			
 
				 # 图片查看器
			
 
				 from .viewer import show_image_with_scroll
			
@@ -117,13 +116,12 @@ __all__ = [
 
				     
			
 
				     # 配置加载
			
 
				     'load_structure_from_config',
			
 
				-    'save_structure_to_config',
			
 
				     'load_table_editor_config',
			
 
				     'parse_table_editor_cli_args',
			
 
				     'build_data_source_catalog',
			
 
				     
			
 
				     # 数据处理
			
 
				-    'parse_ocr_data',
			
 
				+    'get_structure_from_ocr',
			
 
				     
			
 
				     # 图片查看器
			
 
				     'show_image_with_scroll',
			
--- a/table_line_generator/editor/adjustments.py
+++ b/table_line_generator/editor/adjustments.py
@@ -47,7 +47,9 @@ def create_adjustment_section(structure):
 
				             if new_y != horizontal_lines[line_index]:
			
 
				                 save_state_for_undo(structure)
			
 
				                 structure['horizontal_lines'][line_index] = new_y
			
 
				-                structure.setdefault('modified_h_lines', set()).add(line_index)
			
 
				+                structure.setdefault('modified_h_lines', [])
			
 
				+                if line_index not in structure['modified_h_lines']:
			
 
				+                    structure['modified_h_lines'].append(line_index)
			
 
				                 _update_row_intervals(structure)
			
 
				                 clear_table_image_cache()
			
 
				                 adjusted = True
			
@@ -65,7 +67,9 @@ def create_adjustment_section(structure):
 
				             structure['horizontal_lines'].append(new_h_y)
			
 
				             structure['horizontal_lines'].sort()
			
 
				             idx = structure['horizontal_lines'].index(new_h_y)
			
 
				-            structure.setdefault('modified_h_lines', set()).add(idx)
			
 
				+            structure.setdefault('modified_h_lines', [])
			
 
				+            if idx not in structure['modified_h_lines']:
			
 
				+                structure['modified_h_lines'].append(idx)
			
 
				             _update_row_intervals(structure)
			
 
				             clear_table_image_cache()
			
 
				             adjusted = True
			
@@ -81,7 +85,7 @@ def create_adjustment_section(structure):
 
				             save_state_for_undo(structure)
			
 
				             for idx in sorted(to_delete, reverse=True):
			
 
				                 del structure['horizontal_lines'][idx]
			
 
				-            structure['modified_h_lines'] = set()
			
 
				+            structure['modified_h_lines'] = []
			
 
				             _update_row_intervals(structure)
			
 
				             clear_table_image_cache()
			
 
				             adjusted = True
			
@@ -105,7 +109,9 @@ def create_adjustment_section(structure):
 
				             if new_x != vertical_lines[line_index]:
			
 
				                 save_state_for_undo(structure)
			
 
				                 structure['vertical_lines'][line_index] = new_x
			
 
				-                structure.setdefault('modified_v_lines', set()).add(line_index)
			
 
				+                structure.setdefault('modified_v_lines', [])
			
 
				+                if line_index not in structure['modified_v_lines']:
			
 
				+                    structure['modified_v_lines'].append(line_index)
			
 
				                 _update_column_intervals(structure)
			
 
				                 clear_table_image_cache()
			
 
				                 adjusted = True
			
@@ -119,11 +125,13 @@ def create_adjustment_section(structure):
 
				             key="add_v_value"
			
 
				         )
			
 
				         if st.button("➕ 确认添加竖线"):
			
 
				-            save_state_for_undo(structure)
			
 
				             structure['vertical_lines'].append(new_v_x)
			
 
				             structure['vertical_lines'].sort()
			
 
				             idx = structure['vertical_lines'].index(new_v_x)
			
 
				-            structure.setdefault('modified_v_lines', set()).add(idx)
			
 
				+            structure.setdefault('modified_v_lines', [])
			
 
				+            if idx not in structure['modified_v_lines']:
			
 
				+                structure['modified_v_lines'].append(idx)
			
 
				+            _update_column_intervals(structure)
			
 
				             _update_column_intervals(structure)
			
 
				             clear_table_image_cache()
			
 
				             adjusted = True
			
@@ -139,7 +147,7 @@ def create_adjustment_section(structure):
 
				             save_state_for_undo(structure)
			
 
				             for idx in sorted(to_delete, reverse=True):
			
 
				                 del structure['vertical_lines'][idx]
			
 
				-            structure['modified_v_lines'] = set()
			
 
				+            structure['modified_v_lines'] = []
			
 
				             _update_column_intervals(structure)
			
 
				             clear_table_image_cache()
			
 
				             adjusted = True
			
--- a/table_line_generator/editor/analysis_controls.py
+++ b/table_line_generator/editor/analysis_controls.py
@@ -1,60 +1,125 @@
 
				 """
			
 
				-表格结构分析控件
			
 
				+分析功能控件
			
 
				 """
			
 
				 import streamlit as st
			
 
				-from .drawing import clear_table_image_cache
			
 
				+from typing import Dict, Optional
			
 
				+import json
			
 
				 
			
 
				 
			
 
				-def create_analysis_section(y_tolerance: int, x_tolerance: int, min_row_height: int):
			
 
				+def create_analysis_section(generator, tool: str = "ppstructv3") -> Optional[Dict]:
			
 
				     """
			
 
				-    创建分析区域
			
 
				+    创建分析控件
			
 
				     
			
 
				     Args:
			
 
				-        y_tolerance: Y轴聚类容差
			
 
				-        x_tolerance: X轴聚类容差
			
 
				-        min_row_height: 最小行高
			
 
				+        generator: TableLineGenerator 实例
			
 
				+        tool: 工具类型
			
 
				+    
			
 
				+    Returns:
			
 
				+        分析后的表格结构（如果点击了分析按钮）
			
 
				     """
			
 
				-    if st.button("🔍 分析表格结构"):
			
 
				-        with st.spinner("分析中..."):
			
 
				-            try:
			
 
				-                generator = st.session_state.generator
			
 
				-                structure = generator.analyze_table_structure(
			
 
				-                    y_tolerance=y_tolerance,
			
 
				-                    x_tolerance=x_tolerance,
			
 
				-                    min_row_height=min_row_height
			
 
				-                )
			
 
				-                
			
 
				-                if not structure:
			
 
				-                    st.warning("⚠️ 未检测到表格结构")
			
 
				-                    st.stop()
			
 
				-                
			
 
				-                structure['modified_h_lines'] = set()
			
 
				-                structure['modified_v_lines'] = set()
			
 
				-                
			
 
				-                st.session_state.structure = structure
			
 
				-                st.session_state.undo_stack = []
			
 
				-                st.session_state.redo_stack = []
			
 
				-                clear_table_image_cache()
			
 
				-                
			
 
				-                st.success(
			
 
				-                    f"✅ 检测到 {len(structure['rows'])} 行"
			
 
				-                    f"（{len(structure['horizontal_lines'])} 条横线），"
			
 
				-                    f"{len(structure['columns'])} 列"
			
 
				-                    f"（{len(structure['vertical_lines'])} 条竖线）"
			
 
				-                )
			
 
				-                
			
 
				-                col1, col2, col3, col4 = st.columns(4)
			
 
				-                with col1:
			
 
				-                    st.metric("行数", len(structure['rows']))
			
 
				-                with col2:
			
 
				-                    st.metric("横线数", len(structure['horizontal_lines']))
			
 
				-                with col3:
			
 
				-                    st.metric("列数", len(structure['columns']))
			
 
				-                with col4:
			
 
				-                    st.metric("竖线数", len(structure['vertical_lines']))
			
 
				-            
			
 
				-            except Exception as e:
			
 
				-                st.error(f"❌ 分析失败: {e}")
			
 
				-                import traceback
			
 
				-                st.code(traceback.format_exc())
			
 
				-                st.stop()
			
 
				+    st.sidebar.subheader("🔍 表格结构分析")
			
 
				+    
			
 
				+    # 🔑 根据工具类型显示不同的参数
			
 
				+    if tool.lower() == "mineru":
			
 
				+        st.sidebar.info("📋 MinerU 格式：直接使用 table_cells 生成结构")
			
 
				+        
			
 
				+        if st.sidebar.button("🚀 生成表格结构", type="primary"):
			
 
				+            with st.spinner("正在分析表格结构..."):
			
 
				+                try:
			
 
				+                    # 🔑 MinerU 格式：从原始 JSON 重新解析
			
 
				+                    current_catalog = st.session_state.get('current_catalog', [])
			
 
				+                    current_index = st.session_state.get('current_catalog_index', 0)
			
 
				+                    
			
 
				+                    if not current_catalog or current_index >= len(current_catalog):
			
 
				+                        st.error("❌ 未找到当前文件")
			
 
				+                        return None
			
 
				+                    
			
 
				+                    entry = current_catalog[current_index]
			
 
				+                    
			
 
				+                    # 加载原始 JSON
			
 
				+                    with open(entry["json"], "r", encoding="utf-8") as fp:
			
 
				+                        raw = json.load(fp)
			
 
				+                    
			
 
				+                    # 重新解析获取完整结构
			
 
				+                    from .data_processor import get_structure_from_ocr
			
 
				+                    
			
 
				+                    table_bbox, structure = get_structure_from_ocr(raw, tool)
			
 
				+                    
			
 
				+                    # 保存到 session_state
			
 
				+                    st.session_state.structure = structure
			
 
				+                    st.session_state.table_bbox = table_bbox
			
 
				+                    st.session_state.undo_stack = []
			
 
				+                    st.session_state.redo_stack = []
			
 
				+                    
			
 
				+                    # 清除缓存的图片
			
 
				+                    from .drawing import clear_table_image_cache
			
 
				+                    clear_table_image_cache()
			
 
				+                    
			
 
				+                    st.success(
			
 
				+                        f"✅ 表格结构生成成功！\n\n"
			
 
				+                        f"检测到 {structure['total_rows']} 行，{structure['total_cols']} 列"
			
 
				+                    )
			
 
				+                    return structure
			
 
				+                    
			
 
				+                except Exception as e:
			
 
				+                    st.error(f"❌ 分析失败: {e}")
			
 
				+                    import traceback
			
 
				+                    with st.expander("🔍 详细错误"):
			
 
				+                        st.code(traceback.format_exc())
			
 
				+    
			
 
				+    else:
			
 
				+        # 🔑 PPStructure V3 格式：使用参数调整
			
 
				+        y_tolerance = st.sidebar.slider(
			
 
				+            "Y轴聚类容差（行检测）",
			
 
				+            min_value=1,
			
 
				+            max_value=20,
			
 
				+            value=5,
			
 
				+            help="相邻文本框Y坐标差小于此值时合并为同一行"
			
 
				+        )
			
 
				+        
			
 
				+        x_tolerance = st.sidebar.slider(
			
 
				+            "X轴聚类容差（列检测）",
			
 
				+            min_value=5,
			
 
				+            max_value=30,
			
 
				+            value=10,
			
 
				+            help="相邻文本框X坐标差小于此值时合并为同一列"
			
 
				+        )
			
 
				+        
			
 
				+        min_row_height = st.sidebar.slider(
			
 
				+            "最小行高",
			
 
				+            min_value=10,
			
 
				+            max_value=50,
			
 
				+            value=20,
			
 
				+            help="行高小于此值的将被过滤"
			
 
				+        )
			
 
				+        
			
 
				+        if st.sidebar.button("🚀 分析表格结构", type="primary"):
			
 
				+            with st.spinner("正在分析表格结构..."):
			
 
				+                try:
			
 
				+                    structure = generator.analyze_table_structure(
			
 
				+                        y_tolerance=y_tolerance,
			
 
				+                        x_tolerance=x_tolerance,
			
 
				+                        min_row_height=min_row_height
			
 
				+                    )
			
 
				+                    
			
 
				+                    st.session_state.structure = structure
			
 
				+                    st.session_state.undo_stack = []
			
 
				+                    st.session_state.redo_stack = []
			
 
				+                    
			
 
				+                    # 清除缓存的图片
			
 
				+                    from .drawing import clear_table_image_cache
			
 
				+                    clear_table_image_cache()
			
 
				+                    
			
 
				+                    st.success(
			
 
				+                        f"✅ 分析完成！\n\n"
			
 
				+                        f"检测到 {len(structure['rows'])} 行，{len(structure['columns'])} 列"
			
 
				+                    )
			
 
				+                    return structure
			
 
				+                    
			
 
				+                except Exception as e:
			
 
				+                    st.error(f"❌ 分析失败: {e}")
			
 
				+                    import traceback
			
 
				+                    with st.expander("🔍 详细错误"):
			
 
				+                        st.code(traceback.format_exc())
			
 
				+    
			
 
				+    return None
			
--- a/table_line_generator/editor/batch_template_controls.py
+++ b/table_line_generator/editor/batch_template_controls.py
@@ -67,7 +67,15 @@ def create_batch_template_section(current_line_width: int, current_line_color: s
 
				         st.error("❌ 未找到当前页的结构文件，请先保存")
			
 
				         st.info(f"期望文件: {current_structure_file}")
			
 
				         return
			
 
				-    
			
 
				+
			
 
				+    # 🔑 检测当前结构文件的模式
			
 
				+    try:
			
 
				+        with open(current_structure_file, 'r', encoding='utf-8') as f:
			
 
				+            template_structure = json.load(f)
			
 
				+        template_mode = template_structure.get('mode', 'fixed')
			
 
				+    except:
			
 
				+        template_mode = 'fixed'    
			
 
				+
			
 
				     unlabeled_pages = []
			
 
				     for entry in catalog:
			
 
				         if entry["index"] == current_page:
			
@@ -113,17 +121,19 @@ def create_batch_template_section(current_line_width: int, current_line_color: s
 
				             unlabeled_pages,
			
 
				             output_dir,
			
 
				             structure_suffix,
			
 
				+            template_mode,  # 🔑 默认使用固定行高模式
			
 
				             current_line_width,
			
 
				             line_color
			
 
				         )
			
 
				 
			
 
				 
			
 
				 def _apply_template_batch(
			
 
				-    template_file: Path,  # 🔑 改为直接传入模板文件路径
			
 
				+    template_file: Path,
			
 
				     template_entry: Dict,
			
 
				     target_entries: List[Dict],
			
 
				     output_dir: Path,
			
 
				     structure_suffix: str,
			
 
				+    template_mode: str,
			
 
				     line_width: int,
			
 
				     line_color: tuple
			
 
				 ):
			
@@ -136,6 +146,7 @@ def _apply_template_batch(
 
				         target_entries: 目标页面列表
			
 
				         output_dir: 输出目录
			
 
				         structure_suffix: 结构文件后缀
			
 
				+        template_mode: 模板模式 ('fixed' / 'hybrid')
			
 
				         line_width: 线条宽度
			
 
				         line_color: 线条颜色 (r, g, b)
			
 
				     """
			
@@ -144,6 +155,7 @@ def _apply_template_batch(
 
				         applier = TableTemplateApplier(str(template_file))
			
 
				         
			
 
				         st.info(f"📋 使用模板: {template_file.name}")
			
 
				+        st.info(f"🔧 模式: {'混合模式 (MinerU)' if template_mode == 'hybrid' else '固定行高模式'}")
			
 
				         
			
 
				         # 进度条
			
 
				         progress_bar = st.progress(0)
			
@@ -160,59 +172,39 @@ def _apply_template_batch(
 
				             status_text.text(f"处理中: {entry['display']} ({idx + 1}/{len(target_entries)})")
			
 
				             
			
 
				             try:
			
 
				-                # 加载 OCR 数据
			
 
				-                with open(entry["json"], "r", encoding="utf-8") as fp:
			
 
				-                    raw = json.load(fp)
			
 
				+                # ✅ 直接调用统一的处理函数
			
 
				+                from table_template_applier import apply_template_to_single_file
			
 
				                 
			
 
				-                # 解析 OCR 数据
			
 
				-                if 'parsing_res_list' in raw and 'overall_ocr_res' in raw:
			
 
				-                    table_bbox, ocr_data = TableLineGenerator.parse_ppstructure_result(raw)
			
 
				-                else:
			
 
				-                    raise ValueError("不支持的 OCR 格式")
			
 
				+                # 确定是否使用混合模式
			
 
				+                use_hybrid = (template_mode == 'hybrid')
			
 
				+                
			
 
				+                success = apply_template_to_single_file(
			
 
				+                    applier=applier,
			
 
				+                    image_file=entry["image"],
			
 
				+                    json_file=entry["json"],
			
 
				+                    output_dir=output_dir,
			
 
				+                    structure_suffix=structure_suffix,
			
 
				+                    use_hybrid_mode=use_hybrid,
			
 
				+                    line_width=line_width,
			
 
				+                    line_color=line_color
			
 
				+                )
			
 
				                 
			
 
				-                # 加载图片
			
 
				-                if entry["image"] and entry["image"].exists():
			
 
				-                    image = Image.open(entry["image"])
			
 
				+                if success:
			
 
				+                    success_count += 1
			
 
				+                    base_name = entry["json"].stem
			
 
				+                    results.append({
			
 
				+                        'page': entry['index'],
			
 
				+                        'status': 'success',
			
 
				+                        'image': str(output_dir / f"{base_name}.png"),
			
 
				+                        'structure': str(output_dir / f"{base_name}{structure_suffix}")
			
 
				+                    })
			
 
				                 else:
			
 
				-                    st.warning(f"⚠️ 跳过 {entry['display']}: 未找到图片")
			
 
				                     failed_count += 1
			
 
				                     results.append({
			
 
				                         'page': entry['index'],
			
 
				-                        'status': 'skipped',
			
 
				-                        'reason': 'no_image'
			
 
				+                        'status': 'error',
			
 
				+                        'error': 'Processing failed'
			
 
				                     })
			
 
				-                    continue
			
 
				-                
			
 
				-                # 应用模板生成图片
			
 
				-                img_with_lines = applier.apply_to_image(
			
 
				-                    image,
			
 
				-                    ocr_data,
			
 
				-                    line_width=line_width,
			
 
				-                    line_color=line_color
			
 
				-                )
			
 
				-                
			
 
				-                # 生成结构配置
			
 
				-                structure = applier.generate_structure_for_image(ocr_data)
			
 
				-                
			
 
				-                # 保存图片
			
 
				-                base_name = entry["json"].stem
			
 
				-                image_suffix = st.session_state.current_output_config.get("image_suffix", ".png")
			
 
				-                output_image_path = output_dir / f"{base_name}{image_suffix}"
			
 
				-                img_with_lines.save(output_image_path)
			
 
				-                
			
 
				-                # 🔑 保存结构（确保 set 转为 list）
			
 
				-                structure_path = output_dir / f"{base_name}{structure_suffix}"
			
 
				-                
			
 
				-                with open(structure_path, 'w', encoding='utf-8') as f:
			
 
				-                    json.dump(structure, f, indent=2, ensure_ascii=False)
			
 
				-                
			
 
				-                success_count += 1
			
 
				-                results.append({
			
 
				-                    'page': entry['index'],
			
 
				-                    'status': 'success',
			
 
				-                    'image': str(output_image_path),
			
 
				-                    'structure': str(structure_path)
			
 
				-                })
			
 
				                 
			
 
				             except Exception as e:
			
 
				                 failed_count += 1
			
@@ -233,6 +225,7 @@ def _apply_template_batch(
 
				             json.dump({
			
 
				                 'template': template_entry['display'],
			
 
				                 'template_file': str(template_file),
			
 
				+                'template_mode': template_mode,
			
 
				                 'total': len(target_entries),
			
 
				                 'success': success_count,
			
 
				                 'failed': failed_count,
			
@@ -249,7 +242,7 @@ def _apply_template_batch(
 
				                 f"失败: {failed_count} 页"
			
 
				             )
			
 
				             
			
 
				-            # 🔑 提供下载批处理结果
			
 
				+            # 🔑 提供下载批处理报告
			
 
				             with open(batch_result_path, 'r', encoding='utf-8') as f:
			
 
				                 st.download_button(
			
 
				                     "📥 下载批处理报告",
			
--- a/table_line_generator/editor/config_loader.py
+++ b/table_line_generator/editor/config_loader.py
@@ -275,25 +275,25 @@ def load_structure_from_config(config_path: Path) -> dict:
 
				     return structure
			
 
				 
			
 
				 
			
 
				-def save_structure_to_config(structure: dict, output_path: Path):
			
 
				-    """
			
 
				-    保存表格结构到配置文件
			
 
				+# def save_structure_to_config(structure: dict, output_path: Path):
			
 
				+#     """
			
 
				+#     保存表格结构到配置文件
			
 
				     
			
 
				-    Args:
			
 
				-        structure: 表格结构字典
			
 
				-        output_path: 输出文件路径
			
 
				-    """
			
 
				-    save_data = {
			
 
				-        'rows': structure['rows'],
			
 
				-        'columns': structure['columns'],
			
 
				-        'horizontal_lines': structure.get('horizontal_lines', []),
			
 
				-        'vertical_lines': structure.get('vertical_lines', []),
			
 
				-        'row_height': structure['row_height'],
			
 
				-        'col_widths': structure['col_widths'],
			
 
				-        'table_bbox': structure['table_bbox'],
			
 
				-        'modified_h_lines': list(structure.get('modified_h_lines', set())),
			
 
				-        'modified_v_lines': list(structure.get('modified_v_lines', set()))
			
 
				-    }
			
 
				+#     Args:
			
 
				+#         structure: 表格结构字典
			
 
				+#         output_path: 输出文件路径
			
 
				+#     """
			
 
				+#     save_data = {
			
 
				+#         'rows': structure['rows'],
			
 
				+#         'columns': structure['columns'],
			
 
				+#         'horizontal_lines': structure.get('horizontal_lines', []),
			
 
				+#         'vertical_lines': structure.get('vertical_lines', []),
			
 
				+#         'row_height': structure['row_height'],
			
 
				+#         'col_widths': structure['col_widths'],
			
 
				+#         'table_bbox': structure['table_bbox'],
			
 
				+#         'modified_h_lines': list(structure.get('modified_h_lines', set())),
			
 
				+#         'modified_v_lines': list(structure.get('modified_v_lines', set()))
			
 
				+#     }
			
 
				     
			
 
				-    with open(output_path, 'w', encoding='utf-8') as f:
			
 
				-        json.dump(save_data, f, indent=2, ensure_ascii=False)
			
 
				+#     with open(output_path, 'w', encoding='utf-8') as f:
			
 
				+#         json.dump(save_data, f, indent=2, ensure_ascii=False)
			
--- a/table_line_generator/editor/data_processor.py
+++ b/table_line_generator/editor/data_processor.py
@@ -1,53 +1,52 @@
 
				-import streamlit as st
			
 
				-import json
			
 
				-
			
 
				-# 当直接运行时
			
 
				+"""
			
 
				+OCR 数据处理
			
 
				+"""
			
 
				 import sys
			
 
				 from pathlib import Path
			
 
				+from typing import List, Dict, Tuple
			
 
				+
			
 
				+# 添加父目录到路径
			
 
				 sys.path.insert(0, str(Path(__file__).parent.parent))
			
 
				-from table_line_generator import TableLineGenerator  # 上级目录
			
 
				 
			
 
				-def parse_ocr_data(ocr_data):
			
 
				-    """解析OCR数据，支持多种格式"""
			
 
				-    # 如果是字符串，尝试解析
			
 
				-    if isinstance(ocr_data, str):
			
 
				-        try:
			
 
				-            ocr_data = json.loads(ocr_data)
			
 
				-        except json.JSONDecodeError:
			
 
				-            st.error("❌ JSON 格式错误，无法解析")
			
 
				-            return []
			
 
				-    
			
 
				-    # 检查是否为 PPStructure V3 格式
			
 
				-    if isinstance(ocr_data, dict) and 'parsing_res_list' in ocr_data and 'overall_ocr_res' in ocr_data:
			
 
				-        st.info("🔍 检测到 PPStructure V3 格式")
			
 
				-        
			
 
				-        try:
			
 
				-            table_bbox, text_boxes = TableLineGenerator.parse_ppstructure_result(ocr_data)
			
 
				-            st.success(f"✅ 表格区域: {table_bbox}")
			
 
				-            st.success(f"✅ 表格内文本框: {len(text_boxes)} 个")
			
 
				-            return text_boxes
			
 
				-        except Exception as e:
			
 
				-            st.error(f"❌ 解析 PPStructure 结果失败: {e}")
			
 
				-            return []
			
 
				-    
			
 
				-    # 确保是列表
			
 
				-    if not isinstance(ocr_data, list):
			
 
				-        st.error(f"❌ OCR 数据应该是列表，实际类型: {type(ocr_data)}")
			
 
				-        return []
			
 
				+try:
			
 
				+	from table_line_generator import TableLineGenerator
			
 
				+except ImportError:
			
 
				+	from ..table_line_generator import TableLineGenerator
			
 
				+
			
 
				+
			
 
				+def get_structure_from_ocr(
			
 
				+    raw_data: Dict, 
			
 
				+    tool: str = "ppstructv3"
			
 
				+) -> Tuple[List[int], Dict]:
			
 
				+    """
			
 
				+    从 OCR 数据生成表格结构（统一处理流程）
			
 
				     
			
 
				-    if not ocr_data:
			
 
				-        st.warning("⚠️ OCR 数据为空")
			
 
				-        return []
			
 
				+    Args:
			
 
				+        raw_data: 原始 OCR 结果
			
 
				+        tool: 工具类型 ("ppstructv3" / "mineru")
			
 
				     
			
 
				-    first_item = ocr_data[0]
			
 
				-    if not isinstance(first_item, dict):
			
 
				-        st.error(f"❌ OCR 数据项应该是字典，实际类型: {type(first_item)}")
			
 
				-        return []
			
 
				+    Returns:
			
 
				+        (table_bbox, structure): 表格边界框和结构信息
			
 
				+    """
			
 
				+    # 🎯 第一步：解析数据（统一接口）
			
 
				+    table_bbox, ocr_data = TableLineGenerator.parse_ocr_data(raw_data, tool)
			
 
				     
			
 
				-    if 'bbox' not in first_item:
			
 
				-        st.error("❌ OCR 数据缺少 'bbox' 字段")
			
 
				-        st.info("💡 支持的格式示例:\n```json\n[\n  {\n    \"text\": \"文本\",\n    \"bbox\": [x1, y1, x2, y2]\n  }\n]\n```")
			
 
				-        return []
			
 
				+    # 🎯 第二步：分析结构（根据工具选择算法）
			
 
				+    if tool.lower() == "mineru":
			
 
				+        # ✅ 使用静态方法，无需图片
			
 
				+        structure = TableLineGenerator.analyze_structure_only(
			
 
				+            ocr_data,
			
 
				+            method="mineru"
			
 
				+        )
			
 
				+    else:
			
 
				+        # PPStructure 使用聚类算法
			
 
				+        structure = TableLineGenerator.analyze_structure_only(
			
 
				+            ocr_data,
			
 
				+            y_tolerance=5,
			
 
				+            x_tolerance=10,
			
 
				+            min_row_height=20,
			
 
				+            method="cluster"
			
 
				+        )
			
 
				     
			
 
				-    return ocr_data
			
 
				+    return table_bbox, structure
			
 
				 
			
--- a/table_line_generator/editor/directory_selector.py
+++ b/table_line_generator/editor/directory_selector.py
@@ -8,9 +8,14 @@ from PIL import Image
 
				 from typing import Dict, List
			
 
				 
			
 
				 from .config_loader import load_structure_from_config, build_data_source_catalog
			
 
				-from .data_processor import parse_ocr_data
			
 
				+from .data_processor import get_structure_from_ocr
			
 
				 from .drawing import clear_table_image_cache
			
 
				 
			
 
				+try:
			
 
				+	from table_line_generator import TableLineGenerator
			
 
				+except ImportError:
			
 
				+	from ..table_line_generator import TableLineGenerator
			
 
				+
			
 
				 
			
 
				 def create_directory_selector(
			
 
				     data_sources: List[Dict], 
			
@@ -37,7 +42,9 @@ def create_directory_selector(
 
				     source_cfg = next(src for src in data_sources if src["name"] == selected_name)
			
 
				     
			
 
				     # 🔑 保存当前选择的数据源配置到 session_state
			
 
				-    st.session_state.current_data_source = source_cfg
			
 
				+    if "current_data_source" not in st.session_state or st.session_state.current_data_source != source_cfg:
			
 
				+        st.session_state.current_data_source = source_cfg
			
 
				+        st.session_state.dir_selected_index = 0
			
 
				     
			
 
				     # 获取输出配置（优先使用数据源自己的 output）
			
 
				     output_cfg = source_cfg.get("output", global_output_config)
			
@@ -46,8 +53,13 @@ def create_directory_selector(
 
				     output_dir = Path(output_cfg.get("directory", "output/table_structures"))
			
 
				     structure_suffix = output_cfg.get("structure_suffix", "_structure.json")
			
 
				     
			
 
				+    # 🔑 获取工具类型
			
 
				+    tool = source_cfg.get("tool", "ppstructv3")
			
 
				+    st.session_state.current_tool = tool
			
 
				+    st.sidebar.info(f"🔧 工具: {tool.upper()}")
			
 
				+    
			
 
				     # 构建/缓存目录清单
			
 
				-    catalog_key = f"catalog::{selected_name}"
			
 
				+    catalog_key = f"catalog::{source_cfg['json_dir']}"
			
 
				     if catalog_key not in st.session_state:
			
 
				         st.session_state[catalog_key] = build_data_source_catalog(source_cfg)
			
 
				     catalog = st.session_state[catalog_key]
			
@@ -93,7 +105,8 @@ def create_directory_selector(
 
				             catalog[selected], 
			
 
				             output_dir, 
			
 
				             structure_suffix, 
			
 
				-            current_entry_key
			
 
				+            current_entry_key,
			
 
				+            tool  # 🔑 传入工具类型
			
 
				         )
			
 
				     
			
 
				     # 页码跳转处理
			
@@ -109,8 +122,23 @@ def create_directory_selector(
 
				     return st.session_state.get('dir_auto_mode', 'new')
			
 
				 
			
 
				 
			
 
				-def _load_catalog_entry(entry: Dict, output_dir: Path, structure_suffix: str, entry_key: str):
			
 
				-    """加载目录条目（JSON + 图片 + 结构）"""
			
 
				+def _load_catalog_entry(
			
 
				+    entry: Dict, 
			
 
				+    output_dir: Path, 
			
 
				+    structure_suffix: str, 
			
 
				+    entry_key: str,
			
 
				+    tool: str = "ppstructv3"  # 🔑 新增参数
			
 
				+):
			
 
				+    """
			
 
				+    加载目录条目（JSON + 图片 + 结构）
			
 
				+    
			
 
				+    Args:
			
 
				+        entry: 目录条目
			
 
				+        output_dir: 输出目录
			
 
				+        structure_suffix: 结构文件后缀
			
 
				+        entry_key: 条目唯一键
			
 
				+        tool: 工具类型
			
 
				+    """
			
 
				     base_name = entry["json"].stem
			
 
				     structure_file = output_dir / f"{base_name}{structure_suffix}"
			
 
				     has_structure = structure_file.exists()
			
@@ -119,10 +147,20 @@ def _load_catalog_entry(entry: Dict, output_dir: Path, structure_suffix: str, en
 
				     try:
			
 
				         with open(entry["json"], "r", encoding="utf-8") as fp:
			
 
				             raw = json.load(fp)
			
 
				-        st.session_state.ocr_data = parse_ocr_data(raw)
			
 
				+        
			
 
				+        # 🔑 根据工具类型解析数据
			
 
				+        table_bbox, ocr_data = TableLineGenerator.parse_ocr_data(raw, tool=tool)
			
 
				+        
			
 
				+        st.session_state.ocr_data = ocr_data
			
 
				+        st.session_state.table_bbox = table_bbox
			
 
				         st.session_state.loaded_json_name = entry["json"].name
			
 
				+        st.info(f"🔧 使用 {tool.upper()} 解析 JSON")
			
 
				+        
			
 
				     except Exception as e:
			
 
				         st.error(f"❌ 加载 JSON 失败: {e}")
			
 
				+        import traceback
			
 
				+        with st.expander("🔍 详细错误"):
			
 
				+            st.code(traceback.format_exc())
			
 
				         return
			
 
				 
			
 
				     # 🖼️ 加载图片
			
--- a/table_line_generator/editor/file_handlers.py
+++ b/table_line_generator/editor/file_handlers.py
@@ -7,10 +7,13 @@ import tempfile
 
				 from pathlib import Path
			
 
				 from PIL import Image
			
 
				 
			
 
				-from .data_processor import parse_ocr_data
			
 
				 from .config_loader import load_structure_from_config
			
 
				 from .drawing import clear_table_image_cache
			
 
				 
			
 
				+try:
			
 
				+	from table_line_generator import TableLineGenerator
			
 
				+except ImportError:
			
 
				+	from ..table_line_generator import TableLineGenerator
			
 
				 
			
 
				 def handle_json_upload(uploaded_json):
			
 
				     """处理 JSON 文件上传"""
			
@@ -34,7 +37,7 @@ def handle_json_upload(uploaded_json):
 
				             else:
			
 
				                 st.json(raw_data[:3] if len(raw_data) > 3 else raw_data)
			
 
				         
			
 
				-        ocr_data = parse_ocr_data(raw_data)
			
 
				+        ocr_data = TableLineGenerator.parse_ocr_data(raw_data, tool="ppstructv3")
			
 
				         
			
 
				         if not ocr_data:
			
 
				             st.error("❌ 无法解析 OCR 数据，请检查 JSON 格式")
			
--- a/table_line_generator/editor/mode_setup.py
+++ b/table_line_generator/editor/mode_setup.py
@@ -4,6 +4,11 @@
 
				 import streamlit as st
			
 
				 from PIL import Image
			
 
				 from typing import Dict, Tuple
			
 
				+import sys
			
 
				+from pathlib import Path
			
 
				+
			
 
				+# 添加父目录到路径
			
 
				+sys.path.insert(0, str(Path(__file__).parent.parent))
			
 
				 
			
 
				 try:
			
 
				     from ..table_line_generator import TableLineGenerator
			
@@ -14,85 +19,72 @@ from .display_controls import create_display_settings_section, create_undo_redo_
 
				 from .analysis_controls import create_analysis_section
			
 
				 
			
 
				 
			
 
				-def setup_new_annotation_mode(ocr_data, image, config: Dict) -> Tuple:
			
 
				+def setup_new_annotation_mode(
			
 
				+    ocr_data: list,
			
 
				+    image: Image.Image,
			
 
				+    display_config: Dict
			
 
				+) -> Tuple:
			
 
				     """
			
 
				-    设置新建标注模式的通用逻辑
			
 
				-    
			
 
				-    Args:
			
 
				-        ocr_data: OCR 数据
			
 
				-        image: 图片对象
			
 
				-        config: 显示配置
			
 
				+    设置新建标注模式
			
 
				     
			
 
				     Returns:
			
 
				-        tuple: (y_tolerance, x_tolerance, min_row_height, line_width, 
			
 
				-                display_mode, zoom_level, show_line_numbers)
			
 
				+        (generator, structure, undo_stack, line_width, display_mode, zoom_level, show_line_numbers)
			
 
				     """
			
 
				-    # 参数调整
			
 
				-    st.sidebar.header("🔧 参数调整")
			
 
				-    y_tolerance = st.sidebar.slider(
			
 
				-        "Y轴聚类容差（像素）", 
			
 
				-        1, 20, 5, 
			
 
				-        key="new_y_tol"
			
 
				-    )
			
 
				-    x_tolerance = st.sidebar.slider(
			
 
				-        "X轴聚类容差（像素）", 
			
 
				-        5, 50, 10, 
			
 
				-        key="new_x_tol"
			
 
				-    )
			
 
				-    min_row_height = st.sidebar.slider(
			
 
				-        "最小行高（像素）", 
			
 
				-        10, 100, 20, 
			
 
				-        key="new_min_h"
			
 
				-    )
			
 
				-    
			
 
				-    # 显示设置
			
 
				-    line_width, display_mode, zoom_level, show_line_numbers = \
			
 
				-        create_display_settings_section(config)
			
 
				-    create_undo_redo_section()
			
 
				+    # 🔑 获取当前工具类型
			
 
				+    tool = st.session_state.get('current_tool', 'ppstructv3')
			
 
				     
			
 
				     # 初始化生成器
			
 
				-    if 'generator' not in st.session_state or st.session_state.generator is None:
			
 
				-        try:
			
 
				-            generator = TableLineGenerator(image, ocr_data)
			
 
				-            st.session_state.generator = generator
			
 
				-        except Exception as e:
			
 
				-            st.error(f"❌ 初始化生成器失败: {e}")
			
 
				-            st.stop()
			
 
				+    if 'generator' not in st.session_state:
			
 
				+        st.session_state.generator = TableLineGenerator(image, ocr_data)
			
 
				     
			
 
				-    # 分析按钮
			
 
				-    create_analysis_section(y_tolerance, x_tolerance, min_row_height)
			
 
				+    # 分析控件
			
 
				+    structure = create_analysis_section(
			
 
				+        st.session_state.generator,
			
 
				+        tool=tool  # 🔑 传入工具类型
			
 
				+    )
			
 
				+    
			
 
				+    # 显示控件
			
 
				+    line_width, display_mode, zoom_level, show_line_numbers = create_display_settings_section(
			
 
				+        display_config
			
 
				+    )
			
 
				+    
			
 
				+    # 撤销/重做
			
 
				+    undo_stack = []
			
 
				     
			
 
				-    return (y_tolerance, x_tolerance, min_row_height, 
			
 
				-            line_width, display_mode, zoom_level, show_line_numbers)
			
 
				+    return (
			
 
				+        st.session_state.generator,
			
 
				+        structure,
			
 
				+        undo_stack,
			
 
				+        line_width,
			
 
				+        display_mode,
			
 
				+        zoom_level,
			
 
				+        show_line_numbers
			
 
				+    )
			
 
				 
			
 
				 
			
 
				-def setup_edit_annotation_mode(structure: Dict, image, config: Dict) -> Tuple:
			
 
				+def setup_edit_annotation_mode(
			
 
				+    structure: Dict,
			
 
				+    image: Image.Image,
			
 
				+    display_config: Dict
			
 
				+) -> Tuple:
			
 
				     """
			
 
				-    设置编辑标注模式的通用逻辑
			
 
				-    
			
 
				-    Args:
			
 
				-        structure: 表格结构
			
 
				-        image: 图片对象（可为 None）
			
 
				-        config: 显示配置
			
 
				+    设置编辑标注模式
			
 
				     
			
 
				     Returns:
			
 
				-        tuple: (image, line_width, display_mode, zoom_level, show_line_numbers)
			
 
				+        (image, line_width, display_mode, zoom_level, show_line_numbers)
			
 
				     """
			
 
				-    # 如果没有图片，创建虚拟画布
			
 
				-    if image is None:
			
 
				-        if 'table_bbox' in structure:
			
 
				-            bbox = structure['table_bbox']
			
 
				-            dummy_width = bbox[2] + 100
			
 
				-            dummy_height = bbox[3] + 100
			
 
				-        else:
			
 
				-            dummy_width = 2000
			
 
				-            dummy_height = 2000
			
 
				-        image = Image.new('RGB', (dummy_width, dummy_height), color='white')
			
 
				-        st.info(f"💡 使用虚拟画布 ({dummy_width}x{dummy_height})")
			
 
				+    # 显示控件
			
 
				+    line_width, display_mode, zoom_level, show_line_numbers = create_display_settings_section(
			
 
				+        display_config
			
 
				+    )
			
 
				     
			
 
				-    # 显示设置
			
 
				-    line_width, display_mode, zoom_level, show_line_numbers = \
			
 
				-        create_display_settings_section(config)
			
 
				+    # 撤销/重做控件
			
 
				     create_undo_redo_section()
			
 
				     
			
 
				-    return image, line_width, display_mode, zoom_level, show_line_numbers
			
 
				+    return (
			
 
				+        image,
			
 
				+        line_width,
			
 
				+        display_mode,
			
 
				+        zoom_level,
			
 
				+        show_line_numbers
			
 
				+    )
			
--- a/table_line_generator/editor/save_controls.py
+++ b/table_line_generator/editor/save_controls.py
@@ -3,10 +3,10 @@
 
				 """
			
 
				 import streamlit as st
			
 
				 import io
			
 
				+import json
			
 
				 from pathlib import Path
			
 
				 from typing import Dict
			
 
				 
			
 
				-from .config_loader import save_structure_to_config
			
 
				 from .drawing import draw_clean_table_lines
			
 
				 
			
 
				 
			
@@ -142,7 +142,9 @@ def _save_structure_file(structure, output_dir, base_name, suffix, saved_files):
 
				     """保存结构配置文件"""
			
 
				     structure_filename = f"{base_name}{suffix}"
			
 
				     structure_path = output_dir / structure_filename
			
 
				-    save_structure_to_config(structure, structure_path)
			
 
				+    # save_structure_to_config(structure, structure_path)
			
 
				+    with open(structure_path, 'w', encoding='utf-8') as f:
			
 
				+        json.dump(structure, f, indent=2, ensure_ascii=False)
			
 
				     saved_files.append(("配置文件", structure_path))
			
 
				     
			
 
				     with open(structure_path, 'r') as f:
			
--- a/table_line_generator/editor/ui_components_v1.py
+++ b/table_line_generator/editor/ui_components_v1.py
@@ -1,634 +0,0 @@
 
				-"""
			
 
				-UI 组件
			
 
				-"""
			
 
				-
			
 
				-import streamlit as st
			
 
				-import json
			
 
				-from pathlib import Path
			
 
				-from PIL import Image
			
 
				-import tempfile
			
 
				-from typing import Dict, List
			
 
				-
			
 
				-try:
			
 
				-    from ..table_line_generator import TableLineGenerator
			
 
				-except ImportError:
			
 
				-    from table_line_generator import TableLineGenerator
			
 
				-
			
 
				-from .config_loader import load_structure_from_config, build_data_source_catalog
			
 
				-from .drawing import clear_table_image_cache
			
 
				-
			
 
				-def create_file_uploader_section(work_mode: str):
			
 
				-    """
			
 
				-    创建文件上传区域
			
 
				-    
			
 
				-    Args:
			
 
				-        work_mode: 工作模式（"🆕 新建标注" 或 "📂 加载已有标注"）
			
 
				-    """
			
 
				-    if work_mode == "🆕 新建标注":
			
 
				-        st.sidebar.subheader("上传文件")
			
 
				-        uploaded_json = st.sidebar.file_uploader("上传OCR结果JSON", type=['json'], key="new_json")
			
 
				-        uploaded_image = st.sidebar.file_uploader("上传对应图片", type=['jpg', 'png'], key="new_image")
			
 
				-        
			
 
				-        # 处理 JSON 上传
			
 
				-        if uploaded_json is not None:
			
 
				-            if st.session_state.loaded_json_name != uploaded_json.name:
			
 
				-                try:
			
 
				-                    raw_data = json.load(uploaded_json)
			
 
				-                    
			
 
				-                    with st.expander("🔍 原始数据结构"):
			
 
				-                        if isinstance(raw_data, dict):
			
 
				-                            st.json({k: f"<{type(v).__name__}>" if not isinstance(v, (str, int, float, bool, type(None))) else v 
			
 
				-                                    for k, v in list(raw_data.items())[:5]})
			
 
				-                        else:
			
 
				-                            st.json(raw_data[:3] if len(raw_data) > 3 else raw_data)
			
 
				-                    
			
 
				-                    ocr_data = parse_ocr_data(raw_data)
			
 
				-                    
			
 
				-                    if not ocr_data:
			
 
				-                        st.error("❌ 无法解析 OCR 数据，请检查 JSON 格式")
			
 
				-                        st.stop()
			
 
				-                    
			
 
				-                    st.session_state.ocr_data = ocr_data
			
 
				-                    st.session_state.loaded_json_name = uploaded_json.name
			
 
				-                    st.session_state.loaded_config_name = None
			
 
				-                    
			
 
				-                    # 清除旧数据
			
 
				-                    if 'structure' in st.session_state:
			
 
				-                        del st.session_state.structure
			
 
				-                    if 'generator' in st.session_state:
			
 
				-                        del st.session_state.generator
			
 
				-                    st.session_state.undo_stack = []
			
 
				-                    st.session_state.redo_stack = []
			
 
				-                    clear_table_image_cache()
			
 
				-                    
			
 
				-                    st.success(f"✅ 成功加载 {len(ocr_data)} 条 OCR 记录")
			
 
				-                    
			
 
				-                except Exception as e:
			
 
				-                    st.error(f"❌ 加载数据失败: {e}")
			
 
				-                    st.stop()
			
 
				-        
			
 
				-        # 处理图片上传
			
 
				-        if uploaded_image is not None:
			
 
				-            if st.session_state.loaded_image_name != uploaded_image.name:
			
 
				-                try:
			
 
				-                    image = Image.open(uploaded_image)
			
 
				-                    st.session_state.image = image
			
 
				-                    st.session_state.loaded_image_name = uploaded_image.name
			
 
				-                    
			
 
				-                    # 清除旧数据
			
 
				-                    if 'structure' in st.session_state:
			
 
				-                        del st.session_state.structure
			
 
				-                    if 'generator' in st.session_state:
			
 
				-                        del st.session_state.generator
			
 
				-                    st.session_state.undo_stack = []
			
 
				-                    st.session_state.redo_stack = []
			
 
				-                    clear_table_image_cache()
			
 
				-                    
			
 
				-                    st.success(f"✅ 成功加载图片: {uploaded_image.name}")
			
 
				-                    
			
 
				-                except Exception as e:
			
 
				-                    st.error(f"❌ 加载图片失败: {e}")
			
 
				-                    st.stop()
			
 
				-    
			
 
				-    else:  # 加载已有标注
			
 
				-        st.sidebar.subheader("加载已保存的标注")
			
 
				-        
			
 
				-        uploaded_config = st.sidebar.file_uploader(
			
 
				-            "上传配置文件 (*_structure.json)",
			
 
				-            type=['json'],
			
 
				-            key="load_config"
			
 
				-        )
			
 
				-        
			
 
				-        uploaded_image_for_config = st.sidebar.file_uploader(
			
 
				-            "上传对应图片（可选）",
			
 
				-            type=['jpg', 'png'],
			
 
				-            key="load_image"
			
 
				-        )
			
 
				-        
			
 
				-        # 处理配置文件加载
			
 
				-        if uploaded_config is not None:
			
 
				-            if st.session_state.loaded_config_name != uploaded_config.name:
			
 
				-                try:
			
 
				-                    # 创建临时文件
			
 
				-                    with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False, encoding='utf-8') as tmp:
			
 
				-                        tmp.write(uploaded_config.getvalue().decode('utf-8'))
			
 
				-                        tmp_path = tmp.name
			
 
				-                    
			
 
				-                    # 加载结构
			
 
				-                    structure = load_structure_from_config(Path(tmp_path))
			
 
				-                    
			
 
				-                    # 清理临时文件
			
 
				-                    Path(tmp_path).unlink()
			
 
				-                    
			
 
				-                    st.session_state.structure = structure
			
 
				-                    st.session_state.loaded_config_name = uploaded_config.name
			
 
				-                    
			
 
				-                    # 清除历史记录和缓存
			
 
				-                    st.session_state.undo_stack = []
			
 
				-                    st.session_state.redo_stack = []
			
 
				-                    clear_table_image_cache()
			
 
				-                    
			
 
				-                    st.success(f"✅ 成功加载配置: {uploaded_config.name}")
			
 
				-                    st.info(
			
 
				-                        f"📊 表格结构: {len(structure['rows'])}行 x {len(structure['columns'])}列\n\n"
			
 
				-                        f"📏 横线数: {len(structure.get('horizontal_lines', []))}\n\n"
			
 
				-                        f"📏 竖线数: {len(structure.get('vertical_lines', []))}"
			
 
				-                    )
			
 
				-                    
			
 
				-                    # 显示配置文件详情
			
 
				-                    with st.expander("📋 配置详情"):
			
 
				-                        st.json({
			
 
				-                            "行数": len(structure['rows']),
			
 
				-                            "列数": len(structure['columns']),
			
 
				-                            "横线数": len(structure.get('horizontal_lines', [])),
			
 
				-                            "竖线数": len(structure.get('vertical_lines', [])),
			
 
				-                            "行高": structure.get('row_height'),
			
 
				-                            "列宽": structure.get('col_widths'),
			
 
				-                            "已修改的横线": list(structure.get('modified_h_lines', set())),
			
 
				-                            "已修改的竖线": list(structure.get('modified_v_lines', set()))
			
 
				-                        })
			
 
				-                    
			
 
				-                except Exception as e:
			
 
				-                    st.error(f"❌ 加载配置失败: {e}")
			
 
				-                    import traceback
			
 
				-                    st.code(traceback.format_exc())
			
 
				-                    st.stop()
			
 
				-        
			
 
				-        # 处理图片加载
			
 
				-        if uploaded_image_for_config is not None:
			
 
				-            if st.session_state.loaded_image_name != uploaded_image_for_config.name:
			
 
				-                try:
			
 
				-                    image = Image.open(uploaded_image_for_config)
			
 
				-                    st.session_state.image = image
			
 
				-                    st.session_state.loaded_image_name = uploaded_image_for_config.name
			
 
				-                    
			
 
				-                    clear_table_image_cache()
			
 
				-                    
			
 
				-                    st.success(f"✅ 成功加载图片: {uploaded_image_for_config.name}")
			
 
				-                    
			
 
				-                except Exception as e:
			
 
				-                    st.error(f"❌ 加载图片失败: {e}")
			
 
				-                    st.stop()
			
 
				-        
			
 
				-        # 提示信息
			
 
				-        if 'structure' in st.session_state and st.session_state.image is None:
			
 
				-            st.warning("⚠️ 已加载配置，但未加载对应图片。请上传图片以查看效果。")
			
 
				-            st.info("💡 提示：配置文件已加载，您可以：\n1. 上传对应图片查看效果\n2. 直接编辑配置并保存")
			
 
				-
			
 
				-
			
 
				-def create_display_settings_section(display_config: Dict):
			
 
				-    """显示设置（由配置驱动）"""
			
 
				-    st.sidebar.divider()
			
 
				-    st.sidebar.subheader("🖼️ 显示设置")
			
 
				-
			
 
				-    line_width = st.sidebar.slider(
			
 
				-        "线条宽度",
			
 
				-        int(display_config.get("line_width_min", 1)),
			
 
				-        int(display_config.get("line_width_max", 5)),
			
 
				-        int(display_config.get("default_line_width", 2)),
			
 
				-    )
			
 
				-    display_mode = st.sidebar.radio(
			
 
				-        "显示模式",
			
 
				-        ["对比显示", "仅显示划线图", "仅显示原图"],
			
 
				-        index=1,
			
 
				-    )
			
 
				-    zoom_level = st.sidebar.slider(
			
 
				-        "图片缩放",
			
 
				-        float(display_config.get("zoom_min", 0.25)),
			
 
				-        float(display_config.get("zoom_max", 2.0)),
			
 
				-        float(display_config.get("default_zoom", 1.0)),
			
 
				-        float(display_config.get("zoom_step", 0.25)),
			
 
				-    )
			
 
				-    show_line_numbers = st.sidebar.checkbox(
			
 
				-        "显示线条编号",
			
 
				-        value=bool(display_config.get("show_line_numbers", True)),
			
 
				-    )
			
 
				-
			
 
				-    return line_width, display_mode, zoom_level, show_line_numbers
			
 
				-
			
 
				-
			
 
				-def create_undo_redo_section():
			
 
				-    """创建撤销/重做区域"""
			
 
				-    from .state_manager import undo_last_action, redo_last_action
			
 
				-    from .drawing import clear_table_image_cache
			
 
				-    
			
 
				-    st.sidebar.divider()
			
 
				-    st.sidebar.subheader("↩️ 撤销/重做")
			
 
				-    
			
 
				-    col1, col2 = st.sidebar.columns(2)
			
 
				-    with col1:
			
 
				-        if st.button("↩️ 撤销", disabled=len(st.session_state.undo_stack) == 0):
			
 
				-            if undo_last_action():
			
 
				-                clear_table_image_cache()
			
 
				-                st.success("✅ 已撤销")
			
 
				-                st.rerun()
			
 
				-    
			
 
				-    with col2:
			
 
				-        if st.button("↪️ 重做", disabled=len(st.session_state.redo_stack) == 0):
			
 
				-            if redo_last_action():
			
 
				-                clear_table_image_cache()
			
 
				-                st.success("✅ 已重做")
			
 
				-                st.rerun()
			
 
				-    
			
 
				-    st.sidebar.info(f"📚 历史记录: {len(st.session_state.undo_stack)} 条")
			
 
				-
			
 
				-
			
 
				-def create_analysis_section(y_tolerance, x_tolerance, min_row_height):
			
 
				-    """
			
 
				-    创建分析区域
			
 
				-    
			
 
				-    Args:
			
 
				-        y_tolerance: Y轴聚类容差
			
 
				-        x_tolerance: X轴聚类容差
			
 
				-        min_row_height: 最小行高
			
 
				-    """
			
 
				-    if st.button("🔍 分析表格结构"):
			
 
				-        with st.spinner("分析中..."):
			
 
				-            try:
			
 
				-                generator = st.session_state.generator
			
 
				-                structure = generator.analyze_table_structure(
			
 
				-                    y_tolerance=y_tolerance,
			
 
				-                    x_tolerance=x_tolerance,
			
 
				-                    min_row_height=min_row_height
			
 
				-                )
			
 
				-                
			
 
				-                if not structure:
			
 
				-                    st.warning("⚠️ 未检测到表格结构")
			
 
				-                    st.stop()
			
 
				-                
			
 
				-                structure['modified_h_lines'] = set()
			
 
				-                structure['modified_v_lines'] = set()
			
 
				-                
			
 
				-                st.session_state.structure = structure
			
 
				-                st.session_state.undo_stack = []
			
 
				-                st.session_state.redo_stack = []
			
 
				-                clear_table_image_cache()
			
 
				-                
			
 
				-                st.success(
			
 
				-                    f"✅ 检测到 {len(structure['rows'])} 行（{len(structure['horizontal_lines'])} 条横线），"
			
 
				-                    f"{len(structure['columns'])} 列（{len(structure['vertical_lines'])} 条竖线）"
			
 
				-                )
			
 
				-                
			
 
				-                col1, col2, col3, col4 = st.columns(4)
			
 
				-                with col1:
			
 
				-                    st.metric("行数", len(structure['rows']))
			
 
				-                with col2:
			
 
				-                    st.metric("横线数", len(structure['horizontal_lines']))
			
 
				-                with col3:
			
 
				-                    st.metric("列数", len(structure['columns']))
			
 
				-                with col4:
			
 
				-                    st.metric("竖线数", len(structure['vertical_lines']))
			
 
				-            
			
 
				-            except Exception as e:
			
 
				-                st.error(f"❌ 分析失败: {e}")
			
 
				-                import traceback
			
 
				-                st.code(traceback.format_exc())
			
 
				-                st.stop()
			
 
				-
			
 
				-
			
 
				-def create_save_section(work_mode, structure, image, line_width, output_config: Dict):
			
 
				-    """
			
 
				-    保存设置（目录/命名来自配置）
			
 
				-    """
			
 
				-    from .config_loader import save_structure_to_config
			
 
				-    from .drawing import draw_clean_table_lines
			
 
				-    import io
			
 
				-
			
 
				-    st.divider()
			
 
				-
			
 
				-    defaults = output_config.get("defaults", {})
			
 
				-    line_colors = output_config.get("line_colors") or [
			
 
				-        {"name": "黑色", "rgb": [0, 0, 0]},
			
 
				-        {"name": "蓝色", "rgb": [0, 0, 255]},
			
 
				-        {"name": "红色", "rgb": [255, 0, 0]},
			
 
				-    ]
			
 
				-
			
 
				-    save_col1, save_col2, save_col3 = st.columns(3)
			
 
				-
			
 
				-    with save_col1:
			
 
				-        save_structure = st.checkbox(
			
 
				-            "保存表格结构配置",
			
 
				-            value=bool(defaults.get("save_structure", True)),
			
 
				-        )
			
 
				-
			
 
				-    with save_col2:
			
 
				-        save_image = st.checkbox(
			
 
				-            "保存表格线图片",
			
 
				-            value=bool(defaults.get("save_image", True)),
			
 
				-        )
			
 
				-
			
 
				-    color_names = [c["name"] for c in line_colors]
			
 
				-    default_color = defaults.get("line_color", color_names[0])
			
 
				-    default_index = color_names.index(default_color) if default_color in color_names else 0
			
 
				-
			
 
				-    with save_col3:
			
 
				-        line_color_option = st.selectbox(
			
 
				-            "保存时线条颜色",
			
 
				-            color_names,
			
 
				-            label_visibility="collapsed",
			
 
				-            index=default_index,
			
 
				-        )
			
 
				-
			
 
				-    if st.button("💾 保存", type="primary"):
			
 
				-        output_dir = Path(output_config.get("directory", "output/table_structures"))
			
 
				-        output_dir.mkdir(parents=True, exist_ok=True)
			
 
				-
			
 
				-        structure_suffix = output_config.get("structure_suffix", "_structure.json")
			
 
				-        image_suffix = output_config.get("image_suffix", "_with_lines.png")
			
 
				-
			
 
				-        # 确定文件名
			
 
				-        if work_mode == "🆕 新建标注":
			
 
				-            if st.session_state.loaded_json_name:
			
 
				-                base_name = Path(st.session_state.loaded_json_name).stem
			
 
				-            else:
			
 
				-                base_name = "table_structure"
			
 
				-        else:
			
 
				-            if st.session_state.loaded_config_name:
			
 
				-                base_name = Path(st.session_state.loaded_config_name).stem
			
 
				-                if base_name.endswith('_structure'):
			
 
				-                    base_name = base_name[:-10]
			
 
				-            elif st.session_state.loaded_image_name:
			
 
				-                base_name = Path(st.session_state.loaded_image_name).stem
			
 
				-            else:
			
 
				-                base_name = "table_structure"
			
 
				-        
			
 
				-        saved_files = []
			
 
				-        
			
 
				-        if save_structure:
			
 
				-            structure_filename = f"{base_name}{structure_suffix}"
			
 
				-            structure_path = output_dir / structure_filename
			
 
				-            save_structure_to_config(structure, structure_path)
			
 
				-            saved_files.append(("配置文件", structure_path))
			
 
				-            
			
 
				-            with open(structure_path, 'r') as f:
			
 
				-                st.download_button(
			
 
				-                    "📥 下载配置文件",
			
 
				-                    f.read(),
			
 
				-                    file_name=f"{base_name}_structure.json",
			
 
				-                    mime="application/json"
			
 
				-                )
			
 
				-        
			
 
				-        if save_image:
			
 
				-            if st.session_state.image is None:
			
 
				-                st.warning("⚠️ 无法保存图片：未加载图片文件")
			
 
				-            else:
			
 
				-                selected_color_rgb = next(
			
 
				-                    (tuple(c["rgb"]) for c in line_colors if c["name"] == line_color_option),
			
 
				-                    (0, 0, 0),
			
 
				-                )
			
 
				-                clean_img = draw_clean_table_lines(
			
 
				-                    st.session_state.image,
			
 
				-                    structure,
			
 
				-                    line_width=line_width,
			
 
				-                    line_color=selected_color_rgb,
			
 
				-                )
			
 
				-                image_filename = f"{base_name}{image_suffix}"
			
 
				-                output_image_path = output_dir / image_filename
			
 
				-                clean_img.save(output_image_path)
			
 
				-                saved_files.append(("表格线图片", output_image_path))
			
 
				-                
			
 
				-                buf = io.BytesIO()
			
 
				-                clean_img.save(buf, format='PNG')
			
 
				-                buf.seek(0)
			
 
				-                
			
 
				-                st.download_button(
			
 
				-                    "📥 下载表格线图片",
			
 
				-                    buf,
			
 
				-                    file_name=f"{base_name}_with_lines.png",
			
 
				-                    mime="image/png"
			
 
				-                )
			
 
				-        
			
 
				-        if saved_files:
			
 
				-            st.success(f"✅ 已保存 {len(saved_files)} 个文件:")
			
 
				-            for file_type, file_path in saved_files:
			
 
				-                st.info(f"  • {file_type}: {file_path}")
			
 
				-
			
 
				-def setup_new_annotation_mode(ocr_data, image, config: Dict):
			
 
				-    """
			
 
				-    设置新建标注模式的通用逻辑
			
 
				-    
			
 
				-    Args:
			
 
				-        ocr_data: OCR 数据
			
 
				-        image: 图片对象
			
 
				-        config: 显示配置
			
 
				-    
			
 
				-    Returns:
			
 
				-        tuple: (y_tolerance, x_tolerance, min_row_height, line_width, display_mode, zoom_level, show_line_numbers)
			
 
				-    """
			
 
				-    # 参数调整
			
 
				-    st.sidebar.header("🔧 参数调整")
			
 
				-    y_tolerance = st.sidebar.slider("Y轴聚类容差（像素）", 1, 20, 5, key="new_y_tol")
			
 
				-    x_tolerance = st.sidebar.slider("X轴聚类容差（像素）", 5, 50, 10, key="new_x_tol")
			
 
				-    min_row_height = st.sidebar.slider("最小行高（像素）", 10, 100, 20, key="new_min_h")
			
 
				-    
			
 
				-    # 显示设置
			
 
				-    line_width, display_mode, zoom_level, show_line_numbers = create_display_settings_section(config)
			
 
				-    create_undo_redo_section()
			
 
				-    
			
 
				-    # 初始化生成器
			
 
				-    if 'generator' not in st.session_state or st.session_state.generator is None:
			
 
				-        try:
			
 
				-            generator = TableLineGenerator(image, ocr_data)
			
 
				-            st.session_state.generator = generator
			
 
				-        except Exception as e:
			
 
				-            st.error(f"❌ 初始化生成器失败: {e}")
			
 
				-            st.stop()
			
 
				-    
			
 
				-    # 分析按钮
			
 
				-    create_analysis_section(y_tolerance, x_tolerance, min_row_height)
			
 
				-    
			
 
				-    return y_tolerance, x_tolerance, min_row_height, line_width, display_mode, zoom_level, show_line_numbers
			
 
				-
			
 
				-
			
 
				-def setup_edit_annotation_mode(structure, image, config: Dict):
			
 
				-    """
			
 
				-    设置编辑标注模式的通用逻辑
			
 
				-    
			
 
				-    Args:
			
 
				-        structure: 表格结构
			
 
				-        image: 图片对象（可为 None）
			
 
				-        config: 显示配置
			
 
				-    
			
 
				-    Returns:
			
 
				-        tuple: (image, line_width, display_mode, zoom_level, show_line_numbers)
			
 
				-    """
			
 
				-    # 如果没有图片，创建虚拟画布
			
 
				-    if image is None:
			
 
				-        if 'table_bbox' in structure:
			
 
				-            bbox = structure['table_bbox']
			
 
				-            dummy_width = bbox[2] + 100
			
 
				-            dummy_height = bbox[3] + 100
			
 
				-        else:
			
 
				-            dummy_width = 2000
			
 
				-            dummy_height = 2000
			
 
				-        image = Image.new('RGB', (dummy_width, dummy_height), color='white')
			
 
				-        st.info(f"💡 使用虚拟画布 ({dummy_width}x{dummy_height})")
			
 
				-    
			
 
				-    # 显示设置
			
 
				-    line_width, display_mode, zoom_level, show_line_numbers = create_display_settings_section(config)
			
 
				-    create_undo_redo_section()
			
 
				-    
			
 
				-    return image, line_width, display_mode, zoom_level, show_line_numbers
			
 
				-
			
 
				-
			
 
				-def render_table_structure_view(structure, image, line_width, display_mode, zoom_level, show_line_numbers, 
			
 
				-                                viewport_width, viewport_height):
			
 
				-    """
			
 
				-    渲染表格结构视图（统一三种模式的显示逻辑）
			
 
				-    
			
 
				-    Args:
			
 
				-        structure: 表格结构
			
 
				-        image: 图片对象
			
 
				-        line_width: 线条宽度
			
 
				-        display_mode: 显示模式
			
 
				-        zoom_level: 缩放级别
			
 
				-        show_line_numbers: 是否显示线条编号
			
 
				-        viewport_width: 视口宽度
			
 
				-        viewport_height: 视口高度
			
 
				-    """
			
 
				-    # 绘制表格线
			
 
				-    img_with_lines = get_cached_table_lines_image(
			
 
				-        image, structure, line_width=line_width, show_numbers=show_line_numbers
			
 
				-    )
			
 
				-    
			
 
				-    # 根据显示模式显示图片
			
 
				-    if display_mode == "对比显示":
			
 
				-        col1, col2 = st.columns(2)
			
 
				-        with col1:
			
 
				-            show_image_with_scroll(image, "原图", viewport_width, viewport_height, zoom_level)
			
 
				-        with col2:
			
 
				-            show_image_with_scroll(img_with_lines, "表格线", viewport_width, viewport_height, zoom_level)
			
 
				-    elif display_mode == "仅显示划线图":
			
 
				-        show_image_with_scroll(
			
 
				-            img_with_lines, 
			
 
				-            f"表格线图 (缩放: {zoom_level:.0%})", 
			
 
				-            viewport_width, 
			
 
				-            viewport_height, 
			
 
				-            zoom_level
			
 
				-        )
			
 
				-    else:
			
 
				-        show_image_with_scroll(
			
 
				-            image, 
			
 
				-            f"原图 (缩放: {zoom_level:.0%})", 
			
 
				-            viewport_width, 
			
 
				-            viewport_height, 
			
 
				-            zoom_level
			
 
				-        )
			
 
				-    
			
 
				-    # 手动调整区域
			
 
				-    create_adjustment_section(structure)
			
 
				-    
			
 
				-    # 显示详细信息
			
 
				-    with st.expander("📊 表格结构详情"):
			
 
				-        st.json({
			
 
				-            "行数": len(structure['rows']),
			
 
				-            "列数": len(structure['columns']),
			
 
				-            "横线数": len(structure.get('horizontal_lines', [])),
			
 
				-            "竖线数": len(structure.get('vertical_lines', [])),
			
 
				-            "横线坐标": structure.get('horizontal_lines', []),
			
 
				-            "竖线坐标": structure.get('vertical_lines', []),
			
 
				-            "标准行高": structure.get('row_height'),
			
 
				-            "列宽度": structure.get('col_widths'),
			
 
				-            "修改的横线": list(structure.get('modified_h_lines', set())),
			
 
				-            "修改的竖线": list(structure.get('modified_v_lines', set()))
			
 
				-        })
			
 
				-
			
 
				-
			
 
				-def create_directory_selector(data_sources: List[Dict], global_output_config: Dict):
			
 
				-    """目录模式选择器（优化：避免重复加载）"""
			
 
				-    st.sidebar.subheader("目录模式")
			
 
				-    source_names = [src["name"] for src in data_sources]
			
 
				-    selected_name = st.sidebar.selectbox("选择数据源", source_names, key="dir_mode_source")
			
 
				-    source_cfg = next(src for src in data_sources if src["name"] == selected_name)
			
 
				-    
			
 
				-    output_cfg = source_cfg.get("output", global_output_config)
			
 
				-    output_dir = Path(output_cfg.get("directory", "output/table_structures"))
			
 
				-    structure_suffix = output_cfg.get("structure_suffix", "_structure.json")
			
 
				-    
			
 
				-    catalog_key = f"catalog::{selected_name}"
			
 
				-    if catalog_key not in st.session_state:
			
 
				-        st.session_state[catalog_key] = build_data_source_catalog(source_cfg)
			
 
				-    catalog = st.session_state[catalog_key]
			
 
				-
			
 
				-    if not catalog:
			
 
				-        st.sidebar.warning("目录中没有 JSON 文件")
			
 
				-        return
			
 
				-
			
 
				-    if 'dir_selected_index' not in st.session_state:
			
 
				-        st.session_state.dir_selected_index = 0
			
 
				-
			
 
				-    selected = st.sidebar.selectbox(
			
 
				-        "选择文件",
			
 
				-        range(len(catalog)),
			
 
				-        format_func=lambda i: catalog[i]["display"],
			
 
				-        index=st.session_state.dir_selected_index,
			
 
				-        key="dir_select_box"
			
 
				-    )
			
 
				-
			
 
				-    page_input = st.sidebar.number_input(
			
 
				-        "页码跳转",
			
 
				-        min_value=1,
			
 
				-        max_value=len(catalog),
			
 
				-        value=catalog[selected]["index"],
			
 
				-        step=1,
			
 
				-        key="dir_page_input"
			
 
				-    )
			
 
				-    
			
 
				-    # 🔑 关键优化：只在切换文件时才重新加载
			
 
				-    current_entry_key = f"{selected_name}::{catalog[selected]['json']}"
			
 
				-    
			
 
				-    if 'last_loaded_entry' not in st.session_state or st.session_state.last_loaded_entry != current_entry_key:
			
 
				-        # 文件切换，重新加载
			
 
				-        entry = catalog[selected]
			
 
				-        base_name = entry["json"].stem
			
 
				-        structure_file = output_dir / f"{base_name}{structure_suffix}"
			
 
				-        has_structure = structure_file.exists()
			
 
				-        
			
 
				-        # 📂 加载 JSON
			
 
				-        with open(entry["json"], "r", encoding="utf-8") as fp:
			
 
				-            raw = json.load(fp)
			
 
				-        st.session_state.ocr_data = parse_ocr_data(raw)
			
 
				-        st.session_state.loaded_json_name = entry["json"].name
			
 
				-
			
 
				-        # 🖼️ 加载图片
			
 
				-        if entry["image"] and entry["image"].exists():
			
 
				-            st.session_state.image = Image.open(entry["image"])
			
 
				-            st.session_state.loaded_image_name = entry["image"].name
			
 
				-        else:
			
 
				-            st.session_state.image = None
			
 
				-
			
 
				-        # 🎯 自动模式判断
			
 
				-        if has_structure:
			
 
				-            st.session_state.dir_auto_mode = "edit"
			
 
				-            st.session_state.loaded_config_name = structure_file.name
			
 
				-            
			
 
				-            try:
			
 
				-                structure = load_structure_from_config(structure_file)
			
 
				-                st.session_state.structure = structure
			
 
				-                st.session_state.undo_stack = []
			
 
				-                st.session_state.redo_stack = []
			
 
				-                clear_table_image_cache()
			
 
				-                st.sidebar.success(f"✅ 编辑模式")
			
 
				-            except Exception as e:
			
 
				-                st.error(f"❌ 加载标注失败: {e}")
			
 
				-                st.session_state.dir_auto_mode = "new"
			
 
				-        else:
			
 
				-            st.session_state.dir_auto_mode = "new"
			
 
				-            if 'structure' in st.session_state:
			
 
				-                del st.session_state.structure
			
 
				-            if 'generator' in st.session_state:
			
 
				-                del st.session_state.generator
			
 
				-            st.sidebar.info(f"🆕 新建模式")
			
 
				-        
			
 
				-        # 标记已加载
			
 
				-        st.session_state.last_loaded_entry = current_entry_key
			
 
				-        st.info(f"📂 已加载: {entry['json'].name}")
			
 
				-    
			
 
				-    # 页码跳转处理
			
 
				-    if page_input != catalog[selected]["index"]:
			
 
				-        target = next((i for i, item in enumerate(catalog) if item["index"] == page_input), None)
			
 
				-        if target is not None:
			
 
				-            st.session_state.dir_selected_index = target
			
 
				-            st.rerun()
			
 
				-
			
 
				-    return st.session_state.get('dir_auto_mode', 'new')
			
--- a/table_line_generator/streamlit_table_line_editor.py
+++ b/table_line_generator/streamlit_table_line_editor.py
@@ -3,6 +3,10 @@
 
				 支持人工调整表格线位置
			
 
				 """
			
 
				 
			
 
				+import warnings
			
 
				+# 过滤 PaddleX 的语法警告
			
 
				+warnings.filterwarnings('ignore', category=SyntaxWarning, module='paddlex')
			
 
				+
			
 
				 import streamlit as st
			
 
				 from pathlib import Path
			
 
				 from PIL import Image
			
@@ -189,74 +193,56 @@ def create_table_line_editor():
 
				         
			
 
				         return
			
 
				     
			
 
				-    # 🆕 新建标注模式
			
 
				+    # 🎯 新建标注模式
			
 
				     if work_mode == "🆕 新建标注":
			
 
				         create_file_uploader_section(work_mode)
			
 
				         
			
 
				-        if not (st.session_state.ocr_data and st.session_state.image):
			
 
				-            st.info("👆 请在左侧上传 OCR 结果 JSON 文件和对应的图片")
			
 
				-            with st.expander("📖 使用说明"):
			
 
				-                st.markdown("""
			
 
				-                ### 🆕 新建标注模式
			
 
				-                
			
 
				-                **支持的OCR格式**
			
 
				-                
			
 
				-                **1. PPStructure V3 格式 (推荐)**
			
 
				-                ```json
			
 
				-                {
			
 
				-                  "parsing_res_list": [...],
			
 
				-                  "overall_ocr_res": {
			
 
				-                    "rec_boxes": [[x1, y1, x2, y2], ...],
			
 
				-                    "rec_texts": ["文本1", "文本2", ...]
			
 
				-                  }
			
 
				-                }
			
 
				-                ```
			
 
				-                
			
 
				-                **2. 标准格式**
			
 
				-                ```json
			
 
				-                [
			
 
				-                  {
			
 
				-                    "text": "文本内容",
			
 
				-                    "bbox": [x1, y1, x2, y2]
			
 
				-                  }
			
 
				-                ]
			
 
				-                ```
			
 
				-                
			
 
				-                ### 📂 加载已有标注模式
			
 
				-                
			
 
				-                1. 上传之前保存的 `*_structure.json` 配置文件
			
 
				-                2. 上传对应的图片（可选）
			
 
				-                3. 继续调整表格线位置
			
 
				-                4. 保存更新后的配置
			
 
				-                """)
			
 
				-            return
			
 
				-        
			
 
				-        st.info(f"📂 已加载: {st.session_state.loaded_json_name} + {st.session_state.loaded_image_name}")
			
 
				-        
			
 
				-        _, _, _, line_width, display_mode, zoom_level, show_line_numbers = setup_new_annotation_mode(
			
 
				-            st.session_state.ocr_data,
			
 
				-            st.session_state.image,
			
 
				-            TABLE_EDITOR_CONFIG["display"]
			
 
				-        )
			
 
				-        
			
 
				-        if 'structure' in st.session_state and st.session_state.structure:
			
 
				-            render_table_structure_view(
			
 
				-                st.session_state.structure,
			
 
				-                st.session_state.image,
			
 
				-                line_width,
			
 
				-                display_mode,
			
 
				-                zoom_level,
			
 
				-                show_line_numbers,
			
 
				-                VIEWPORT_WIDTH,
			
 
				-                VIEWPORT_HEIGHT
			
 
				-            )
			
 
				-            create_save_section(
			
 
				-                work_mode,
			
 
				-                st.session_state.structure,
			
 
				-                st.session_state.image,
			
 
				-                line_width,
			
 
				-                TABLE_EDITOR_CONFIG["output"]
			
 
				+        if st.session_state.ocr_data and st.session_state.image:
			
 
				+            st.info(f"📂 已加载: {st.session_state.loaded_json_name}")
			
 
				+            
			
 
				+            # 🔧 显示分析参数设置（统一处理）
			
 
				+            st.sidebar.subheader("🔬 分析参数")
			
 
				+            
			
 
				+            analysis_method = st.sidebar.selectbox(
			
 
				+                "分析算法",
			
 
				+                ["auto", "cluster", "mineru"],
			
 
				+                format_func=lambda x: {
			
 
				+                    "auto": "🤖 自动选择（推荐）",
			
 
				+                    "cluster": "📊 聚类算法（通用）",
			
 
				+                    "mineru": "🎯 MinerU 索引算法"
			
 
				+                }[x]
			
 
				             )
			
 
				+            
			
 
				+            if analysis_method in ["auto", "cluster"]:
			
 
				+                y_tolerance = st.sidebar.slider("Y轴容差", 1, 20, 5)
			
 
				+                x_tolerance = st.sidebar.slider("X轴容差", 1, 30, 10)
			
 
				+                min_row_height = st.sidebar.slider("最小行高", 10, 50, 20)
			
 
				+            
			
 
				+            # 🎯 分析按钮
			
 
				+            if st.button("🔍 分析表格结构"):
			
 
				+                with st.spinner("正在分析..."):
			
 
				+                    # 统一的分析流程
			
 
				+                    generator = TableLineGenerator(
			
 
				+                        st.session_state.image, 
			
 
				+                        st.session_state.ocr_data
			
 
				+                    )
			
 
				+                    
			
 
				+                    if analysis_method == "auto":
			
 
				+                        # 根据数据特征自动选择
			
 
				+                        has_cell_index = any('row' in item for item in st.session_state.ocr_data)
			
 
				+                        method = "mineru" if has_cell_index else "cluster"
			
 
				+                    else:
			
 
				+                        method = analysis_method
			
 
				+                    
			
 
				+                    st.session_state.structure = generator.analyze_table_structure(
			
 
				+                        y_tolerance=y_tolerance if method == "cluster" else 5,
			
 
				+                        x_tolerance=x_tolerance if method == "cluster" else 10,
			
 
				+                        min_row_height=min_row_height if method == "cluster" else 20,
			
 
				+                        method=method
			
 
				+                    )
			
 
				+                    
			
 
				+                    st.success(f"✅ 分析完成（使用 {method} 算法）")
			
 
				+        
			
 
				         return
			
 
				     
			
 
				     # 📂 加载已有标注模式
			
--- a/table_line_generator/table_line_generator.py
+++ b/table_line_generator/table_line_generator.py
@@ -9,51 +9,139 @@ from PIL import Image, ImageDraw
 
				 from pathlib import Path
			
 
				 from typing import List, Dict, Tuple, Optional, Union
			
 
				 import json
			
 
				+from bs4 import BeautifulSoup
			
 
				 
			
 
				 
			
 
				 class TableLineGenerator:
			
 
				     """表格线生成器"""
			
 
				     
			
 
				-    def __init__(self, image: Union[str, Image.Image], ocr_data: List[Dict]):
			
 
				+    def __init__(self, image: Union[str, Image.Image, None], ocr_data: Dict):
			
 
				         """
			
 
				         初始化表格线生成器
			
 
				         
			
 
				         Args:
			
 
				-            image: 图片路径(str) 或 PIL.Image 对象
			
 
				+            image: 图片路径(str) 或 PIL.Image 对象，或 None（仅分析结构时）
			
 
				             ocr_data: OCR识别结果（包含bbox）
			
 
				         """
			
 
				-        if isinstance(image, str):
			
 
				-            # 传入的是路径
			
 
				+        if image is None:
			
 
				+            # 🆕 无图片模式：仅用于结构分析
			
 
				+            self.image_path = None
			
 
				+            self.image = None
			
 
				+        elif isinstance(image, str):
			
 
				             self.image_path = image
			
 
				             self.image = Image.open(image)
			
 
				         elif isinstance(image, Image.Image):
			
 
				-            # 传入的是 PIL Image 对象
			
 
				-            self.image_path = None  # 没有路径
			
 
				+            self.image_path = None
			
 
				             self.image = image
			
 
				         else:
			
 
				             raise TypeError(
			
 
				-                f"image 参数必须是 str (路径) 或 PIL.Image.Image 对象，"
			
 
				+                f"image 参数必须是 str (路径)、PIL.Image.Image 对象或 None，"
			
 
				                 f"实际类型: {type(image)}"
			
 
				             )
			
 
				         
			
 
				         self.ocr_data = ocr_data
			
 
				         
			
 
				         # 表格结构参数
			
 
				-        self.rows = []          # 行坐标列表 [(y_start, y_end), ...]
			
 
				-        self.columns = []       # 列坐标列表 [(x_start, x_end), ...]
			
 
				-        self.row_height = 0     # 标准行高
			
 
				-        self.col_widths = []    # 各列宽度
			
 
				+        self.rows = []
			
 
				+        self.columns = []
			
 
				+        self.row_height = 0
			
 
				+        self.col_widths = []
			
 
				+
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def parse_ocr_data(ocr_result: Dict, tool: str = "ppstructv3") -> Tuple[List[int], Dict]:
			
 
				+        """
			
 
				+        统一的 OCR 数据解析接口（第一步：仅读取数据）
			
 
				+        
			
 
				+        Args:
			
 
				+            ocr_result: OCR 识别结果（完整 JSON）
			
 
				+            tool: 工具类型 ("ppstructv3" / "mineru")
			
 
				+        
			
 
				+        Returns:
			
 
				+            (table_bbox, ocr_data): 表格边界框和文本框列表
			
 
				+        """
			
 
				+        if tool.lower() == "mineru":
			
 
				+            return TableLineGenerator._parse_mineru_data(ocr_result)
			
 
				+        elif tool.lower() in ["ppstructv3", "ppstructure"]:
			
 
				+            return TableLineGenerator._parse_ppstructure_data(ocr_result)
			
 
				+        else:
			
 
				+            raise ValueError(f"不支持的工具类型: {tool}")
			
 
				     
			
 
				     @staticmethod
			
 
				-    def parse_ppstructure_result(ocr_result: Dict) -> Tuple[List[int], List[Dict]]:
			
 
				+    def _parse_mineru_data(mineru_result: Union[Dict, List]) -> Tuple[List[int], Dict]:
			
 
				+        """
			
 
				+        解析 MinerU 格式数据（仅提取数据，不分析结构）
			
 
				+        
			
 
				+        Args:
			
 
				+            mineru_result: MinerU 的完整 JSON 结果
			
 
				+            
			
 
				+        Returns:
			
 
				+            (table_bbox, ocr_data): 表格边界框和文本框列表
			
 
				+        """
			
 
				+        # 🔑 提取 table 数据
			
 
				+        table_data = _extract_table_data(mineru_result)
			
 
				+        
			
 
				+        if not table_data:
			
 
				+            raise ValueError("未找到 MinerU 格式的表格数据 (type='table')")
			
 
				+        
			
 
				+        # 验证必要字段
			
 
				+        if 'table_cells' not in table_data:
			
 
				+            raise ValueError("表格数据中未找到 table_cells 字段")
			
 
				+        
			
 
				+        table_cells = table_data['table_cells']
			
 
				+        if not table_cells:
			
 
				+            raise ValueError("table_cells 为空")
			
 
				+        
			
 
				+        # 🔑 优先使用 table_body 确定准确的行列数
			
 
				+        if 'table_body' in table_data:
			
 
				+            actual_rows, actual_cols = _parse_table_body_structure(table_data['table_body'])
			
 
				+            print(f"📋 从 table_body 解析: {actual_rows} 行 × {actual_cols} 列")
			
 
				+        else:
			
 
				+            # 回退：从 table_cells 推断
			
 
				+            actual_rows = max(cell.get('row', 0) for cell in table_cells if 'row' in cell)
			
 
				+            actual_cols = max(cell.get('col', 0) for cell in table_cells if 'col' in cell)
			
 
				+            print(f"📋 从 table_cells 推断: {actual_rows} 行 × {actual_cols} 列")        
			
 
				+        if not table_data or 'table_cells' not in table_data:
			
 
				+            raise ValueError("未找到有效的 MinerU 表格数据")
			
 
				+        
			
 
				+        table_cells = table_data['table_cells']
			
 
				+        
			
 
				+        # 🔑 计算表格边界框
			
 
				+        all_bboxes = [cell['bbox'] for cell in table_cells if 'bbox' in cell]
			
 
				+        
			
 
				+        if all_bboxes:
			
 
				+            x_min = min(bbox[0] for bbox in all_bboxes)
			
 
				+            y_min = min(bbox[1] for bbox in all_bboxes)
			
 
				+            x_max = max(bbox[2] for bbox in all_bboxes)
			
 
				+            y_max = max(bbox[3] for bbox in all_bboxes)
			
 
				+            table_bbox = [x_min, y_min, x_max, y_max]
			
 
				+        else:
			
 
				+            table_bbox = table_data.get('bbox', [0, 0, 2000, 2000])
			
 
				+        
			
 
				+        # 按位置排序（从上到下，从左到右）
			
 
				+        table_cells.sort(key=lambda x: (x['bbox'][1], x['bbox'][0]))
			
 
				+        # 🔑 转换为统一的 ocr_data 格式
			
 
				+        ocr_data = {
			
 
				+            'table_bbox': table_bbox,
			
 
				+            'actual_rows': actual_rows,
			
 
				+            'actual_cols': actual_cols,
			
 
				+            'text_boxes': table_cells
			
 
				+        }
			
 
				+        
			
 
				+        print(f"📊 MinerU 数据解析完成: {len(table_cells)} 个文本框")
			
 
				+        
			
 
				+        return table_bbox, ocr_data
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def _parse_ppstructure_data(ocr_result: Dict) -> Tuple[List[int], Dict]:
			
 
				         """
			
 
				-        解析 PPStructure V3 的 OCR 结果
			
 
				+        解析 PPStructure V3 格式数据
			
 
				         
			
 
				         Args:
			
 
				             ocr_result: PPStructure V3 的完整 JSON 结果
			
 
				         
			
 
				         Returns:
			
 
				-            (table_bbox, text_boxes): 表格边界框和文本框列表
			
 
				+            (table_bbox, ocr_data): 表格边界框和文本框列表
			
 
				         """
			
 
				         # 1. 从 parsing_res_list 中找到 table 区域
			
 
				         table_bbox = None
			
@@ -66,7 +154,7 @@ class TableLineGenerator:
 
				         if not table_bbox:
			
 
				             raise ValueError("未找到表格区域 (block_label='table')")
			
 
				         
			
 
				-        # 2. 从 overall_ocr_res 中提取文本框（使用 rec_boxes）
			
 
				+        # 2. 从 overall_ocr_res 中提取文本框
			
 
				         text_boxes = []
			
 
				         if 'overall_ocr_res' in ocr_result:
			
 
				             rec_boxes = ocr_result['overall_ocr_res'].get('rec_boxes', [])
			
@@ -75,7 +163,6 @@ class TableLineGenerator:
 
				             # 过滤出表格区域内的文本框
			
 
				             for i, bbox in enumerate(rec_boxes):
			
 
				                 if len(bbox) >= 4:
			
 
				-                    # bbox 格式: [x1, y1, x2, y2]
			
 
				                     x1, y1, x2, y2 = bbox[:4]
			
 
				                     
			
 
				                     # 判断文本框是否在表格区域内
			
@@ -85,39 +172,196 @@ class TableLineGenerator:
 
				                             'bbox': [int(x1), int(y1), int(x2), int(y2)],
			
 
				                             'text': rec_texts[i] if i < len(rec_texts) else ''
			
 
				                         })
			
 
				-            # 对text_boxes从上到下，从左到右排序
			
 
				-            text_boxes.sort(key=lambda x: (x['bbox'][1], x['bbox'][0]))
			
 
				         
			
 
				-        return table_bbox, text_boxes
			
 
				+        # 按位置排序
			
 
				+        text_boxes.sort(key=lambda x: (x['bbox'][1], x['bbox'][0]))
			
 
				+        
			
 
				+        print(f"📊 PPStructure 数据解析完成: {len(text_boxes)} 个文本框")
			
 
				+        ocr_data = {
			
 
				+            'table_bbox': table_bbox,
			
 
				+            'text_boxes': text_boxes
			
 
				+        }
			
 
				         
			
 
				+        return table_bbox, ocr_data
			
 
				+    
			
 
				+    # ==================== 统一接口：第二步 - 分析结构 ====================
			
 
				+    
			
 
				     def analyze_table_structure(self, 
			
 
				                                y_tolerance: int = 5,
			
 
				                                x_tolerance: int = 10,
			
 
				-                               min_row_height: int = 20) -> Dict:
			
 
				+                               min_row_height: int = 20,
			
 
				+                               method: str = "auto",
			
 
				+                               ) -> Dict:
			
 
				         """
			
 
				-        分析表格结构（行列分布）
			
 
				+        分析表格结构（支持多种算法）
			
 
				         
			
 
				         Args:
			
 
				             y_tolerance: Y轴聚类容差（像素）
			
 
				             x_tolerance: X轴聚类容差（像素）
			
 
				             min_row_height: 最小行高（像素）
			
 
				+            method: 分析方法 ("auto" / "cluster" / "mineru")
			
 
				+            use_table_body: 是否使用 table_body（仅 mineru 方法有效）
			
 
				+        
			
 
				+        Returns:
			
 
				+            表格结构信息
			
 
				+        """
			
 
				+        if not self.ocr_data:
			
 
				+            return {}
			
 
				+        
			
 
				+        # 🔑 自动选择方法
			
 
				+        if method == "auto":
			
 
				+            # 根据数据特征自动选择
			
 
				+            has_cell_index = any('row' in item and 'col' in item for item in self.ocr_data.get('text_boxes', []))
			
 
				+            method = "mineru" if has_cell_index else "cluster"
			
 
				+            print(f"🤖 自动选择分析方法: {method}")
			
 
				+        
			
 
				+        # 🔑 根据方法选择算法
			
 
				+        if method == "mineru":
			
 
				+            return self._analyze_by_cell_index()
			
 
				+        else:
			
 
				+            return self._analyze_by_clustering(y_tolerance, x_tolerance, min_row_height)
			
 
				+
			
 
				+    def _analyze_by_cell_index(self) -> Dict:
			
 
				+        """
			
 
				+        基于单元格的 row/col 索引分析（MinerU 专用）
			
 
				         
			
 
				         Returns:
			
 
				-            表格结构信息，包含:
			
 
				-            - rows: 行区间列表
			
 
				-            - columns: 列区间列表
			
 
				-            - horizontal_lines: 横线Y坐标列表 [y1, y2, ..., y_{n+1}]
			
 
				-            - vertical_lines: 竖线X坐标列表 [x1, x2, ..., x_{m+1}]
			
 
				-            - row_height: 标准行高
			
 
				-            - col_widths: 各列宽度
			
 
				-            - table_bbox: 表格边界框
			
 
				+            表格结构信息
			
 
				         """
			
 
				         if not self.ocr_data:
			
 
				             return {}
			
 
				+
			
 
				+        # 🔑 确定实际行列数
			
 
				+        actual_rows = self.ocr_data.get('actual_rows', 0)
			
 
				+        actual_cols = self.ocr_data.get('actual_cols', 0)
			
 
				+        print(f"📋 检测到: {actual_rows} 行 × {actual_cols} 列")
			
 
				+
			
 
				+        ocr_data = self.ocr_data.get('text_boxes', [])
			
 
				+        
			
 
				+        # 🔑 按行列索引分组单元格
			
 
				+        cells_by_row = {}
			
 
				+        cells_by_col = {}
			
 
				+        
			
 
				+        for item in ocr_data:
			
 
				+            if 'row' not in item or 'col' not in item:
			
 
				+                continue
			
 
				+            
			
 
				+            row = item['row']
			
 
				+            col = item['col']
			
 
				+            bbox = item['bbox']
			
 
				+            
			
 
				+            if row <= actual_rows and col <= actual_cols:
			
 
				+                if row not in cells_by_row:
			
 
				+                    cells_by_row[row] = []
			
 
				+                cells_by_row[row].append(bbox)
			
 
				+                
			
 
				+                if col not in cells_by_col:
			
 
				+                    cells_by_col[col] = []
			
 
				+                cells_by_col[col].append(bbox)
			
 
				+        
			
 
				+        # 🔑 计算每行的 y 边界
			
 
				+        row_boundaries = {}
			
 
				+        for row_num in range(1, actual_rows + 1):
			
 
				+            if row_num in cells_by_row:
			
 
				+                bboxes = cells_by_row[row_num]
			
 
				+                y_min = min(bbox[1] for bbox in bboxes)
			
 
				+                y_max = max(bbox[3] for bbox in bboxes)
			
 
				+                row_boundaries[row_num] = (y_min, y_max)        
			
 
				+
			
 
				+        # 🔑 计算横线（现在使用的是过滤后的数据）
			
 
				+        horizontal_lines = _calculate_horizontal_lines_with_spacing(row_boundaries)
			
 
				+        
			
 
				+        # 🔑 列边界计算（同样需要过滤异常值）
			
 
				+        col_boundaries = {}
			
 
				+        for col_num in range(1, actual_cols + 1):
			
 
				+            if col_num in cells_by_col:
			
 
				+                bboxes = cells_by_col[col_num]
			
 
				+                
			
 
				+                # 🎯 过滤 x 方向的异常值（使用 IQR）
			
 
				+                if len(bboxes) > 1:
			
 
				+                    x_centers = [(bbox[0] + bbox[2]) / 2 for bbox in bboxes]
			
 
				+                    x_center_q1 = np.percentile(x_centers, 25)
			
 
				+                    x_center_q3 = np.percentile(x_centers, 75)
			
 
				+                    x_center_iqr = x_center_q3 - x_center_q1
			
 
				+                    x_center_median = np.median(x_centers)
			
 
				+                    
			
 
				+                    # 允许偏移 3 倍 IQR 或至少 100px
			
 
				+                    x_threshold = max(3 * x_center_iqr, 100)
			
 
				+                    
			
 
				+                    valid_bboxes = [
			
 
				+                        bbox for bbox in bboxes
			
 
				+                        if abs((bbox[0] + bbox[2]) / 2 - x_center_median) <= x_threshold
			
 
				+                    ]
			
 
				+                else:
			
 
				+                    valid_bboxes = bboxes
			
 
				+                
			
 
				+                if valid_bboxes:
			
 
				+                    x_min = min(bbox[0] for bbox in valid_bboxes)
			
 
				+                    x_max = max(bbox[2] for bbox in valid_bboxes)
			
 
				+                    col_boundaries[col_num] = (x_min, x_max)
			
 
				+    
			
 
				+        # 🔑 计算竖线
			
 
				+        vertical_lines = _calculate_vertical_lines_with_spacing(col_boundaries)
			
 
				+        
			
 
				+        # 🔑 生成行区间
			
 
				+        self.rows = []
			
 
				+        for row_num in sorted(row_boundaries.keys()):
			
 
				+            y_min, y_max = row_boundaries[row_num]
			
 
				+            self.rows.append({
			
 
				+                'y_start': y_min,
			
 
				+                'y_end': y_max,
			
 
				+                'bboxes': cells_by_row.get(row_num, []),
			
 
				+                'row_index': row_num
			
 
				+            })
			
 
				+        
			
 
				+        # 🔑 生成列区间
			
 
				+        self.columns = []
			
 
				+        for col_num in sorted(col_boundaries.keys()):
			
 
				+            x_min, x_max = col_boundaries[col_num]
			
 
				+            self.columns.append({
			
 
				+                'x_start': x_min,
			
 
				+                'x_end': x_max,
			
 
				+                'col_index': col_num
			
 
				+            })
			
 
				         
			
 
				+        # 计算行高和列宽
			
 
				+        self.row_height = int(np.median([r['y_end'] - r['y_start'] for r in self.rows])) if self.rows else 0
			
 
				+        self.col_widths = [c['x_end'] - c['x_start'] for c in self.columns]
			
 
				+        
			
 
				+        return {
			
 
				+            'rows': self.rows,
			
 
				+            'columns': self.columns,
			
 
				+            'horizontal_lines': horizontal_lines,
			
 
				+            'vertical_lines': vertical_lines,
			
 
				+            'row_height': self.row_height,
			
 
				+            'col_widths': self.col_widths,
			
 
				+            'table_bbox': self._get_table_bbox(),
			
 
				+            'total_rows': actual_rows,
			
 
				+            'total_cols': actual_cols,
			
 
				+            'mode': 'hybrid',  # ✅ 添加 mode 字段
			
 
				+            'modified_h_lines': [],  # ✅ 添加修改记录字段
			
 
				+            'modified_v_lines': []   # ✅ 添加修改记录字段
			
 
				+        }
			
 
				+    
			
 
				+    def _analyze_by_clustering(self, y_tolerance: int, x_tolerance: int, min_row_height: int) -> Dict:
			
 
				+        """
			
 
				+        基于坐标聚类分析（通用方法）
			
 
				+        
			
 
				+        Args:
			
 
				+            y_tolerance: Y轴聚类容差
			
 
				+            x_tolerance: X轴聚类容差
			
 
				+            min_row_height: 最小行高
			
 
				+        
			
 
				+        Returns:
			
 
				+            表格结构信息
			
 
				+        """
			
 
				+        if not self.ocr_data:
			
 
				+            return {}
			
 
				+
			
 
				+        ocr_data = self.ocr_data.get('text_boxes', [])
			
 
				         # 1. 提取所有bbox的Y坐标（用于行检测）
			
 
				         y_coords = []
			
 
				-        for item in self.ocr_data:
			
 
				+        for item in ocr_data:
			
 
				             bbox = item.get('bbox', [])
			
 
				             if len(bbox) >= 4:
			
 
				                 y1, y2 = bbox[1], bbox[3]
			
@@ -126,62 +370,80 @@ class TableLineGenerator:
 
				         # 按Y坐标排序
			
 
				         y_coords.sort(key=lambda x: x[0])
			
 
				         
			
 
				-        # 2. 聚类检测行（基于Y坐标相近的bbox）
			
 
				+        # 2. 聚类检测行
			
 
				         self.rows = self._cluster_rows(y_coords, y_tolerance, min_row_height)
			
 
				         
			
 
				-        # 3. 计算标准行高（中位数）
			
 
				+        # 3. 计算标准行高
			
 
				         row_heights = [row['y_end'] - row['y_start'] for row in self.rows]
			
 
				         self.row_height = int(np.median(row_heights)) if row_heights else 30
			
 
				         
			
 
				         # 4. 提取所有bbox的X坐标（用于列检测）
			
 
				         x_coords = []
			
 
				-        for item in self.ocr_data:
			
 
				+        for item in ocr_data:
			
 
				             bbox = item.get('bbox', [])
			
 
				             if len(bbox) >= 4:
			
 
				                 x1, x2 = bbox[0], bbox[2]
			
 
				                 x_coords.append((x1, x2))
			
 
				         
			
 
				-        # 5. 聚类检测列（基于X坐标相近的bbox）
			
 
				+        # 5. 聚类检测列
			
 
				         self.columns = self._cluster_columns(x_coords, x_tolerance)
			
 
				         
			
 
				-        # 6. 计算各列宽度
			
 
				+        # 6. 计算列宽
			
 
				         self.col_widths = [col['x_end'] - col['x_start'] for col in self.columns]
			
 
				         
			
 
				-        # 🆕 7. 生成横线坐标列表（共 n+1 条）
			
 
				+        # 7. 生成横线坐标
			
 
				         horizontal_lines = []
			
 
				         for row in self.rows:
			
 
				             horizontal_lines.append(row['y_start'])
			
 
				-        # 添加最后一条横线
			
 
				         if self.rows:
			
 
				             horizontal_lines.append(self.rows[-1]['y_end'])
			
 
				         
			
 
				-        # 🆕 8. 生成竖线坐标列表（共 m+1 条）
			
 
				+        # 8. 生成竖线坐标
			
 
				         vertical_lines = []
			
 
				         for col in self.columns:
			
 
				             vertical_lines.append(col['x_start'])
			
 
				-        # 添加最后一条竖线
			
 
				         if self.columns:
			
 
				             vertical_lines.append(self.columns[-1]['x_end'])
			
 
				         
			
 
				         return {
			
 
				             'rows': self.rows,
			
 
				             'columns': self.columns,
			
 
				-            'horizontal_lines': horizontal_lines,  # 🆕 横线Y坐标列表
			
 
				-            'vertical_lines': vertical_lines,      # 🆕 竖线X坐标列表
			
 
				+            'horizontal_lines': horizontal_lines,
			
 
				+            'vertical_lines': vertical_lines,
			
 
				             'row_height': self.row_height,
			
 
				             'col_widths': self.col_widths,
			
 
				-            'table_bbox': self._get_table_bbox()
			
 
				+            'table_bbox': self._get_table_bbox(),
			
 
				+            'mode': 'fixed',  # ✅ 添加 mode 字段
			
 
				+            'modified_h_lines': [],  # ✅ 添加修改记录字段
			
 
				+            'modified_v_lines': []   # ✅ 添加修改记录字段
			
 
				         }
			
 
				-    
			
 
				-    def _cluster_rows(self, y_coords: List[Tuple], tolerance: int, min_height: int) -> List[Dict]:
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def parse_mineru_table_result(mineru_result: Union[Dict, List], use_table_body: bool = True) -> Tuple[List[int], Dict]:
			
 
				+        """
			
 
				+        [已弃用] 建议使用 parse_ocr_data() + analyze_table_structure()
			
 
				+        
			
 
				+        保留此方法是为了向后兼容
			
 
				+        """
			
 
				+        import warnings
			
 
				+        warnings.warn(
			
 
				+            "parse_mineru_table_result() 已弃用，请使用 "
			
 
				+            "parse_ocr_data() + analyze_table_structure()",
			
 
				+            DeprecationWarning
			
 
				+        )
			
 
				+        raise NotImplementedError( "parse_mineru_table_result() 已弃用，请使用 " "parse_ocr_data() + analyze_table_structure()")
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def parse_ppstructure_result(ocr_result: Dict) -> Tuple[List[int], Dict]:
			
 
				         """
			
 
				-        聚类检测行
			
 
				+        [推荐] 解析 PPStructure V3 的 OCR 结果
			
 
				         
			
 
				-        策略：
			
 
				-        1. 按Y坐标排序
			
 
				-        2. 相近的Y坐标（容差内）归为同一行
			
 
				-        3. 过滤掉高度过小的行
			
 
				+        这是第一步操作，建议继续使用
			
 
				         """
			
 
				+        return TableLineGenerator._parse_ppstructure_data(ocr_result)
			
 
				+        
			
 
				+    def _cluster_rows(self, y_coords: List[Tuple], tolerance: int, min_height: int) -> List[Dict]:
			
 
				+        """聚类检测行"""
			
 
				         if not y_coords:
			
 
				             return []
			
 
				         
			
@@ -195,43 +457,30 @@ class TableLineGenerator:
 
				         for i in range(1, len(y_coords)):
			
 
				             y1, y2, bbox = y_coords[i]
			
 
				             
			
 
				-            # 判断是否属于当前行（Y坐标相近）
			
 
				             if abs(y1 - current_row['y_start']) <= tolerance:
			
 
				-                # 更新行的Y范围
			
 
				                 current_row['y_start'] = min(current_row['y_start'], y1)
			
 
				                 current_row['y_end'] = max(current_row['y_end'], y2)
			
 
				                 current_row['bboxes'].append(bbox)
			
 
				             else:
			
 
				-                # 保存当前行（如果高度足够）
			
 
				                 if current_row['y_end'] - current_row['y_start'] >= min_height:
			
 
				                     rows.append(current_row)
			
 
				                 
			
 
				-                # 开始新行
			
 
				                 current_row = {
			
 
				                     'y_start': y1,
			
 
				                     'y_end': y2,
			
 
				                     'bboxes': [bbox]
			
 
				                 }
			
 
				         
			
 
				-        # 保存最后一行
			
 
				         if current_row['y_end'] - current_row['y_start'] >= min_height:
			
 
				             rows.append(current_row)
			
 
				         
			
 
				         return rows
			
 
				     
			
 
				     def _cluster_columns(self, x_coords: List[Tuple], tolerance: int) -> List[Dict]:
			
 
				-        """
			
 
				-        聚类检测列
			
 
				-        
			
 
				-        策略：
			
 
				-        1. 提取所有bbox的左边界和右边界
			
 
				-        2. 聚类相近的X坐标
			
 
				-        3. 生成列分界线
			
 
				-        """
			
 
				+        """聚类检测列"""
			
 
				         if not x_coords:
			
 
				             return []
			
 
				         
			
 
				-        # 提取所有X坐标（左边界和右边界）
			
 
				         all_x = []
			
 
				         for x1, x2 in x_coords:
			
 
				             all_x.append(x1)
			
@@ -239,19 +488,16 @@ class TableLineGenerator:
 
				         
			
 
				         all_x = sorted(set(all_x))
			
 
				         
			
 
				-        # 聚类X坐标
			
 
				         columns = []
			
 
				         current_x = all_x[0]
			
 
				         
			
 
				         for x in all_x[1:]:
			
 
				             if x - current_x > tolerance:
			
 
				-                # 新列开始
			
 
				                 columns.append(current_x)
			
 
				                 current_x = x
			
 
				         
			
 
				         columns.append(current_x)
			
 
				         
			
 
				-        # 生成列区间
			
 
				         column_regions = []
			
 
				         for i in range(len(columns) - 1):
			
 
				             column_regions.append({
			
@@ -276,117 +522,259 @@ class TableLineGenerator:
 
				     def generate_table_lines(self, 
			
 
				                             line_color: Tuple[int, int, int] = (0, 0, 255),
			
 
				                             line_width: int = 2) -> Image.Image:
			
 
				-        """
			
 
				-        在原图上绘制表格线
			
 
				-        
			
 
				-        Args:
			
 
				-            line_color: 线条颜色 (R, G, B)
			
 
				-            line_width: 线条宽度
			
 
				+        """在原图上绘制表格线"""
			
 
				+        if self.image is None:
			
 
				+            raise ValueError(
			
 
				+                "无图片模式下不能调用 generate_table_lines()，"
			
 
				+                "请在初始化时提供图片"
			
 
				+            )
			
 
				         
			
 
				-        Returns:
			
 
				-            绘制了表格线的图片
			
 
				-        """
			
 
				-        # 复制原图
			
 
				         img_with_lines = self.image.copy()
			
 
				         draw = ImageDraw.Draw(img_with_lines)
			
 
				         
			
 
				-        # 🔧 简化：使用行列区间而不是重复计算
			
 
				         x_start = self.columns[0]['x_start'] if self.columns else 0
			
 
				         x_end = self.columns[-1]['x_end'] if self.columns else img_with_lines.width
			
 
				         y_start = self.rows[0]['y_start'] if self.rows else 0
			
 
				         y_end = self.rows[-1]['y_end'] if self.rows else img_with_lines.height
			
 
				         
			
 
				-        # 绘制横线（包括最后一条）
			
 
				+        # 绘制横线
			
 
				         for row in self.rows:
			
 
				             y = row['y_start']
			
 
				             draw.line([(x_start, y), (x_end, y)], fill=line_color, width=line_width)
			
 
				         
			
 
				-        # 绘制最后一条横线
			
 
				         if self.rows:
			
 
				             y = self.rows[-1]['y_end']
			
 
				             draw.line([(x_start, y), (x_end, y)], fill=line_color, width=line_width)
			
 
				         
			
 
				-        # 绘制竖线（包括最后一条）
			
 
				+        # 绘制竖线
			
 
				         for col in self.columns:
			
 
				             x = col['x_start']
			
 
				             draw.line([(x, y_start), (x, y_end)], fill=line_color, width=line_width)
			
 
				         
			
 
				-        # 绘制最后一条竖线
			
 
				         if self.columns:
			
 
				             x = self.columns[-1]['x_end']
			
 
				             draw.line([(x, y_start), (x, y_end)], fill=line_color, width=line_width)
			
 
				         
			
 
				         return img_with_lines
			
 
				-    
			
 
				-    def save_table_structure(self, output_path: str):
			
 
				-        """保存表格结构配置（用于应用到其他页）"""
			
 
				-        structure = {
			
 
				-            'row_height': self.row_height,
			
 
				-            'col_widths': self.col_widths,
			
 
				-            'columns': self.columns,
			
 
				-            'first_row_y': self.rows[0]['y_start'] if self.rows else 0,
			
 
				-            'table_bbox': self._get_table_bbox()
			
 
				-        }
			
 
				-        
			
 
				-        with open(output_path, 'w', encoding='utf-8') as f:
			
 
				-            json.dump(structure, f, indent=2, ensure_ascii=False)
			
 
				-        
			
 
				-        return structure
			
 
				-    
			
 
				-    def apply_structure_to_image(self, 
			
 
				-                                target_image: Union[str, Image.Image],
			
 
				-                                structure: Dict,
			
 
				-                                output_path: str) -> str:
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def analyze_structure_only(
			
 
				+        ocr_data: Dict,
			
 
				+        y_tolerance: int = 5,
			
 
				+        x_tolerance: int = 10,
			
 
				+        min_row_height: int = 20,
			
 
				+        method: str = "auto"
			
 
				+    ) -> Dict:
			
 
				         """
			
 
				-        将表格结构应用到其他页
			
 
				+        仅分析表格结构（无需图片）
			
 
				         
			
 
				         Args:
			
 
				-            target_image: 目标图片路径(str) 或 PIL.Image 对象
			
 
				-            structure: 表格结构配置
			
 
				-            output_path: 输出路径
			
 
				+            ocr_data: OCR识别结果
			
 
				+            y_tolerance: Y轴聚类容差（像素）
			
 
				+            x_tolerance: X轴聚类容差（像素）
			
 
				+            min_row_height: 最小行高（像素）
			
 
				+            method: 分析方法 ("auto" / "cluster" / "mineru")
			
 
				         
			
 
				         Returns:
			
 
				-            生成的有线表格图片路径
			
 
				+            表格结构信息
			
 
				         """
			
 
				-        # 🔧 修改：支持传入 Image 对象或路径
			
 
				-        if isinstance(target_image, str):
			
 
				-            target_img = Image.open(target_image)
			
 
				-        elif isinstance(target_image, Image.Image):
			
 
				-            target_img = target_image
			
 
				-        else:
			
 
				-            raise TypeError(
			
 
				-                f"target_image 参数必须是 str (路径) 或 PIL.Image.Image 对象，"
			
 
				-                f"实际类型: {type(target_image)}"
			
 
				-            )
			
 
				-        
			
 
				-        draw = ImageDraw.Draw(target_img)
			
 
				+        # 🔑 创建无图片模式的生成器
			
 
				+        temp_generator = TableLineGenerator(None, ocr_data)
			
 
				+        
			
 
				+        # 🔑 分析结构
			
 
				+        return temp_generator.analyze_table_structure(
			
 
				+            y_tolerance=y_tolerance,
			
 
				+            x_tolerance=x_tolerance,
			
 
				+            min_row_height=min_row_height,
			
 
				+            method=method
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def _calculate_horizontal_lines_with_spacing(row_boundaries: Dict[int, Tuple[int, int]]) -> List[int]:
			
 
				+    """
			
 
				+    计算横线位置（考虑行间距）
			
 
				+    
			
 
				+    Args:
			
 
				+        row_boundaries: {row_num: (y_min, y_max)}
			
 
				+        
			
 
				+    Returns:
			
 
				+        横线 y 坐标列表
			
 
				+    """
			
 
				+    if not row_boundaries:
			
 
				+        return []
			
 
				+    
			
 
				+    sorted_rows = sorted(row_boundaries.items())
			
 
				+    
			
 
				+    # 🔑 分析相邻行之间的间隔
			
 
				+    gaps = []
			
 
				+    gap_info = []  # 保存详细信息用于调试
			
 
				+    
			
 
				+    for i in range(len(sorted_rows) - 1):
			
 
				+        row_num1, (y_min1, y_max1) = sorted_rows[i]
			
 
				+        row_num2, (y_min2, y_max2) = sorted_rows[i + 1]
			
 
				+        gap = y_min2 - y_max1  # 行间距（可能为负，表示重叠）
			
 
				+        
			
 
				+        gaps.append(gap)
			
 
				+        gap_info.append({
			
 
				+            'row1': row_num1,
			
 
				+            'row2': row_num2,
			
 
				+            'gap': gap
			
 
				+        })
			
 
				+    
			
 
				+    print(f"📏 行间距详情:")
			
 
				+    for info in gap_info:
			
 
				+        status = "重叠" if info['gap'] < 0 else "正常"
			
 
				+        print(f"   行 {info['row1']} → {info['row2']}: {info['gap']:.1f}px ({status})")
			
 
				+    
			
 
				+    # 🔑 过滤掉负数 gap（重叠情况）和极小的 gap
			
 
				+    valid_gaps = [g for g in gaps if g > 2]  # 至少 2px 间隔才算有效
			
 
				+    
			
 
				+    if valid_gaps:
			
 
				+        gap_median = np.median(valid_gaps)
			
 
				+        gap_std = np.std(valid_gaps)
			
 
				         
			
 
				-        row_height = structure['row_height']
			
 
				-        col_widths = structure['col_widths']
			
 
				-        columns = structure['columns']
			
 
				-        first_row_y = structure['first_row_y']
			
 
				-        table_bbox = structure['table_bbox']
			
 
				+        print(f"📏 行间距统计: 中位数={gap_median:.1f}px, 标准差={gap_std:.1f}px")
			
 
				+        print(f"   有效间隔数: {len(valid_gaps)}/{len(gaps)}")
			
 
				+    
			
 
				+    # 🔑 生成横线坐标（在相邻行中间）
			
 
				+    horizontal_lines = []
			
 
				+    
			
 
				+    for i, (row_num, (y_min, y_max)) in enumerate(sorted_rows):
			
 
				+        if i == 0:
			
 
				+            # 第一行的上边界
			
 
				+            horizontal_lines.append(y_min)
			
 
				+        
			
 
				+        if i < len(sorted_rows) - 1:
			
 
				+            next_row_num, (next_y_min, next_y_max) = sorted_rows[i + 1]
			
 
				+            gap = next_y_min - y_max
			
 
				+            
			
 
				+            if gap > 0:
			
 
				+                # 有间隔：在间隔中间画线
			
 
				+                # separator_y = int((y_max + next_y_min) / 2)
			
 
				+                # 有间隔：更靠近下一行的位置
			
 
				+                separator_y = int(next_y_min) - max(int(gap / 4), 2)
			
 
				+                horizontal_lines.append(separator_y)
			
 
				+            else:
			
 
				+                # 重叠或紧贴：在当前行的下边界画线
			
 
				+                separator_y = int(next_y_min) - max(int(gap / 4), 2)
			
 
				+                horizontal_lines.append(separator_y)
			
 
				+        else:
			
 
				+            # 最后一行的下边界
			
 
				+            horizontal_lines.append(y_max)
			
 
				+    
			
 
				+    return sorted(set(horizontal_lines))
			
 
				+
			
 
				+
			
 
				+def _calculate_vertical_lines_with_spacing(col_boundaries: Dict[int, Tuple[int, int]]) -> List[int]:
			
 
				+    """
			
 
				+    计算竖线位置（考虑列间距和重叠）
			
 
				+    
			
 
				+    Args:
			
 
				+        col_boundaries: {col_num: (x_min, x_max)}
			
 
				+        
			
 
				+    Returns:
			
 
				+        竖线 x 坐标列表
			
 
				+    """
			
 
				+    if not col_boundaries:
			
 
				+        return []
			
 
				+    
			
 
				+    sorted_cols = sorted(col_boundaries.items())
			
 
				+    
			
 
				+    # 🔑 分析相邻列之间的间隔
			
 
				+    gaps = []
			
 
				+    gap_info = []
			
 
				+    
			
 
				+    for i in range(len(sorted_cols) - 1):
			
 
				+        col_num1, (x_min1, x_max1) = sorted_cols[i]
			
 
				+        col_num2, (x_min2, x_max2) = sorted_cols[i + 1]
			
 
				+        gap = x_min2 - x_max1  # 列间距（可能为负）
			
 
				+        
			
 
				+        gaps.append(gap)
			
 
				+        gap_info.append({
			
 
				+            'col1': col_num1,
			
 
				+            'col2': col_num2,
			
 
				+            'gap': gap
			
 
				+        })
			
 
				+    
			
 
				+    print(f"📏 列间距详情:")
			
 
				+    for info in gap_info:
			
 
				+        status = "重叠" if info['gap'] < 0 else "正常"
			
 
				+        print(f"   列 {info['col1']} → {info['col2']}: {info['gap']:.1f}px ({status})")
			
 
				+    
			
 
				+    # 🔑 过滤掉负数 gap
			
 
				+    valid_gaps = [g for g in gaps if g > 2]
			
 
				+    
			
 
				+    if valid_gaps:
			
 
				+        gap_median = np.median(valid_gaps)
			
 
				+        gap_std = np.std(valid_gaps)
			
 
				+        print(f"📏 列间距统计: 中位数={gap_median:.1f}px, 标准差={gap_std:.1f}px")
			
 
				+    
			
 
				+    # 🔑 生成竖线坐标（在相邻列中间）
			
 
				+    vertical_lines = []
			
 
				+    
			
 
				+    for i, (col_num, (x_min, x_max)) in enumerate(sorted_cols):
			
 
				+        if i == 0:
			
 
				+            # 第一列的左边界
			
 
				+            vertical_lines.append(x_min)
			
 
				+        
			
 
				+        if i < len(sorted_cols) - 1:
			
 
				+            next_col_num, (next_x_min, next_x_max) = sorted_cols[i + 1]
			
 
				+            gap = next_x_min - x_max
			
 
				+            
			
 
				+            if gap > 0:
			
 
				+                # 有间隔：在间隔中间画线
			
 
				+                separator_x = int((x_max + next_x_min) / 2)
			
 
				+                vertical_lines.append(separator_x)
			
 
				+            else:
			
 
				+                # 重叠或紧贴：在当前列的右边界画线
			
 
				+                vertical_lines.append(x_max)
			
 
				+        else:
			
 
				+            # 最后一列的右边界
			
 
				+            vertical_lines.append(x_max)
			
 
				+    
			
 
				+    return sorted(set(vertical_lines))
			
 
				+
			
 
				+
			
 
				+def _extract_table_data(mineru_result: Union[Dict, List]) -> Optional[Dict]:
			
 
				+    """提取 table 数据"""
			
 
				+    if isinstance(mineru_result, list):
			
 
				+        for item in mineru_result:
			
 
				+            if isinstance(item, dict) and item.get('type') == 'table':
			
 
				+                return item
			
 
				+    elif isinstance(mineru_result, dict):
			
 
				+        if mineru_result.get('type') == 'table':
			
 
				+            return mineru_result
			
 
				+        # 递归查找
			
 
				+        for value in mineru_result.values():
			
 
				+            if isinstance(value, dict) and value.get('type') == 'table':
			
 
				+                return value
			
 
				+            elif isinstance(value, list):
			
 
				+                result = _extract_table_data(value)
			
 
				+                if result:
			
 
				+                    return result
			
 
				+    return None
			
 
				+
			
 
				+
			
 
				+def _parse_table_body_structure(table_body: str) -> Tuple[int, int]:
			
 
				+    """从 table_body HTML 中解析准确的行列数"""
			
 
				+    try:
			
 
				+        soup = BeautifulSoup(table_body, 'html.parser')
			
 
				+        table = soup.find('table')
			
 
				         
			
 
				-        # 计算行数（根据图片高度）
			
 
				-        num_rows = int((target_img.height - first_row_y) / row_height)
			
 
				+        if not table:
			
 
				+            raise ValueError("未找到 <table> 标签")
			
 
				         
			
 
				-        # 绘制横线
			
 
				-        for i in range(num_rows + 1):
			
 
				-            y = first_row_y + i * row_height
			
 
				-            draw.line([(table_bbox[0], y), (table_bbox[2], y)], 
			
 
				-                     fill=(0, 0, 255), width=2)
			
 
				+        rows = table.find_all('tr')
			
 
				+        if not rows:
			
 
				+            raise ValueError("未找到 <tr> 标签")
			
 
				         
			
 
				-        # 绘制竖线
			
 
				-        for col in columns:
			
 
				-            x = col['x_start']
			
 
				-            draw.line([(x, first_row_y), (x, first_row_y + num_rows * row_height)],
			
 
				-                     fill=(0, 0, 255), width=2)
			
 
				+        num_rows = len(rows)
			
 
				+        first_row = rows[0]
			
 
				+        num_cols = len(first_row.find_all(['td', 'th']))
			
 
				         
			
 
				-        # 绘制最后一条竖线
			
 
				-        x = columns[-1]['x_end']
			
 
				-        draw.line([(x, first_row_y), (x, first_row_y + num_rows * row_height)],
			
 
				-                 fill=(0, 0, 255), width=2)
			
 
				+        return num_rows, num_cols
			
 
				         
			
 
				-        # 保存
			
 
				-        target_img.save(output_path)
			
 
				-        return output_path
			
 
				+    except Exception as e:
			
 
				+        print(f"⚠️ 解析 table_body 失败: {e}")
			
 
				+        return 0, 0
			
 
				+
			
--- a/table_line_generator/table_line_generator.yaml
+++ b/table_line_generator/table_line_generator.yaml
@@ -28,7 +28,22 @@ table_editor:
 
				         rgb: [255, 0, 0]
			
 
				 
			
 
				   data_sources:
			
 
				+    - name: "B用户_扫描流水"
			
 
				+      tool: "mineru"  # 格式同 MinerU
			
 
				+      base_dir: "/Users/zhch158/workspace/data/流水分析"
			
 
				+      json_dir: "{{name}}/mineru_vllm_results_cell_bbox"
			
 
				+      image_dir: "{{name}}/mineru_vllm_results/{{name}}"
			
 
				+      # {{ name }}_page_(?P<page>\d{3})\.json 来匹配 JSON 文件名。模板变量 {{ name }} 会在运行时被替换成实际前缀；随后 _page_ 是字面固定部分。命名捕获组 (?P<page>\d{3}) 强制页面编号恰好为三位数字，并将其存入 page 组  
			
 
				+      json_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.json"
			
 
				+      image_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.png"
			
 
				+      sort_key: "page"
			
 
				+      output:
			
 
				+        directory: "{{base_dir}}/{{ name }}.wiredtable"
			
 
				+        structure_suffix: "_structure.json"
			
 
				+        image_suffix: ".png"
			
 
				+
			
 
				     - name: "康强_北京农村商业银行"
			
 
				+      tool: "ppstructv3"
			
 
				       base_dir: "/Users/zhch158/workspace/data/流水分析"
			
 
				       json_dir: "{{name}}/ppstructurev3_client_results"
			
 
				       image_dir: "{{name}}/ppstructurev3_client_results/{{name}}"
			
@@ -40,6 +55,3 @@ table_editor:
 
				         directory: "{{base_dir}}/{{ name }}.wiredtable"
			
 
				         structure_suffix: "_structure.json"
			
 
				         image_suffix: ".png"
			
 
				-    # - name: "示例文档"
			
 
				-    #   json_dir: "../demo/json"
			
 
				-    #   image_dir: "../demo/img"
			
--- a/table_line_generator/table_template_applier.py
+++ b/table_line_generator/table_template_applier.py
@@ -9,63 +9,60 @@ from PIL import Image, ImageDraw
 
				 from typing import Dict, List, Tuple
			
 
				 import numpy as np
			
 
				 import argparse
			
 
				+import sys
			
 
				+
			
 
				+# 添加父目录到路径
			
 
				+sys.path.insert(0, str(Path(__file__).parent))
			
 
				 
			
 
				 try:
			
 
				+    from editor.data_processor import get_structure_from_ocr
			
 
				     from table_line_generator import TableLineGenerator
			
 
				 except ImportError:
			
 
				+    from .editor.data_processor import get_structure_from_ocr
			
 
				     from .table_line_generator import TableLineGenerator
			
 
				 
			
 
				 
			
 
				 class TableTemplateApplier:
			
 
				-    """表格模板应用器"""
			
 
				+    """表格模板应用器（混合模式）"""
			
 
				     
			
 
				     def __init__(self, template_config_path: str):
			
 
				-        """
			
 
				-        初始化模板应用器
			
 
				-        
			
 
				-        Args:
			
 
				-            template_config_path: 模板配置文件路径（人工标注的结果）
			
 
				-        """
			
 
				+        """初始化时只提取列信息和表头信息"""
			
 
				         with open(template_config_path, 'r', encoding='utf-8') as f:
			
 
				             self.template = json.load(f)
			
 
				         
			
 
				-        # 🎯 从标注结果提取固定参数
			
 
				+        # ✅ 只提取列宽（固定）
			
 
				         self.col_widths = self.template['col_widths']
			
 
				         
			
 
				-        # 🔧 计算数据行的标准行高（排除表头）
			
 
				-        rows = self.template['rows']
			
 
				-        if len(rows) > 1:
			
 
				-            # 计算每行的实际高度
			
 
				-            row_heights = [row['y_end'] - row['y_start'] for row in rows]
			
 
				-            
			
 
				-            # 🎯 假设第一行是表头，从第二行开始计算
			
 
				-            data_row_heights = row_heights[1:] if len(row_heights) > 1 else row_heights
			
 
				-            
			
 
				-            # 使用中位数作为标准行高（更稳健）
			
 
				-            self.row_height = int(np.median(data_row_heights))
			
 
				-            self.header_height = row_heights[0] if row_heights else self.row_height
			
 
				-            
			
 
				-            print(f"📏 表头高度: {self.header_height}px")
			
 
				-            print(f"📏 数据行高度: {self.row_height}px")
			
 
				-            print(f"   （从 {len(data_row_heights)} 行数据中计算，中位数）")
			
 
				-        else:
			
 
				-            # 兜底方案
			
 
				-            self.row_height = self.template.get('row_height', 60)
			
 
				-            self.header_height = self.row_height
			
 
				-        
			
 
				-        # 🎯 计算列的相对位置（从第一列开始的偏移量）
			
 
				+        # ✅ 计算列的相对位置
			
 
				         self.col_offsets = [0]
			
 
				         for width in self.col_widths:
			
 
				             self.col_offsets.append(self.col_offsets[-1] + width)
			
 
				         
			
 
				-        # 🎯 提取表头的Y坐标（作为参考）
			
 
				-        self.template_header_y = rows[0]['y_start'] if rows else 0
			
 
				+        # ✅ 提取表头高度（通常固定）
			
 
				+        rows = self.template['rows']
			
 
				+        if rows:
			
 
				+            self.header_height = rows[0]['y_end'] - rows[0]['y_start']
			
 
				+        else:
			
 
				+            self.header_height = 40
			
 
				+        
			
 
				+        # ✅ 计算数据行高度（用于固定行高模式）
			
 
				+        if len(rows) > 1:
			
 
				+            data_row_heights = [row['y_end'] - row['y_start'] for row in rows[1:]]
			
 
				+            # 使用中位数作为典型行高
			
 
				+            self.row_height = int(np.median(data_row_heights)) if data_row_heights else 40
			
 
				+            # 兜底行高（同样使用中位数）
			
 
				+            self.fallback_row_height = self.row_height
			
 
				+        else:
			
 
				+            # 如果只有表头，使用默认值
			
 
				+            self.row_height = 40
			
 
				+            self.fallback_row_height = 40
			
 
				         
			
 
				         print(f"\n✅ 加载模板配置:")
			
 
				-        print(f"   表头高度: {self.header_height}px")
			
 
				-        print(f"   数据行高度: {self.row_height}px")
			
 
				         print(f"   列数: {len(self.col_widths)}")
			
 
				         print(f"   列宽: {self.col_widths}")
			
 
				+        print(f"   表头高度: {self.header_height}px")
			
 
				+        print(f"   数据行高: {self.row_height}px (用于固定行高模式)")
			
 
				+        print(f"   兜底行高: {self.fallback_row_height}px (OCR失败时使用)")
			
 
				     
			
 
				     def detect_table_anchor(self, ocr_data: List[Dict]) -> Tuple[int, int]:
			
 
				         """
			
@@ -128,14 +125,14 @@ class TableTemplateApplier:
 
				         
			
 
				         return total_rows
			
 
				     
			
 
				-    def apply_to_image(self, 
			
 
				+    def apply_template_fixed(self, 
			
 
				                        image: Image.Image,
			
 
				                        ocr_data: List[Dict],
			
 
				                        anchor_x: int = None,
			
 
				                        anchor_y: int = None,
			
 
				                        num_rows: int = None,
			
 
				                        line_width: int = 2,
			
 
				-                       line_color: Tuple[int, int, int] = (0, 0, 0)) -> Image.Image:
			
 
				+                       line_color: Tuple[int, int, int] = (0, 0, 0)) -> Tuple[Image.Image, Dict]:
			
 
				         """
			
 
				         将模板应用到图片
			
 
				         
			
@@ -202,62 +199,208 @@ class TableTemplateApplier:
 
				         y_end = horizontal_lines[-1]
			
 
				         for x in vertical_lines:
			
 
				             draw.line([(x, y_start), (x, y_end)], fill=line_color, width=line_width)
			
 
				+
			
 
				+        print(f"✅ 表格绘制完成: {len(horizontal_lines)}行 × {len(vertical_lines)-1}列")
			
 
				+
			
 
				+                # 🔑 生成结构信息
			
 
				+        structure = self._build_structure(
			
 
				+            horizontal_lines, 
			
 
				+            vertical_lines, 
			
 
				+            anchor_x, 
			
 
				+            anchor_y,
			
 
				+            mode='fixed'
			
 
				+        )
			
 
				         
			
 
				-        return img_with_lines
			
 
				-    
			
 
				-    def generate_structure_for_image(self,
			
 
				-                                    ocr_data: List[Dict],
			
 
				-                                    anchor_x: int = None,
			
 
				-                                    anchor_y: int = None,
			
 
				-                                    num_rows: int = None) -> Dict:
			
 
				+        return img_with_lines, structure
			
 
				+    
			
 
				+    def apply_template_hybrid(self,
			
 
				+                             image: Image.Image,
			
 
				+                             ocr_data_dict: Dict,
			
 
				+                             use_ocr_rows: bool = True,
			
 
				+                             anchor_x: int = None,
			
 
				+                             anchor_y: int = None,
			
 
				+                             y_tolerance: int = 5,
			
 
				+                             line_width: int = 2,
			
 
				+                             line_color: Tuple[int, int, int] = (0, 0, 0)) -> Tuple[Image.Image, Dict]:
			
 
				         """
			
 
				-        为新图片生成表格结构配置
			
 
				+        混合模式：使用模板的列 + OCR的行
			
 
				         
			
 
				         Args:
			
 
				-            ocr_data: OCR识别结果
			
 
				+            image: 目标图片
			
 
				+            ocr_data: OCR识别结果（用于检测行）
			
 
				+            use_ocr_rows: 是否使用OCR检测的行（True=自适应行高）
			
 
				             anchor_x: 表格起始X坐标（None=自动检测）
			
 
				             anchor_y: 表头起始Y坐标（None=自动检测）
			
 
				-            num_rows: 总行数（None=自动检测）
			
 
				+            y_tolerance: Y轴聚类容差（像素）
			
 
				+            line_width: 线条宽度
			
 
				+            line_color: 线条颜色
			
 
				         
			
 
				         Returns:
			
 
				-            表格结构配置
			
 
				+            绘制了表格线的图片, 结构信息
			
 
				         """
			
 
				+        img_with_lines = image.copy()
			
 
				+        draw = ImageDraw.Draw(img_with_lines)
			
 
				+        
			
 
				+        ocr_data = ocr_data_dict.get('text_boxes', [])
			
 
				+        
			
 
				         # 🔍 自动检测锚点
			
 
				         if anchor_x is None or anchor_y is None:
			
 
				             detected_x, detected_y = self.detect_table_anchor(ocr_data)
			
 
				             anchor_x = anchor_x or detected_x
			
 
				             anchor_y = anchor_y or detected_y
			
 
				         
			
 
				-        # 🔍 自动检测行数
			
 
				-        if num_rows is None:
			
 
				-            num_rows = self.detect_table_rows(ocr_data, anchor_y)
			
 
				+        print(f"\n📍 表格锚点: ({anchor_x}, {anchor_y})")
			
 
				         
			
 
				-        # 🎨 生成横线坐标
			
 
				-        horizontal_lines = []
			
 
				-        horizontal_lines.append(anchor_y)
			
 
				+        # ✅ 竖线：使用模板的列宽（固定）
			
 
				+        vertical_lines = [anchor_x + offset for offset in self.col_offsets]
			
 
				+        print(f"📏 竖线坐标: {vertical_lines} (使用模板，共{len(vertical_lines)}条)")
			
 
				+        
			
 
				+        # ✅ 横线：根据模式选择
			
 
				+        if use_ocr_rows and ocr_data:
			
 
				+            horizontal_lines = self._detect_rows_from_ocr(
			
 
				+                ocr_data, anchor_y, y_tolerance
			
 
				+            )
			
 
				+            print(f"📏 横线坐标: 使用OCR检测 (共{len(horizontal_lines)}条，自适应行高)")
			
 
				+        else:
			
 
				+            num_rows = self.detect_table_rows(ocr_data, anchor_y) if ocr_data else 10
			
 
				+            horizontal_lines = self._generate_fixed_rows(anchor_y, num_rows)
			
 
				+            print(f"📏 横线坐标: 使用固定行高 (共{len(horizontal_lines)}条)")
			
 
				+        
			
 
				+        # 🖊️ 绘制横线
			
 
				+        x_start = vertical_lines[0]
			
 
				+        x_end = vertical_lines[-1]
			
 
				+        for y in horizontal_lines:
			
 
				+            draw.line([(x_start, y), (x_end, y)], fill=line_color, width=line_width)
			
 
				+        
			
 
				+        # 🖊️ 绘制竖线
			
 
				+        y_start = horizontal_lines[0]
			
 
				+        y_end = horizontal_lines[-1]
			
 
				+        for x in vertical_lines:
			
 
				+            draw.line([(x, y_start), (x, y_end)], fill=line_color, width=line_width)
			
 
				+        
			
 
				+        print(f"✅ 表格绘制完成: {len(horizontal_lines)}行 × {len(vertical_lines)-1}列")
			
 
				+        
			
 
				+        # 🔑 生成结构信息
			
 
				+        structure = self._build_structure(
			
 
				+            horizontal_lines, 
			
 
				+            vertical_lines, 
			
 
				+            anchor_x, 
			
 
				+            anchor_y,
			
 
				+            mode='hybrid'
			
 
				+        )
			
 
				+        
			
 
				+        return img_with_lines, structure
			
 
				+
			
 
				+    def _detect_rows_from_ocr(self, 
			
 
				+                              ocr_data: List[Dict], 
			
 
				+                              anchor_y: int,
			
 
				+                              y_tolerance: int = 5) -> List[int]:
			
 
				+        """
			
 
				+        从OCR结果中检测行（自适应行高）
			
 
				+        复用 get_structure_from_ocr 统一接口
			
 
				+        
			
 
				+        Args:
			
 
				+            ocr_data: OCR识别结果（MinerU 格式的 text_boxes）
			
 
				+            anchor_y: 表格起始Y坐标
			
 
				+            y_tolerance: Y轴聚类容差（未使用，保留参数兼容性）
			
 
				+        
			
 
				+        Returns:
			
 
				+            横线 y 坐标列表
			
 
				+        """
			
 
				+        if not ocr_data:
			
 
				+            return [anchor_y, anchor_y + self.header_height]
			
 
				+        
			
 
				+        print(f"\n🔍 OCR行检测 (使用 MinerU 算法):")
			
 
				+        print(f"   有效文本框数: {len(ocr_data)}")
			
 
				+        
			
 
				+        # 🔑 验证是否为 MinerU 格式
			
 
				+        has_cell_index = any('row' in item and 'col' in item for item in ocr_data)
			
 
				+        
			
 
				+        if not has_cell_index:
			
 
				+            print("   ⚠️ 警告: OCR数据不包含 row/col 索引，可能不是 MinerU 格式")
			
 
				+            print("   ⚠️ 混合模式需要 MinerU 格式的 JSON 文件")
			
 
				+            return [anchor_y, anchor_y + self.header_height]
			
 
				+        
			
 
				+        # 🔑 重构原始数据格式（MinerU 需要完整的 table 结构）
			
 
				+        raw_data = {
			
 
				+            'type': 'table',
			
 
				+            'table_cells': ocr_data
			
 
				+        }
			
 
				+        
			
 
				+        try:
			
 
				+            # ✅ 使用统一接口解析和分析（无需 dummy_image）
			
 
				+            table_bbox, structure = get_structure_from_ocr(
			
 
				+                raw_data, 
			
 
				+                tool="mineru"
			
 
				+            )
			
 
				+            
			
 
				+            if not structure or 'horizontal_lines' not in structure:
			
 
				+                print("   ⚠️ MinerU 分析失败，使用兜底方案")
			
 
				+                return [anchor_y, anchor_y + self.header_height]
			
 
				+            
			
 
				+            # 🔑 获取横线坐标
			
 
				+            horizontal_lines = structure['horizontal_lines']
			
 
				+            
			
 
				+            # 🔑 调整第一条线到 anchor_y（表头顶部）
			
 
				+            if horizontal_lines:
			
 
				+                offset = anchor_y - horizontal_lines[0]
			
 
				+                horizontal_lines = [y + offset for y in horizontal_lines]
			
 
				+            
			
 
				+            print(f"   检测到行数: {len(horizontal_lines) - 1}")
			
 
				+            
			
 
				+            # 🔑 分析行高分布
			
 
				+            if len(horizontal_lines) > 1:
			
 
				+                row_heights = []
			
 
				+                for i in range(len(horizontal_lines) - 1):
			
 
				+                    h = horizontal_lines[i+1] - horizontal_lines[i]
			
 
				+                    row_heights.append(h)
			
 
				+                
			
 
				+                if len(row_heights) > 1:
			
 
				+                    import numpy as np
			
 
				+                    print(f"   行高分布: min={min(row_heights)}, "
			
 
				+                          f"median={int(np.median(row_heights))}, "
			
 
				+                          f"max={max(row_heights)}")
			
 
				+            
			
 
				+            return horizontal_lines
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            print(f"   ⚠️ 解析失败: {e}")
			
 
				+            import traceback
			
 
				+            traceback.print_exc()
			
 
				+            return [anchor_y, anchor_y + self.header_height]
			
 
				+    
			
 
				+    def _generate_fixed_rows(self, anchor_y: int, num_rows: int) -> List[int]:
			
 
				+        """生成固定行高的横线（兜底方案）"""
			
 
				+        horizontal_lines = [anchor_y]
			
 
				+        
			
 
				+        # 表头
			
 
				         horizontal_lines.append(anchor_y + self.header_height)
			
 
				         
			
 
				+        # 数据行
			
 
				         current_y = anchor_y + self.header_height
			
 
				         for i in range(num_rows - 1):
			
 
				-            current_y += self.row_height
			
 
				+            current_y += self.fallback_row_height
			
 
				             horizontal_lines.append(current_y)
			
 
				         
			
 
				-        # 🎨 生成竖线坐标
			
 
				-        vertical_lines = []
			
 
				-        for offset in self.col_offsets:
			
 
				-            x = anchor_x + offset
			
 
				-            vertical_lines.append(x)
			
 
				-        
			
 
				-        # 🎨 生成行区间
			
 
				+        return horizontal_lines
			
 
				+    
			
 
				+    def _build_structure(self,
			
 
				+                        horizontal_lines: List[int],
			
 
				+                        vertical_lines: List[int],
			
 
				+                        anchor_x: int,
			
 
				+                        anchor_y: int,
			
 
				+                        mode: str = 'fixed') -> Dict:
			
 
				+        """构建表格结构信息（统一）"""
			
 
				+        # 生成行区间
			
 
				         rows = []
			
 
				-        for i in range(num_rows):
			
 
				+        for i in range(len(horizontal_lines) - 1):
			
 
				             rows.append({
			
 
				                 'y_start': horizontal_lines[i],
			
 
				                 'y_end': horizontal_lines[i + 1],
			
 
				                 'bboxes': []
			
 
				             })
			
 
				         
			
 
				-        # 🎨 生成列区间
			
 
				+        # 生成列区间
			
 
				         columns = []
			
 
				         for i in range(len(vertical_lines) - 1):
			
 
				             columns.append({
			
@@ -265,30 +408,40 @@ class TableTemplateApplier:
 
				                 'x_end': vertical_lines[i + 1]
			
 
				             })
			
 
				         
			
 
				+        # ✅ 根据模式设置正确的 mode 值
			
 
				+        if mode == 'hybrid':
			
 
				+            mode_value = 'hybrid'
			
 
				+        elif mode == 'fixed':
			
 
				+            mode_value = 'fixed'
			
 
				+        else:
			
 
				+            mode_value = mode  # 保留原始值
			
 
				+        
			
 
				         return {
			
 
				             'rows': rows,
			
 
				             'columns': columns,
			
 
				             'horizontal_lines': horizontal_lines,
			
 
				             'vertical_lines': vertical_lines,
			
 
				-            'header_height': self.header_height,
			
 
				-            'row_height': self.row_height,
			
 
				             'col_widths': self.col_widths,
			
 
				+            'row_height': self.row_height if mode == 'fixed' else None,
			
 
				             'table_bbox': [
			
 
				                 vertical_lines[0],
			
 
				                 horizontal_lines[0],
			
 
				                 vertical_lines[-1],
			
 
				                 horizontal_lines[-1]
			
 
				             ],
			
 
				+            'mode': mode_value,  # ✅ 确保有 mode 字段
			
 
				             'anchor': {'x': anchor_x, 'y': anchor_y},
			
 
				-            'num_rows': num_rows
			
 
				+            'modified_h_lines': [],  # ✅ 添加修改记录字段
			
 
				+            'modified_v_lines': []   # ✅ 添加修改记录字段
			
 
				         }
			
 
				 
			
 
				-
			
 
				 def apply_template_to_single_file(
			
 
				     applier: TableTemplateApplier,
			
 
				     image_file: Path,
			
 
				     json_file: Path,
			
 
				     output_dir: Path,
			
 
				+    structure_suffix: str = "_structure.json",
			
 
				+    use_hybrid_mode: bool = True,
			
 
				     line_width: int = 2,
			
 
				     line_color: Tuple[int, int, int] = (0, 0, 0)
			
 
				 ) -> bool:
			
@@ -300,6 +453,7 @@ def apply_template_to_single_file(
 
				         image_file: 图片文件路径
			
 
				         json_file: OCR JSON文件路径
			
 
				         output_dir: 输出目录
			
 
				+        use_hybrid_mode: 是否使用混合模式（需要 MinerU 格式）
			
 
				         line_width: 线条宽度
			
 
				         line_color: 线条颜色
			
 
				     
			
@@ -313,39 +467,79 @@ def apply_template_to_single_file(
 
				         with open(json_file, 'r', encoding='utf-8') as f:
			
 
				             raw_data = json.load(f)
			
 
				         
			
 
				-        # 🔧 解析OCR数据（支持PPStructure格式）
			
 
				+        # 🔑 自动检测 OCR 格式
			
 
				+        ocr_format = None
			
 
				+        
			
 
				         if 'parsing_res_list' in raw_data and 'overall_ocr_res' in raw_data:
			
 
				-            table_bbox, ocr_data = TableLineGenerator.parse_ppstructure_result(raw_data)
			
 
				+            # PPStructure 格式
			
 
				+            ocr_format = 'ppstructure'
			
 
				+        elif isinstance(raw_data, (list, dict)):
			
 
				+            # 尝试提取 MinerU 格式
			
 
				+            table_data = None
			
 
				+            if isinstance(raw_data, list):
			
 
				+                for item in raw_data:
			
 
				+                    if isinstance(item, dict) and item.get('type') == 'table':
			
 
				+                        table_data = item
			
 
				+                        break
			
 
				+            elif isinstance(raw_data, dict) and raw_data.get('type') == 'table':
			
 
				+                table_data = raw_data
			
 
				+            if table_data and 'table_cells' in table_data:
			
 
				+                ocr_format = 'mineru'
			
 
				+            else:
			
 
				+                raise ValueError("未识别的 OCR 格式")
			
 
				         else:
			
 
				-            raise ValueError("不是PPStructure格式的OCR结果")
			
 
				+            raise ValueError("未识别的 OCR 格式（仅支持 PPStructure 或 MinerU）")
			
 
				+
			
 
				+        table_bbox, ocr_data = TableLineGenerator.parse_ocr_data(
			
 
				+            raw_data, 
			
 
				+            tool=ocr_format
			
 
				+        )
			
 
				         
			
 
				-        print(f"  ✅ 加载OCR数据: {len(ocr_data)} 个文本框")
			
 
				+        text_boxes = ocr_data.get('text_boxes', [])
			
 
				+        print(f"  ✅ 加载OCR数据: {len(text_boxes)} 个文本框")
			
 
				+        print(f"  📋 OCR格式: {ocr_format}")
			
 
				         
			
 
				         # 加载图片
			
 
				         image = Image.open(image_file)
			
 
				         print(f"  ✅ 加载图片: {image.size}")
			
 
				         
			
 
				-        # 🎯 应用模板
			
 
				-        img_with_lines = applier.apply_to_image(
			
 
				-            image,
			
 
				-            ocr_data,
			
 
				-            line_width=line_width,
			
 
				-            line_color=line_color
			
 
				-        )
			
 
				+        # 🔑 验证混合模式的格式要求
			
 
				+        if use_hybrid_mode and ocr_format != 'mineru':
			
 
				+            print(f"  ⚠️ 警告: 混合模式需要 MinerU 格式，当前格式为 {ocr_format}")
			
 
				+            print(f"  ℹ️  自动切换到完全模板模式")
			
 
				+            use_hybrid_mode = False
			
 
				+        
			
 
				+        # 🆕 根据模式选择处理方式
			
 
				+        if use_hybrid_mode:
			
 
				+            print(f"  🔧 使用混合模式 (模板列 + MinerU 行)")
			
 
				+            img_with_lines, structure  = applier.apply_template_hybrid(
			
 
				+                image,
			
 
				+                ocr_data,
			
 
				+                use_ocr_rows=True,
			
 
				+                line_width=line_width,
			
 
				+                line_color=line_color
			
 
				+            )
			
 
				+        else:
			
 
				+            print(f"  🔧 使用完全模板模式 (固定行高)")
			
 
				+            img_with_lines, structure = applier.apply_template_fixed(
			
 
				+                image,
			
 
				+                text_boxes,
			
 
				+                line_width=line_width,
			
 
				+                line_color=line_color
			
 
				+            )
			
 
				         
			
 
				         # 保存图片
			
 
				-        output_file = output_dir / f"{image_file.stem}_with_lines.png"
			
 
				+        output_file = output_dir / f"{image_file.stem}.png"
			
 
				         img_with_lines.save(output_file)
			
 
				         
			
 
				-        # 🆕 生成并保存结构配置
			
 
				-        structure = applier.generate_structure_for_image(ocr_data)
			
 
				-        structure_file = output_dir / f"{image_file.stem}_structure.json"
			
 
				+        # 保存结构配置
			
 
				+        structure_file = output_dir / f"{image_file.stem}{structure_suffix}"
			
 
				         with open(structure_file, 'w', encoding='utf-8') as f:
			
 
				             json.dump(structure, f, indent=2, ensure_ascii=False)
			
 
				         
			
 
				         print(f"  ✅ 保存图片: {output_file.name}")
			
 
				         print(f"  ✅ 保存配置: {structure_file.name}")
			
 
				-        print(f"  📊 表格: {structure['num_rows']}行 x {len(structure['columns'])}列")
			
 
				+        print(f"  📊 表格: {len(structure['rows'])}行 x {len(structure['columns'])}列")
			
 
				         
			
 
				         return True
			
 
				         
			
@@ -361,6 +555,8 @@ def apply_template_batch(
 
				     image_dir: str,
			
 
				     json_dir: str,
			
 
				     output_dir: str,
			
 
				+    structure_suffix: str = "_structure.json",
			
 
				+    use_hybrid_mode: bool = False,
			
 
				     line_width: int = 2,
			
 
				     line_color: Tuple[int, int, int] = (0, 0, 0)
			
 
				 ):
			
@@ -414,7 +610,7 @@ def apply_template_batch(
 
				             continue
			
 
				         
			
 
				         if apply_template_to_single_file(
			
 
				-            applier, image_file, json_file, output_path, 
			
 
				+            applier, image_file, json_file, output_path, structure_suffix, use_hybrid_mode,
			
 
				             line_width, line_color
			
 
				         ):
			
 
				             results.append({
			
@@ -454,29 +650,31 @@ def apply_template_batch(
 
				 def main():
			
 
				     """主函数"""
			
 
				     parser = argparse.ArgumentParser(
			
 
				-        description='应用表格模板到其他页面',
			
 
				+        description='应用表格模板到其他页面（支持混合模式）',
			
 
				         formatter_class=argparse.RawDescriptionHelpFormatter,
			
 
				         epilog="""
			
 
				 示例用法:
			
 
				 
			
 
				-  1. 批量处理整个目录:
			
 
				+  1. 混合模式（推荐，自适应行高）:
			
 
				      python table_template_applier.py \\
			
 
				-         --template output/康强_北京农村商业银行_page_001_structure.json \\
			
 
				+         --template template.json \\
			
 
				          --image-dir /path/to/images \\
			
 
				          --json-dir /path/to/jsons \\
			
 
				-         --output-dir /path/to/output
			
 
				+         --output-dir /path/to/output \\
			
 
				+         --structure-suffix _structure.json \\
			
 
				+         --hybrid
			
 
				 
			
 
				-  2. 处理单个文件:
			
 
				+  2. 完全模板模式（固定行高）:
			
 
				      python table_template_applier.py \\
			
 
				-         --template output/康强_北京农村商业银行_page_001_structure.json \\
			
 
				-         --image-file /path/to/page_002.png \\
			
 
				-         --json-file /path/to/page_002.json \\
			
 
				-         --output-dir /path/to/output
			
 
				+         --template template.json \\
			
 
				+         --image-file page.png \\
			
 
				+         --json-file page.json \\
			
 
				+         --output-dir /path/to/output \\
			
 
				+         --structure-suffix _structure.json \\
			
 
				 
			
 
				-输出内容:
			
 
				-  - {name}_with_lines.png: 带表格线的图片
			
 
				-  - {name}_structure.json: 表格结构配置
			
 
				-  - batch_results.json: 批处理统计结果
			
 
				+模式说明:
			
 
				+  - 混合模式（--hybrid）: 列宽使用模板，行高根据OCR自适应
			
 
				+  - 完全模板模式: 列宽和行高都使用模板（适合固定格式表格）
			
 
				         """
			
 
				     )
			
 
				     
			
@@ -522,6 +720,12 @@ def main():
 
				         required=True,
			
 
				         help='输出目录（必需）'
			
 
				     )
			
 
				+    output_group.add_argument(
			
 
				+        '--structure-suffix',
			
 
				+        type=str,
			
 
				+        default='_structure.json',
			
 
				+        help='输出结构配置文件后缀（默认: _structure.json）'
			
 
				+    )
			
 
				     
			
 
				     # 绘图参数组
			
 
				     draw_group = parser.add_argument_group('绘图参数')
			
@@ -538,6 +742,14 @@ def main():
 
				         help='线条颜色（默认: black）'
			
 
				     )
			
 
				     
			
 
				+    # 🆕 新增模式参数
			
 
				+    mode_group = parser.add_argument_group('模式参数')
			
 
				+    mode_group.add_argument(
			
 
				+        '--hybrid',
			
 
				+        action='store_true',
			
 
				+        help='使用混合模式（模板列 + OCR行，自适应行高，推荐）'
			
 
				+    )
			
 
				+    
			
 
				     args = parser.parse_args()
			
 
				     
			
 
				     # 颜色映射
			
@@ -581,7 +793,9 @@ def main():
 
				         
			
 
				         success = apply_template_to_single_file(
			
 
				             applier, image_file, json_file, output_path,
			
 
				-            args.width, line_color
			
 
				+            use_hybrid_mode=args.hybrid,  # 🆕 传递混合模式参数
			
 
				+            line_width=args.width, 
			
 
				+            line_color=line_color
			
 
				         )
			
 
				         
			
 
				         if success:
			
@@ -610,8 +824,10 @@ def main():
 
				             str(image_dir),
			
 
				             str(json_dir),
			
 
				             str(output_path),
			
 
				-            args.width,
			
 
				-            line_color
			
 
				+            structure_suffix=args.structure_suffix,
			
 
				+            use_hybrid_mode=args.hybrid,  # 🆕 传递混合模式参数
			
 
				+            line_width=args.width,
			
 
				+            line_color=line_color,
			
 
				         )
			
 
				     
			
 
				     else:
			
@@ -633,14 +849,21 @@ if __name__ == "__main__":
 
				         
			
 
				         # 默认配置
			
 
				         default_config = {
			
 
				-            "template": "output/table_structures/康强_北京农村商业银行_page_001_structure.json",
			
 
				+            "template": "/Users/zhch158/workspace/data/流水分析/康强_北京农村商业银行.wiredtable/康强_北京农村商业银行_page_001_structure.json",
			
 
				             "image-file": "/Users/zhch158/workspace/data/流水分析/康强_北京农村商业银行/ppstructurev3_client_results/康强_北京农村商业银行/康强_北京农村商业银行_page_002.png",
			
 
				             "json-file": "/Users/zhch158/workspace/data/流水分析/康强_北京农村商业银行/ppstructurev3_client_results/康强_北京农村商业银行_page_002.json",
			
 
				             "output-dir": "output/batch_results",
			
 
				             "width": "2",
			
 
				             "color": "black"
			
 
				         }
			
 
				-        
			
 
				+        # default_config = {
			
 
				+        #     "template": "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水.wiredtable/B用户_扫描流水_page_001_structure.json",
			
 
				+        #     "image-file": "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/mineru_vllm_results/B用户_扫描流水/B用户_扫描流水_page_002.png",
			
 
				+        #     "json-file": "/Users/zhch158/workspace/data/流水分析/B用户_扫描流水/mineru_vllm_results_cell_bbox/B用户_扫描流水_page_002.json",
			
 
				+        #     "output-dir": "output/batch_results",
			
 
				+        #     "width": "2",
			
 
				+        #     "color": "black"
			
 
				+        # }        
			
 
				         print("⚙️  默认参数:")
			
 
				         for key, value in default_config.items():
			
 
				             print(f"  --{key}: {value}")
			
@@ -649,5 +872,7 @@ if __name__ == "__main__":
 
				         sys.argv = [sys.argv[0]]
			
 
				         for key, value in default_config.items():
			
 
				             sys.argv.extend([f"--{key}", str(value)])
			
 
				+        
			
 
				+        sys.argv.append("--hybrid")  # 使用混合模式
			
 
				     
			
 
				     sys.exit(main())
Author	SHA1 Message	Date
zhch158_admin	74c95e92f5 feat: 添加无图片模式以仅分析表格结构，优化行列边界计算逻辑	22 hours ago
zhch158_admin	adb6af311f feat: 支持混合模式，优化模板应用逻辑并增强OCR数据处理	22 hours ago
zhch158_admin	3716bf591e refactor: 移除未使用的保存结构函数，优化结构数据处理逻辑	22 hours ago
zhch158_admin	3327051a35 fix: 优化目录选择器，避免重复加载数据源配置并重置选择索引	1 day ago
zhch158_admin	446cf46bcb fix: 修正计算横线位置的间隔处理，确保最小间隔为2	1 day ago
zhch158_admin	7e26b885b4 feat: 统一OCR数据解析接口，支持多种工具类型并优化数据提取流程	1 day ago
zhch158_admin	2adb273fda feat: 替换OCR数据解析函数为TableLineGenerator的解析方法	1 day ago
zhch158_admin	cdf4150736 feat: 替换OCR数据解析函数，使用TableLineGenerator解析工具	1 day ago
zhch158_admin	26b22366fa feat: 重构OCR数据解析流程，统一接口并优化结构分析	1 day ago
zhch158_admin	5d1c15b040 feat: 更新数据处理模块，替换解析函数为获取结构函数	1 day ago
zhch158_admin	b7b1d5e55d feat: 优化新建标注模式，添加分析参数设置和分析按钮功能	1 day ago
zhch158_admin	0562a5f7b1 feat: 添加对B用户扫描流水的支持，更新数据源配置	1 day ago
zhch158_admin	13326c5e69 feat: 添加对MinerU格式结果的解析，支持行列分割线的自动提取	1 day ago
zhch158_admin	b44105d0da feat: 添加对PaddleX语法警告的过滤	1 day ago
zhch158_admin	8c92209400 feat: 优化分析控件和数据处理，支持多种工具类型解析	1 day ago