8 Commits 7b6a80f651 ... 47892de44f

Autor SHA1 Mensagem Data
  zhch158_admin 47892de44f feat: 添加批量应用模板功能,优化状态管理和错误处理 3 dias atrás
  zhch158_admin 8f5ffb2a25 feat: 添加批量模板控件,优化目录模式和保存功能 3 dias atrás
  zhch158_admin 4fbd3a1c7e feat: 添加表格编辑器配置文件,支持数据源和输出设置 3 dias atrás
  zhch158_admin a66962ed97 feat: 移除过时的表格线生成函数,简化代码结构 3 dias atrás
  zhch158_admin 244e079db9 feat: 添加目录模式支持,优化配置加载和状态管理 3 dias atrás
  zhch158_admin 880446a21e docs: 更新README.md,添加可视化编辑器的配置说明 3 dias atrás
  zhch158_admin d1ed886bfa feat: Add display controls, file handlers, and save functionality for table annotation tool 3 dias atrás
  zhch158_admin 8810bcad1d feat: add table line visualization editor with adjustable line positions and undo/redo functionality 4 dias atrás

+ 4 - 0
table_line_generator/README.md

@@ -34,7 +34,11 @@ OCR识别(bbox) → 自动分析 → 人工调整 → 保存模板 → 批量应
 ### 2️⃣ 打开可视化编辑器
 
 ```bash
+# 读取缺省配置文件./table_line_generator.yaml
 streamlit run streamlit_table_line_editor.py
+
+# 关键是在入口脚本名后加 --,后面的参数才会传递给脚本自身
+streamlit run streamlit_table_line_editor.py -- --config /path/to/custom.yaml
 ```
 
 ### 3️⃣ 新建标注(第一页)

+ 0 - 113
table_line_generator/batch_apply_table_lines.py

@@ -1,113 +0,0 @@
-"""
-批量将表格结构应用到所有页
-"""
-
-import json
-from pathlib import Path
-from table_line_generator import TableLineGenerator
-from PIL import Image
-from typing import List
-import argparse
-
-
-def batch_apply_table_structure(
-    source_json_path: str,
-    target_image_dir: str,
-    output_dir: str,
-    structure_config_path: str = None
-):
-    """
-    批量应用表格结构
-    
-    Args:
-        source_json_path: 源OCR结果JSON路径(用于生成初始结构)
-        target_image_dir: 目标图片目录
-        output_dir: 输出目录
-        structure_config_path: 表格结构配置路径(可选)
-    """
-    # 1. 加载或生成表格结构
-    if structure_config_path and Path(structure_config_path).exists():
-        # 加载已有配置
-        with open(structure_config_path, 'r') as f:
-            structure = json.load(f)
-        print(f"📂 加载表格结构: {structure_config_path}")
-    else:
-        # 生成新配置
-        with open(source_json_path, 'r') as f:
-            ocr_data = json.load(f)
-        
-        source_image_path = Path(source_json_path).with_suffix('.jpg')
-        generator = TableLineGenerator(str(source_image_path), ocr_data)
-        
-        structure_info = generator.analyze_table_structure()
-        structure = generator.save_table_structure(
-            f"{output_dir}/table_structure.json"
-        )
-        print(f"✅ 生成表格结构配置")
-    
-    # 2. 查找所有目标图片
-    target_images = list(Path(target_image_dir).glob("*.jpg"))
-    target_images.extend(list(Path(target_image_dir).glob("*.png")))
-    target_images = sorted(target_images)
-    
-    print(f"📁 找到 {len(target_images)} 个图片文件")
-    
-    # 3. 批量应用
-    output_path = Path(output_dir)
-    output_path.mkdir(parents=True, exist_ok=True)
-    
-    results = []
-    for image_path in target_images:
-        try:
-            # 创建临时生成器(用于应用结构)
-            generator = TableLineGenerator(str(image_path), [])
-            generator.rows = structure.get('rows', [])
-            generator.columns = structure.get('columns', [])
-            generator.row_height = structure.get('row_height', 30)
-            
-            # 应用结构
-            output_file = output_path / f"{image_path.stem}_with_lines.jpg"
-            generator.apply_structure_to_image(
-                str(image_path),
-                structure,
-                str(output_file)
-            )
-            
-            results.append({
-                'source': str(image_path),
-                'output': str(output_file),
-                'status': 'success'
-            })
-            print(f"✅ {image_path.name} → {output_file.name}")
-            
-        except Exception as e:
-            results.append({
-                'source': str(image_path),
-                'status': 'error',
-                'error': str(e)
-            })
-            print(f"❌ {image_path.name} 失败: {e}")
-    
-    # 保存结果
-    with open(output_path / "batch_results.json", 'w') as f:
-        json.dump(results, f, indent=2, ensure_ascii=False)
-    
-    success_count = sum(1 for r in results if r['status'] == 'success')
-    print(f"\n🎉 完成!成功: {success_count}/{len(results)}")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="批量应用表格结构")
-    parser.add_argument('-s', '--source', required=True, help="源OCR结果JSON路径")
-    parser.add_argument('-t', '--target', required=True, help="目标图片目录")
-    parser.add_argument('-o', '--output', required=True, help="输出目录")
-    parser.add_argument('-c', '--config', help="表格结构配置路径")
-    
-    args = parser.parse_args()
-    
-    batch_apply_table_structure(
-        args.source,
-        args.target,
-        args.output,
-        args.config
-    )

+ 130 - 0
table_line_generator/editor/__init__.py

@@ -0,0 +1,130 @@
+"""
+表格线编辑器核心模块
+"""
+import sys
+from pathlib import Path
+
+# ✅ 确保父目录在路径中
+_parent_dir = Path(__file__).parent.parent
+if str(_parent_dir) not in sys.path:
+    sys.path.insert(0, str(_parent_dir))
+
+# 文件处理
+from .file_handlers import create_file_uploader_section
+
+# 显示控件
+from .display_controls import (
+    create_display_settings_section,
+    create_undo_redo_section,
+)
+
+# 分析控件
+from .analysis_controls import create_analysis_section
+
+# 保存控件
+from .save_controls import create_save_section
+
+# 🆕 批量模板控件
+from .batch_template_controls import create_batch_template_section
+
+# 模式设置
+from .mode_setup import (
+    setup_new_annotation_mode,
+    setup_edit_annotation_mode,
+)
+
+# 目录选择器
+from .directory_selector import create_directory_selector
+
+# 表格视图
+from .table_viewer import render_table_structure_view
+
+# 绘图
+from .drawing import (
+    draw_table_lines_with_numbers,
+    draw_clean_table_lines,
+    get_cached_table_lines_image,
+    clear_table_image_cache,
+)
+
+# 状态管理
+from .state_manager import (
+    init_undo_stack,
+    save_state_for_undo,
+    undo_last_action,
+    redo_last_action,
+)
+
+# 调整
+from .adjustments import create_adjustment_section
+
+# 配置加载
+from .config_loader import (
+    load_structure_from_config,
+    save_structure_to_config,
+    load_table_editor_config,
+    parse_table_editor_cli_args,
+    build_data_source_catalog,
+)
+
+# 数据处理
+from .data_processor import parse_ocr_data
+
+# 图片查看器
+from .viewer import show_image_with_scroll
+
+__all__ = [
+    # 文件处理
+    'create_file_uploader_section',
+    
+    # 显示控件
+    'create_display_settings_section',
+    'create_undo_redo_section',
+    
+    # 分析控件
+    'create_analysis_section',
+    
+    # 保存控件
+    'create_save_section',
+    
+    # 🆕 批量模板控件
+    'create_batch_template_section',
+    
+    # 模式设置
+    'setup_new_annotation_mode',
+    'setup_edit_annotation_mode',
+    
+    # 目录选择器
+    'create_directory_selector',
+    
+    # 表格视图
+    'render_table_structure_view',
+    
+    # 绘图
+    'draw_table_lines_with_numbers',
+    'draw_clean_table_lines',
+    'get_cached_table_lines_image',
+    'clear_table_image_cache',
+    
+    # 状态管理
+    'init_undo_stack',
+    'save_state_for_undo',
+    'undo_last_action',
+    'redo_last_action',
+    
+    # 调整
+    'create_adjustment_section',
+    
+    # 配置加载
+    'load_structure_from_config',
+    'save_structure_to_config',
+    'load_table_editor_config',
+    'parse_table_editor_cli_args',
+    'build_data_source_catalog',
+    
+    # 数据处理
+    'parse_ocr_data',
+    
+    # 图片查看器
+    'show_image_with_scroll',
+]

+ 201 - 0
table_line_generator/editor/adjustments.py

@@ -0,0 +1,201 @@
+"""
+手动调整功能
+"""
+
+from .state_manager import save_state_for_undo
+from .drawing import clear_table_image_cache
+
+
+def create_adjustment_section(structure):
+    """
+    创建手动调整区域
+    """
+    import streamlit as st
+
+    st.divider()
+    st.header("🛠️ 手动调整")
+
+    horizontal_lines = structure.get('horizontal_lines', [])
+    vertical_lines = structure.get('vertical_lines', [])
+    adjusted = False
+
+    # 行操作, 列操作
+    adjustment_action = st.radio(
+        "行&列操作",
+        ["调整横线", "添加横线", "删除横线", "调整竖线", "添加竖线", "删除竖线"],
+        horizontal=True,
+        index=None,
+        label_visibility="collapsed",
+        key="adjustment_action_radio"
+    )
+
+    if adjustment_action == "调整横线" and horizontal_lines:
+        line_index = st.selectbox(
+            "选择横线",
+            range(len(horizontal_lines)),
+            format_func=lambda i: f"R{i+1} (Y={horizontal_lines[i]})",
+            key="adjust_h_select"
+        )
+        new_y = st.number_input(
+            "新的Y坐标",
+            min_value=0,
+            value=horizontal_lines[line_index],
+            step=1,
+            key="adjust_h_value"
+        )
+        if st.button("✅ 应用横线调整"):
+            if new_y != horizontal_lines[line_index]:
+                save_state_for_undo(structure)
+                structure['horizontal_lines'][line_index] = new_y
+                structure.setdefault('modified_h_lines', set()).add(line_index)
+                _update_row_intervals(structure)
+                clear_table_image_cache()
+                adjusted = True
+                st.success(f"✅ R{line_index+1} 已更新")
+    elif adjustment_action == "添加横线":
+        new_h_y = st.number_input(
+            "新横线的Y坐标",
+            min_value=0,
+            value=horizontal_lines[-1] + 50 if horizontal_lines else 100,
+            step=1,
+            key="add_h_value"
+        )
+        if st.button("➕ 确认添加横线"):
+            save_state_for_undo(structure)
+            structure['horizontal_lines'].append(new_h_y)
+            structure['horizontal_lines'].sort()
+            idx = structure['horizontal_lines'].index(new_h_y)
+            structure.setdefault('modified_h_lines', set()).add(idx)
+            _update_row_intervals(structure)
+            clear_table_image_cache()
+            adjusted = True
+            st.success(f"✅ 新增横线 Y={new_h_y}")
+    elif adjustment_action == "删除横线" and len(horizontal_lines) > 2:
+        to_delete = st.multiselect(
+            "选择要删除的横线",
+            range(len(horizontal_lines)),
+            format_func=lambda i: f"R{i+1} (Y={horizontal_lines[i]})",
+            key="del_h_select"
+        )
+        if to_delete and st.button("🗑️ 确认删除横线"):
+            save_state_for_undo(structure)
+            for idx in sorted(to_delete, reverse=True):
+                del structure['horizontal_lines'][idx]
+            structure['modified_h_lines'] = set()
+            _update_row_intervals(structure)
+            clear_table_image_cache()
+            adjusted = True
+            st.success(f"✅ 已删除 {len(to_delete)} 条横线")
+
+    elif adjustment_action == "调整竖线" and vertical_lines:
+        line_index = st.selectbox(
+            "选择竖线",
+            range(len(vertical_lines)),
+            format_func=lambda i: f"C{i+1} (X={vertical_lines[i]})",
+            key="adjust_v_select"
+        )
+        new_x = st.number_input(
+            "新的X坐标",
+            min_value=0,
+            value=vertical_lines[line_index],
+            step=1,
+            key="adjust_v_value"
+        )
+        if st.button("✅ 应用竖线调整"):
+            if new_x != vertical_lines[line_index]:
+                save_state_for_undo(structure)
+                structure['vertical_lines'][line_index] = new_x
+                structure.setdefault('modified_v_lines', set()).add(line_index)
+                _update_column_intervals(structure)
+                clear_table_image_cache()
+                adjusted = True
+                st.success(f"✅ C{line_index+1} 已更新")
+    elif adjustment_action == "添加竖线":
+        new_v_x = st.number_input(
+            "新竖线的X坐标",
+            min_value=0,
+            value=vertical_lines[-1] + 100 if vertical_lines else 100,
+            step=1,
+            key="add_v_value"
+        )
+        if st.button("➕ 确认添加竖线"):
+            save_state_for_undo(structure)
+            structure['vertical_lines'].append(new_v_x)
+            structure['vertical_lines'].sort()
+            idx = structure['vertical_lines'].index(new_v_x)
+            structure.setdefault('modified_v_lines', set()).add(idx)
+            _update_column_intervals(structure)
+            clear_table_image_cache()
+            adjusted = True
+            st.success(f"✅ 新增竖线 X={new_v_x}")
+    elif adjustment_action == "删除竖线" and len(vertical_lines) > 2:
+        to_delete = st.multiselect(
+            "选择要删除的竖线",
+            range(len(vertical_lines)),
+            format_func=lambda i: f"C{i+1} (X={vertical_lines[i]})",
+            key="del_v_select"
+        )
+        if to_delete and st.button("🗑️ 确认删除竖线"):
+            save_state_for_undo(structure)
+            for idx in sorted(to_delete, reverse=True):
+                del structure['vertical_lines'][idx]
+            structure['modified_v_lines'] = set()
+            _update_column_intervals(structure)
+            clear_table_image_cache()
+            adjusted = True
+            st.success(f"✅ 已删除 {len(to_delete)} 条竖线")
+
+    return adjusted
+
+
+def _update_row_intervals(structure):
+    """根据横线坐标更新行区间"""
+    horizontal_lines = structure.get('horizontal_lines', [])
+    
+    rows = []
+    for i in range(len(horizontal_lines) - 1):
+        rows.append({
+            'y_start': horizontal_lines[i],
+            'y_end': horizontal_lines[i + 1],
+            'bboxes': []
+        })
+    
+    structure['rows'] = rows
+    
+    # 更新表格边界框
+    if 'table_bbox' in structure:
+        vertical_lines = structure.get('vertical_lines', [])
+        structure['table_bbox'] = [
+            vertical_lines[0] if vertical_lines else 0,
+            horizontal_lines[0],
+            vertical_lines[-1] if vertical_lines else 0,
+            horizontal_lines[-1]
+        ]
+
+
+def _update_column_intervals(structure):
+    """根据竖线坐标更新列区间"""
+    vertical_lines = structure.get('vertical_lines', [])
+    
+    columns = []
+    for i in range(len(vertical_lines) - 1):
+        columns.append({
+            'x_start': vertical_lines[i],
+            'x_end': vertical_lines[i + 1]
+        })
+    
+    structure['columns'] = columns
+    
+    # 更新列宽
+    col_widths = [col['x_end'] - col['x_start'] for col in columns]
+    structure['col_widths'] = col_widths
+    
+    # 更新表格边界框
+    if 'table_bbox' in structure:
+        horizontal_lines = structure.get('horizontal_lines', [])
+        structure['table_bbox'] = [
+            vertical_lines[0],
+            horizontal_lines[0] if horizontal_lines else 0,
+            vertical_lines[-1],
+            horizontal_lines[-1] if horizontal_lines else 0
+        ]

+ 60 - 0
table_line_generator/editor/analysis_controls.py

@@ -0,0 +1,60 @@
+"""
+表格结构分析控件
+"""
+import streamlit as st
+from .drawing import clear_table_image_cache
+
+
+def create_analysis_section(y_tolerance: int, x_tolerance: int, min_row_height: int):
+    """
+    创建分析区域
+    
+    Args:
+        y_tolerance: Y轴聚类容差
+        x_tolerance: X轴聚类容差
+        min_row_height: 最小行高
+    """
+    if st.button("🔍 分析表格结构"):
+        with st.spinner("分析中..."):
+            try:
+                generator = st.session_state.generator
+                structure = generator.analyze_table_structure(
+                    y_tolerance=y_tolerance,
+                    x_tolerance=x_tolerance,
+                    min_row_height=min_row_height
+                )
+                
+                if not structure:
+                    st.warning("⚠️ 未检测到表格结构")
+                    st.stop()
+                
+                structure['modified_h_lines'] = set()
+                structure['modified_v_lines'] = set()
+                
+                st.session_state.structure = structure
+                st.session_state.undo_stack = []
+                st.session_state.redo_stack = []
+                clear_table_image_cache()
+                
+                st.success(
+                    f"✅ 检测到 {len(structure['rows'])} 行"
+                    f"({len(structure['horizontal_lines'])} 条横线),"
+                    f"{len(structure['columns'])} 列"
+                    f"({len(structure['vertical_lines'])} 条竖线)"
+                )
+                
+                col1, col2, col3, col4 = st.columns(4)
+                with col1:
+                    st.metric("行数", len(structure['rows']))
+                with col2:
+                    st.metric("横线数", len(structure['horizontal_lines']))
+                with col3:
+                    st.metric("列数", len(structure['columns']))
+                with col4:
+                    st.metric("竖线数", len(structure['vertical_lines']))
+            
+            except Exception as e:
+                st.error(f"❌ 分析失败: {e}")
+                import traceback
+                st.code(traceback.format_exc())
+                st.stop()

+ 277 - 0
table_line_generator/editor/batch_template_controls.py

@@ -0,0 +1,277 @@
+"""
+批量模板应用控件
+"""
+import streamlit as st
+import json
+from pathlib import Path
+from PIL import Image
+from typing import Dict, List
+import sys
+
+# 添加父目录到路径
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from table_template_applier import TableTemplateApplier
+from table_line_generator import TableLineGenerator
+
+
+def create_batch_template_section(current_line_width: int, current_line_color: str):
+    """
+    创建批量应用模板的控制区域
+    
+    Args:
+        current_line_width: 当前页使用的线条宽度
+        current_line_color: 当前页使用的线条颜色名称
+        
+    要求:
+    - 当前在目录模式
+    - 已有标注(edit 模式)
+    - 有可用的目录清单
+    """
+    # 检查前置条件
+    if 'structure' not in st.session_state or not st.session_state.structure:
+        return
+    
+    if 'current_catalog' not in st.session_state:
+        return
+    
+    if 'current_output_config' not in st.session_state:
+        return
+    
+    # 🔑 检查当前页是否有保存的结构文件
+    if 'loaded_config_name' not in st.session_state or not st.session_state.loaded_config_name:
+        st.warning("⚠️ 当前页未保存结构文件,请先保存后再批量应用")
+        return
+    
+    st.divider()
+    st.subheader("🔄 批量应用模板")
+    
+    catalog = st.session_state.current_catalog
+    current_index = st.session_state.get('current_catalog_index', 0)
+    current_entry = catalog[current_index]
+    
+    # 统计信息
+    total_files = len(catalog)
+    current_page = current_entry["index"]
+    
+    # 找出哪些页面还没有标注
+    output_config = st.session_state.current_output_config
+    output_dir = Path(output_config.get("directory", "output/table_structures"))
+    structure_suffix = output_config.get("structure_suffix", "_structure.json")
+    
+    # 🔑 获取当前页的结构文件路径
+    current_base_name = current_entry["json"].stem
+    current_structure_file = output_dir / f"{current_base_name}{structure_suffix}"
+    
+    if not current_structure_file.exists():
+        st.error("❌ 未找到当前页的结构文件,请先保存")
+        st.info(f"期望文件: {current_structure_file}")
+        return
+    
+    unlabeled_pages = []
+    for entry in catalog:
+        if entry["index"] == current_page:
+            continue  # 跳过当前页
+        structure_file = output_dir / f"{entry['json'].stem}{structure_suffix}"
+        if not structure_file.exists():
+            unlabeled_pages.append(entry)
+    
+    st.info(
+        f"📊 当前页: {current_page}/{total_files}\n\n"
+        f"📄 模板文件: {current_structure_file.name}\n\n"
+        f"✅ 已标注: {total_files - len(unlabeled_pages)} 页\n\n"
+        f"⏳ 待处理: {len(unlabeled_pages)} 页"
+    )
+    
+    if len(unlabeled_pages) == 0:
+        st.success("🎉 所有页面都已标注!")
+        return
+    
+    # 🔑 使用当前页的设置
+    st.info(
+        f"🎨 将使用当前页设置:\n\n"
+        f"• 线条宽度: {current_line_width}px\n\n"
+        f"• 线条颜色: {current_line_color}"
+    )
+    
+    # 获取颜色配置
+    line_colors = output_config.get("line_colors") or [
+        {"name": "黑色", "rgb": [0, 0, 0]},
+        {"name": "蓝色", "rgb": [0, 0, 255]},
+        {"name": "红色", "rgb": [255, 0, 0]},
+    ]
+    
+    # 🔑 从颜色名称映射到 RGB
+    color_map = {c["name"]: tuple(c["rgb"]) for c in line_colors}
+    line_color = color_map.get(current_line_color, (0, 0, 0))
+    
+    # 应用按钮
+    if st.button("🚀 批量应用到所有未标注页面", type="primary"):
+        _apply_template_batch(
+            current_structure_file,  # 🔑 直接使用保存的结构文件
+            current_entry,
+            unlabeled_pages,
+            output_dir,
+            structure_suffix,
+            current_line_width,
+            line_color
+        )
+
+
+def _apply_template_batch(
+    template_file: Path,  # 🔑 改为直接传入模板文件路径
+    template_entry: Dict,
+    target_entries: List[Dict],
+    output_dir: Path,
+    structure_suffix: str,
+    line_width: int,
+    line_color: tuple
+):
+    """
+    执行批量应用模板
+    
+    Args:
+        template_file: 模板结构文件路径
+        template_entry: 模板页面条目
+        target_entries: 目标页面列表
+        output_dir: 输出目录
+        structure_suffix: 结构文件后缀
+        line_width: 线条宽度
+        line_color: 线条颜色 (r, g, b)
+    """
+    try:
+        # 🔑 直接使用保存的结构文件创建模板应用器
+        applier = TableTemplateApplier(str(template_file))
+        
+        st.info(f"📋 使用模板: {template_file.name}")
+        
+        # 进度条
+        progress_bar = st.progress(0)
+        status_text = st.empty()
+        
+        success_count = 0
+        failed_count = 0
+        results = []
+        
+        for idx, entry in enumerate(target_entries):
+            # 更新进度
+            progress = (idx + 1) / len(target_entries)
+            progress_bar.progress(progress)
+            status_text.text(f"处理中: {entry['display']} ({idx + 1}/{len(target_entries)})")
+            
+            try:
+                # 加载 OCR 数据
+                with open(entry["json"], "r", encoding="utf-8") as fp:
+                    raw = json.load(fp)
+                
+                # 解析 OCR 数据
+                if 'parsing_res_list' in raw and 'overall_ocr_res' in raw:
+                    table_bbox, ocr_data = TableLineGenerator.parse_ppstructure_result(raw)
+                else:
+                    raise ValueError("不支持的 OCR 格式")
+                
+                # 加载图片
+                if entry["image"] and entry["image"].exists():
+                    image = Image.open(entry["image"])
+                else:
+                    st.warning(f"⚠️ 跳过 {entry['display']}: 未找到图片")
+                    failed_count += 1
+                    results.append({
+                        'page': entry['index'],
+                        'status': 'skipped',
+                        'reason': 'no_image'
+                    })
+                    continue
+                
+                # 应用模板生成图片
+                img_with_lines = applier.apply_to_image(
+                    image,
+                    ocr_data,
+                    line_width=line_width,
+                    line_color=line_color
+                )
+                
+                # 生成结构配置
+                structure = applier.generate_structure_for_image(ocr_data)
+                
+                # 保存图片
+                base_name = entry["json"].stem
+                image_suffix = st.session_state.current_output_config.get("image_suffix", ".png")
+                output_image_path = output_dir / f"{base_name}{image_suffix}"
+                img_with_lines.save(output_image_path)
+                
+                # 🔑 保存结构(确保 set 转为 list)
+                structure_path = output_dir / f"{base_name}{structure_suffix}"
+                
+                with open(structure_path, 'w', encoding='utf-8') as f:
+                    json.dump(structure, f, indent=2, ensure_ascii=False)
+                
+                success_count += 1
+                results.append({
+                    'page': entry['index'],
+                    'status': 'success',
+                    'image': str(output_image_path),
+                    'structure': str(structure_path)
+                })
+                
+            except Exception as e:
+                failed_count += 1
+                results.append({
+                    'page': entry['index'],
+                    'status': 'error',
+                    'error': str(e)
+                })
+                st.error(f"❌ 处理失败 {entry['display']}: {e}")
+        
+        # 完成
+        progress_bar.progress(1.0)
+        status_text.empty()
+        
+        # 保存批处理结果
+        batch_result_path = output_dir / "batch_results.json"
+        with open(batch_result_path, 'w', encoding='utf-8') as f:
+            json.dump({
+                'template': template_entry['display'],
+                'template_file': str(template_file),
+                'total': len(target_entries),
+                'success': success_count,
+                'failed': failed_count,
+                'line_width': line_width,
+                'line_color': line_color,
+                'results': results
+            }, f, indent=2, ensure_ascii=False)
+        
+        # 显示结果
+        if success_count > 0:
+            st.success(
+                f"✅ 批量应用完成!\n\n"
+                f"成功: {success_count} 页\n\n"
+                f"失败: {failed_count} 页"
+            )
+            
+            # 🔑 提供下载批处理结果
+            with open(batch_result_path, 'r', encoding='utf-8') as f:
+                st.download_button(
+                    "📥 下载批处理报告",
+                    f.read(),
+                    file_name="batch_results.json",
+                    mime="application/json"
+                )
+        else:
+            st.error("❌ 批量应用失败,没有成功处理任何页面")
+        
+        # 显示详细结果
+        with st.expander("📋 详细结果"):
+            for result in results:
+                if result['status'] == 'success':
+                    st.success(f"✅ 第 {result['page']} 页")
+                elif result['status'] == 'error':
+                    st.error(f"❌ 第 {result['page']} 页: {result.get('error', '未知错误')}")
+                else:
+                    st.warning(f"⚠️ 第 {result['page']} 页: {result.get('reason', '跳过')}")
+    
+    except Exception as e:
+        st.error(f"❌ 批量应用过程中发生错误: {e}")
+        import traceback
+        with st.expander("🔍 详细错误信息"):
+            st.code(traceback.format_exc())

+ 299 - 0
table_line_generator/editor/config_loader.py

@@ -0,0 +1,299 @@
+"""
+配置文件加载/保存
+"""
+
+import argparse
+import json
+import sys
+import yaml
+import re
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+from jinja2 import Template
+
+TABLE_EDITOR_DEFAULTS = {
+    "viewport": {"width": 1200, "height": 600},
+    "display": {
+        "default_zoom": 1.0,
+        "zoom_min": 0.25,
+        "zoom_max": 2.0,
+        "zoom_step": 0.25,
+        "default_line_width": 2,
+        "line_width_min": 1,
+        "line_width_max": 5,
+        "show_line_numbers": True,
+    },
+    "output": {
+        "directory": "output/table_structures",
+        "structure_suffix": "_structure.json",
+        "image_suffix": "_with_lines.png",
+        "defaults": {
+            "save_structure": True,
+            "save_image": True,
+            "line_color": "黑色",
+        },
+        "line_colors": [
+            {"name": "黑色", "rgb": [0, 0, 0]},
+            {"name": "蓝色", "rgb": [0, 0, 255]},
+            {"name": "红色", "rgb": [255, 0, 0]},
+        ],
+    },
+}
+
+
+def parse_table_editor_cli_args(argv: Optional[List[str]] = None):
+    parser = argparse.ArgumentParser(add_help=False)
+    parser.add_argument(
+        "--config",
+        type=str,
+        default=None,
+        help="table_line_generator 配置文件路径",
+    )
+    args, _ = parser.parse_known_args(argv if argv is not None else sys.argv[1:])
+    return args
+
+
+def load_table_editor_config(config_path: Path) -> Dict:
+    config_path = Path(config_path)
+    table_cfg = {}
+
+    if config_path.exists():
+        with open(config_path, "r", encoding="utf-8") as fp:
+            data = yaml.safe_load(fp) or {}
+            table_cfg = data.get("table_editor", {})
+    else:
+        print(f"[table_editor] config not found: {config_path}, using defaults")
+
+    def merge(section):
+        merged = TABLE_EDITOR_DEFAULTS[section].copy()
+        merged.update(table_cfg.get(section, {}))
+        return merged
+
+    result = {
+        "viewport": merge("viewport"),
+        "display": merge("display"),
+        "output": merge("output"),
+        "data_sources": _prepare_data_sources(table_cfg.get("data_sources"), config_path.parent),
+    }
+    result["output"]["line_colors"] = table_cfg.get("output", {}).get(
+        "line_colors", TABLE_EDITOR_DEFAULTS["output"]["line_colors"]
+    )
+    result["output"]["defaults"] = {
+        **TABLE_EDITOR_DEFAULTS["output"]["defaults"],
+        **table_cfg.get("output", {}).get("defaults", {}),
+    }
+    return result
+
+
+def _compile_pattern(pattern: Optional[str], context: Dict) -> Optional[re.Pattern]:
+    if not pattern:
+        return None
+    rendered = Template(pattern).render(**context)
+    return re.compile(rendered)
+
+
+def _render_template(value, context):
+    if value is None:
+        return None
+    if isinstance(value, (int, float, bool)):
+        return value
+    return Template(str(value)).render(**context)
+
+
+def _resolve_path(path_str: str, base_dir: Optional[Path], config_root: Path) -> Path:
+    path = Path(path_str).expanduser()
+    if not path.is_absolute():
+        path = (base_dir or config_root) / path
+    return path.resolve()
+
+
+def _prepare_data_sources(raw_sources: Optional[List[Dict]], config_root: Path) -> List[Dict]:
+    prepared = []
+    for src in raw_sources or []:
+        # 🎯 构建模板上下文
+        ctx = {
+            'name': src['name'],
+            'base_dir': src['base_dir']
+        }
+        base_dir_path = ctx['base_dir']
+        def resolve_dir(field: str) -> Path:
+            raw_value = src.get(field)
+            if raw_value is None:
+                raise ValueError(f"[table_editor] data source '{src.get('name')}' 缺少 {field}")
+            rendered = _render_template(raw_value, ctx)
+            if not rendered:
+                raise ValueError(f"[table_editor] data source '{src.get('name')}' {field} 为空")
+            return _resolve_path(rendered, base_dir_path, config_root)
+
+        json_dir = resolve_dir("json_dir")
+        image_dir = resolve_dir("image_dir")
+
+        prepared_source = {
+            **src,
+            "json_dir": json_dir,
+            "image_dir": image_dir,
+            "context": ctx,
+        }
+        prepared_source["json_pattern"] = _render_template(src.get("json_pattern"), ctx)
+        prepared_source["image_pattern"] = _render_template(src.get("image_pattern"), ctx)
+
+        if "output" in src:
+            output_cfg = dict(src["output"])
+            if "directory" in output_cfg:
+                rendered = _render_template(output_cfg["directory"], ctx)
+                if rendered:
+                    output_cfg["directory"] = str(_resolve_path(rendered, base_dir_path, config_root))
+            for suffix_key in ("structure_suffix", "image_suffix"):
+                if suffix_key in output_cfg:
+                    output_cfg[suffix_key] = _render_template(output_cfg[suffix_key], ctx)
+            prepared_source["output"] = output_cfg
+
+        prepared.append(prepared_source)
+    return prepared
+
+
+def build_data_source_catalog(source_cfg: Dict) -> List[Dict]:
+    json_dir = Path(source_cfg["json_dir"]).expanduser().resolve()
+    image_dir = Path(source_cfg["image_dir"]).expanduser().resolve()
+    json_suffix = source_cfg.get("json_suffix", ".json")
+    image_suffix = source_cfg.get("image_suffix", ".png")
+
+    context = dict(source_cfg.get("context") or {})
+    if not context:
+        context = dict(source_cfg.get("variables", {}))
+        context.setdefault("name", source_cfg.get("name", ""))
+
+    json_regex = _compile_pattern(source_cfg.get("json_pattern"), context)
+    image_regex = _compile_pattern(source_cfg.get("image_pattern"), context)
+
+    json_files = []
+    for file in json_dir.glob("*"):
+        if not file.is_file():
+            continue
+        match = None
+        if json_regex:
+            match = json_regex.fullmatch(file.name)
+            if not match:
+                continue
+        elif json_suffix and not file.name.endswith(json_suffix):
+            continue
+        page_token = match.group("page") if match and "page" in match.groupdict() else None
+        json_files.append({
+            "path": file,
+            "stem": file.stem,
+            "page_token": page_token,
+            "page": int(page_token) if page_token and page_token.isdigit() else None,
+            "mtime": file.stat().st_mtime,
+        })
+
+    sort_key = source_cfg.get("sort_key", "name")
+    if sort_key == "page" and any(item["page"] is not None for item in json_files):
+        json_files.sort(key=lambda x: (x["page"] is None, x["page"] if x["page"] is not None else x["stem"]))
+    elif sort_key == "mtime":
+        json_files.sort(key=lambda x: x["mtime"])
+    else:
+        json_files.sort(key=lambda x: x["stem"])
+
+    image_map: Dict[str, Path] = {}
+    for img in image_dir.glob("*"):
+        if not img.is_file():
+            continue
+        match = None
+        if image_regex:
+            match = image_regex.fullmatch(img.name)
+            if not match:
+                continue
+        elif image_suffix and not img.name.endswith(image_suffix):
+            continue
+        page_token = match.group("page") if match and "page" in match.groupdict() else None
+        key = page_token or img.stem
+        image_map[key] = img
+
+    catalog = []
+    for idx, item in enumerate(json_files, start=1):
+        key = item["page_token"] or item["stem"]
+        catalog.append({
+            "index": idx,
+            "display": f"{idx:03d} · {key}",
+            "json": item["path"],
+            "image": image_map.get(key),
+            "page": item["page"],
+            "page_token": item["page_token"],
+        })
+    return catalog
+
+
+def load_structure_from_config(config_path: Path) -> dict:
+    """
+    从配置文件加载表格结构
+    
+    Args:
+        config_path: 配置文件路径
+    
+    Returns:
+        表格结构字典
+    """
+    with open(config_path, 'r', encoding='utf-8') as f:
+        structure = json.load(f)
+    
+    # 兼容旧版配置(补充缺失字段)
+    if 'horizontal_lines' not in structure:
+        # 从 rows 生成横线坐标
+        horizontal_lines = []
+        for row in structure.get('rows', []):
+            horizontal_lines.append(row['y_start'])
+        if structure.get('rows'):
+            horizontal_lines.append(structure['rows'][-1]['y_end'])
+        structure['horizontal_lines'] = horizontal_lines
+    
+    if 'vertical_lines' not in structure:
+        # 从 columns 生成竖线坐标
+        vertical_lines = []
+        for col in structure.get('columns', []):
+            vertical_lines.append(col['x_start'])
+        if structure.get('columns'):
+            vertical_lines.append(structure['columns'][-1]['x_end'])
+        structure['vertical_lines'] = vertical_lines
+    
+    # 转换修改标记(从列表转为集合)
+    if 'modified_h_lines' in structure:
+        structure['modified_h_lines'] = set(structure['modified_h_lines'])
+    else:
+        structure['modified_h_lines'] = set()
+    
+    if 'modified_v_lines' in structure:
+        structure['modified_v_lines'] = set(structure['modified_v_lines'])
+    else:
+        structure['modified_v_lines'] = set()
+    
+    # 转换旧版的 modified_rows/modified_cols(如果存在)
+    if 'modified_rows' in structure and not structure['modified_h_lines']:
+        structure['modified_h_lines'] = set(structure.get('modified_rows', []))
+    if 'modified_cols' in structure and not structure['modified_v_lines']:
+        structure['modified_v_lines'] = set(structure.get('modified_cols', []))
+    
+    return structure
+
+
+def save_structure_to_config(structure: dict, output_path: Path):
+    """
+    保存表格结构到配置文件
+    
+    Args:
+        structure: 表格结构字典
+        output_path: 输出文件路径
+    """
+    save_data = {
+        'rows': structure['rows'],
+        'columns': structure['columns'],
+        'horizontal_lines': structure.get('horizontal_lines', []),
+        'vertical_lines': structure.get('vertical_lines', []),
+        'row_height': structure['row_height'],
+        'col_widths': structure['col_widths'],
+        'table_bbox': structure['table_bbox'],
+        'modified_h_lines': list(structure.get('modified_h_lines', set())),
+        'modified_v_lines': list(structure.get('modified_v_lines', set()))
+    }
+    
+    with open(output_path, 'w', encoding='utf-8') as f:
+        json.dump(save_data, f, indent=2, ensure_ascii=False)

+ 53 - 0
table_line_generator/editor/data_processor.py

@@ -0,0 +1,53 @@
+import streamlit as st
+import json
+
+# 当直接运行时
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from table_line_generator import TableLineGenerator  # 上级目录
+
+def parse_ocr_data(ocr_data):
+    """解析OCR数据,支持多种格式"""
+    # 如果是字符串,尝试解析
+    if isinstance(ocr_data, str):
+        try:
+            ocr_data = json.loads(ocr_data)
+        except json.JSONDecodeError:
+            st.error("❌ JSON 格式错误,无法解析")
+            return []
+    
+    # 检查是否为 PPStructure V3 格式
+    if isinstance(ocr_data, dict) and 'parsing_res_list' in ocr_data and 'overall_ocr_res' in ocr_data:
+        st.info("🔍 检测到 PPStructure V3 格式")
+        
+        try:
+            table_bbox, text_boxes = TableLineGenerator.parse_ppstructure_result(ocr_data)
+            st.success(f"✅ 表格区域: {table_bbox}")
+            st.success(f"✅ 表格内文本框: {len(text_boxes)} 个")
+            return text_boxes
+        except Exception as e:
+            st.error(f"❌ 解析 PPStructure 结果失败: {e}")
+            return []
+    
+    # 确保是列表
+    if not isinstance(ocr_data, list):
+        st.error(f"❌ OCR 数据应该是列表,实际类型: {type(ocr_data)}")
+        return []
+    
+    if not ocr_data:
+        st.warning("⚠️ OCR 数据为空")
+        return []
+    
+    first_item = ocr_data[0]
+    if not isinstance(first_item, dict):
+        st.error(f"❌ OCR 数据项应该是字典,实际类型: {type(first_item)}")
+        return []
+    
+    if 'bbox' not in first_item:
+        st.error("❌ OCR 数据缺少 'bbox' 字段")
+        st.info("💡 支持的格式示例:\n```json\n[\n  {\n    \"text\": \"文本\",\n    \"bbox\": [x1, y1, x2, y2]\n  }\n]\n```")
+        return []
+    
+    return ocr_data
+

+ 164 - 0
table_line_generator/editor/directory_selector.py

@@ -0,0 +1,164 @@
+"""
+目录模式选择器
+"""
+import streamlit as st
+import json
+from pathlib import Path
+from PIL import Image
+from typing import Dict, List
+
+from .config_loader import load_structure_from_config, build_data_source_catalog
+from .data_processor import parse_ocr_data
+from .drawing import clear_table_image_cache
+
+
+def create_directory_selector(
+    data_sources: List[Dict], 
+    global_output_config: Dict
+) -> str:
+    """
+    目录模式选择器(优化:避免重复加载)
+    
+    Args:
+        data_sources: 数据源列表
+        global_output_config: 全局输出配置
+    
+    Returns:
+        str: 当前模式 ("new" 或 "edit")
+    """
+    st.sidebar.subheader("目录模式")
+    
+    source_names = [src["name"] for src in data_sources]
+    selected_name = st.sidebar.selectbox(
+        "选择数据源", 
+        source_names, 
+        key="dir_mode_source"
+    )
+    source_cfg = next(src for src in data_sources if src["name"] == selected_name)
+    
+    # 🔑 保存当前选择的数据源配置到 session_state
+    st.session_state.current_data_source = source_cfg
+    
+    # 获取输出配置(优先使用数据源自己的 output)
+    output_cfg = source_cfg.get("output", global_output_config)
+    st.session_state.current_output_config = output_cfg
+    
+    output_dir = Path(output_cfg.get("directory", "output/table_structures"))
+    structure_suffix = output_cfg.get("structure_suffix", "_structure.json")
+    
+    # 构建/缓存目录清单
+    catalog_key = f"catalog::{selected_name}"
+    if catalog_key not in st.session_state:
+        st.session_state[catalog_key] = build_data_source_catalog(source_cfg)
+    catalog = st.session_state[catalog_key]
+
+    if not catalog:
+        st.sidebar.warning("目录中没有 JSON 文件")
+        return "new"
+
+    # 初始化选择索引
+    if 'dir_selected_index' not in st.session_state:
+        st.session_state.dir_selected_index = 0
+
+    # 文件选择
+    selected = st.sidebar.selectbox(
+        "选择文件",
+        range(len(catalog)),
+        format_func=lambda i: catalog[i]["display"],
+        index=st.session_state.dir_selected_index,
+        key="dir_select_box"
+    )
+
+    # 页码跳转
+    page_input = st.sidebar.number_input(
+        "页码跳转",
+        min_value=1,
+        max_value=len(catalog),
+        value=catalog[selected]["index"],
+        step=1,
+        key="dir_page_input"
+    )
+    
+    # 🔑 保存当前选择的目录清单到 session_state(供批量应用使用)
+    st.session_state.current_catalog = catalog
+    st.session_state.current_catalog_index = selected
+    
+    # 🔑 关键优化:只在切换文件时才重新加载
+    current_entry_key = f"{selected_name}::{catalog[selected]['json']}"
+    
+    if ('last_loaded_entry' not in st.session_state or 
+        st.session_state.last_loaded_entry != current_entry_key):
+        
+        _load_catalog_entry(
+            catalog[selected], 
+            output_dir, 
+            structure_suffix, 
+            current_entry_key
+        )
+    
+    # 页码跳转处理
+    if page_input != catalog[selected]["index"]:
+        target = next(
+            (i for i, item in enumerate(catalog) if item["index"] == page_input), 
+            None
+        )
+        if target is not None:
+            st.session_state.dir_selected_index = target
+            st.rerun()
+
+    return st.session_state.get('dir_auto_mode', 'new')
+
+
+def _load_catalog_entry(entry: Dict, output_dir: Path, structure_suffix: str, entry_key: str):
+    """加载目录条目(JSON + 图片 + 结构)"""
+    base_name = entry["json"].stem
+    structure_file = output_dir / f"{base_name}{structure_suffix}"
+    has_structure = structure_file.exists()
+    
+    # 📂 加载 JSON
+    try:
+        with open(entry["json"], "r", encoding="utf-8") as fp:
+            raw = json.load(fp)
+        st.session_state.ocr_data = parse_ocr_data(raw)
+        st.session_state.loaded_json_name = entry["json"].name
+    except Exception as e:
+        st.error(f"❌ 加载 JSON 失败: {e}")
+        return
+
+    # 🖼️ 加载图片
+    try:
+        if entry["image"] and entry["image"].exists():
+            st.session_state.image = Image.open(entry["image"])
+            st.session_state.loaded_image_name = entry["image"].name
+        else:
+            st.session_state.image = None
+    except Exception as e:
+        st.error(f"❌ 加载图片失败: {e}")
+        st.session_state.image = None
+
+    # 🎯 自动模式判断
+    if has_structure:
+        st.session_state.dir_auto_mode = "edit"
+        st.session_state.loaded_config_name = structure_file.name
+        
+        try:
+            structure = load_structure_from_config(structure_file)
+            st.session_state.structure = structure
+            st.session_state.undo_stack = []
+            st.session_state.redo_stack = []
+            clear_table_image_cache()
+            st.sidebar.success("✅ 编辑模式")
+        except Exception as e:
+            st.error(f"❌ 加载标注失败: {e}")
+            st.session_state.dir_auto_mode = "new"
+    else:
+        st.session_state.dir_auto_mode = "new"
+        if 'structure' in st.session_state:
+            del st.session_state.structure
+        if 'generator' in st.session_state:
+            del st.session_state.generator
+        st.sidebar.info("🆕 新建模式")
+    
+    # 标记已加载
+    st.session_state.last_loaded_entry = entry_key
+    st.info(f"📂 已加载: {entry['json'].name}")

+ 74 - 0
table_line_generator/editor/display_controls.py

@@ -0,0 +1,74 @@
+"""
+显示设置控件
+"""
+import streamlit as st
+from typing import Dict, Tuple
+
+
+def create_display_settings_section(display_config: Dict) -> Tuple[int, str, float, bool]:
+    """
+    显示设置(由配置驱动)
+    
+    Args:
+        display_config: 显示配置字典
+    
+    Returns:
+        tuple: (line_width, display_mode, zoom_level, show_line_numbers)
+    """
+    st.sidebar.divider()
+    st.sidebar.subheader("🖼️ 显示设置")
+
+    line_width = st.sidebar.slider(
+        "线条宽度",
+        int(display_config.get("line_width_min", 1)),
+        int(display_config.get("line_width_max", 5)),
+        int(display_config.get("default_line_width", 2)),
+    )
+    
+    display_mode = st.sidebar.radio(
+        "显示模式",
+        ["对比显示", "仅显示划线图", "仅显示原图"],
+        index=1,
+    )
+    
+    zoom_level = st.sidebar.slider(
+        "图片缩放",
+        float(display_config.get("zoom_min", 0.25)),
+        float(display_config.get("zoom_max", 2.0)),
+        float(display_config.get("default_zoom", 1.0)),
+        float(display_config.get("zoom_step", 0.25)),
+    )
+    
+    show_line_numbers = st.sidebar.checkbox(
+        "显示线条编号",
+        value=bool(display_config.get("show_line_numbers", True)),
+    )
+
+    return line_width, display_mode, zoom_level, show_line_numbers
+
+
+def create_undo_redo_section():
+    """创建撤销/重做区域"""
+    from .state_manager import undo_last_action, redo_last_action
+    from .drawing import clear_table_image_cache
+    
+    st.sidebar.divider()
+    st.sidebar.subheader("↩️ 撤销/重做")
+    
+    col1, col2 = st.sidebar.columns(2)
+    
+    with col1:
+        if st.button("↩️ 撤销", disabled=len(st.session_state.undo_stack) == 0):
+            if undo_last_action():
+                clear_table_image_cache()
+                st.success("✅ 已撤销")
+                st.rerun()
+    
+    with col2:
+        if st.button("↪️ 重做", disabled=len(st.session_state.redo_stack) == 0):
+            if redo_last_action():
+                clear_table_image_cache()
+                st.success("✅ 已重做")
+                st.rerun()
+    
+    st.sidebar.info(f"📚 历史记录: {len(st.session_state.undo_stack)} 条")

+ 178 - 0
table_line_generator/editor/drawing.py

@@ -0,0 +1,178 @@
+"""
+表格线绘制功能
+"""
+
+import streamlit as st
+from PIL import Image, ImageDraw, ImageFont
+import json
+import hashlib
+
+
+def draw_table_lines_with_numbers(image, structure, line_width=2, show_numbers=True):
+    """
+    绘制带编号的表格线(使用线坐标列表)
+    
+    Args:
+        image: PIL Image 对象
+        structure: 表格结构字典(包含 horizontal_lines 和 vertical_lines)
+        line_width: 线条宽度
+        show_numbers: 是否显示编号
+    
+    Returns:
+        绘制了表格线和编号的图片
+    """
+    img_with_lines = image.copy()
+    draw = ImageDraw.Draw(img_with_lines)
+    
+    # 尝试加载字体
+    try:
+        font = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 20)
+    except:
+        font = ImageFont.load_default()
+    
+    # 使用线坐标列表
+    horizontal_lines = structure.get('horizontal_lines', [])
+    vertical_lines = structure.get('vertical_lines', [])
+    modified_h_lines = structure.get('modified_h_lines', set())
+    modified_v_lines = structure.get('modified_v_lines', set())
+    
+    # 计算绘制范围
+    x_start = vertical_lines[0] if vertical_lines else 0
+    x_end = vertical_lines[-1] if vertical_lines else img_with_lines.width
+    y_start = horizontal_lines[0] if horizontal_lines else 0
+    y_end = horizontal_lines[-1] if horizontal_lines else img_with_lines.height
+    
+    # 绘制横线
+    for idx, y in enumerate(horizontal_lines):
+        color = (255, 0, 0) if idx in modified_h_lines else (0, 0, 255)
+        draw.line([(x_start, y), (x_end, y)], fill=color, width=line_width)
+        
+        # 绘制行编号
+        if show_numbers:
+            text = f"R{idx+1}"
+            bbox = draw.textbbox((x_start - 35, y - 10), text, font=font)
+            draw.rectangle(bbox, fill='white', outline='black')
+            draw.text((x_start - 35, y - 10), text, fill=color, font=font)
+    
+    # 绘制竖线
+    for idx, x in enumerate(vertical_lines):
+        color = (255, 0, 0) if idx in modified_v_lines else (0, 0, 255)
+        draw.line([(x, y_start), (x, y_end)], fill=color, width=line_width)
+        
+        # 绘制列编号
+        if show_numbers:
+            text = f"C{idx+1}"
+            bbox = draw.textbbox((x - 10, y_start - 25), text, font=font)
+            draw.rectangle(bbox, fill='white', outline='black')
+            draw.text((x - 10, y_start - 25), text, fill=color, font=font)
+            bbox = draw.textbbox((x - 10, y_end + 25), text, font=font)
+            draw.rectangle(bbox, fill='white', outline='black')
+            draw.text((x - 10, y_end + 25), text, fill=color, font=font)
+    
+    return img_with_lines
+
+
+def draw_clean_table_lines(image, structure, line_width=2, line_color=(0, 0, 0)):
+    """
+    绘制纯净的表格线(用于保存)
+    - 所有线用统一颜色
+    - 不显示编号
+    
+    Args:
+        image: PIL Image 对象
+        structure: 表格结构字典
+        line_width: 线条宽度
+        line_color: 线条颜色,默认黑色 (0, 0, 0)
+    
+    Returns:
+        绘制了纯净表格线的图片
+    """
+    img_with_lines = image.copy()
+    draw = ImageDraw.Draw(img_with_lines)
+    
+    horizontal_lines = structure.get('horizontal_lines', [])
+    vertical_lines = structure.get('vertical_lines', [])
+    
+    if not horizontal_lines or not vertical_lines:
+        return img_with_lines
+    
+    # 计算绘制范围
+    x_start = vertical_lines[0]
+    x_end = vertical_lines[-1]
+    y_start = horizontal_lines[0]
+    y_end = horizontal_lines[-1]
+    
+    # 绘制横线
+    for y in horizontal_lines:
+        draw.line([(x_start, y), (x_end, y)], fill=line_color, width=line_width)
+    
+    # 绘制竖线
+    for x in vertical_lines:
+        draw.line([(x, y_start), (x, y_end)], fill=line_color, width=line_width)
+    
+    return img_with_lines
+
+
+def get_structure_hash(structure, line_width, show_numbers):
+    """生成结构的哈希值,用于判断是否需要重新绘制"""
+    key_data = {
+        'horizontal_lines': structure.get('horizontal_lines', []),
+        'vertical_lines': structure.get('vertical_lines', []),
+        'modified_h_lines': sorted(list(structure.get('modified_h_lines', set()))),
+        'modified_v_lines': sorted(list(structure.get('modified_v_lines', set()))),
+        'line_width': line_width,
+        'show_numbers': show_numbers
+    }
+    
+    key_str = json.dumps(key_data, sort_keys=True)
+    return hashlib.md5(key_str.encode()).hexdigest()
+
+
+def get_cached_table_lines_image(image, structure, line_width, show_numbers):
+    """
+    获取缓存的表格线图片,如果缓存不存在或失效则重新绘制
+    
+    Args:
+        image: PIL Image 对象
+        structure: 表格结构字典
+        line_width: 线条宽度
+        show_numbers: 是否显示编号
+    
+    Returns:
+        绘制了表格线和编号的图片
+    """
+    # 初始化缓存
+    if 'cached_table_image' not in st.session_state:
+        st.session_state.cached_table_image = None
+    if 'cached_table_hash' not in st.session_state:
+        st.session_state.cached_table_hash = None
+    
+    # 计算当前结构的哈希
+    current_hash = get_structure_hash(structure, line_width, show_numbers)
+    
+    # 检查缓存是否有效
+    if (st.session_state.cached_table_hash == current_hash and 
+        st.session_state.cached_table_image is not None):
+        return st.session_state.cached_table_image
+    
+    # 缓存失效,重新绘制
+    img_with_lines = draw_table_lines_with_numbers(
+        image, 
+        structure, 
+        line_width=line_width,
+        show_numbers=show_numbers
+    )
+    
+    # 更新缓存
+    st.session_state.cached_table_image = img_with_lines
+    st.session_state.cached_table_hash = current_hash
+    
+    return img_with_lines
+
+
+def clear_table_image_cache():
+    """清除表格图片缓存"""
+    if 'cached_table_image' in st.session_state:
+        st.session_state.cached_table_image = None
+    if 'cached_table_hash' in st.session_state:
+        st.session_state.cached_table_hash = None

+ 201 - 0
table_line_generator/editor/file_handlers.py

@@ -0,0 +1,201 @@
+"""
+文件上传和加载处理
+"""
+import streamlit as st
+import json
+import tempfile
+from pathlib import Path
+from PIL import Image
+
+from .data_processor import parse_ocr_data
+from .config_loader import load_structure_from_config
+from .drawing import clear_table_image_cache
+
+
+def handle_json_upload(uploaded_json):
+    """处理 JSON 文件上传"""
+    if uploaded_json is None:
+        return
+    
+    if st.session_state.loaded_json_name == uploaded_json.name:
+        return
+    
+    try:
+        raw_data = json.load(uploaded_json)
+        
+        with st.expander("🔍 原始数据结构"):
+            if isinstance(raw_data, dict):
+                st.json({
+                    k: f"<{type(v).__name__}>" 
+                    if not isinstance(v, (str, int, float, bool, type(None))) 
+                    else v 
+                    for k, v in list(raw_data.items())[:5]
+                })
+            else:
+                st.json(raw_data[:3] if len(raw_data) > 3 else raw_data)
+        
+        ocr_data = parse_ocr_data(raw_data)
+        
+        if not ocr_data:
+            st.error("❌ 无法解析 OCR 数据,请检查 JSON 格式")
+            st.stop()
+        
+        st.session_state.ocr_data = ocr_data
+        st.session_state.loaded_json_name = uploaded_json.name
+        st.session_state.loaded_config_name = None
+        
+        # 清除旧数据
+        if 'structure' in st.session_state:
+            del st.session_state.structure
+        if 'generator' in st.session_state:
+            del st.session_state.generator
+        st.session_state.undo_stack = []
+        st.session_state.redo_stack = []
+        clear_table_image_cache()
+        
+        st.success(f"✅ 成功加载 {len(ocr_data)} 条 OCR 记录")
+        
+    except Exception as e:
+        st.error(f"❌ 加载数据失败: {e}")
+        st.stop()
+
+
+def handle_image_upload(uploaded_image):
+    """处理图片文件上传"""
+    if uploaded_image is None:
+        return
+    
+    if st.session_state.loaded_image_name == uploaded_image.name:
+        return
+    
+    try:
+        image = Image.open(uploaded_image)
+        st.session_state.image = image
+        st.session_state.loaded_image_name = uploaded_image.name
+        
+        # 清除旧数据
+        if 'structure' in st.session_state:
+            del st.session_state.structure
+        if 'generator' in st.session_state:
+            del st.session_state.generator
+        st.session_state.undo_stack = []
+        st.session_state.redo_stack = []
+        clear_table_image_cache()
+        
+        st.success(f"✅ 成功加载图片: {uploaded_image.name}")
+        
+    except Exception as e:
+        st.error(f"❌ 加载图片失败: {e}")
+        st.stop()
+
+
+def handle_config_upload(uploaded_config):
+    """处理配置文件上传"""
+    if uploaded_config is None:
+        return
+    
+    if st.session_state.loaded_config_name == uploaded_config.name:
+        return
+    
+    try:
+        # 创建临时文件
+        with tempfile.NamedTemporaryFile(
+            mode='w', 
+            suffix='.json', 
+            delete=False, 
+            encoding='utf-8'
+        ) as tmp:
+            tmp.write(uploaded_config.getvalue().decode('utf-8'))
+            tmp_path = tmp.name
+        
+        # 加载结构
+        structure = load_structure_from_config(Path(tmp_path))
+        
+        # 清理临时文件
+        Path(tmp_path).unlink()
+        
+        st.session_state.structure = structure
+        st.session_state.loaded_config_name = uploaded_config.name
+        
+        # 清除历史记录和缓存
+        st.session_state.undo_stack = []
+        st.session_state.redo_stack = []
+        clear_table_image_cache()
+        
+        st.success(f"✅ 成功加载配置: {uploaded_config.name}")
+        st.info(
+            f"📊 表格结构: {len(structure['rows'])}行 x {len(structure['columns'])}列\n\n"
+            f"📏 横线数: {len(structure.get('horizontal_lines', []))}\n\n"
+            f"📏 竖线数: {len(structure.get('vertical_lines', []))}"
+        )
+        
+        # 显示配置文件详情
+        with st.expander("📋 配置详情"):
+            st.json({
+                "行数": len(structure['rows']),
+                "列数": len(structure['columns']),
+                "横线数": len(structure.get('horizontal_lines', [])),
+                "竖线数": len(structure.get('vertical_lines', [])),
+                "行高": structure.get('row_height'),
+                "列宽": structure.get('col_widths'),
+                "已修改的横线": list(structure.get('modified_h_lines', set())),
+                "已修改的竖线": list(structure.get('modified_v_lines', set()))
+            })
+        
+    except Exception as e:
+        st.error(f"❌ 加载配置失败: {e}")
+        import traceback
+        st.code(traceback.format_exc())
+        st.stop()
+
+
+def create_file_uploader_section(work_mode: str):
+    """
+    创建文件上传区域
+    
+    Args:
+        work_mode: 工作模式("🆕 新建标注" 或 "📂 加载已有标注")
+    """
+    if work_mode == "🆕 新建标注":
+        st.sidebar.subheader("上传文件")
+        
+        uploaded_json = st.sidebar.file_uploader(
+            "上传OCR结果JSON", 
+            type=['json'], 
+            key="new_json"
+        )
+        uploaded_image = st.sidebar.file_uploader(
+            "上传对应图片", 
+            type=['jpg', 'png'], 
+            key="new_image"
+        )
+        
+        handle_json_upload(uploaded_json)
+        handle_image_upload(uploaded_image)
+    
+    else:  # 加载已有标注
+        st.sidebar.subheader("加载已保存的标注")
+        
+        uploaded_config = st.sidebar.file_uploader(
+            "上传配置文件 (*_structure.json)",
+            type=['json'],
+            key="load_config"
+        )
+        
+        uploaded_image_for_config = st.sidebar.file_uploader(
+            "上传对应图片(可选)",
+            type=['jpg', 'png'],
+            key="load_image"
+        )
+        
+        handle_config_upload(uploaded_config)
+        handle_image_upload(uploaded_image_for_config)
+        
+        # 提示信息
+        if 'structure' in st.session_state and st.session_state.image is None:
+            st.warning("⚠️ 已加载配置,但未加载对应图片。请上传图片以查看效果。")
+            st.info(
+                "💡 提示:配置文件已加载,您可以:\n"
+                "1. 上传对应图片查看效果\n"
+                "2. 直接编辑配置并保存"
+            )

+ 98 - 0
table_line_generator/editor/mode_setup.py

@@ -0,0 +1,98 @@
+"""
+模式设置(新建/编辑)
+"""
+import streamlit as st
+from PIL import Image
+from typing import Dict, Tuple
+
+try:
+    from ..table_line_generator import TableLineGenerator
+except ImportError:
+    from table_line_generator import TableLineGenerator
+
+from .display_controls import create_display_settings_section, create_undo_redo_section
+from .analysis_controls import create_analysis_section
+
+
+def setup_new_annotation_mode(ocr_data, image, config: Dict) -> Tuple:
+    """
+    设置新建标注模式的通用逻辑
+    
+    Args:
+        ocr_data: OCR 数据
+        image: 图片对象
+        config: 显示配置
+    
+    Returns:
+        tuple: (y_tolerance, x_tolerance, min_row_height, line_width, 
+                display_mode, zoom_level, show_line_numbers)
+    """
+    # 参数调整
+    st.sidebar.header("🔧 参数调整")
+    y_tolerance = st.sidebar.slider(
+        "Y轴聚类容差(像素)", 
+        1, 20, 5, 
+        key="new_y_tol"
+    )
+    x_tolerance = st.sidebar.slider(
+        "X轴聚类容差(像素)", 
+        5, 50, 10, 
+        key="new_x_tol"
+    )
+    min_row_height = st.sidebar.slider(
+        "最小行高(像素)", 
+        10, 100, 20, 
+        key="new_min_h"
+    )
+    
+    # 显示设置
+    line_width, display_mode, zoom_level, show_line_numbers = \
+        create_display_settings_section(config)
+    create_undo_redo_section()
+    
+    # 初始化生成器
+    if 'generator' not in st.session_state or st.session_state.generator is None:
+        try:
+            generator = TableLineGenerator(image, ocr_data)
+            st.session_state.generator = generator
+        except Exception as e:
+            st.error(f"❌ 初始化生成器失败: {e}")
+            st.stop()
+    
+    # 分析按钮
+    create_analysis_section(y_tolerance, x_tolerance, min_row_height)
+    
+    return (y_tolerance, x_tolerance, min_row_height, 
+            line_width, display_mode, zoom_level, show_line_numbers)
+
+
+def setup_edit_annotation_mode(structure: Dict, image, config: Dict) -> Tuple:
+    """
+    设置编辑标注模式的通用逻辑
+    
+    Args:
+        structure: 表格结构
+        image: 图片对象(可为 None)
+        config: 显示配置
+    
+    Returns:
+        tuple: (image, line_width, display_mode, zoom_level, show_line_numbers)
+    """
+    # 如果没有图片,创建虚拟画布
+    if image is None:
+        if 'table_bbox' in structure:
+            bbox = structure['table_bbox']
+            dummy_width = bbox[2] + 100
+            dummy_height = bbox[3] + 100
+        else:
+            dummy_width = 2000
+            dummy_height = 2000
+        image = Image.new('RGB', (dummy_width, dummy_height), color='white')
+        st.info(f"💡 使用虚拟画布 ({dummy_width}x{dummy_height})")
+    
+    # 显示设置
+    line_width, display_mode, zoom_level, show_line_numbers = \
+        create_display_settings_section(config)
+    create_undo_redo_section()
+    
+    return image, line_width, display_mode, zoom_level, show_line_numbers

+ 190 - 0
table_line_generator/editor/save_controls.py

@@ -0,0 +1,190 @@
+"""
+保存功能控件
+"""
+import streamlit as st
+import io
+from pathlib import Path
+from typing import Dict
+
+from .config_loader import save_structure_to_config
+from .drawing import draw_clean_table_lines
+
+
+def create_save_section(work_mode: str, structure: Dict, image, line_width: int, output_config: Dict):
+    """
+    保存设置(目录/命名来自配置)
+    
+    Args:
+        work_mode: 工作模式
+        structure: 表格结构
+        image: 图片对象
+        line_width: 线条宽度
+        output_config: 输出配置(兜底用)
+    """
+    st.divider()
+
+    # 🔑 优先使用当前数据源的输出配置
+    if 'current_output_config' in st.session_state:
+        active_output_config = st.session_state.current_output_config
+        st.info(f"📂 保存位置:{active_output_config.get('directory', 'N/A')}")
+    else:
+        active_output_config = output_config
+
+    defaults = active_output_config.get("defaults", {})
+    line_colors = active_output_config.get("line_colors") or [
+        {"name": "黑色", "rgb": [0, 0, 0]},
+        {"name": "蓝色", "rgb": [0, 0, 255]},
+        {"name": "红色", "rgb": [255, 0, 0]},
+    ]
+
+    save_col1, save_col2, save_col3 = st.columns(3)
+
+    with save_col1:
+        save_structure = st.checkbox(
+            "保存表格结构配置",
+            value=bool(defaults.get("save_structure", True)),
+        )
+
+    with save_col2:
+        save_image = st.checkbox(
+            "保存表格线图片",
+            value=bool(defaults.get("save_image", True)),
+        )
+
+    color_names = [c["name"] for c in line_colors]
+    default_color = defaults.get("line_color", color_names[0])
+    default_index = (
+        color_names.index(default_color) 
+        if default_color in color_names 
+        else 0
+    )
+
+    with save_col3:
+        line_color_option = st.selectbox(
+            "线条颜色",
+            color_names,
+            index=default_index,
+			label_visibility="collapsed",
+            key="save_line_color"
+        )
+
+    if st.button("💾 保存", type="primary"):
+        output_dir = Path(active_output_config.get("directory", "output/table_structures"))
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        structure_suffix = active_output_config.get("structure_suffix", "_structure.json")
+        image_suffix = active_output_config.get("image_suffix", "_with_lines.png")
+
+        # 确定文件名
+        base_name = _determine_base_name(work_mode)
+        
+        saved_files = []
+        
+        if save_structure:
+            _save_structure_file(
+                structure, 
+                output_dir, 
+                base_name, 
+                structure_suffix, 
+                saved_files
+            )
+        
+        if save_image:
+            _save_image_file(
+                image,
+                structure,
+                line_width,
+                line_color_option,
+                line_colors,
+                output_dir,
+                base_name,
+                image_suffix,
+                saved_files
+            )
+        
+        if saved_files:
+            st.success(f"✅ 已保存 {len(saved_files)} 个文件:")
+            for file_type, file_path in saved_files:
+                st.info(f"  • {file_type}: {file_path}")
+            
+            # 显示当前数据源信息
+            if 'current_data_source' in st.session_state:
+                ds = st.session_state.current_data_source
+                with st.expander("📋 数据源信息"):
+                    st.json({
+                        "名称": ds.get("name"),
+                        "JSON目录": str(ds.get("json_dir")),
+                        "图片目录": str(ds.get("image_dir")),
+                        "输出目录": str(output_dir),
+                    })
+
+
+def _determine_base_name(work_mode: str) -> str:
+    """确定保存文件的基础名称"""
+    if work_mode == "🆕 新建标注" or work_mode == "new":
+        if st.session_state.loaded_json_name:
+            return Path(st.session_state.loaded_json_name).stem
+        else:
+            return "table_structure"
+    else:
+        if st.session_state.loaded_config_name:
+            base_name = Path(st.session_state.loaded_config_name).stem
+            if base_name.endswith('_structure'):
+                base_name = base_name[:-10]
+            return base_name
+        elif st.session_state.loaded_image_name:
+            return Path(st.session_state.loaded_image_name).stem
+        else:
+            return "table_structure"
+
+
+def _save_structure_file(structure, output_dir, base_name, suffix, saved_files):
+    """保存结构配置文件"""
+    structure_filename = f"{base_name}{suffix}"
+    structure_path = output_dir / structure_filename
+    save_structure_to_config(structure, structure_path)
+    saved_files.append(("配置文件", structure_path))
+    
+    with open(structure_path, 'r') as f:
+        st.download_button(
+            "📥 下载配置文件",
+            f.read(),
+            file_name=f"{base_name}_structure.json",
+            mime="application/json"
+        )
+
+
+def _save_image_file(image, structure, line_width, color_option, line_colors, 
+                     output_dir, base_name, suffix, saved_files):
+    """保存表格线图片"""
+    if image is None:
+        st.warning("⚠️ 无法保存图片:未加载图片文件")
+        return
+    
+    selected_color_rgb = next(
+        (tuple(c["rgb"]) for c in line_colors if c["name"] == color_option),
+        (0, 0, 0),
+    )
+    
+    clean_img = draw_clean_table_lines(
+        image,
+        structure,
+        line_width=line_width,
+        line_color=selected_color_rgb,
+    )
+    
+    image_filename = f"{base_name}{suffix}"
+    output_image_path = output_dir / image_filename
+    clean_img.save(output_image_path)
+    saved_files.append(("表格线图片", output_image_path))
+    
+    buf = io.BytesIO()
+    clean_img.save(buf, format='PNG')
+    buf.seek(0)
+    
+    st.download_button(
+        "📥 下载表格线图片",
+        buf,
+        file_name=f"{base_name}_with_lines.png",
+        mime="image/png"
+    )

+ 69 - 0
table_line_generator/editor/state_manager.py

@@ -0,0 +1,69 @@
+"""
+状态管理(撤销/重做)
+"""
+
+import streamlit as st
+import copy
+
+
+def init_undo_stack():
+    """初始化撤销/重做栈"""
+    if 'undo_stack' not in st.session_state:
+        st.session_state.undo_stack = []
+    if 'redo_stack' not in st.session_state:
+        st.session_state.redo_stack = []
+
+
+def save_state_for_undo(structure):
+    """
+    保存当前状态到撤销栈
+    
+    Args:
+        structure: 当前表格结构
+    """
+    # 深拷贝当前结构
+    state = copy.deepcopy(structure)
+    st.session_state.undo_stack.append(state)
+    
+    # 清空重做栈
+    st.session_state.redo_stack = []
+    
+    # 限制栈深度(最多保存20个历史状态)
+    if len(st.session_state.undo_stack) > 20:
+        st.session_state.undo_stack.pop(0)
+
+
+def undo_last_action():
+    """
+    撤销上一个操作
+    
+    Returns:
+        是否成功撤销
+    """
+    if st.session_state.undo_stack:
+        # 保存当前状态到重做栈
+        current_state = copy.deepcopy(st.session_state.structure)
+        st.session_state.redo_stack.append(current_state)
+        
+        # 恢复上一个状态
+        st.session_state.structure = st.session_state.undo_stack.pop()
+        return True
+    return False
+
+
+def redo_last_action():
+    """
+    重做上一个操作
+    
+    Returns:
+        是否成功重做
+    """
+    if st.session_state.redo_stack:
+        # 保存当前状态到撤销栈
+        current_state = copy.deepcopy(st.session_state.structure)
+        st.session_state.undo_stack.append(current_state)
+        
+        # 恢复重做的状态
+        st.session_state.structure = st.session_state.redo_stack.pop()
+        return True
+    return False

+ 88 - 0
table_line_generator/editor/table_viewer.py

@@ -0,0 +1,88 @@
+"""
+表格结构渲染视图
+"""
+import streamlit as st
+from typing import Dict
+
+from .drawing import get_cached_table_lines_image
+from .viewer import show_image_with_scroll
+from .adjustments import create_adjustment_section
+
+
+def render_table_structure_view(
+    structure: Dict, 
+    image, 
+    line_width: int, 
+    display_mode: str, 
+    zoom_level: float, 
+    show_line_numbers: bool, 
+    viewport_width: int, 
+    viewport_height: int
+):
+    """
+    渲染表格结构视图(统一三种模式的显示逻辑)
+    
+    Args:
+        structure: 表格结构
+        image: 图片对象
+        line_width: 线条宽度
+        display_mode: 显示模式
+        zoom_level: 缩放级别
+        show_line_numbers: 是否显示线条编号
+        viewport_width: 视口宽度
+        viewport_height: 视口高度
+    """
+    # 绘制表格线
+    img_with_lines = get_cached_table_lines_image(
+        image, structure, line_width=line_width, show_numbers=show_line_numbers
+    )
+    
+    # 根据显示模式显示图片
+    if display_mode == "对比显示":
+        col1, col2 = st.columns(2)
+        with col1:
+            show_image_with_scroll(
+                image, "原图", 
+                viewport_width, viewport_height, zoom_level
+            )
+        with col2:
+            show_image_with_scroll(
+                img_with_lines, "表格线", 
+                viewport_width, viewport_height, zoom_level
+            )
+    
+    elif display_mode == "仅显示划线图":
+        show_image_with_scroll(
+            img_with_lines, 
+            f"表格线图 (缩放: {zoom_level:.0%})", 
+            viewport_width, 
+            viewport_height, 
+            zoom_level
+        )
+    
+    else:  # 仅显示原图
+        show_image_with_scroll(
+            image, 
+            f"原图 (缩放: {zoom_level:.0%})", 
+            viewport_width, 
+            viewport_height, 
+            zoom_level
+        )
+    
+    # 手动调整区域
+    create_adjustment_section(structure)
+    
+    # 显示详细信息
+    with st.expander("📊 表格结构详情"):
+        st.json({
+            "行数": len(structure['rows']),
+            "列数": len(structure['columns']),
+            "横线数": len(structure.get('horizontal_lines', [])),
+            "竖线数": len(structure.get('vertical_lines', [])),
+            "横线坐标": structure.get('horizontal_lines', []),
+            "竖线坐标": structure.get('vertical_lines', []),
+            "标准行高": structure.get('row_height'),
+            "列宽度": structure.get('col_widths'),
+            "修改的横线": list(structure.get('modified_h_lines', set())),
+            "修改的竖线": list(structure.get('modified_v_lines', set()))
+        })

+ 634 - 0
table_line_generator/editor/ui_components_v1.py

@@ -0,0 +1,634 @@
+"""
+UI 组件
+"""
+
+import streamlit as st
+import json
+from pathlib import Path
+from PIL import Image
+import tempfile
+from typing import Dict, List
+
+try:
+    from ..table_line_generator import TableLineGenerator
+except ImportError:
+    from table_line_generator import TableLineGenerator
+
+from .config_loader import load_structure_from_config, build_data_source_catalog
+from .drawing import clear_table_image_cache
+
+def create_file_uploader_section(work_mode: str):
+    """
+    创建文件上传区域
+    
+    Args:
+        work_mode: 工作模式("🆕 新建标注" 或 "📂 加载已有标注")
+    """
+    if work_mode == "🆕 新建标注":
+        st.sidebar.subheader("上传文件")
+        uploaded_json = st.sidebar.file_uploader("上传OCR结果JSON", type=['json'], key="new_json")
+        uploaded_image = st.sidebar.file_uploader("上传对应图片", type=['jpg', 'png'], key="new_image")
+        
+        # 处理 JSON 上传
+        if uploaded_json is not None:
+            if st.session_state.loaded_json_name != uploaded_json.name:
+                try:
+                    raw_data = json.load(uploaded_json)
+                    
+                    with st.expander("🔍 原始数据结构"):
+                        if isinstance(raw_data, dict):
+                            st.json({k: f"<{type(v).__name__}>" if not isinstance(v, (str, int, float, bool, type(None))) else v 
+                                    for k, v in list(raw_data.items())[:5]})
+                        else:
+                            st.json(raw_data[:3] if len(raw_data) > 3 else raw_data)
+                    
+                    ocr_data = parse_ocr_data(raw_data)
+                    
+                    if not ocr_data:
+                        st.error("❌ 无法解析 OCR 数据,请检查 JSON 格式")
+                        st.stop()
+                    
+                    st.session_state.ocr_data = ocr_data
+                    st.session_state.loaded_json_name = uploaded_json.name
+                    st.session_state.loaded_config_name = None
+                    
+                    # 清除旧数据
+                    if 'structure' in st.session_state:
+                        del st.session_state.structure
+                    if 'generator' in st.session_state:
+                        del st.session_state.generator
+                    st.session_state.undo_stack = []
+                    st.session_state.redo_stack = []
+                    clear_table_image_cache()
+                    
+                    st.success(f"✅ 成功加载 {len(ocr_data)} 条 OCR 记录")
+                    
+                except Exception as e:
+                    st.error(f"❌ 加载数据失败: {e}")
+                    st.stop()
+        
+        # 处理图片上传
+        if uploaded_image is not None:
+            if st.session_state.loaded_image_name != uploaded_image.name:
+                try:
+                    image = Image.open(uploaded_image)
+                    st.session_state.image = image
+                    st.session_state.loaded_image_name = uploaded_image.name
+                    
+                    # 清除旧数据
+                    if 'structure' in st.session_state:
+                        del st.session_state.structure
+                    if 'generator' in st.session_state:
+                        del st.session_state.generator
+                    st.session_state.undo_stack = []
+                    st.session_state.redo_stack = []
+                    clear_table_image_cache()
+                    
+                    st.success(f"✅ 成功加载图片: {uploaded_image.name}")
+                    
+                except Exception as e:
+                    st.error(f"❌ 加载图片失败: {e}")
+                    st.stop()
+    
+    else:  # 加载已有标注
+        st.sidebar.subheader("加载已保存的标注")
+        
+        uploaded_config = st.sidebar.file_uploader(
+            "上传配置文件 (*_structure.json)",
+            type=['json'],
+            key="load_config"
+        )
+        
+        uploaded_image_for_config = st.sidebar.file_uploader(
+            "上传对应图片(可选)",
+            type=['jpg', 'png'],
+            key="load_image"
+        )
+        
+        # 处理配置文件加载
+        if uploaded_config is not None:
+            if st.session_state.loaded_config_name != uploaded_config.name:
+                try:
+                    # 创建临时文件
+                    with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False, encoding='utf-8') as tmp:
+                        tmp.write(uploaded_config.getvalue().decode('utf-8'))
+                        tmp_path = tmp.name
+                    
+                    # 加载结构
+                    structure = load_structure_from_config(Path(tmp_path))
+                    
+                    # 清理临时文件
+                    Path(tmp_path).unlink()
+                    
+                    st.session_state.structure = structure
+                    st.session_state.loaded_config_name = uploaded_config.name
+                    
+                    # 清除历史记录和缓存
+                    st.session_state.undo_stack = []
+                    st.session_state.redo_stack = []
+                    clear_table_image_cache()
+                    
+                    st.success(f"✅ 成功加载配置: {uploaded_config.name}")
+                    st.info(
+                        f"📊 表格结构: {len(structure['rows'])}行 x {len(structure['columns'])}列\n\n"
+                        f"📏 横线数: {len(structure.get('horizontal_lines', []))}\n\n"
+                        f"📏 竖线数: {len(structure.get('vertical_lines', []))}"
+                    )
+                    
+                    # 显示配置文件详情
+                    with st.expander("📋 配置详情"):
+                        st.json({
+                            "行数": len(structure['rows']),
+                            "列数": len(structure['columns']),
+                            "横线数": len(structure.get('horizontal_lines', [])),
+                            "竖线数": len(structure.get('vertical_lines', [])),
+                            "行高": structure.get('row_height'),
+                            "列宽": structure.get('col_widths'),
+                            "已修改的横线": list(structure.get('modified_h_lines', set())),
+                            "已修改的竖线": list(structure.get('modified_v_lines', set()))
+                        })
+                    
+                except Exception as e:
+                    st.error(f"❌ 加载配置失败: {e}")
+                    import traceback
+                    st.code(traceback.format_exc())
+                    st.stop()
+        
+        # 处理图片加载
+        if uploaded_image_for_config is not None:
+            if st.session_state.loaded_image_name != uploaded_image_for_config.name:
+                try:
+                    image = Image.open(uploaded_image_for_config)
+                    st.session_state.image = image
+                    st.session_state.loaded_image_name = uploaded_image_for_config.name
+                    
+                    clear_table_image_cache()
+                    
+                    st.success(f"✅ 成功加载图片: {uploaded_image_for_config.name}")
+                    
+                except Exception as e:
+                    st.error(f"❌ 加载图片失败: {e}")
+                    st.stop()
+        
+        # 提示信息
+        if 'structure' in st.session_state and st.session_state.image is None:
+            st.warning("⚠️ 已加载配置,但未加载对应图片。请上传图片以查看效果。")
+            st.info("💡 提示:配置文件已加载,您可以:\n1. 上传对应图片查看效果\n2. 直接编辑配置并保存")
+
+
+def create_display_settings_section(display_config: Dict):
+    """显示设置(由配置驱动)"""
+    st.sidebar.divider()
+    st.sidebar.subheader("🖼️ 显示设置")
+
+    line_width = st.sidebar.slider(
+        "线条宽度",
+        int(display_config.get("line_width_min", 1)),
+        int(display_config.get("line_width_max", 5)),
+        int(display_config.get("default_line_width", 2)),
+    )
+    display_mode = st.sidebar.radio(
+        "显示模式",
+        ["对比显示", "仅显示划线图", "仅显示原图"],
+        index=1,
+    )
+    zoom_level = st.sidebar.slider(
+        "图片缩放",
+        float(display_config.get("zoom_min", 0.25)),
+        float(display_config.get("zoom_max", 2.0)),
+        float(display_config.get("default_zoom", 1.0)),
+        float(display_config.get("zoom_step", 0.25)),
+    )
+    show_line_numbers = st.sidebar.checkbox(
+        "显示线条编号",
+        value=bool(display_config.get("show_line_numbers", True)),
+    )
+
+    return line_width, display_mode, zoom_level, show_line_numbers
+
+
+def create_undo_redo_section():
+    """创建撤销/重做区域"""
+    from .state_manager import undo_last_action, redo_last_action
+    from .drawing import clear_table_image_cache
+    
+    st.sidebar.divider()
+    st.sidebar.subheader("↩️ 撤销/重做")
+    
+    col1, col2 = st.sidebar.columns(2)
+    with col1:
+        if st.button("↩️ 撤销", disabled=len(st.session_state.undo_stack) == 0):
+            if undo_last_action():
+                clear_table_image_cache()
+                st.success("✅ 已撤销")
+                st.rerun()
+    
+    with col2:
+        if st.button("↪️ 重做", disabled=len(st.session_state.redo_stack) == 0):
+            if redo_last_action():
+                clear_table_image_cache()
+                st.success("✅ 已重做")
+                st.rerun()
+    
+    st.sidebar.info(f"📚 历史记录: {len(st.session_state.undo_stack)} 条")
+
+
+def create_analysis_section(y_tolerance, x_tolerance, min_row_height):
+    """
+    创建分析区域
+    
+    Args:
+        y_tolerance: Y轴聚类容差
+        x_tolerance: X轴聚类容差
+        min_row_height: 最小行高
+    """
+    if st.button("🔍 分析表格结构"):
+        with st.spinner("分析中..."):
+            try:
+                generator = st.session_state.generator
+                structure = generator.analyze_table_structure(
+                    y_tolerance=y_tolerance,
+                    x_tolerance=x_tolerance,
+                    min_row_height=min_row_height
+                )
+                
+                if not structure:
+                    st.warning("⚠️ 未检测到表格结构")
+                    st.stop()
+                
+                structure['modified_h_lines'] = set()
+                structure['modified_v_lines'] = set()
+                
+                st.session_state.structure = structure
+                st.session_state.undo_stack = []
+                st.session_state.redo_stack = []
+                clear_table_image_cache()
+                
+                st.success(
+                    f"✅ 检测到 {len(structure['rows'])} 行({len(structure['horizontal_lines'])} 条横线),"
+                    f"{len(structure['columns'])} 列({len(structure['vertical_lines'])} 条竖线)"
+                )
+                
+                col1, col2, col3, col4 = st.columns(4)
+                with col1:
+                    st.metric("行数", len(structure['rows']))
+                with col2:
+                    st.metric("横线数", len(structure['horizontal_lines']))
+                with col3:
+                    st.metric("列数", len(structure['columns']))
+                with col4:
+                    st.metric("竖线数", len(structure['vertical_lines']))
+            
+            except Exception as e:
+                st.error(f"❌ 分析失败: {e}")
+                import traceback
+                st.code(traceback.format_exc())
+                st.stop()
+
+
+def create_save_section(work_mode, structure, image, line_width, output_config: Dict):
+    """
+    保存设置(目录/命名来自配置)
+    """
+    from .config_loader import save_structure_to_config
+    from .drawing import draw_clean_table_lines
+    import io
+
+    st.divider()
+
+    defaults = output_config.get("defaults", {})
+    line_colors = output_config.get("line_colors") or [
+        {"name": "黑色", "rgb": [0, 0, 0]},
+        {"name": "蓝色", "rgb": [0, 0, 255]},
+        {"name": "红色", "rgb": [255, 0, 0]},
+    ]
+
+    save_col1, save_col2, save_col3 = st.columns(3)
+
+    with save_col1:
+        save_structure = st.checkbox(
+            "保存表格结构配置",
+            value=bool(defaults.get("save_structure", True)),
+        )
+
+    with save_col2:
+        save_image = st.checkbox(
+            "保存表格线图片",
+            value=bool(defaults.get("save_image", True)),
+        )
+
+    color_names = [c["name"] for c in line_colors]
+    default_color = defaults.get("line_color", color_names[0])
+    default_index = color_names.index(default_color) if default_color in color_names else 0
+
+    with save_col3:
+        line_color_option = st.selectbox(
+            "保存时线条颜色",
+            color_names,
+            label_visibility="collapsed",
+            index=default_index,
+        )
+
+    if st.button("💾 保存", type="primary"):
+        output_dir = Path(output_config.get("directory", "output/table_structures"))
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        structure_suffix = output_config.get("structure_suffix", "_structure.json")
+        image_suffix = output_config.get("image_suffix", "_with_lines.png")
+
+        # 确定文件名
+        if work_mode == "🆕 新建标注":
+            if st.session_state.loaded_json_name:
+                base_name = Path(st.session_state.loaded_json_name).stem
+            else:
+                base_name = "table_structure"
+        else:
+            if st.session_state.loaded_config_name:
+                base_name = Path(st.session_state.loaded_config_name).stem
+                if base_name.endswith('_structure'):
+                    base_name = base_name[:-10]
+            elif st.session_state.loaded_image_name:
+                base_name = Path(st.session_state.loaded_image_name).stem
+            else:
+                base_name = "table_structure"
+        
+        saved_files = []
+        
+        if save_structure:
+            structure_filename = f"{base_name}{structure_suffix}"
+            structure_path = output_dir / structure_filename
+            save_structure_to_config(structure, structure_path)
+            saved_files.append(("配置文件", structure_path))
+            
+            with open(structure_path, 'r') as f:
+                st.download_button(
+                    "📥 下载配置文件",
+                    f.read(),
+                    file_name=f"{base_name}_structure.json",
+                    mime="application/json"
+                )
+        
+        if save_image:
+            if st.session_state.image is None:
+                st.warning("⚠️ 无法保存图片:未加载图片文件")
+            else:
+                selected_color_rgb = next(
+                    (tuple(c["rgb"]) for c in line_colors if c["name"] == line_color_option),
+                    (0, 0, 0),
+                )
+                clean_img = draw_clean_table_lines(
+                    st.session_state.image,
+                    structure,
+                    line_width=line_width,
+                    line_color=selected_color_rgb,
+                )
+                image_filename = f"{base_name}{image_suffix}"
+                output_image_path = output_dir / image_filename
+                clean_img.save(output_image_path)
+                saved_files.append(("表格线图片", output_image_path))
+                
+                buf = io.BytesIO()
+                clean_img.save(buf, format='PNG')
+                buf.seek(0)
+                
+                st.download_button(
+                    "📥 下载表格线图片",
+                    buf,
+                    file_name=f"{base_name}_with_lines.png",
+                    mime="image/png"
+                )
+        
+        if saved_files:
+            st.success(f"✅ 已保存 {len(saved_files)} 个文件:")
+            for file_type, file_path in saved_files:
+                st.info(f"  • {file_type}: {file_path}")
+
+def setup_new_annotation_mode(ocr_data, image, config: Dict):
+    """
+    设置新建标注模式的通用逻辑
+    
+    Args:
+        ocr_data: OCR 数据
+        image: 图片对象
+        config: 显示配置
+    
+    Returns:
+        tuple: (y_tolerance, x_tolerance, min_row_height, line_width, display_mode, zoom_level, show_line_numbers)
+    """
+    # 参数调整
+    st.sidebar.header("🔧 参数调整")
+    y_tolerance = st.sidebar.slider("Y轴聚类容差(像素)", 1, 20, 5, key="new_y_tol")
+    x_tolerance = st.sidebar.slider("X轴聚类容差(像素)", 5, 50, 10, key="new_x_tol")
+    min_row_height = st.sidebar.slider("最小行高(像素)", 10, 100, 20, key="new_min_h")
+    
+    # 显示设置
+    line_width, display_mode, zoom_level, show_line_numbers = create_display_settings_section(config)
+    create_undo_redo_section()
+    
+    # 初始化生成器
+    if 'generator' not in st.session_state or st.session_state.generator is None:
+        try:
+            generator = TableLineGenerator(image, ocr_data)
+            st.session_state.generator = generator
+        except Exception as e:
+            st.error(f"❌ 初始化生成器失败: {e}")
+            st.stop()
+    
+    # 分析按钮
+    create_analysis_section(y_tolerance, x_tolerance, min_row_height)
+    
+    return y_tolerance, x_tolerance, min_row_height, line_width, display_mode, zoom_level, show_line_numbers
+
+
+def setup_edit_annotation_mode(structure, image, config: Dict):
+    """
+    设置编辑标注模式的通用逻辑
+    
+    Args:
+        structure: 表格结构
+        image: 图片对象(可为 None)
+        config: 显示配置
+    
+    Returns:
+        tuple: (image, line_width, display_mode, zoom_level, show_line_numbers)
+    """
+    # 如果没有图片,创建虚拟画布
+    if image is None:
+        if 'table_bbox' in structure:
+            bbox = structure['table_bbox']
+            dummy_width = bbox[2] + 100
+            dummy_height = bbox[3] + 100
+        else:
+            dummy_width = 2000
+            dummy_height = 2000
+        image = Image.new('RGB', (dummy_width, dummy_height), color='white')
+        st.info(f"💡 使用虚拟画布 ({dummy_width}x{dummy_height})")
+    
+    # 显示设置
+    line_width, display_mode, zoom_level, show_line_numbers = create_display_settings_section(config)
+    create_undo_redo_section()
+    
+    return image, line_width, display_mode, zoom_level, show_line_numbers
+
+
+def render_table_structure_view(structure, image, line_width, display_mode, zoom_level, show_line_numbers, 
+                                viewport_width, viewport_height):
+    """
+    渲染表格结构视图(统一三种模式的显示逻辑)
+    
+    Args:
+        structure: 表格结构
+        image: 图片对象
+        line_width: 线条宽度
+        display_mode: 显示模式
+        zoom_level: 缩放级别
+        show_line_numbers: 是否显示线条编号
+        viewport_width: 视口宽度
+        viewport_height: 视口高度
+    """
+    # 绘制表格线
+    img_with_lines = get_cached_table_lines_image(
+        image, structure, line_width=line_width, show_numbers=show_line_numbers
+    )
+    
+    # 根据显示模式显示图片
+    if display_mode == "对比显示":
+        col1, col2 = st.columns(2)
+        with col1:
+            show_image_with_scroll(image, "原图", viewport_width, viewport_height, zoom_level)
+        with col2:
+            show_image_with_scroll(img_with_lines, "表格线", viewport_width, viewport_height, zoom_level)
+    elif display_mode == "仅显示划线图":
+        show_image_with_scroll(
+            img_with_lines, 
+            f"表格线图 (缩放: {zoom_level:.0%})", 
+            viewport_width, 
+            viewport_height, 
+            zoom_level
+        )
+    else:
+        show_image_with_scroll(
+            image, 
+            f"原图 (缩放: {zoom_level:.0%})", 
+            viewport_width, 
+            viewport_height, 
+            zoom_level
+        )
+    
+    # 手动调整区域
+    create_adjustment_section(structure)
+    
+    # 显示详细信息
+    with st.expander("📊 表格结构详情"):
+        st.json({
+            "行数": len(structure['rows']),
+            "列数": len(structure['columns']),
+            "横线数": len(structure.get('horizontal_lines', [])),
+            "竖线数": len(structure.get('vertical_lines', [])),
+            "横线坐标": structure.get('horizontal_lines', []),
+            "竖线坐标": structure.get('vertical_lines', []),
+            "标准行高": structure.get('row_height'),
+            "列宽度": structure.get('col_widths'),
+            "修改的横线": list(structure.get('modified_h_lines', set())),
+            "修改的竖线": list(structure.get('modified_v_lines', set()))
+        })
+
+
+def create_directory_selector(data_sources: List[Dict], global_output_config: Dict):
+    """目录模式选择器(优化:避免重复加载)"""
+    st.sidebar.subheader("目录模式")
+    source_names = [src["name"] for src in data_sources]
+    selected_name = st.sidebar.selectbox("选择数据源", source_names, key="dir_mode_source")
+    source_cfg = next(src for src in data_sources if src["name"] == selected_name)
+    
+    output_cfg = source_cfg.get("output", global_output_config)
+    output_dir = Path(output_cfg.get("directory", "output/table_structures"))
+    structure_suffix = output_cfg.get("structure_suffix", "_structure.json")
+    
+    catalog_key = f"catalog::{selected_name}"
+    if catalog_key not in st.session_state:
+        st.session_state[catalog_key] = build_data_source_catalog(source_cfg)
+    catalog = st.session_state[catalog_key]
+
+    if not catalog:
+        st.sidebar.warning("目录中没有 JSON 文件")
+        return
+
+    if 'dir_selected_index' not in st.session_state:
+        st.session_state.dir_selected_index = 0
+
+    selected = st.sidebar.selectbox(
+        "选择文件",
+        range(len(catalog)),
+        format_func=lambda i: catalog[i]["display"],
+        index=st.session_state.dir_selected_index,
+        key="dir_select_box"
+    )
+
+    page_input = st.sidebar.number_input(
+        "页码跳转",
+        min_value=1,
+        max_value=len(catalog),
+        value=catalog[selected]["index"],
+        step=1,
+        key="dir_page_input"
+    )
+    
+    # 🔑 关键优化:只在切换文件时才重新加载
+    current_entry_key = f"{selected_name}::{catalog[selected]['json']}"
+    
+    if 'last_loaded_entry' not in st.session_state or st.session_state.last_loaded_entry != current_entry_key:
+        # 文件切换,重新加载
+        entry = catalog[selected]
+        base_name = entry["json"].stem
+        structure_file = output_dir / f"{base_name}{structure_suffix}"
+        has_structure = structure_file.exists()
+        
+        # 📂 加载 JSON
+        with open(entry["json"], "r", encoding="utf-8") as fp:
+            raw = json.load(fp)
+        st.session_state.ocr_data = parse_ocr_data(raw)
+        st.session_state.loaded_json_name = entry["json"].name
+
+        # 🖼️ 加载图片
+        if entry["image"] and entry["image"].exists():
+            st.session_state.image = Image.open(entry["image"])
+            st.session_state.loaded_image_name = entry["image"].name
+        else:
+            st.session_state.image = None
+
+        # 🎯 自动模式判断
+        if has_structure:
+            st.session_state.dir_auto_mode = "edit"
+            st.session_state.loaded_config_name = structure_file.name
+            
+            try:
+                structure = load_structure_from_config(structure_file)
+                st.session_state.structure = structure
+                st.session_state.undo_stack = []
+                st.session_state.redo_stack = []
+                clear_table_image_cache()
+                st.sidebar.success(f"✅ 编辑模式")
+            except Exception as e:
+                st.error(f"❌ 加载标注失败: {e}")
+                st.session_state.dir_auto_mode = "new"
+        else:
+            st.session_state.dir_auto_mode = "new"
+            if 'structure' in st.session_state:
+                del st.session_state.structure
+            if 'generator' in st.session_state:
+                del st.session_state.generator
+            st.sidebar.info(f"🆕 新建模式")
+        
+        # 标记已加载
+        st.session_state.last_loaded_entry = current_entry_key
+        st.info(f"📂 已加载: {entry['json'].name}")
+    
+    # 页码跳转处理
+    if page_input != catalog[selected]["index"]:
+        target = next((i for i, item in enumerate(catalog) if item["index"] == page_input), None)
+        if target is not None:
+            st.session_state.dir_selected_index = target
+            st.rerun()
+
+    return st.session_state.get('dir_auto_mode', 'new')

+ 29 - 0
table_line_generator/editor/viewer.py

@@ -0,0 +1,29 @@
+from io import BytesIO
+import base64
+import streamlit as st
+
+def show_image_with_scroll(
+    image,
+    caption="",
+    viewport_width=800,
+    viewport_height=1200,
+    zoom=1.0
+):
+    buf = BytesIO()
+    image.save(buf, format="PNG")
+    img_base64 = base64.b64encode(buf.getvalue()).decode("utf-8")
+
+    width = image.width
+    height = image.height
+
+    html = f"""
+    <div style="border:1px solid #e0e0e0;border-radius:6px;width:{viewport_width}px;height:{viewport_height}px;
+                overflow:auto;background:#fff;margin-bottom:0.5rem;">
+        <div style="transform:scale({zoom});transform-origin:top left;width:{width}px;height:{height}px;">
+            <img src="data:image/png;base64,{img_base64}" style="width:{width}px;max-width:none;display:block;" />
+        </div>
+    </div>
+    """
+    st.markdown(html, unsafe_allow_html=True)
+    if caption:
+        st.caption(caption)

+ 202 - 879
table_line_generator/streamlit_table_line_editor.py

@@ -5,339 +5,70 @@
 
 import streamlit as st
 from pathlib import Path
-import json
-from PIL import Image, ImageDraw, ImageFont
-import numpy as np
-import copy
+from PIL import Image
+import yaml
+from typing import Dict, List, Optional, Tuple
+import argparse
+import sys
 
 try:
-    from .table_line_generator import TableLineGenerator
-except ImportError:
     from table_line_generator import TableLineGenerator
+except ImportError:
+    from .table_line_generator import TableLineGenerator
 
+# 导入编辑器模块
+from editor import (
+    # UI 组件
+    create_file_uploader_section,
+    create_display_settings_section,
+    create_undo_redo_section,
+    create_analysis_section,
+    create_save_section,
+    create_directory_selector,
+    # 新增的模块功能
+    setup_new_annotation_mode,
+    setup_edit_annotation_mode,
+    render_table_structure_view,
+    
+    # 绘图
+    get_cached_table_lines_image,
+    
+    # 状态管理
+    init_undo_stack,
+    
+    # 调整
+    create_adjustment_section,
+    show_image_with_scroll,
 
-def parse_ocr_data(ocr_data):
-    """解析OCR数据,支持多种格式"""
-    # 如果是字符串,尝试解析
-    if isinstance(ocr_data, str):
-        try:
-            ocr_data = json.loads(ocr_data)
-        except json.JSONDecodeError:
-            st.error("❌ JSON 格式错误,无法解析")
-            return []
-    
-    # 检查是否为 PPStructure V3 格式
-    if isinstance(ocr_data, dict) and 'parsing_res_list' in ocr_data and 'overall_ocr_res' in ocr_data:
-        st.info("🔍 检测到 PPStructure V3 格式")
-        
-        try:
-            table_bbox, text_boxes = TableLineGenerator.parse_ppstructure_result(ocr_data)
-            st.success(f"✅ 表格区域: {table_bbox}")
-            st.success(f"✅ 表格内文本框: {len(text_boxes)} 个")
-            return text_boxes
-        except Exception as e:
-            st.error(f"❌ 解析 PPStructure 结果失败: {e}")
-            return []
-    
-    # 确保是列表
-    if not isinstance(ocr_data, list):
-        st.error(f"❌ OCR 数据应该是列表,实际类型: {type(ocr_data)}")
-        return []
-    
-    if not ocr_data:
-        st.warning("⚠️ OCR 数据为空")
-        return []
-    
-    first_item = ocr_data[0]
-    if not isinstance(first_item, dict):
-        st.error(f"❌ OCR 数据项应该是字典,实际类型: {type(first_item)}")
-        return []
-    
-    if 'bbox' not in first_item:
-        st.error("❌ OCR 数据缺少 'bbox' 字段")
-        st.info("💡 支持的格式示例:\n```json\n[\n  {\n    \"text\": \"文本\",\n    \"bbox\": [x1, y1, x2, y2]\n  }\n]\n```")
-        return []
-    
-    return ocr_data
-
-
-def draw_table_lines_with_numbers(image, structure, line_width=2, show_numbers=True):
-    """
-    绘制带编号的表格线(使用线坐标列表)
-    
-    Args:
-        image: PIL Image 对象
-        structure: 表格结构字典(包含 horizontal_lines 和 vertical_lines)
-        line_width: 线条宽度
-        show_numbers: 是否显示编号
-    
-    Returns:
-        绘制了表格线和编号的图片
-    """
-    img_with_lines = image.copy()
-    draw = ImageDraw.Draw(img_with_lines)
-    
-    # 尝试加载字体
-    try:
-        font = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 20)
-    except:
-        font = ImageFont.load_default()
-    
-    # 🆕 使用线坐标列表
-    horizontal_lines = structure.get('horizontal_lines', [])
-    vertical_lines = structure.get('vertical_lines', [])
-    modified_h_lines = structure.get('modified_h_lines', set())
-    modified_v_lines = structure.get('modified_v_lines', set())
-    
-    # 计算绘制范围
-    x_start = vertical_lines[0] if vertical_lines else 0
-    x_end = vertical_lines[-1] if vertical_lines else img_with_lines.width
-    y_start = horizontal_lines[0] if horizontal_lines else 0
-    y_end = horizontal_lines[-1] if horizontal_lines else img_with_lines.height
-    
-    # 🎨 绘制横线
-    for idx, y in enumerate(horizontal_lines):
-        color = (255, 0, 0) if idx in modified_h_lines else (0, 0, 255)
-        draw.line([(x_start, y), (x_end, y)], fill=color, width=line_width)
-        
-        # 🔢 绘制行编号
-        if show_numbers:
-            text = f"R{idx+1}"
-            bbox = draw.textbbox((x_start - 35, y - 10), text, font=font)
-            draw.rectangle(bbox, fill='white', outline='black')
-            draw.text((x_start - 35, y - 10), text, fill=color, font=font)
-    
-    # 🎨 绘制竖线
-    for idx, x in enumerate(vertical_lines):
-        color = (255, 0, 0) if idx in modified_v_lines else (0, 0, 255)
-        draw.line([(x, y_start), (x, y_end)], fill=color, width=line_width)
-        
-        # 🔢 绘制列编号
-        if show_numbers:
-            text = f"C{idx+1}"
-            bbox = draw.textbbox((x - 10, y_start - 25), text, font=font)
-            draw.rectangle(bbox, fill='white', outline='black')
-            draw.text((x - 10, y_start - 25), text, fill=color, font=font)
-            bbox = draw.textbbox((x - 10, y_end + 25), text, font=font)
-            draw.rectangle(bbox, fill='white', outline='black')
-            draw.text((x - 10, y_end + 25), text, fill=color, font=font)
-    
-    return img_with_lines
-
-
-# 🆕 新增:用于保存的纯净表格线绘制函数
-def draw_clean_table_lines(image, structure, line_width=2, line_color=(0, 0, 0)):
-    """
-    绘制纯净的表格线(用于保存)
-    - 所有线用黑色
-    - 不显示编号
-    
-    Args:
-        image: PIL Image 对象
-        structure: 表格结构字典
-        line_width: 线条宽度
-        line_color: 线条颜色,默认黑色 (0, 0, 0)
-    
-    Returns:
-        绘制了纯净表格线的图片
-    """
-    img_with_lines = image.copy()
-    draw = ImageDraw.Draw(img_with_lines)
-    
-    horizontal_lines = structure.get('horizontal_lines', [])
-    vertical_lines = structure.get('vertical_lines', [])
-    
-    if not horizontal_lines or not vertical_lines:
-        return img_with_lines
-    
-    # 计算绘制范围
-    x_start = vertical_lines[0]
-    x_end = vertical_lines[-1]
-    y_start = horizontal_lines[0]
-    y_end = horizontal_lines[-1]
-    
-    # 🖤 绘制横线(统一黑色)
-    for y in horizontal_lines:
-        draw.line([(x_start, y), (x_end, y)], fill=line_color, width=line_width)
-    
-    # 🖤 绘制竖线(统一黑色)
-    for x in vertical_lines:
-        draw.line([(x, y_start), (x, y_end)], fill=line_color, width=line_width)
-    
-    return img_with_lines
-
-
-def init_undo_stack():
-    """初始化撤销/重做栈"""
-    if 'undo_stack' not in st.session_state:
-        st.session_state.undo_stack = []
-    if 'redo_stack' not in st.session_state:
-        st.session_state.redo_stack = []
-
-
-def save_state_for_undo(structure):
-    """保存当前状态到撤销栈"""
-    # 深拷贝当前结构
-    state = copy.deepcopy(structure)
-    st.session_state.undo_stack.append(state)
-    # 清空重做栈
-    st.session_state.redo_stack = []
-    
-    # 限制栈深度(最多保存20个历史状态)
-    if len(st.session_state.undo_stack) > 20:
-        st.session_state.undo_stack.pop(0)
-
-
-def undo_last_action():
-    """撤销上一个操作"""
-    if st.session_state.undo_stack:
-        # 保存当前状态到重做栈
-        current_state = copy.deepcopy(st.session_state.structure)
-        st.session_state.redo_stack.append(current_state)
-        
-        # 恢复上一个状态
-        st.session_state.structure = st.session_state.undo_stack.pop()
-        return True
-    return False
-
-
-def redo_last_action():
-    """重做上一个操作"""
-    if st.session_state.redo_stack:
-        # 保存当前状态到撤销栈
-        current_state = copy.deepcopy(st.session_state.structure)
-        st.session_state.undo_stack.append(current_state)
-        
-        # 恢复重做的状态
-        st.session_state.structure = st.session_state.redo_stack.pop()
-        return True
-    return False
-
+    # 配置
+    load_table_editor_config,
+    build_data_source_catalog,
+    parse_table_editor_cli_args,
+    # 🆕 批量应用模板
+    create_batch_template_section,
+)
 
-def get_structure_hash(structure, line_width, show_numbers):
-    """生成结构的哈希值,用于判断是否需要重新绘制"""
-    import hashlib
-    
-    # 🔧 使用线坐标列表生成哈希
-    key_data = {
-        'horizontal_lines': structure.get('horizontal_lines', []),
-        'vertical_lines': structure.get('vertical_lines', []),
-        'modified_h_lines': sorted(list(structure.get('modified_h_lines', set()))),
-        'modified_v_lines': sorted(list(structure.get('modified_v_lines', set()))),
-        'line_width': line_width,
-        'show_numbers': show_numbers
-    }
-    
-    key_str = json.dumps(key_data, sort_keys=True)
-    return hashlib.md5(key_str.encode()).hexdigest()
+DEFAULT_CONFIG_PATH = Path(__file__).with_name("table_line_generator.yaml")
 
+@st.cache_resource
+def get_cli_args():
+    return parse_table_editor_cli_args()
 
-def get_cached_table_lines_image(image, structure, line_width, show_numbers):
-    """
-    获取缓存的表格线图片,如果缓存不存在或失效则重新绘制
-    
-    Args:
-        image: PIL Image 对象
-        structure: 表格结构字典
-        line_width: 线条宽度
-        show_numbers: 是否显示编号
-    
-    Returns:
-        绘制了表格线和编号的图片
-    """
-    # 初始化缓存
-    if 'cached_table_image' not in st.session_state:
-        st.session_state.cached_table_image = None
-    if 'cached_table_hash' not in st.session_state:
-        st.session_state.cached_table_hash = None
-    
-    # 计算当前结构的哈希
-    current_hash = get_structure_hash(structure, line_width, show_numbers)
-    
-    # 检查缓存是否有效
-    if (st.session_state.cached_table_hash == current_hash and 
-        st.session_state.cached_table_image is not None):
-        # 缓存有效,直接返回
-        return st.session_state.cached_table_image
-    
-    # 缓存失效,重新绘制
-    img_with_lines = draw_table_lines_with_numbers(
-        image, 
-        structure, 
-        line_width=line_width,
-        show_numbers=show_numbers
+@st.cache_resource
+def get_table_editor_config():
+    """缓存配置加载(整个 session 共享)"""
+    cli_args = get_cli_args()
+    config_path = (
+        Path(cli_args.config).expanduser()
+        if cli_args.config
+        else DEFAULT_CONFIG_PATH
     )
-    
-    # 更新缓存
-    st.session_state.cached_table_image = img_with_lines
-    st.session_state.cached_table_hash = current_hash
-    
-    return img_with_lines
-
-
-def clear_table_image_cache():
-    """清除表格图片缓存"""
-    if 'cached_table_image' in st.session_state:
-        st.session_state.cached_table_image = None
-    if 'cached_table_hash' in st.session_state:
-        st.session_state.cached_table_hash = None
-
-
-def load_structure_from_config(config_path: Path) -> dict:
-    """
-    从配置文件加载表格结构
-    
-    Args:
-        config_path: 配置文件路径
-    
-    Returns:
-        表格结构字典
-    """
-    with open(config_path, 'r', encoding='utf-8') as f:
-        structure = json.load(f)
-    
-    # 🔧 兼容旧版配置(补充缺失字段)
-    if 'horizontal_lines' not in structure:
-        # 从 rows 生成横线坐标
-        horizontal_lines = []
-        for row in structure.get('rows', []):
-            horizontal_lines.append(row['y_start'])
-        if structure.get('rows'):
-            horizontal_lines.append(structure['rows'][-1]['y_end'])
-        structure['horizontal_lines'] = horizontal_lines
-    
-    if 'vertical_lines' not in structure:
-        # 从 columns 生成竖线坐标
-        vertical_lines = []
-        for col in structure.get('columns', []):
-            vertical_lines.append(col['x_start'])
-        if structure.get('columns'):
-            vertical_lines.append(structure['columns'][-1]['x_end'])
-        structure['vertical_lines'] = vertical_lines
-    
-    # 🔧 转换修改标记(从列表转为集合)
-    if 'modified_h_lines' in structure:
-        structure['modified_h_lines'] = set(structure['modified_h_lines'])
-    else:
-        structure['modified_h_lines'] = set()
-    
-    if 'modified_v_lines' in structure:
-        structure['modified_v_lines'] = set(structure['modified_v_lines'])
-    else:
-        structure['modified_v_lines'] = set()
-    
-    # 🔧 转换旧版的 modified_rows/modified_cols(如果存在)
-    if 'modified_rows' in structure and not structure['modified_h_lines']:
-        structure['modified_h_lines'] = set(structure.get('modified_rows', []))
-    if 'modified_cols' in structure and not structure['modified_v_lines']:
-        structure['modified_v_lines'] = set(structure.get('modified_cols', []))
-    
-    return structure
+    return load_table_editor_config(config_path)
 
 
 def create_table_line_editor():
     """创建表格线编辑器界面"""
-    # 🆕 配置页面为宽屏模式
+    # 配置页面
     st.set_page_config(
         page_title="表格线编辑器",
         page_icon="📏",
@@ -347,7 +78,13 @@ def create_table_line_editor():
     
     st.title("📏 表格线编辑器")
     
-    # 初始化 session_state
+    # 🎯 从缓存获取配置
+    TABLE_EDITOR_CONFIG = get_table_editor_config()
+    VIEWPORT_WIDTH = TABLE_EDITOR_CONFIG["viewport"]["width"]
+    VIEWPORT_HEIGHT = TABLE_EDITOR_CONFIG["viewport"]["height"]
+    DATA_SOURCES = TABLE_EDITOR_CONFIG.get("data_sources", [])
+    
+    # 初始化 session_state(集中管理)
     if 'loaded_json_name' not in st.session_state:
         st.session_state.loaded_json_name = None
     if 'loaded_image_name' not in st.session_state:
@@ -359,179 +96,105 @@ def create_table_line_editor():
     if 'image' not in st.session_state:
         st.session_state.image = None
     
+    # 🆕 目录模式专用状态
+    if 'dir_selected_index' not in st.session_state:
+        st.session_state.dir_selected_index = 0
+    if 'last_loaded_entry' not in st.session_state:
+        st.session_state.last_loaded_entry = None
+    if 'dir_auto_mode' not in st.session_state:
+        st.session_state.dir_auto_mode = None
+    if 'current_data_source' not in st.session_state:  # 🔑 新增
+        st.session_state.current_data_source = None
+    if 'current_output_config' not in st.session_state:  # 🔑 新增
+        st.session_state.current_output_config = None
+    
     # 初始化撤销/重做栈
     init_undo_stack()
     
-    # 🆕 添加工作模式选择
+    # 🆕 工作模式选择
     st.sidebar.header("📂 工作模式")
     work_mode = st.sidebar.radio(
         "选择模式",
-        ["🆕 新建标注", "📂 加载已有标注"],
+        ["🆕 新建标注", "📂 加载已有标注", "📁 目录模式"],
         index=0
     )
     
-    if work_mode == "🆕 新建标注":
-        # 原有的上传流程
-        st.sidebar.subheader("上传文件")
-        uploaded_json = st.sidebar.file_uploader("上传OCR结果JSON", type=['json'], key="new_json")
-        uploaded_image = st.sidebar.file_uploader("上传对应图片", type=['jpg', 'png'], key="new_image")
-        
-        # 检查是否需要重新加载 JSON
-        if uploaded_json is not None:
-            if st.session_state.loaded_json_name != uploaded_json.name:
-                try:
-                    raw_data = json.load(uploaded_json)
-                    
-                    with st.expander("🔍 原始数据结构"):
-                        if isinstance(raw_data, dict):
-                            st.json({k: f"<{type(v).__name__}>" if not isinstance(v, (str, int, float, bool, type(None))) else v 
-                                    for k, v in list(raw_data.items())[:5]})
-                        else:
-                            st.json(raw_data[:3] if len(raw_data) > 3 else raw_data)
-                    
-                    ocr_data = parse_ocr_data(raw_data)
-                    
-                    if not ocr_data:
-                        st.error("❌ 无法解析 OCR 数据,请检查 JSON 格式")
-                        st.stop()
-                    
-                    st.session_state.ocr_data = ocr_data
-                    st.session_state.loaded_json_name = uploaded_json.name
-                    st.session_state.loaded_config_name = None  # 清除配置文件标记
-                    
-                    # 清除旧的分析结果、历史记录和缓存
-                    if 'structure' in st.session_state:
-                        del st.session_state.structure
-                    if 'generator' in st.session_state:
-                        del st.session_state.generator
-                    st.session_state.undo_stack = []
-                    st.session_state.redo_stack = []
-                    clear_table_image_cache()
-                    
-                    st.success(f"✅ 成功加载 {len(ocr_data)} 条 OCR 记录")
-                    
-                except Exception as e:
-                    st.error(f"❌ 加载数据失败: {e}")
-                    st.stop()
-        
-        # 检查是否需要重新加载图片
-        if uploaded_image is not None:
-            if st.session_state.loaded_image_name != uploaded_image.name:
-                try:
-                    image = Image.open(uploaded_image)
-                    
-                    st.session_state.image = image
-                    st.session_state.loaded_image_name = uploaded_image.name
-                    
-                    if 'structure' in st.session_state:
-                        del st.session_state.structure
-                    if 'generator' in st.session_state:
-                        del st.session_state.generator
-                    st.session_state.undo_stack = []
-                    st.session_state.redo_stack = []
-                    clear_table_image_cache()
-                    
-                    st.success(f"✅ 成功加载图片: {uploaded_image.name}")
-                    
-                except Exception as e:
-                    st.error(f"❌ 加载图片失败: {e}")
-                    st.stop()
-    
-    else:  # 📂 加载已有标注
-        st.sidebar.subheader("加载已保存的标注")
-        
-        # 🆕 上传配置文件
-        uploaded_config = st.sidebar.file_uploader(
-            "上传配置文件 (*_structure.json)",
-            type=['json'],
-            key="load_config"
-        )
-        
-        # 🆕 上传对应的图片(可选,用于重新标注)
-        uploaded_image_for_config = st.sidebar.file_uploader(
-            "上传对应图片(可选)",
-            type=['jpg', 'png'],
-            key="load_image"
-        )
+    # 📁 目录模式
+    if work_mode == "📁 目录模式":
+        if not DATA_SOURCES:
+            st.sidebar.warning("未配置 data_sources")
+            return
         
-        # 处理配置文件加载
-        if uploaded_config is not None:
-            if st.session_state.loaded_config_name != uploaded_config.name:
-                try:
-                    # 🔧 直接从配置文件路径加载
-                    import tempfile
-                    
-                    # 创建临时文件
-                    with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False, encoding='utf-8') as tmp:
-                        tmp.write(uploaded_config.getvalue().decode('utf-8'))
-                        tmp_path = tmp.name
-                    
-                    # 加载结构
-                    structure = load_structure_from_config(Path(tmp_path))
-                    
-                    # 清理临时文件
-                    Path(tmp_path).unlink()
-                    
-                    st.session_state.structure = structure
-                    st.session_state.loaded_config_name = uploaded_config.name
-                    
-                    # 清除历史记录和缓存
-                    st.session_state.undo_stack = []
-                    st.session_state.redo_stack = []
-                    clear_table_image_cache()
-                    
-                    st.success(f"✅ 成功加载配置: {uploaded_config.name}")
-                    st.info(
-                        f"📊 表格结构: {len(structure['rows'])}行 x {len(structure['columns'])}列\n\n"
-                        f"📏 横线数: {len(structure.get('horizontal_lines', []))}\n\n"
-                        f"📏 竖线数: {len(structure.get('vertical_lines', []))}"
-                    )
-                    
-                    # 🆕 显示配置文件详情
-                    with st.expander("📋 配置详情"):
-                        st.json({
-                            "行数": len(structure['rows']),
-                            "列数": len(structure['columns']),
-                            "横线数": len(structure.get('horizontal_lines', [])),
-                            "竖线数": len(structure.get('vertical_lines', [])),
-                            "行高": structure.get('row_height'),
-                            "列宽": structure.get('col_widths'),
-                            "已修改的横线": list(structure.get('modified_h_lines', set())),
-                            "已修改的竖线": list(structure.get('modified_v_lines', set()))
-                        })
-                    
-                except Exception as e:
-                    st.error(f"❌ 加载配置失败: {e}")
-                    import traceback
-                    st.code(traceback.format_exc())
-                    st.stop()
+        auto_mode = create_directory_selector(DATA_SOURCES, TABLE_EDITOR_CONFIG["output"])
+        
+        # 显示当前数据源
+        if st.session_state.current_data_source:
+            ds_name = st.session_state.current_data_source.get("name", "未知")
+            st.sidebar.success(f"✅ 数据源: {ds_name}")
+        
+        if auto_mode == "new":
+            if not (st.session_state.ocr_data and st.session_state.image):
+                st.warning("⚠️ 缺少必要数据")
+                return
+            _, _, _, line_width, display_mode, zoom_level, show_line_numbers = setup_new_annotation_mode(
+                st.session_state.ocr_data,
+                st.session_state.image,
+                TABLE_EDITOR_CONFIG["display"]
+            )
+        else:  # edit
+            if 'structure' not in st.session_state:
+                st.warning("⚠️ 结构加载失败")
+                return
+            image, line_width, display_mode, zoom_level, show_line_numbers = setup_edit_annotation_mode(
+                st.session_state.structure,
+                st.session_state.image,
+                TABLE_EDITOR_CONFIG["display"]
+            )
         
-        # 处理图片加载(用于显示)
-        if uploaded_image_for_config is not None:
-            if st.session_state.loaded_image_name != uploaded_image_for_config.name:
-                try:
-                    image = Image.open(uploaded_image_for_config)
-                    st.session_state.image = image
-                    st.session_state.loaded_image_name = uploaded_image_for_config.name
-                    
-                    clear_table_image_cache()
-                    
-                    st.success(f"✅ 成功加载图片: {uploaded_image_for_config.name}")
-                    
-                except Exception as e:
-                    st.error(f"❌ 加载图片失败: {e}")
-                    st.stop()
+        # 统一渲染
+        if 'structure' in st.session_state and st.session_state.structure:
+            render_table_structure_view(
+                st.session_state.structure,
+                st.session_state.image or Image.new('RGB', (2000, 2000), 'white'),
+                line_width,  # 🔑 统一使用这个 line_width
+                display_mode,
+                zoom_level,
+                show_line_numbers,
+                VIEWPORT_WIDTH,
+                VIEWPORT_HEIGHT
+            )
+            
+            # 保存区域
+            create_save_section(
+                auto_mode,
+                st.session_state.structure,
+                st.session_state.image,
+                line_width,  # 🔑 传递给保存区域
+                st.session_state.current_output_config or TABLE_EDITOR_CONFIG["output"]
+            )
+            
+            # 🆕 批量应用模板区域(仅在 edit 模式显示)
+            if auto_mode == "edit":
+                # 🔑 获取当前的线条颜色名称(从保存区域的选择)
+                output_cfg = TABLE_EDITOR_CONFIG["output"]
+                line_colors = output_cfg.get("line_colors")
+                defaults = output_cfg.get("defaults", {})
+                default_color = defaults.get("line_color", line_colors[0]["name"])
+                
+                # 🔑 传递当前页的设置
+                create_batch_template_section(
+                    current_line_width=line_width,
+                    current_line_color=st.session_state.get('save_line_color', default_color)
+                )
         
-        # 🆕 如果配置已加载但没有图片,提示用户
-        if 'structure' in st.session_state and st.session_state.image is None:
-            st.warning("⚠️ 已加载配置,但未加载对应图片。请上传图片以查看效果。")
-            st.info("💡 提示:配置文件已加载,您可以:\n1. 上传对应图片查看效果\n2. 直接编辑配置并保存")
+        return
     
-    # 检查必要条件
+    # 🆕 新建标注模式
     if work_mode == "🆕 新建标注":
-        if st.session_state.ocr_data is None or st.session_state.image is None:
+        create_file_uploader_section(work_mode)
+        
+        if not (st.session_state.ocr_data and st.session_state.image):
             st.info("👆 请在左侧上传 OCR 结果 JSON 文件和对应的图片")
-            
             with st.expander("📖 使用说明"):
                 st.markdown("""
                 ### 🆕 新建标注模式
@@ -568,23 +231,40 @@ def create_table_line_editor():
                 """)
             return
         
-        ocr_data = st.session_state.ocr_data
-        image = st.session_state.image
-        
         st.info(f"📂 已加载: {st.session_state.loaded_json_name} + {st.session_state.loaded_image_name}")
         
-        if 'generator' not in st.session_state or st.session_state.generator is None:
-            try:
-                generator = TableLineGenerator(image, ocr_data)
-                st.session_state.generator = generator
-            except Exception as e:
-                st.error(f"❌ 初始化失败: {e}")
-                st.stop()
+        _, _, _, line_width, display_mode, zoom_level, show_line_numbers = setup_new_annotation_mode(
+            st.session_state.ocr_data,
+            st.session_state.image,
+            TABLE_EDITOR_CONFIG["display"]
+        )
+        
+        if 'structure' in st.session_state and st.session_state.structure:
+            render_table_structure_view(
+                st.session_state.structure,
+                st.session_state.image,
+                line_width,
+                display_mode,
+                zoom_level,
+                show_line_numbers,
+                VIEWPORT_WIDTH,
+                VIEWPORT_HEIGHT
+            )
+            create_save_section(
+                work_mode,
+                st.session_state.structure,
+                st.session_state.image,
+                line_width,
+                TABLE_EDITOR_CONFIG["output"]
+            )
+        return
     
-    else:  # 加载已有标注模式
+    # 📂 加载已有标注模式
+    if work_mode == "📂 加载已有标注":
+        create_file_uploader_section(work_mode)
+        
         if 'structure' not in st.session_state:
             st.info("👆 请在左侧上传配置文件 (*_structure.json)")
-            
             with st.expander("📖 使用说明"):
                 st.markdown("""
                 ### 📂 加载已有标注
@@ -606,396 +286,39 @@ def create_table_line_editor():
         if st.session_state.image is None:
             st.warning("⚠️ 仅加载了配置,未加载图片。部分功能受限。")
         
-        # 🆕 使用配置中的信息
-        structure = st.session_state.structure
-        image = st.session_state.image
-        
-        if image is None:
-            # 如果没有图片,创建一个虚拟的空白图片用于显示坐标信息
-            if 'table_bbox' in structure:
-                bbox = structure['table_bbox']
-                dummy_width = bbox[2] + 100
-                dummy_height = bbox[3] + 100
-            else:
-                dummy_width = 2000
-                dummy_height = 2000
-            
-            image = Image.new('RGB', (dummy_width, dummy_height), color='white')
-            st.info(f"💡 使用虚拟画布 ({dummy_width}x{dummy_height}) 显示表格结构")
-    
-    # 显示设置
-    st.sidebar.divider()
-    st.sidebar.subheader("🖼️ 显示设置")
-    
-    line_width = st.sidebar.slider("线条宽度", 1, 5, 2)
-    display_mode = st.sidebar.radio("显示模式", ["对比显示", "仅显示划线图", "仅显示原图"], index=1)
-    zoom_level = st.sidebar.slider("图片缩放", 0.25, 2.0, 1.0, 0.25)
-    show_line_numbers = st.sidebar.checkbox("显示线条编号", value=True)
-    
-    # 撤销/重做按钮
-    st.sidebar.divider()
-    st.sidebar.subheader("↩️ 撤销/重做")
-    
-    col1, col2 = st.sidebar.columns(2)
-    with col1:
-        if st.button("↩️ 撤销", disabled=len(st.session_state.undo_stack) == 0):
-            if undo_last_action():
-                clear_table_image_cache()
-                st.success("✅ 已撤销")
-                st.rerun()
-    
-    with col2:
-        if st.button("↪️ 重做", disabled=len(st.session_state.redo_stack) == 0):
-            if redo_last_action():
-                clear_table_image_cache()
-                st.success("✅ 已重做")
-                st.rerun()
-    
-    st.sidebar.info(f"📚 历史记录: {len(st.session_state.undo_stack)} 条")
-    
-    # 分析表格结构(仅在新建模式显示)
-    if work_mode == "🆕 新建标注" and st.button("🔍 分析表格结构"):
-        with st.spinner("分析中..."):
-            try:
-                generator = st.session_state.generator
-                structure = generator.analyze_table_structure(
-                    y_tolerance=y_tolerance,
-                    x_tolerance=x_tolerance,
-                    min_row_height=min_row_height
-                )
-                
-                if not structure:
-                    st.warning("⚠️ 未检测到表格结构")
-                    st.stop()
-                
-                structure['modified_h_lines'] = set()
-                structure['modified_v_lines'] = set()
-                
-                st.session_state.structure = structure
-                
-                st.session_state.undo_stack = []
-                st.session_state.redo_stack = []
-                clear_table_image_cache()
-                
-                st.success(
-                    f"✅ 检测到 {len(structure['rows'])} 行({len(structure['horizontal_lines'])} 条横线),"
-                    f"{len(structure['columns'])} 列({len(structure['vertical_lines'])} 条竖线)"
-                )
-                
-                col1, col2, col3, col4 = st.columns(4)
-                with col1:
-                    st.metric("行数", len(structure['rows']))
-                with col2:
-                    st.metric("横线数", len(structure['horizontal_lines']))
-                with col3:
-                    st.metric("列数", len(structure['columns']))
-                with col4:
-                    st.metric("竖线数", len(structure['vertical_lines']))
-            
-            except Exception as e:
-                st.error(f"❌ 分析失败: {e}")
-                import traceback
-                st.code(traceback.format_exc())
-                st.stop()
-    
-    # 显示结果(两种模式通用)
-    if 'structure' in st.session_state and st.session_state.structure:
-        structure = st.session_state.structure
-        
-        # 使用缓存机制绘制表格线
-        img_with_lines = get_cached_table_lines_image(
-            image, 
-            structure, 
-            line_width=line_width,
-            show_numbers=show_line_numbers
+        image, line_width, display_mode, zoom_level, show_line_numbers = setup_edit_annotation_mode(
+            st.session_state.structure,
+            st.session_state.image,
+            TABLE_EDITOR_CONFIG["display"]
         )
         
-        # 根据显示模式显示图片
-        if display_mode == "对比显示":
-            col1, col2 = st.columns(2)
-            with col1:
-                st.subheader("原图")
-                st.image(image, use_container_width=True)
-            
-            with col2:
-                st.subheader("添加表格线")
-                st.image(img_with_lines, use_container_width=True)
-                
-        elif display_mode == "仅显示划线图":
-            display_width = int(img_with_lines.width * zoom_level)
-            
-            st.subheader(f"表格线图 (缩放: {zoom_level:.0%})")
-            st.image(img_with_lines, width=display_width)
-            
-        else:
-            display_width = int(image.width * zoom_level)
-            
-            st.subheader(f"原图 (缩放: {zoom_level:.0%})")
-            st.image(image, width=display_width)
-        
-        # 显示详细信息
-        with st.expander("📊 表格结构详情"):
-            st.json({
-                "行数": len(structure['rows']),
-                "列数": len(structure['columns']),
-                "横线数": len(structure.get('horizontal_lines', [])),
-                "竖线数": len(structure.get('vertical_lines', [])),
-                "横线坐标": structure.get('horizontal_lines', []),
-                "竖线坐标": structure.get('vertical_lines', []),
-                "标准行高": structure.get('row_height'),
-                "列宽度": structure.get('col_widths'),
-                "修改的横线": list(structure.get('modified_h_lines', set())),
-                "修改的竖线": list(structure.get('modified_v_lines', set()))
-            })
-        
-        # 🆕 手动调整 - 使用线坐标列表
-        st.subheader("🛠️ 手动调整")
-        
-        adjust_type = st.radio(
-            "调整类型",
-            ["调整横线", "调整竖线", "添加横线", "删除横线", "添加竖线", "删除竖线"],
-            horizontal=True
+        render_table_structure_view(
+            st.session_state.structure,
+            image,
+            line_width,
+            display_mode,
+            zoom_level,
+            show_line_numbers,
+            VIEWPORT_WIDTH,
+            VIEWPORT_HEIGHT
+        )
+        create_save_section(
+            work_mode,
+            st.session_state.structure,
+            image,
+            line_width,
+            TABLE_EDITOR_CONFIG["output"]
         )
-        
-        if adjust_type == "调整横线":
-            horizontal_lines = structure.get('horizontal_lines', [])
-            if len(horizontal_lines) > 0:
-                line_index = st.selectbox(
-                    "选择横线",
-                    range(len(horizontal_lines)),
-                    format_func=lambda x: f"第 {x+1} 条横线 (Y: {horizontal_lines[x]}) {'🔴已修改' if x in structure.get('modified_h_lines', set()) else ''}"
-                )
-                
-                new_y = st.number_input(
-                    "新的Y坐标",
-                    value=int(horizontal_lines[line_index]),
-                    step=1
-                )
-                
-                if st.button("应用调整"):
-                    save_state_for_undo(structure)
-                    
-                    structure['horizontal_lines'][line_index] = new_y
-                    structure['modified_h_lines'].add(line_index)
-                    
-                    # 🔧 同步更新 rows
-                    if line_index < len(structure['rows']):
-                        structure['rows'][line_index]['y_start'] = new_y
-                    if line_index > 0:
-                        structure['rows'][line_index - 1]['y_end'] = new_y
-                    
-                    clear_table_image_cache()
-                    st.success("✅ 已调整")
-                    st.rerun()
-            else:
-                st.warning("⚠️ 没有检测到横线")
-        
-        elif adjust_type == "调整竖线":
-            vertical_lines = structure.get('vertical_lines', [])
-            if len(vertical_lines) > 0:
-                line_index = st.selectbox(
-                    "选择竖线",
-                    range(len(vertical_lines)),
-                    format_func=lambda x: f"第 {x+1} 条竖线 (X: {vertical_lines[x]}) {'🔴已修改' if x in structure.get('modified_v_lines', set()) else ''}"
-                )
-                
-                new_x = st.number_input(
-                    "新的X坐标",
-                    value=int(vertical_lines[line_index]),
-                    step=1
-                )
-                
-                if st.button("应用调整"):
-                    save_state_for_undo(structure)
-                    
-                    structure['vertical_lines'][line_index] = new_x
-                    structure['modified_v_lines'].add(line_index)
-                    
-                    # 🔧 同步更新 columns
-                    if line_index < len(structure['columns']):
-                        structure['columns'][line_index]['x_start'] = new_x
-                    if line_index > 0:
-                        structure['columns'][line_index - 1]['x_end'] = new_x
-                    
-                    clear_table_image_cache()
-                    st.success("✅ 已调整")
-                    st.rerun()
-            else:
-                st.warning("⚠️ 没有检测到竖线")
-        
-        elif adjust_type == "删除横线":
-            horizontal_lines = structure.get('horizontal_lines', [])
-            if len(horizontal_lines) > 0:
-                lines_to_delete = st.multiselect(
-                    "选择要删除的横线(可多选)",
-                    range(len(horizontal_lines)),
-                    format_func=lambda x: f"第 {x+1} 条横线 (Y: {horizontal_lines[x]}) {'🔴已修改' if x in structure.get('modified_h_lines', set()) else ''}"
-                )
-                
-                if lines_to_delete and st.button("🗑️ 批量删除", type="primary"):
-                    save_state_for_undo(structure)
-                    
-                    # 🔧 删除线坐标
-                    for idx in sorted(lines_to_delete, reverse=True):
-                        del structure['horizontal_lines'][idx]
-                    
-                    # 🔧 重新计算 rows(删除线后重建行区间)
-                    new_rows = []
-                    for i in range(len(structure['horizontal_lines']) - 1):
-                        new_rows.append({
-                            'y_start': structure['horizontal_lines'][i],
-                            'y_end': structure['horizontal_lines'][i + 1],
-                            # 'bboxes': []
-                        })
-                    structure['rows'] = new_rows
-                    
-                    # 更新修改标记
-                    structure['modified_h_lines'] = set()
-                    
-                    clear_table_image_cache()
-                    st.success(f"✅ 已删除 {len(lines_to_delete)} 条横线")
-                    st.rerun()
-                
-                st.info(f"💡 当前有 {len(horizontal_lines)} 条横线,已选择 {len(lines_to_delete)} 条")
-            else:
-                st.warning("⚠️ 没有可删除的横线")
-        
-        elif adjust_type == "删除竖线":
-            vertical_lines = structure.get('vertical_lines', [])
-            if len(vertical_lines) > 0:
-                lines_to_delete = st.multiselect(
-                    "选择要删除的竖线(可多选)",
-                    range(len(vertical_lines)),
-                    format_func=lambda x: f"第 {x+1} 条竖线 (X: {vertical_lines[x]}) {'🔴已修改' if x in structure.get('modified_v_lines', set()) else ''}"
-                )
-                
-                if lines_to_delete and st.button("🗑️ 批量删除", type="primary"):
-                    save_state_for_undo(structure)
-                    
-                    # 🔧 删除线坐标
-                    for idx in sorted(lines_to_delete, reverse=True):
-                        del structure['vertical_lines'][idx]
-                    
-                    # 🔧 重新计算 columns
-                    new_columns = []
-                    for i in range(len(structure['vertical_lines']) - 1):
-                        new_columns.append({
-                            'x_start': structure['vertical_lines'][i],
-                            'x_end': structure['vertical_lines'][i + 1]
-                        })
-                    structure['columns'] = new_columns
-                    
-                    # 重新计算列宽
-                    structure['col_widths'] = [
-                        col['x_end'] - col['x_start'] 
-                        for col in new_columns
-                    ]
-                    
-                    # 更新修改标记
-                    structure['modified_v_lines'] = set()
-                    
-                    clear_table_image_cache()
-                    st.success(f"✅ 已删除 {len(lines_to_delete)} 条竖线")
-                    st.rerun()
-                
-                st.info(f"💡 当前有 {len(vertical_lines)} 条竖线,已选择 {len(lines_to_delete)} 条")
-            else:
-                st.warning("⚠️ 没有可删除的列")
-        
-        # 保存配置
-        st.divider()
-        
-        save_col1, save_col2, save_col3 = st.columns(3)
-        
-        with save_col1:
-            save_structure = st.checkbox("保存表格结构配置", value=True)
-        
-        with save_col2:
-            save_image = st.checkbox("保存表格线图片", value=True)
-        
-        with save_col3:
-            # 🆕 线条颜色选择
-            line_color_option = st.selectbox(
-                "保存时线条颜色",
-                ["黑色", "蓝色", "红色"],
-                index=0
-            )
-        
-        if st.button("💾 保存", type="primary"):
-            output_dir = Path("output/table_structures")
-            output_dir.mkdir(parents=True, exist_ok=True)
-            
-            base_name = Path(st.session_state.loaded_image_name).stem
-            saved_files = []
-            
-            if save_structure:
-                structure_path = output_dir / f"{base_name}_structure.json"
-                
-                # 🔧 保存线坐标列表
-                save_structure_data = {
-                    'rows': structure['rows'],
-                    'columns': structure['columns'],
-                    'horizontal_lines': structure.get('horizontal_lines', []),
-                    'vertical_lines': structure.get('vertical_lines', []),
-                    'row_height': structure['row_height'],
-                    'col_widths': structure['col_widths'],
-                    'table_bbox': structure['table_bbox'],
-                    'modified_h_lines': list(structure.get('modified_h_lines', set())),
-                    'modified_v_lines': list(structure.get('modified_v_lines', set()))
-                }
-                
-                with open(structure_path, 'w', encoding='utf-8') as f:
-                    json.dump(save_structure_data, f, indent=2, ensure_ascii=False)
-                
-                saved_files.append(("配置文件", structure_path))
-                
-                with open(structure_path, 'r') as f:
-                    st.download_button(
-                        "📥 下载配置文件",
-                        f.read(),
-                        file_name=f"{base_name}_structure.json",
-                        mime="application/json"
-                    )
-            
-            if save_image:
-                # 🆕 根据选择的颜色绘制纯净表格线
-                color_map = {
-                    "黑色": (0, 0, 0),
-                    "蓝色": (0, 0, 255),
-                    "红色": (255, 0, 0)
-                }
-                selected_color = color_map[line_color_option]
-                
-                # 🎯 使用纯净绘制函数
-                clean_img = draw_clean_table_lines(
-                    image,
-                    structure,
-                    line_width=line_width,
-                    line_color=selected_color
-                )
-                
-                output_image_path = output_dir / f"{base_name}_with_lines.png"
-                clean_img.save(output_image_path)
-                saved_files.append(("表格线图片", output_image_path))
-                
-                # 🆕 提供下载按钮
-                import io
-                buf = io.BytesIO()
-                clean_img.save(buf, format='PNG')
-                buf.seek(0)
-                
-                st.download_button(
-                    "📥 下载表格线图片",
-                    buf,
-                    file_name=f"{base_name}_with_lines.png",
-                    mime="image/png"
-                )
-            
-            if saved_files:
-                st.success(f"✅ 已保存 {len(saved_files)} 个文件:")
-                for file_type, file_path in saved_files:
-                    st.info(f"  • {file_type}: {file_path}")
-
 
 if __name__ == "__main__":
-    create_table_line_editor()
+    try:
+        create_table_line_editor()
+    except GeneratorExit:
+        pass  # Streamlit 内部清理,忽略
+    except KeyboardInterrupt:
+        st.info("👋 程序已停止")
+    except Exception as e:
+        st.error(f"❌ 程序崩溃: {e}")
+        import traceback
+        with st.expander("🔍 详细错误信息"):
+            st.code(traceback.format_exc())

+ 0 - 149
table_line_generator/table_line_generator.py

@@ -390,152 +390,3 @@ class TableLineGenerator:
         # 保存
         target_img.save(output_path)
         return output_path
-
-
-def generate_table_lines_from_ppstructure(
-    json_path: str,
-    output_dir: str,
-    config: Dict
-) -> Dict:
-    """
-    从 PPStructure V3 结果生成表格线
-    
-    Args:
-        json_path: PPStructure V3 结果 JSON 路径
-        output_dir: 输出目录
-        config: 配置字典
-    
-    Returns:
-        生成结果信息
-    """
-    # 1. 加载 PPStructure V3 结果
-    with open(json_path, 'r', encoding='utf-8') as f:
-        ppstructure_result = json.load(f)
-    
-    # 2. 解析表格区域和文本框
-    table_bbox, text_boxes = TableLineGenerator.parse_ppstructure_result(ppstructure_result)
-    
-    print(f"✅ 表格区域: {table_bbox}")
-    print(f"✅ 表格内文本框数量: {len(text_boxes)}")
-    
-    # 3. 查找对应图片
-    json_file = Path(json_path)
-    
-    # 从 PPStructure 结果中获取原图路径
-    input_path = ppstructure_result.get('input_path')
-    if input_path and Path(input_path).exists():
-        image_path = Path(input_path)
-    else:
-        # 尝试根据 JSON 文件名查找图片
-        image_path = json_file.with_suffix('.png')
-        if not image_path.exists():
-            image_path = json_file.with_suffix('.jpg')
-    
-    if not image_path.exists():
-        raise FileNotFoundError(f"找不到图片: {image_path}")
-    
-    print(f"✅ 图片路径: {image_path}")
-    
-    # 4. 初始化表格线生成器
-    generator = TableLineGenerator(str(image_path), text_boxes)
-    
-    # 5. 分析表格结构
-    structure = generator.analyze_table_structure(
-        y_tolerance=config.get('y_tolerance', 5),
-        x_tolerance=config.get('x_tolerance', 10),
-        min_row_height=config.get('min_row_height', 20)
-    )
-    
-    print(f"✅ 检测到 {len(structure['rows'])} 行,{len(structure['columns'])} 列")
-    print(f"✅ 标准行高: {structure['row_height']}px")
-    
-    # 6. 生成表格线图片
-    img_with_lines = generator.generate_table_lines(
-        line_color=tuple(config.get('line_color', [0, 0, 255])),
-        line_width=config.get('line_width', 2)
-    )
-    
-    # 7. 保存结果
-    output_path = Path(output_dir)
-    output_path.mkdir(parents=True, exist_ok=True)
-    
-    output_image_path = output_path / f"{json_file.stem}_with_lines.jpg"
-    img_with_lines.save(output_image_path)
-    
-    # 保存表格结构配置
-    structure_path = output_path / f"{json_file.stem}_structure.json"
-    generator.save_table_structure(str(structure_path))
-    
-    return {
-        'image_with_lines': str(output_image_path),
-        'structure_config': str(structure_path),
-        'structure': structure,
-        'table_bbox': table_bbox,
-        'text_boxes_count': len(text_boxes)
-    }
-
-
-def generate_table_lines_for_page(json_path: str, 
-                                  output_dir: str,
-                                  config: Dict) -> Dict:
-    """
-    为单页生成表格线(兼容旧版接口)
-    
-    Args:
-        json_path: OCR结果JSON路径
-        output_dir: 输出目录
-        config: 配置字典
-    
-    Returns:
-        生成结果信息
-    """
-    # 加载OCR数据
-    with open(json_path, 'r', encoding='utf-8') as f:
-        ocr_data = json.load(f)
-    
-    # 判断是否为 PPStructure 结果
-    if 'parsing_res_list' in ocr_data and 'overall_ocr_res' in ocr_data:
-        # 使用新的 PPStructure 解析函数
-        return generate_table_lines_from_ppstructure(json_path, output_dir, config)
-    
-    # 查找对应图片
-    json_file = Path(json_path)
-    image_path = json_file.with_suffix('.jpg')
-    if not image_path.exists():
-        image_path = json_file.with_suffix('.png')
-    
-    if not image_path.exists():
-        raise FileNotFoundError(f"找不到图片: {image_path}")
-    
-    # 初始化表格线生成器
-    generator = TableLineGenerator(str(image_path), ocr_data)
-    
-    # 分析表格结构
-    structure = generator.analyze_table_structure(
-        y_tolerance=config.get('y_tolerance', 5),
-        x_tolerance=config.get('x_tolerance', 10),
-        min_row_height=config.get('min_row_height', 20)
-    )
-    
-    # 生成表格线图片
-    img_with_lines = generator.generate_table_lines(
-        line_color=tuple(config.get('line_color', [0, 0, 255])),
-        line_width=config.get('line_width', 2)
-    )
-    
-    # 保存
-    output_path = Path(output_dir)
-    output_path.mkdir(parents=True, exist_ok=True)
-    
-    output_image_path = output_path / f"{json_file.stem}_with_lines.jpg"
-    img_with_lines.save(output_image_path)
-    
-    # 保存表格结构配置
-    structure_path = output_path / f"{json_file.stem}_structure.json"
-    generator.save_table_structure(str(structure_path))
-    
-    return {
-        'image_with_lines': str(output_image_path),
-        'structure_config': str(structure_path),
-        'structure': structure
-    }

+ 45 - 0
table_line_generator/table_line_generator.yaml

@@ -0,0 +1,45 @@
+table_editor:
+  viewport:
+    width: 1200
+    height: 600
+  display:
+    default_zoom: 0.70
+    zoom_min: 0.20
+    zoom_max: 2.0
+    zoom_step: 0.1
+    default_line_width: 1
+    line_width_min: 1
+    line_width_max: 5
+    show_line_numbers: true
+  output:
+    directory: "output/table_structures"
+    structure_suffix: "_structure.json"
+    image_suffix: ".png"
+    defaults:
+      save_structure: true
+      save_image: true
+      line_color: "黑色"
+    line_colors:
+      - name: "黑色"
+        rgb: [0, 0, 0]
+      - name: "蓝色"
+        rgb: [0, 0, 255]
+      - name: "红色"
+        rgb: [255, 0, 0]
+
+  data_sources:
+    - name: "康强_北京农村商业银行"
+      base_dir: "/Users/zhch158/workspace/data/流水分析"
+      json_dir: "{{name}}/ppstructurev3_client_results"
+      image_dir: "{{name}}/ppstructurev3_client_results/{{name}}"
+      # {{ name }}_page_(?P<page>\d{3})\.json 来匹配 JSON 文件名。模板变量 {{ name }} 会在运行时被替换成实际前缀;随后 _page_ 是字面固定部分。命名捕获组 (?P<page>\d{3}) 强制页面编号恰好为三位数字,并将其存入 page 组  
+      json_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.json"
+      image_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.png"
+      sort_key: "page"
+      output:
+        directory: "{{base_dir}}/{{ name }}.wiredtable"
+        structure_suffix: "_structure.json"
+        image_suffix: ".png"
+    # - name: "示例文档"
+    #   json_dir: "../demo/json"
+    #   image_dir: "../demo/img"