Selaa lähdekoodia

feat: Add display controls, file handlers, and save functionality for table annotation tool

- Implemented display settings and undo/redo controls in `display_controls.py`.
- Added file upload handling for JSON, images, and configuration files in `file_handlers.py`.
- Created mode setup functions for new and edit annotation modes in `mode_setup.py`.
- Developed save controls for saving table structure and images in `save_controls.py`.
- Introduced table structure rendering logic in `table_viewer.py`.
- Created UI components for file uploading and display settings in `ui_components_v1.py`.
- Implemented a viewer for displaying images with scroll functionality in `viewer.py`.
zhch158_admin 3 päivää sitten
vanhempi
commit
d1ed886bfa

+ 69 - 16
table_line_generator/editor/__init__.py

@@ -2,47 +2,91 @@
 表格线编辑器核心模块
 """
 
-from .ui_components import (
-    parse_ocr_data,
-    create_file_uploader_section,
+# 文件处理
+from .file_handlers import create_file_uploader_section
+
+# 显示控件
+from .display_controls import (
     create_display_settings_section,
     create_undo_redo_section,
-    create_analysis_section,
-    create_save_section
 )
 
+# 分析控件
+from .analysis_controls import create_analysis_section
+
+# 保存控件
+from .save_controls import create_save_section
+
+# 模式设置
+from .mode_setup import (
+    setup_new_annotation_mode,
+    setup_edit_annotation_mode,
+)
+
+# 目录选择器
+from .directory_selector import create_directory_selector
+
+# 表格视图
+from .table_viewer import render_table_structure_view
+
+# 绘图
 from .drawing import (
     draw_table_lines_with_numbers,
     draw_clean_table_lines,
     get_cached_table_lines_image,
-    clear_table_image_cache
+    clear_table_image_cache,
 )
 
+# 状态管理
 from .state_manager import (
     init_undo_stack,
     save_state_for_undo,
     undo_last_action,
-    redo_last_action
+    redo_last_action,
 )
 
+# 调整
+from .adjustments import create_adjustment_section
+
+# 配置加载
 from .config_loader import (
     load_structure_from_config,
-    save_structure_to_config
+    save_structure_to_config,
+    load_table_editor_config,
+    parse_table_editor_cli_args,
+    build_data_source_catalog,
 )
 
-from .adjustments import (
-    create_adjustment_section
-)
+# 数据处理
+from .data_processor import parse_ocr_data
+
+# 图片查看器
+from .viewer import show_image_with_scroll
 
 __all__ = [
-    # UI 组件
-    'parse_ocr_data',
+    # 文件处理
     'create_file_uploader_section',
+    
+    # 显示控件
     'create_display_settings_section',
     'create_undo_redo_section',
+    
+    # 分析控件
     'create_analysis_section',
+    
+    # 保存控件
     'create_save_section',
     
+    # 模式设置
+    'setup_new_annotation_mode',
+    'setup_edit_annotation_mode',
+    
+    # 目录选择器
+    'create_directory_selector',
+    
+    # 表格视图
+    'render_table_structure_view',
+    
     # 绘图
     'draw_table_lines_with_numbers',
     'draw_clean_table_lines',
@@ -55,10 +99,19 @@ __all__ = [
     'undo_last_action',
     'redo_last_action',
     
-    # 配置
+    # 调整
+    'create_adjustment_section',
+    
+    # 配置加载
     'load_structure_from_config',
     'save_structure_to_config',
+    'load_table_editor_config',
+    'parse_table_editor_cli_args',
+    'build_data_source_catalog',
     
-    # 调整
-    'create_adjustment_section',
+    # 数据处理
+    'parse_ocr_data',
+    
+    # 图片查看器
+    'show_image_with_scroll',
 ]

+ 124 - 249
table_line_generator/editor/adjustments.py

@@ -2,7 +2,6 @@
 手动调整功能
 """
 
-import streamlit as st
 from .state_manager import save_state_for_undo
 from .drawing import clear_table_image_cache
 
@@ -10,266 +9,142 @@ from .drawing import clear_table_image_cache
 def create_adjustment_section(structure):
     """
     创建手动调整区域
-    
-    Args:
-        structure: 表格结构字典
-    
-    Returns:
-        是否进行了调整(用于判断是否需要重新渲染)
     """
+    import streamlit as st
+
     st.divider()
     st.header("🛠️ 手动调整")
-    
+
+    horizontal_lines = structure.get('horizontal_lines', [])
+    vertical_lines = structure.get('vertical_lines', [])
     adjusted = False
-    
-    # 横线调整
-    with st.expander("📏 调整横线位置", expanded=False):
-        horizontal_lines = structure.get('horizontal_lines', [])
-        
-        if not horizontal_lines:
-            st.warning("⚠️ 没有检测到横线")
-        else:
-            st.info(f"当前有 {len(horizontal_lines)} 条横线")
-            
-            # 选择要调整的横线
-            line_index = st.selectbox(
-                "选择横线",
-                range(len(horizontal_lines)),
-                format_func=lambda i: f"R{i+1} (Y={horizontal_lines[i]})"
-            )
-            
-            # 显示当前Y坐标
-            current_y = horizontal_lines[line_index]
-            st.text(f"当前Y坐标: {current_y}")
-            
-            # 输入新的Y坐标
-            col1, col2 = st.columns([3, 1])
-            
-            with col1:
-                new_y = st.number_input(
-                    "新的Y坐标",
-                    min_value=0,
-                    value=current_y,
-                    step=1,
-                    key=f"h_line_{line_index}"
-                )
-            
-            with col2:
-                if st.button("✅ 应用", key=f"apply_h_{line_index}"):
-                    if new_y != current_y:
-                        # 保存状态
-                        save_state_for_undo(structure)
-                        
-                        # 更新横线
-                        structure['horizontal_lines'][line_index] = new_y
-                        
-                        # 标记为已修改
-                        structure.setdefault('modified_h_lines', set()).add(line_index)
-                        
-                        # 重新计算行区间
-                        _update_row_intervals(structure)
-                        
-                        clear_table_image_cache()
-                        adjusted = True
-                        st.success(f"✅ 已更新 R{line_index+1} 到 Y={new_y}")
-                        st.rerun()
-    
-    # 竖线调整
-    with st.expander("📏 调整竖线位置", expanded=False):
-        vertical_lines = structure.get('vertical_lines', [])
-        
-        if not vertical_lines:
-            st.warning("⚠️ 没有检测到竖线")
-        else:
-            st.info(f"当前有 {len(vertical_lines)} 条竖线")
-            
-            # 选择要调整的竖线
-            line_index = st.selectbox(
-                "选择竖线",
-                range(len(vertical_lines)),
-                format_func=lambda i: f"C{i+1} (X={vertical_lines[i]})"
-            )
-            
-            # 显示当前X坐标
-            current_x = vertical_lines[line_index]
-            st.text(f"当前X坐标: {current_x}")
-            
-            # 输入新的X坐标
-            col1, col2 = st.columns([3, 1])
-            
-            with col1:
-                new_x = st.number_input(
-                    "新的X坐标",
-                    min_value=0,
-                    value=current_x,
-                    step=1,
-                    key=f"v_line_{line_index}"
-                )
-            
-            with col2:
-                if st.button("✅ 应用", key=f"apply_v_{line_index}"):
-                    if new_x != current_x:
-                        # 保存状态
-                        save_state_for_undo(structure)
-                        
-                        # 更新竖线
-                        structure['vertical_lines'][line_index] = new_x
-                        
-                        # 标记为已修改
-                        structure.setdefault('modified_v_lines', set()).add(line_index)
-                        
-                        # 重新计算列区间
-                        _update_column_intervals(structure)
-                        
-                        clear_table_image_cache()
-                        adjusted = True
-                        st.success(f"✅ 已更新 C{line_index+1} 到 X={new_x}")
-                        st.rerun()
-    
-    # 添加横线
-    with st.expander("➕ 添加横线", expanded=False):
-        horizontal_lines = structure.get('horizontal_lines', [])
-        
-        col1, col2 = st.columns([3, 1])
-        
-        with col1:
-            new_h_y = st.number_input(
-                "新横线的Y坐标",
-                min_value=0,
-                value=horizontal_lines[-1] + 50 if horizontal_lines else 100,
-                step=1,
-                key="new_h_line"
-            )
-        
-        with col2:
-            if st.button("➕ 添加", key="add_h_line"):
-                # 保存状态
-                save_state_for_undo(structure)
-                
-                # 插入新横线(保持排序)
-                horizontal_lines.append(new_h_y)
-                horizontal_lines.sort()
-                
-                # 找到新线的索引
-                new_index = horizontal_lines.index(new_h_y)
-                
-                # 标记为已修改
-                structure.setdefault('modified_h_lines', set()).add(new_index)
-                
-                # 重新计算行区间
-                _update_row_intervals(structure)
-                
-                clear_table_image_cache()
-                adjusted = True
-                st.success(f"✅ 已添加横线 Y={new_h_y}")
-                st.rerun()
-    
-    # 删除横线
-    with st.expander("🗑️ 删除横线", expanded=False):
-        horizontal_lines = structure.get('horizontal_lines', [])
-        
-        if len(horizontal_lines) <= 2:
-            st.warning("⚠️ 至少需要保留2条横线(表格顶部和底部)")
-        else:
-            # 多选要删除的横线
-            to_delete = st.multiselect(
-                "选择要删除的横线",
-                range(len(horizontal_lines)),
-                format_func=lambda i: f"R{i+1} (Y={horizontal_lines[i]})",
-                key="delete_h_lines"
-            )
-            
-            if to_delete and st.button("🗑️ 删除选中", key="confirm_delete_h"):
-                # 保存状态
+
+    # 行操作, 列操作
+    adjustment_action = st.radio(
+        "行&列操作",
+        ["调整横线", "添加横线", "删除横线", "调整竖线", "添加竖线", "删除竖线"],
+        horizontal=True,
+        index=None,
+        label_visibility="collapsed",
+        key="adjustment_action_radio"
+    )
+
+    if adjustment_action == "调整横线" and horizontal_lines:
+        line_index = st.selectbox(
+            "选择横线",
+            range(len(horizontal_lines)),
+            format_func=lambda i: f"R{i+1} (Y={horizontal_lines[i]})",
+            key="adjust_h_select"
+        )
+        new_y = st.number_input(
+            "新的Y坐标",
+            min_value=0,
+            value=horizontal_lines[line_index],
+            step=1,
+            key="adjust_h_value"
+        )
+        if st.button("✅ 应用横线调整"):
+            if new_y != horizontal_lines[line_index]:
                 save_state_for_undo(structure)
-                
-                # 删除选中的横线(从后往前删)
-                for idx in sorted(to_delete, reverse=True):
-                    del horizontal_lines[idx]
-                
-                # 重新计算修改标记
-                structure['modified_h_lines'] = set()
-                
-                # 重新计算行区间
+                structure['horizontal_lines'][line_index] = new_y
+                structure.setdefault('modified_h_lines', set()).add(line_index)
                 _update_row_intervals(structure)
-                
-                clear_table_image_cache()
-                adjusted = True
-                st.success(f"✅ 已删除 {len(to_delete)} 条横线")
-                st.rerun()
-    
-    # 添加竖线
-    with st.expander("➕ 添加竖线", expanded=False):
-        vertical_lines = structure.get('vertical_lines', [])
-        
-        col1, col2 = st.columns([3, 1])
-        
-        with col1:
-            new_v_x = st.number_input(
-                "新竖线的X坐标",
-                min_value=0,
-                value=vertical_lines[-1] + 100 if vertical_lines else 100,
-                step=1,
-                key="new_v_line"
-            )
-        
-        with col2:
-            if st.button("➕ 添加", key="add_v_line"):
-                # 保存状态
-                save_state_for_undo(structure)
-                
-                # 插入新竖线(保持排序)
-                vertical_lines.append(new_v_x)
-                vertical_lines.sort()
-                
-                # 找到新线的索引
-                new_index = vertical_lines.index(new_v_x)
-                
-                # 标记为已修改
-                structure.setdefault('modified_v_lines', set()).add(new_index)
-                
-                # 重新计算列区间
-                _update_column_intervals(structure)
-                
                 clear_table_image_cache()
                 adjusted = True
-                st.success(f"✅ 已添加竖线 X={new_v_x}")
-                st.rerun()
-    
-    # 删除竖线
-    with st.expander("🗑️ 删除竖线", expanded=False):
-        vertical_lines = structure.get('vertical_lines', [])
-        
-        if len(vertical_lines) <= 2:
-            st.warning("⚠️ 至少需要保留2条竖线(表格左侧和右侧)")
-        else:
-            # 多选要删除的竖线
-            to_delete = st.multiselect(
-                "选择要删除的竖线",
-                range(len(vertical_lines)),
-                format_func=lambda i: f"C{i+1} (X={vertical_lines[i]})",
-                key="delete_v_lines"
-            )
-            
-            if to_delete and st.button("🗑️ 删除选中", key="confirm_delete_v"):
-                # 保存状态
+                st.success(f"✅ R{line_index+1} 已更新")
+    elif adjustment_action == "添加横线":
+        new_h_y = st.number_input(
+            "新横线的Y坐标",
+            min_value=0,
+            value=horizontal_lines[-1] + 50 if horizontal_lines else 100,
+            step=1,
+            key="add_h_value"
+        )
+        if st.button("➕ 确认添加横线"):
+            save_state_for_undo(structure)
+            structure['horizontal_lines'].append(new_h_y)
+            structure['horizontal_lines'].sort()
+            idx = structure['horizontal_lines'].index(new_h_y)
+            structure.setdefault('modified_h_lines', set()).add(idx)
+            _update_row_intervals(structure)
+            clear_table_image_cache()
+            adjusted = True
+            st.success(f"✅ 新增横线 Y={new_h_y}")
+    elif adjustment_action == "删除横线" and len(horizontal_lines) > 2:
+        to_delete = st.multiselect(
+            "选择要删除的横线",
+            range(len(horizontal_lines)),
+            format_func=lambda i: f"R{i+1} (Y={horizontal_lines[i]})",
+            key="del_h_select"
+        )
+        if to_delete and st.button("🗑️ 确认删除横线"):
+            save_state_for_undo(structure)
+            for idx in sorted(to_delete, reverse=True):
+                del structure['horizontal_lines'][idx]
+            structure['modified_h_lines'] = set()
+            _update_row_intervals(structure)
+            clear_table_image_cache()
+            adjusted = True
+            st.success(f"✅ 已删除 {len(to_delete)} 条横线")
+
+    elif adjustment_action == "调整竖线" and vertical_lines:
+        line_index = st.selectbox(
+            "选择竖线",
+            range(len(vertical_lines)),
+            format_func=lambda i: f"C{i+1} (X={vertical_lines[i]})",
+            key="adjust_v_select"
+        )
+        new_x = st.number_input(
+            "新的X坐标",
+            min_value=0,
+            value=vertical_lines[line_index],
+            step=1,
+            key="adjust_v_value"
+        )
+        if st.button("✅ 应用竖线调整"):
+            if new_x != vertical_lines[line_index]:
                 save_state_for_undo(structure)
-                
-                # 删除选中的竖线(从后往前删)
-                for idx in sorted(to_delete, reverse=True):
-                    del vertical_lines[idx]
-                
-                # 重新计算修改标记
-                structure['modified_v_lines'] = set()
-                
-                # 重新计算列区间
+                structure['vertical_lines'][line_index] = new_x
+                structure.setdefault('modified_v_lines', set()).add(line_index)
                 _update_column_intervals(structure)
-                
                 clear_table_image_cache()
                 adjusted = True
-                st.success(f"✅ 已删除 {len(to_delete)} 条竖线")
-                st.rerun()
-    
+                st.success(f"✅ C{line_index+1} 已更新")
+    elif adjustment_action == "添加竖线":
+        new_v_x = st.number_input(
+            "新竖线的X坐标",
+            min_value=0,
+            value=vertical_lines[-1] + 100 if vertical_lines else 100,
+            step=1,
+            key="add_v_value"
+        )
+        if st.button("➕ 确认添加竖线"):
+            save_state_for_undo(structure)
+            structure['vertical_lines'].append(new_v_x)
+            structure['vertical_lines'].sort()
+            idx = structure['vertical_lines'].index(new_v_x)
+            structure.setdefault('modified_v_lines', set()).add(idx)
+            _update_column_intervals(structure)
+            clear_table_image_cache()
+            adjusted = True
+            st.success(f"✅ 新增竖线 X={new_v_x}")
+    elif adjustment_action == "删除竖线" and len(vertical_lines) > 2:
+        to_delete = st.multiselect(
+            "选择要删除的竖线",
+            range(len(vertical_lines)),
+            format_func=lambda i: f"C{i+1} (X={vertical_lines[i]})",
+            key="del_v_select"
+        )
+        if to_delete and st.button("🗑️ 确认删除竖线"):
+            save_state_for_undo(structure)
+            for idx in sorted(to_delete, reverse=True):
+                del structure['vertical_lines'][idx]
+            structure['modified_v_lines'] = set()
+            _update_column_intervals(structure)
+            clear_table_image_cache()
+            adjusted = True
+            st.success(f"✅ 已删除 {len(to_delete)} 条竖线")
+
     return adjusted
 
 

+ 60 - 0
table_line_generator/editor/analysis_controls.py

@@ -0,0 +1,60 @@
+"""
+表格结构分析控件
+"""
+import streamlit as st
+from .drawing import clear_table_image_cache
+
+
+def create_analysis_section(y_tolerance: int, x_tolerance: int, min_row_height: int):
+    """
+    创建分析区域
+    
+    Args:
+        y_tolerance: Y轴聚类容差
+        x_tolerance: X轴聚类容差
+        min_row_height: 最小行高
+    """
+    if st.button("🔍 分析表格结构"):
+        with st.spinner("分析中..."):
+            try:
+                generator = st.session_state.generator
+                structure = generator.analyze_table_structure(
+                    y_tolerance=y_tolerance,
+                    x_tolerance=x_tolerance,
+                    min_row_height=min_row_height
+                )
+                
+                if not structure:
+                    st.warning("⚠️ 未检测到表格结构")
+                    st.stop()
+                
+                structure['modified_h_lines'] = set()
+                structure['modified_v_lines'] = set()
+                
+                st.session_state.structure = structure
+                st.session_state.undo_stack = []
+                st.session_state.redo_stack = []
+                clear_table_image_cache()
+                
+                st.success(
+                    f"✅ 检测到 {len(structure['rows'])} 行"
+                    f"({len(structure['horizontal_lines'])} 条横线),"
+                    f"{len(structure['columns'])} 列"
+                    f"({len(structure['vertical_lines'])} 条竖线)"
+                )
+                
+                col1, col2, col3, col4 = st.columns(4)
+                with col1:
+                    st.metric("行数", len(structure['rows']))
+                with col2:
+                    st.metric("横线数", len(structure['horizontal_lines']))
+                with col3:
+                    st.metric("列数", len(structure['columns']))
+                with col4:
+                    st.metric("竖线数", len(structure['vertical_lines']))
+            
+            except Exception as e:
+                st.error(f"❌ 分析失败: {e}")
+                import traceback
+                st.code(traceback.format_exc())
+                st.stop()

+ 217 - 0
table_line_generator/editor/config_loader.py

@@ -2,8 +2,225 @@
 配置文件加载/保存
 """
 
+import argparse
 import json
+import sys
+import yaml
+import re
 from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+from jinja2 import Template
+
+TABLE_EDITOR_DEFAULTS = {
+    "viewport": {"width": 1200, "height": 600},
+    "display": {
+        "default_zoom": 1.0,
+        "zoom_min": 0.25,
+        "zoom_max": 2.0,
+        "zoom_step": 0.25,
+        "default_line_width": 2,
+        "line_width_min": 1,
+        "line_width_max": 5,
+        "show_line_numbers": True,
+    },
+    "output": {
+        "directory": "output/table_structures",
+        "structure_suffix": "_structure.json",
+        "image_suffix": "_with_lines.png",
+        "defaults": {
+            "save_structure": True,
+            "save_image": True,
+            "line_color": "黑色",
+        },
+        "line_colors": [
+            {"name": "黑色", "rgb": [0, 0, 0]},
+            {"name": "蓝色", "rgb": [0, 0, 255]},
+            {"name": "红色", "rgb": [255, 0, 0]},
+        ],
+    },
+}
+
+
+def parse_table_editor_cli_args(argv: Optional[List[str]] = None):
+    parser = argparse.ArgumentParser(add_help=False)
+    parser.add_argument(
+        "--config",
+        type=str,
+        default=None,
+        help="table_line_generator 配置文件路径",
+    )
+    args, _ = parser.parse_known_args(argv if argv is not None else sys.argv[1:])
+    return args
+
+
+def load_table_editor_config(config_path: Path) -> Dict:
+    config_path = Path(config_path)
+    table_cfg = {}
+
+    if config_path.exists():
+        with open(config_path, "r", encoding="utf-8") as fp:
+            data = yaml.safe_load(fp) or {}
+            table_cfg = data.get("table_editor", {})
+    else:
+        print(f"[table_editor] config not found: {config_path}, using defaults")
+
+    def merge(section):
+        merged = TABLE_EDITOR_DEFAULTS[section].copy()
+        merged.update(table_cfg.get(section, {}))
+        return merged
+
+    result = {
+        "viewport": merge("viewport"),
+        "display": merge("display"),
+        "output": merge("output"),
+        "data_sources": _prepare_data_sources(table_cfg.get("data_sources"), config_path.parent),
+    }
+    result["output"]["line_colors"] = table_cfg.get("output", {}).get(
+        "line_colors", TABLE_EDITOR_DEFAULTS["output"]["line_colors"]
+    )
+    result["output"]["defaults"] = {
+        **TABLE_EDITOR_DEFAULTS["output"]["defaults"],
+        **table_cfg.get("output", {}).get("defaults", {}),
+    }
+    return result
+
+
+def _compile_pattern(pattern: Optional[str], context: Dict) -> Optional[re.Pattern]:
+    if not pattern:
+        return None
+    rendered = Template(pattern).render(**context)
+    return re.compile(rendered)
+
+
+def _render_template(value, context):
+    if value is None:
+        return None
+    if isinstance(value, (int, float, bool)):
+        return value
+    return Template(str(value)).render(**context)
+
+
+def _resolve_path(path_str: str, base_dir: Optional[Path], config_root: Path) -> Path:
+    path = Path(path_str).expanduser()
+    if not path.is_absolute():
+        path = (base_dir or config_root) / path
+    return path.resolve()
+
+
+def _prepare_data_sources(raw_sources: Optional[List[Dict]], config_root: Path) -> List[Dict]:
+    prepared = []
+    for src in raw_sources or []:
+        # 🎯 构建模板上下文
+        ctx = {
+            'name': src['name'],
+            'base_dir': src['base_dir']
+        }
+        base_dir_path = ctx['base_dir']
+        def resolve_dir(field: str) -> Path:
+            raw_value = src.get(field)
+            if raw_value is None:
+                raise ValueError(f"[table_editor] data source '{src.get('name')}' 缺少 {field}")
+            rendered = _render_template(raw_value, ctx)
+            if not rendered:
+                raise ValueError(f"[table_editor] data source '{src.get('name')}' {field} 为空")
+            return _resolve_path(rendered, base_dir_path, config_root)
+
+        json_dir = resolve_dir("json_dir")
+        image_dir = resolve_dir("image_dir")
+
+        prepared_source = {
+            **src,
+            "json_dir": json_dir,
+            "image_dir": image_dir,
+            "context": ctx,
+        }
+        prepared_source["json_pattern"] = _render_template(src.get("json_pattern"), ctx)
+        prepared_source["image_pattern"] = _render_template(src.get("image_pattern"), ctx)
+
+        if "output" in src:
+            output_cfg = dict(src["output"])
+            if "directory" in output_cfg:
+                rendered = _render_template(output_cfg["directory"], ctx)
+                if rendered:
+                    output_cfg["directory"] = str(_resolve_path(rendered, base_dir_path, config_root))
+            for suffix_key in ("structure_suffix", "image_suffix"):
+                if suffix_key in output_cfg:
+                    output_cfg[suffix_key] = _render_template(output_cfg[suffix_key], ctx)
+            prepared_source["output"] = output_cfg
+
+        prepared.append(prepared_source)
+    return prepared
+
+
+def build_data_source_catalog(source_cfg: Dict) -> List[Dict]:
+    json_dir = Path(source_cfg["json_dir"]).expanduser().resolve()
+    image_dir = Path(source_cfg["image_dir"]).expanduser().resolve()
+    json_suffix = source_cfg.get("json_suffix", ".json")
+    image_suffix = source_cfg.get("image_suffix", ".png")
+
+    context = dict(source_cfg.get("context") or {})
+    if not context:
+        context = dict(source_cfg.get("variables", {}))
+        context.setdefault("name", source_cfg.get("name", ""))
+
+    json_regex = _compile_pattern(source_cfg.get("json_pattern"), context)
+    image_regex = _compile_pattern(source_cfg.get("image_pattern"), context)
+
+    json_files = []
+    for file in json_dir.glob("*"):
+        if not file.is_file():
+            continue
+        match = None
+        if json_regex:
+            match = json_regex.fullmatch(file.name)
+            if not match:
+                continue
+        elif json_suffix and not file.name.endswith(json_suffix):
+            continue
+        page_token = match.group("page") if match and "page" in match.groupdict() else None
+        json_files.append({
+            "path": file,
+            "stem": file.stem,
+            "page_token": page_token,
+            "page": int(page_token) if page_token and page_token.isdigit() else None,
+            "mtime": file.stat().st_mtime,
+        })
+
+    sort_key = source_cfg.get("sort_key", "name")
+    if sort_key == "page" and any(item["page"] is not None for item in json_files):
+        json_files.sort(key=lambda x: (x["page"] is None, x["page"] if x["page"] is not None else x["stem"]))
+    elif sort_key == "mtime":
+        json_files.sort(key=lambda x: x["mtime"])
+    else:
+        json_files.sort(key=lambda x: x["stem"])
+
+    image_map: Dict[str, Path] = {}
+    for img in image_dir.glob("*"):
+        if not img.is_file():
+            continue
+        match = None
+        if image_regex:
+            match = image_regex.fullmatch(img.name)
+            if not match:
+                continue
+        elif image_suffix and not img.name.endswith(image_suffix):
+            continue
+        page_token = match.group("page") if match and "page" in match.groupdict() else None
+        key = page_token or img.stem
+        image_map[key] = img
+
+    catalog = []
+    for idx, item in enumerate(json_files, start=1):
+        key = item["page_token"] or item["stem"]
+        catalog.append({
+            "index": idx,
+            "display": f"{idx:03d} · {key}",
+            "json": item["path"],
+            "image": image_map.get(key),
+            "page": item["page"],
+            "page_token": item["page_token"],
+        })
+    return catalog
 
 
 def load_structure_from_config(config_path: Path) -> dict:

+ 53 - 0
table_line_generator/editor/data_processor.py

@@ -0,0 +1,53 @@
+import streamlit as st
+import json
+
+# 当直接运行时
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from table_line_generator import TableLineGenerator  # 上级目录
+
+def parse_ocr_data(ocr_data):
+    """解析OCR数据,支持多种格式"""
+    # 如果是字符串,尝试解析
+    if isinstance(ocr_data, str):
+        try:
+            ocr_data = json.loads(ocr_data)
+        except json.JSONDecodeError:
+            st.error("❌ JSON 格式错误,无法解析")
+            return []
+    
+    # 检查是否为 PPStructure V3 格式
+    if isinstance(ocr_data, dict) and 'parsing_res_list' in ocr_data and 'overall_ocr_res' in ocr_data:
+        st.info("🔍 检测到 PPStructure V3 格式")
+        
+        try:
+            table_bbox, text_boxes = TableLineGenerator.parse_ppstructure_result(ocr_data)
+            st.success(f"✅ 表格区域: {table_bbox}")
+            st.success(f"✅ 表格内文本框: {len(text_boxes)} 个")
+            return text_boxes
+        except Exception as e:
+            st.error(f"❌ 解析 PPStructure 结果失败: {e}")
+            return []
+    
+    # 确保是列表
+    if not isinstance(ocr_data, list):
+        st.error(f"❌ OCR 数据应该是列表,实际类型: {type(ocr_data)}")
+        return []
+    
+    if not ocr_data:
+        st.warning("⚠️ OCR 数据为空")
+        return []
+    
+    first_item = ocr_data[0]
+    if not isinstance(first_item, dict):
+        st.error(f"❌ OCR 数据项应该是字典,实际类型: {type(first_item)}")
+        return []
+    
+    if 'bbox' not in first_item:
+        st.error("❌ OCR 数据缺少 'bbox' 字段")
+        st.info("💡 支持的格式示例:\n```json\n[\n  {\n    \"text\": \"文本\",\n    \"bbox\": [x1, y1, x2, y2]\n  }\n]\n```")
+        return []
+    
+    return ocr_data
+

+ 147 - 0
table_line_generator/editor/directory_selector.py

@@ -0,0 +1,147 @@
+"""
+目录模式选择器
+"""
+import streamlit as st
+import json
+from pathlib import Path
+from PIL import Image
+from typing import Dict, List
+
+from .config_loader import load_structure_from_config, build_data_source_catalog
+from .data_processor import parse_ocr_data
+from .drawing import clear_table_image_cache
+
+
+def create_directory_selector(
+    data_sources: List[Dict], 
+    global_output_config: Dict
+) -> str:
+    """
+    目录模式选择器(优化:避免重复加载)
+    
+    Args:
+        data_sources: 数据源列表
+        global_output_config: 全局输出配置
+    
+    Returns:
+        str: 当前模式 ("new" 或 "edit")
+    """
+    st.sidebar.subheader("目录模式")
+    
+    source_names = [src["name"] for src in data_sources]
+    selected_name = st.sidebar.selectbox(
+        "选择数据源", 
+        source_names, 
+        key="dir_mode_source"
+    )
+    source_cfg = next(src for src in data_sources if src["name"] == selected_name)
+    
+    # 获取输出配置
+    output_cfg = source_cfg.get("output", global_output_config)
+    output_dir = Path(output_cfg.get("directory", "output/table_structures"))
+    structure_suffix = output_cfg.get("structure_suffix", "_structure.json")
+    
+    # 构建/缓存目录清单
+    catalog_key = f"catalog::{selected_name}"
+    if catalog_key not in st.session_state:
+        st.session_state[catalog_key] = build_data_source_catalog(source_cfg)
+    catalog = st.session_state[catalog_key]
+
+    if not catalog:
+        st.sidebar.warning("目录中没有 JSON 文件")
+        return "new"
+
+    # 初始化选择索引
+    if 'dir_selected_index' not in st.session_state:
+        st.session_state.dir_selected_index = 0
+
+    # 文件选择
+    selected = st.sidebar.selectbox(
+        "选择文件",
+        range(len(catalog)),
+        format_func=lambda i: catalog[i]["display"],
+        index=st.session_state.dir_selected_index,
+        key="dir_select_box"
+    )
+
+    # 页码跳转
+    page_input = st.sidebar.number_input(
+        "页码跳转",
+        min_value=1,
+        max_value=len(catalog),
+        value=catalog[selected]["index"],
+        step=1,
+        key="dir_page_input"
+    )
+    
+    # 🔑 关键优化:只在切换文件时才重新加载
+    current_entry_key = f"{selected_name}::{catalog[selected]['json']}"
+    
+    if ('last_loaded_entry' not in st.session_state or 
+        st.session_state.last_loaded_entry != current_entry_key):
+        
+        _load_catalog_entry(
+            catalog[selected], 
+            output_dir, 
+            structure_suffix, 
+            current_entry_key
+        )
+    
+    # 页码跳转处理
+    if page_input != catalog[selected]["index"]:
+        target = next(
+            (i for i, item in enumerate(catalog) if item["index"] == page_input), 
+            None
+        )
+        if target is not None:
+            st.session_state.dir_selected_index = target
+            st.rerun()
+
+    return st.session_state.get('dir_auto_mode', 'new')
+
+
+def _load_catalog_entry(entry: Dict, output_dir: Path, structure_suffix: str, entry_key: str):
+    """加载目录条目(JSON + 图片 + 结构)"""
+    base_name = entry["json"].stem
+    structure_file = output_dir / f"{base_name}{structure_suffix}"
+    has_structure = structure_file.exists()
+    
+    # 📂 加载 JSON
+    with open(entry["json"], "r", encoding="utf-8") as fp:
+        raw = json.load(fp)
+    st.session_state.ocr_data = parse_ocr_data(raw)
+    st.session_state.loaded_json_name = entry["json"].name
+
+    # 🖼️ 加载图片
+    if entry["image"] and entry["image"].exists():
+        st.session_state.image = Image.open(entry["image"])
+        st.session_state.loaded_image_name = entry["image"].name
+    else:
+        st.session_state.image = None
+
+    # 🎯 自动模式判断
+    if has_structure:
+        st.session_state.dir_auto_mode = "edit"
+        st.session_state.loaded_config_name = structure_file.name
+        
+        try:
+            structure = load_structure_from_config(structure_file)
+            st.session_state.structure = structure
+            st.session_state.undo_stack = []
+            st.session_state.redo_stack = []
+            clear_table_image_cache()
+            st.sidebar.success("✅ 编辑模式")
+        except Exception as e:
+            st.error(f"❌ 加载标注失败: {e}")
+            st.session_state.dir_auto_mode = "new"
+    else:
+        st.session_state.dir_auto_mode = "new"
+        if 'structure' in st.session_state:
+            del st.session_state.structure
+        if 'generator' in st.session_state:
+            del st.session_state.generator
+        st.sidebar.info("🆕 新建模式")
+    
+    # 标记已加载
+    st.session_state.last_loaded_entry = entry_key
+    st.info(f"📂 已加载: {entry['json'].name}")

+ 74 - 0
table_line_generator/editor/display_controls.py

@@ -0,0 +1,74 @@
+"""
+显示设置控件
+"""
+import streamlit as st
+from typing import Dict, Tuple
+
+
+def create_display_settings_section(display_config: Dict) -> Tuple[int, str, float, bool]:
+    """
+    显示设置(由配置驱动)
+    
+    Args:
+        display_config: 显示配置字典
+    
+    Returns:
+        tuple: (line_width, display_mode, zoom_level, show_line_numbers)
+    """
+    st.sidebar.divider()
+    st.sidebar.subheader("🖼️ 显示设置")
+
+    line_width = st.sidebar.slider(
+        "线条宽度",
+        int(display_config.get("line_width_min", 1)),
+        int(display_config.get("line_width_max", 5)),
+        int(display_config.get("default_line_width", 2)),
+    )
+    
+    display_mode = st.sidebar.radio(
+        "显示模式",
+        ["对比显示", "仅显示划线图", "仅显示原图"],
+        index=1,
+    )
+    
+    zoom_level = st.sidebar.slider(
+        "图片缩放",
+        float(display_config.get("zoom_min", 0.25)),
+        float(display_config.get("zoom_max", 2.0)),
+        float(display_config.get("default_zoom", 1.0)),
+        float(display_config.get("zoom_step", 0.25)),
+    )
+    
+    show_line_numbers = st.sidebar.checkbox(
+        "显示线条编号",
+        value=bool(display_config.get("show_line_numbers", True)),
+    )
+
+    return line_width, display_mode, zoom_level, show_line_numbers
+
+
+def create_undo_redo_section():
+    """创建撤销/重做区域"""
+    from .state_manager import undo_last_action, redo_last_action
+    from .drawing import clear_table_image_cache
+    
+    st.sidebar.divider()
+    st.sidebar.subheader("↩️ 撤销/重做")
+    
+    col1, col2 = st.sidebar.columns(2)
+    
+    with col1:
+        if st.button("↩️ 撤销", disabled=len(st.session_state.undo_stack) == 0):
+            if undo_last_action():
+                clear_table_image_cache()
+                st.success("✅ 已撤销")
+                st.rerun()
+    
+    with col2:
+        if st.button("↪️ 重做", disabled=len(st.session_state.redo_stack) == 0):
+            if redo_last_action():
+                clear_table_image_cache()
+                st.success("✅ 已重做")
+                st.rerun()
+    
+    st.sidebar.info(f"📚 历史记录: {len(st.session_state.undo_stack)} 条")

+ 201 - 0
table_line_generator/editor/file_handlers.py

@@ -0,0 +1,201 @@
+"""
+文件上传和加载处理
+"""
+import streamlit as st
+import json
+import tempfile
+from pathlib import Path
+from PIL import Image
+
+from .data_processor import parse_ocr_data
+from .config_loader import load_structure_from_config
+from .drawing import clear_table_image_cache
+
+
+def handle_json_upload(uploaded_json):
+    """处理 JSON 文件上传"""
+    if uploaded_json is None:
+        return
+    
+    if st.session_state.loaded_json_name == uploaded_json.name:
+        return
+    
+    try:
+        raw_data = json.load(uploaded_json)
+        
+        with st.expander("🔍 原始数据结构"):
+            if isinstance(raw_data, dict):
+                st.json({
+                    k: f"<{type(v).__name__}>" 
+                    if not isinstance(v, (str, int, float, bool, type(None))) 
+                    else v 
+                    for k, v in list(raw_data.items())[:5]
+                })
+            else:
+                st.json(raw_data[:3] if len(raw_data) > 3 else raw_data)
+        
+        ocr_data = parse_ocr_data(raw_data)
+        
+        if not ocr_data:
+            st.error("❌ 无法解析 OCR 数据,请检查 JSON 格式")
+            st.stop()
+        
+        st.session_state.ocr_data = ocr_data
+        st.session_state.loaded_json_name = uploaded_json.name
+        st.session_state.loaded_config_name = None
+        
+        # 清除旧数据
+        if 'structure' in st.session_state:
+            del st.session_state.structure
+        if 'generator' in st.session_state:
+            del st.session_state.generator
+        st.session_state.undo_stack = []
+        st.session_state.redo_stack = []
+        clear_table_image_cache()
+        
+        st.success(f"✅ 成功加载 {len(ocr_data)} 条 OCR 记录")
+        
+    except Exception as e:
+        st.error(f"❌ 加载数据失败: {e}")
+        st.stop()
+
+
+def handle_image_upload(uploaded_image):
+    """处理图片文件上传"""
+    if uploaded_image is None:
+        return
+    
+    if st.session_state.loaded_image_name == uploaded_image.name:
+        return
+    
+    try:
+        image = Image.open(uploaded_image)
+        st.session_state.image = image
+        st.session_state.loaded_image_name = uploaded_image.name
+        
+        # 清除旧数据
+        if 'structure' in st.session_state:
+            del st.session_state.structure
+        if 'generator' in st.session_state:
+            del st.session_state.generator
+        st.session_state.undo_stack = []
+        st.session_state.redo_stack = []
+        clear_table_image_cache()
+        
+        st.success(f"✅ 成功加载图片: {uploaded_image.name}")
+        
+    except Exception as e:
+        st.error(f"❌ 加载图片失败: {e}")
+        st.stop()
+
+
+def handle_config_upload(uploaded_config):
+    """处理配置文件上传"""
+    if uploaded_config is None:
+        return
+    
+    if st.session_state.loaded_config_name == uploaded_config.name:
+        return
+    
+    try:
+        # 创建临时文件
+        with tempfile.NamedTemporaryFile(
+            mode='w', 
+            suffix='.json', 
+            delete=False, 
+            encoding='utf-8'
+        ) as tmp:
+            tmp.write(uploaded_config.getvalue().decode('utf-8'))
+            tmp_path = tmp.name
+        
+        # 加载结构
+        structure = load_structure_from_config(Path(tmp_path))
+        
+        # 清理临时文件
+        Path(tmp_path).unlink()
+        
+        st.session_state.structure = structure
+        st.session_state.loaded_config_name = uploaded_config.name
+        
+        # 清除历史记录和缓存
+        st.session_state.undo_stack = []
+        st.session_state.redo_stack = []
+        clear_table_image_cache()
+        
+        st.success(f"✅ 成功加载配置: {uploaded_config.name}")
+        st.info(
+            f"📊 表格结构: {len(structure['rows'])}行 x {len(structure['columns'])}列\n\n"
+            f"📏 横线数: {len(structure.get('horizontal_lines', []))}\n\n"
+            f"📏 竖线数: {len(structure.get('vertical_lines', []))}"
+        )
+        
+        # 显示配置文件详情
+        with st.expander("📋 配置详情"):
+            st.json({
+                "行数": len(structure['rows']),
+                "列数": len(structure['columns']),
+                "横线数": len(structure.get('horizontal_lines', [])),
+                "竖线数": len(structure.get('vertical_lines', [])),
+                "行高": structure.get('row_height'),
+                "列宽": structure.get('col_widths'),
+                "已修改的横线": list(structure.get('modified_h_lines', set())),
+                "已修改的竖线": list(structure.get('modified_v_lines', set()))
+            })
+        
+    except Exception as e:
+        st.error(f"❌ 加载配置失败: {e}")
+        import traceback
+        st.code(traceback.format_exc())
+        st.stop()
+
+
+def create_file_uploader_section(work_mode: str):
+    """
+    创建文件上传区域
+    
+    Args:
+        work_mode: 工作模式("🆕 新建标注" 或 "📂 加载已有标注")
+    """
+    if work_mode == "🆕 新建标注":
+        st.sidebar.subheader("上传文件")
+        
+        uploaded_json = st.sidebar.file_uploader(
+            "上传OCR结果JSON", 
+            type=['json'], 
+            key="new_json"
+        )
+        uploaded_image = st.sidebar.file_uploader(
+            "上传对应图片", 
+            type=['jpg', 'png'], 
+            key="new_image"
+        )
+        
+        handle_json_upload(uploaded_json)
+        handle_image_upload(uploaded_image)
+    
+    else:  # 加载已有标注
+        st.sidebar.subheader("加载已保存的标注")
+        
+        uploaded_config = st.sidebar.file_uploader(
+            "上传配置文件 (*_structure.json)",
+            type=['json'],
+            key="load_config"
+        )
+        
+        uploaded_image_for_config = st.sidebar.file_uploader(
+            "上传对应图片(可选)",
+            type=['jpg', 'png'],
+            key="load_image"
+        )
+        
+        handle_config_upload(uploaded_config)
+        handle_image_upload(uploaded_image_for_config)
+        
+        # 提示信息
+        if 'structure' in st.session_state and st.session_state.image is None:
+            st.warning("⚠️ 已加载配置,但未加载对应图片。请上传图片以查看效果。")
+            st.info(
+                "💡 提示:配置文件已加载,您可以:\n"
+                "1. 上传对应图片查看效果\n"
+                "2. 直接编辑配置并保存"
+            )

+ 98 - 0
table_line_generator/editor/mode_setup.py

@@ -0,0 +1,98 @@
+"""
+模式设置(新建/编辑)
+"""
+import streamlit as st
+from PIL import Image
+from typing import Dict, Tuple
+
+try:
+    from ..table_line_generator import TableLineGenerator
+except ImportError:
+    from table_line_generator import TableLineGenerator
+
+from .display_controls import create_display_settings_section, create_undo_redo_section
+from .analysis_controls import create_analysis_section
+
+
+def setup_new_annotation_mode(ocr_data, image, config: Dict) -> Tuple:
+    """
+    设置新建标注模式的通用逻辑
+    
+    Args:
+        ocr_data: OCR 数据
+        image: 图片对象
+        config: 显示配置
+    
+    Returns:
+        tuple: (y_tolerance, x_tolerance, min_row_height, line_width, 
+                display_mode, zoom_level, show_line_numbers)
+    """
+    # 参数调整
+    st.sidebar.header("🔧 参数调整")
+    y_tolerance = st.sidebar.slider(
+        "Y轴聚类容差(像素)", 
+        1, 20, 5, 
+        key="new_y_tol"
+    )
+    x_tolerance = st.sidebar.slider(
+        "X轴聚类容差(像素)", 
+        5, 50, 10, 
+        key="new_x_tol"
+    )
+    min_row_height = st.sidebar.slider(
+        "最小行高(像素)", 
+        10, 100, 20, 
+        key="new_min_h"
+    )
+    
+    # 显示设置
+    line_width, display_mode, zoom_level, show_line_numbers = \
+        create_display_settings_section(config)
+    create_undo_redo_section()
+    
+    # 初始化生成器
+    if 'generator' not in st.session_state or st.session_state.generator is None:
+        try:
+            generator = TableLineGenerator(image, ocr_data)
+            st.session_state.generator = generator
+        except Exception as e:
+            st.error(f"❌ 初始化生成器失败: {e}")
+            st.stop()
+    
+    # 分析按钮
+    create_analysis_section(y_tolerance, x_tolerance, min_row_height)
+    
+    return (y_tolerance, x_tolerance, min_row_height, 
+            line_width, display_mode, zoom_level, show_line_numbers)
+
+
+def setup_edit_annotation_mode(structure: Dict, image, config: Dict) -> Tuple:
+    """
+    设置编辑标注模式的通用逻辑
+    
+    Args:
+        structure: 表格结构
+        image: 图片对象(可为 None)
+        config: 显示配置
+    
+    Returns:
+        tuple: (image, line_width, display_mode, zoom_level, show_line_numbers)
+    """
+    # 如果没有图片,创建虚拟画布
+    if image is None:
+        if 'table_bbox' in structure:
+            bbox = structure['table_bbox']
+            dummy_width = bbox[2] + 100
+            dummy_height = bbox[3] + 100
+        else:
+            dummy_width = 2000
+            dummy_height = 2000
+        image = Image.new('RGB', (dummy_width, dummy_height), color='white')
+        st.info(f"💡 使用虚拟画布 ({dummy_width}x{dummy_height})")
+    
+    # 显示设置
+    line_width, display_mode, zoom_level, show_line_numbers = \
+        create_display_settings_section(config)
+    create_undo_redo_section()
+    
+    return image, line_width, display_mode, zoom_level, show_line_numbers

+ 171 - 0
table_line_generator/editor/save_controls.py

@@ -0,0 +1,171 @@
+"""
+保存功能控件
+"""
+import streamlit as st
+import io
+from pathlib import Path
+from typing import Dict
+
+from .config_loader import save_structure_to_config
+from .drawing import draw_clean_table_lines
+
+
+def create_save_section(work_mode: str, structure: Dict, image, line_width: int, output_config: Dict):
+    """
+    保存设置(目录/命名来自配置)
+    
+    Args:
+        work_mode: 工作模式
+        structure: 表格结构
+        image: 图片对象
+        line_width: 线条宽度
+        output_config: 输出配置
+    """
+    st.divider()
+
+    defaults = output_config.get("defaults", {})
+    line_colors = output_config.get("line_colors") or [
+        {"name": "黑色", "rgb": [0, 0, 0]},
+        {"name": "蓝色", "rgb": [0, 0, 255]},
+        {"name": "红色", "rgb": [255, 0, 0]},
+    ]
+
+    save_col1, save_col2, save_col3 = st.columns(3)
+
+    with save_col1:
+        save_structure = st.checkbox(
+            "保存表格结构配置",
+            value=bool(defaults.get("save_structure", True)),
+        )
+
+    with save_col2:
+        save_image = st.checkbox(
+            "保存表格线图片",
+            value=bool(defaults.get("save_image", True)),
+        )
+
+    color_names = [c["name"] for c in line_colors]
+    default_color = defaults.get("line_color", color_names[0])
+    default_index = (
+        color_names.index(default_color) 
+        if default_color in color_names 
+        else 0
+    )
+
+    with save_col3:
+        line_color_option = st.selectbox(
+            "保存时线条颜色",
+            color_names,
+            label_visibility="collapsed",
+            index=default_index,
+        )
+
+    if st.button("💾 保存", type="primary"):
+        output_dir = Path(output_config.get("directory", "output/table_structures"))
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        structure_suffix = output_config.get("structure_suffix", "_structure.json")
+        image_suffix = output_config.get("image_suffix", "_with_lines.png")
+
+        # 确定文件名
+        base_name = _determine_base_name(work_mode)
+        
+        saved_files = []
+        
+        if save_structure:
+            _save_structure_file(
+                structure, 
+                output_dir, 
+                base_name, 
+                structure_suffix, 
+                saved_files
+            )
+        
+        if save_image:
+            _save_image_file(
+                image,
+                structure,
+                line_width,
+                line_color_option,
+                line_colors,
+                output_dir,
+                base_name,
+                image_suffix,
+                saved_files
+            )
+        
+        if saved_files:
+            st.success(f"✅ 已保存 {len(saved_files)} 个文件:")
+            for file_type, file_path in saved_files:
+                st.info(f"  • {file_type}: {file_path}")
+
+
+def _determine_base_name(work_mode: str) -> str:
+    """确定保存文件的基础名称"""
+    if work_mode == "🆕 新建标注" or work_mode == "new":
+        if st.session_state.loaded_json_name:
+            return Path(st.session_state.loaded_json_name).stem
+        else:
+            return "table_structure"
+    else:
+        if st.session_state.loaded_config_name:
+            base_name = Path(st.session_state.loaded_config_name).stem
+            if base_name.endswith('_structure'):
+                base_name = base_name[:-10]
+            return base_name
+        elif st.session_state.loaded_image_name:
+            return Path(st.session_state.loaded_image_name).stem
+        else:
+            return "table_structure"
+
+
+def _save_structure_file(structure, output_dir, base_name, suffix, saved_files):
+    """保存结构配置文件"""
+    structure_filename = f"{base_name}{suffix}"
+    structure_path = output_dir / structure_filename
+    save_structure_to_config(structure, structure_path)
+    saved_files.append(("配置文件", structure_path))
+    
+    with open(structure_path, 'r') as f:
+        st.download_button(
+            "📥 下载配置文件",
+            f.read(),
+            file_name=f"{base_name}_structure.json",
+            mime="application/json"
+        )
+
+
+def _save_image_file(image, structure, line_width, color_option, line_colors, 
+                     output_dir, base_name, suffix, saved_files):
+    """保存表格线图片"""
+    if image is None:
+        st.warning("⚠️ 无法保存图片:未加载图片文件")
+        return
+    
+    selected_color_rgb = next(
+        (tuple(c["rgb"]) for c in line_colors if c["name"] == color_option),
+        (0, 0, 0),
+    )
+    
+    clean_img = draw_clean_table_lines(
+        image,
+        structure,
+        line_width=line_width,
+        line_color=selected_color_rgb,
+    )
+    
+    image_filename = f"{base_name}{suffix}"
+    output_image_path = output_dir / image_filename
+    clean_img.save(output_image_path)
+    saved_files.append(("表格线图片", output_image_path))
+    
+    buf = io.BytesIO()
+    clean_img.save(buf, format='PNG')
+    buf.seek(0)
+    
+    st.download_button(
+        "📥 下载表格线图片",
+        buf,
+        file_name=f"{base_name}_with_lines.png",
+        mime="image/png"
+    )

+ 88 - 0
table_line_generator/editor/table_viewer.py

@@ -0,0 +1,88 @@
+"""
+表格结构渲染视图
+"""
+import streamlit as st
+from typing import Dict
+
+from .drawing import get_cached_table_lines_image
+from .viewer import show_image_with_scroll
+from .adjustments import create_adjustment_section
+
+
+def render_table_structure_view(
+    structure: Dict, 
+    image, 
+    line_width: int, 
+    display_mode: str, 
+    zoom_level: float, 
+    show_line_numbers: bool, 
+    viewport_width: int, 
+    viewport_height: int
+):
+    """
+    渲染表格结构视图(统一三种模式的显示逻辑)
+    
+    Args:
+        structure: 表格结构
+        image: 图片对象
+        line_width: 线条宽度
+        display_mode: 显示模式
+        zoom_level: 缩放级别
+        show_line_numbers: 是否显示线条编号
+        viewport_width: 视口宽度
+        viewport_height: 视口高度
+    """
+    # 绘制表格线
+    img_with_lines = get_cached_table_lines_image(
+        image, structure, line_width=line_width, show_numbers=show_line_numbers
+    )
+    
+    # 根据显示模式显示图片
+    if display_mode == "对比显示":
+        col1, col2 = st.columns(2)
+        with col1:
+            show_image_with_scroll(
+                image, "原图", 
+                viewport_width, viewport_height, zoom_level
+            )
+        with col2:
+            show_image_with_scroll(
+                img_with_lines, "表格线", 
+                viewport_width, viewport_height, zoom_level
+            )
+    
+    elif display_mode == "仅显示划线图":
+        show_image_with_scroll(
+            img_with_lines, 
+            f"表格线图 (缩放: {zoom_level:.0%})", 
+            viewport_width, 
+            viewport_height, 
+            zoom_level
+        )
+    
+    else:  # 仅显示原图
+        show_image_with_scroll(
+            image, 
+            f"原图 (缩放: {zoom_level:.0%})", 
+            viewport_width, 
+            viewport_height, 
+            zoom_level
+        )
+    
+    # 手动调整区域
+    create_adjustment_section(structure)
+    
+    # 显示详细信息
+    with st.expander("📊 表格结构详情"):
+        st.json({
+            "行数": len(structure['rows']),
+            "列数": len(structure['columns']),
+            "横线数": len(structure.get('horizontal_lines', [])),
+            "竖线数": len(structure.get('vertical_lines', [])),
+            "横线坐标": structure.get('horizontal_lines', []),
+            "竖线坐标": structure.get('vertical_lines', []),
+            "标准行高": structure.get('row_height'),
+            "列宽度": structure.get('col_widths'),
+            "修改的横线": list(structure.get('modified_h_lines', set())),
+            "修改的竖线": list(structure.get('modified_v_lines', set()))
+        })

+ 303 - 87
table_line_generator/editor/ui_components.py → table_line_generator/editor/ui_components_v1.py

@@ -7,61 +7,16 @@ import json
 from pathlib import Path
 from PIL import Image
 import tempfile
+from typing import Dict, List
 
 try:
     from ..table_line_generator import TableLineGenerator
 except ImportError:
     from table_line_generator import TableLineGenerator
 
-from .config_loader import load_structure_from_config
+from .config_loader import load_structure_from_config, build_data_source_catalog
 from .drawing import clear_table_image_cache
 
-
-def parse_ocr_data(ocr_data):
-    """解析OCR数据,支持多种格式"""
-    # 如果是字符串,尝试解析
-    if isinstance(ocr_data, str):
-        try:
-            ocr_data = json.loads(ocr_data)
-        except json.JSONDecodeError:
-            st.error("❌ JSON 格式错误,无法解析")
-            return []
-    
-    # 检查是否为 PPStructure V3 格式
-    if isinstance(ocr_data, dict) and 'parsing_res_list' in ocr_data and 'overall_ocr_res' in ocr_data:
-        st.info("🔍 检测到 PPStructure V3 格式")
-        
-        try:
-            table_bbox, text_boxes = TableLineGenerator.parse_ppstructure_result(ocr_data)
-            st.success(f"✅ 表格区域: {table_bbox}")
-            st.success(f"✅ 表格内文本框: {len(text_boxes)} 个")
-            return text_boxes
-        except Exception as e:
-            st.error(f"❌ 解析 PPStructure 结果失败: {e}")
-            return []
-    
-    # 确保是列表
-    if not isinstance(ocr_data, list):
-        st.error(f"❌ OCR 数据应该是列表,实际类型: {type(ocr_data)}")
-        return []
-    
-    if not ocr_data:
-        st.warning("⚠️ OCR 数据为空")
-        return []
-    
-    first_item = ocr_data[0]
-    if not isinstance(first_item, dict):
-        st.error(f"❌ OCR 数据项应该是字典,实际类型: {type(first_item)}")
-        return []
-    
-    if 'bbox' not in first_item:
-        st.error("❌ OCR 数据缺少 'bbox' 字段")
-        st.info("💡 支持的格式示例:\n```json\n[\n  {\n    \"text\": \"文本\",\n    \"bbox\": [x1, y1, x2, y2]\n  }\n]\n```")
-        return []
-    
-    return ocr_data
-
-
 def create_file_uploader_section(work_mode: str):
     """
     创建文件上传区域
@@ -221,16 +176,34 @@ def create_file_uploader_section(work_mode: str):
             st.info("💡 提示:配置文件已加载,您可以:\n1. 上传对应图片查看效果\n2. 直接编辑配置并保存")
 
 
-def create_display_settings_section():
-    """创建显示设置区域"""
+def create_display_settings_section(display_config: Dict):
+    """显示设置(由配置驱动)"""
     st.sidebar.divider()
     st.sidebar.subheader("🖼️ 显示设置")
-    
-    line_width = st.sidebar.slider("线条宽度", 1, 5, 2)
-    display_mode = st.sidebar.radio("显示模式", ["对比显示", "仅显示划线图", "仅显示原图"], index=1)
-    zoom_level = st.sidebar.slider("图片缩放", 0.25, 2.0, 1.0, 0.25)
-    show_line_numbers = st.sidebar.checkbox("显示线条编号", value=True)
-    
+
+    line_width = st.sidebar.slider(
+        "线条宽度",
+        int(display_config.get("line_width_min", 1)),
+        int(display_config.get("line_width_max", 5)),
+        int(display_config.get("default_line_width", 2)),
+    )
+    display_mode = st.sidebar.radio(
+        "显示模式",
+        ["对比显示", "仅显示划线图", "仅显示原图"],
+        index=1,
+    )
+    zoom_level = st.sidebar.slider(
+        "图片缩放",
+        float(display_config.get("zoom_min", 0.25)),
+        float(display_config.get("zoom_max", 2.0)),
+        float(display_config.get("default_zoom", 1.0)),
+        float(display_config.get("zoom_step", 0.25)),
+    )
+    show_line_numbers = st.sidebar.checkbox(
+        "显示线条编号",
+        value=bool(display_config.get("show_line_numbers", True)),
+    )
+
     return line_width, display_mode, zoom_level, show_line_numbers
 
 
@@ -313,41 +286,56 @@ def create_analysis_section(y_tolerance, x_tolerance, min_row_height):
                 st.stop()
 
 
-def create_save_section(work_mode, structure, image, line_width):
+def create_save_section(work_mode, structure, image, line_width, output_config: Dict):
     """
-    创建保存区域
-    
-    Args:
-        work_mode: 工作模式
-        structure: 表格结构
-        image: 图片
-        line_width: 线条宽度
+    保存设置(目录/命名来自配置)
     """
     from .config_loader import save_structure_to_config
     from .drawing import draw_clean_table_lines
     import io
-    
+
     st.divider()
-    
+
+    defaults = output_config.get("defaults", {})
+    line_colors = output_config.get("line_colors") or [
+        {"name": "黑色", "rgb": [0, 0, 0]},
+        {"name": "蓝色", "rgb": [0, 0, 255]},
+        {"name": "红色", "rgb": [255, 0, 0]},
+    ]
+
     save_col1, save_col2, save_col3 = st.columns(3)
-    
+
     with save_col1:
-        save_structure = st.checkbox("保存表格结构配置", value=True)
-    
+        save_structure = st.checkbox(
+            "保存表格结构配置",
+            value=bool(defaults.get("save_structure", True)),
+        )
+
     with save_col2:
-        save_image = st.checkbox("保存表格线图片", value=True)
-    
+        save_image = st.checkbox(
+            "保存表格线图片",
+            value=bool(defaults.get("save_image", True)),
+        )
+
+    color_names = [c["name"] for c in line_colors]
+    default_color = defaults.get("line_color", color_names[0])
+    default_index = color_names.index(default_color) if default_color in color_names else 0
+
     with save_col3:
         line_color_option = st.selectbox(
             "保存时线条颜色",
-            ["黑色", "蓝色", "红色"],
-            index=0
+            color_names,
+            label_visibility="collapsed",
+            index=default_index,
         )
-    
+
     if st.button("💾 保存", type="primary"):
-        output_dir = Path("output/table_structures")
+        output_dir = Path(output_config.get("directory", "output/table_structures"))
         output_dir.mkdir(parents=True, exist_ok=True)
-        
+
+        structure_suffix = output_config.get("structure_suffix", "_structure.json")
+        image_suffix = output_config.get("image_suffix", "_with_lines.png")
+
         # 确定文件名
         if work_mode == "🆕 新建标注":
             if st.session_state.loaded_json_name:
@@ -367,7 +355,8 @@ def create_save_section(work_mode, structure, image, line_width):
         saved_files = []
         
         if save_structure:
-            structure_path = output_dir / f"{base_name}_structure.json"
+            structure_filename = f"{base_name}{structure_suffix}"
+            structure_path = output_dir / structure_filename
             save_structure_to_config(structure, structure_path)
             saved_files.append(("配置文件", structure_path))
             
@@ -383,21 +372,18 @@ def create_save_section(work_mode, structure, image, line_width):
             if st.session_state.image is None:
                 st.warning("⚠️ 无法保存图片:未加载图片文件")
             else:
-                color_map = {
-                    "黑色": (0, 0, 0),
-                    "蓝色": (0, 0, 255),
-                    "红色": (255, 0, 0)
-                }
-                selected_color = color_map[line_color_option]
-                
+                selected_color_rgb = next(
+                    (tuple(c["rgb"]) for c in line_colors if c["name"] == line_color_option),
+                    (0, 0, 0),
+                )
                 clean_img = draw_clean_table_lines(
                     st.session_state.image,
                     structure,
                     line_width=line_width,
-                    line_color=selected_color
+                    line_color=selected_color_rgb,
                 )
-                
-                output_image_path = output_dir / f"{base_name}_with_lines.png"
+                image_filename = f"{base_name}{image_suffix}"
+                output_image_path = output_dir / image_filename
                 clean_img.save(output_image_path)
                 saved_files.append(("表格线图片", output_image_path))
                 
@@ -415,4 +401,234 @@ def create_save_section(work_mode, structure, image, line_width):
         if saved_files:
             st.success(f"✅ 已保存 {len(saved_files)} 个文件:")
             for file_type, file_path in saved_files:
-                st.info(f"  • {file_type}: {file_path}")
+                st.info(f"  • {file_type}: {file_path}")
+
+def setup_new_annotation_mode(ocr_data, image, config: Dict):
+    """
+    设置新建标注模式的通用逻辑
+    
+    Args:
+        ocr_data: OCR 数据
+        image: 图片对象
+        config: 显示配置
+    
+    Returns:
+        tuple: (y_tolerance, x_tolerance, min_row_height, line_width, display_mode, zoom_level, show_line_numbers)
+    """
+    # 参数调整
+    st.sidebar.header("🔧 参数调整")
+    y_tolerance = st.sidebar.slider("Y轴聚类容差(像素)", 1, 20, 5, key="new_y_tol")
+    x_tolerance = st.sidebar.slider("X轴聚类容差(像素)", 5, 50, 10, key="new_x_tol")
+    min_row_height = st.sidebar.slider("最小行高(像素)", 10, 100, 20, key="new_min_h")
+    
+    # 显示设置
+    line_width, display_mode, zoom_level, show_line_numbers = create_display_settings_section(config)
+    create_undo_redo_section()
+    
+    # 初始化生成器
+    if 'generator' not in st.session_state or st.session_state.generator is None:
+        try:
+            generator = TableLineGenerator(image, ocr_data)
+            st.session_state.generator = generator
+        except Exception as e:
+            st.error(f"❌ 初始化生成器失败: {e}")
+            st.stop()
+    
+    # 分析按钮
+    create_analysis_section(y_tolerance, x_tolerance, min_row_height)
+    
+    return y_tolerance, x_tolerance, min_row_height, line_width, display_mode, zoom_level, show_line_numbers
+
+
+def setup_edit_annotation_mode(structure, image, config: Dict):
+    """
+    设置编辑标注模式的通用逻辑
+    
+    Args:
+        structure: 表格结构
+        image: 图片对象(可为 None)
+        config: 显示配置
+    
+    Returns:
+        tuple: (image, line_width, display_mode, zoom_level, show_line_numbers)
+    """
+    # 如果没有图片,创建虚拟画布
+    if image is None:
+        if 'table_bbox' in structure:
+            bbox = structure['table_bbox']
+            dummy_width = bbox[2] + 100
+            dummy_height = bbox[3] + 100
+        else:
+            dummy_width = 2000
+            dummy_height = 2000
+        image = Image.new('RGB', (dummy_width, dummy_height), color='white')
+        st.info(f"💡 使用虚拟画布 ({dummy_width}x{dummy_height})")
+    
+    # 显示设置
+    line_width, display_mode, zoom_level, show_line_numbers = create_display_settings_section(config)
+    create_undo_redo_section()
+    
+    return image, line_width, display_mode, zoom_level, show_line_numbers
+
+
+def render_table_structure_view(structure, image, line_width, display_mode, zoom_level, show_line_numbers, 
+                                viewport_width, viewport_height):
+    """
+    渲染表格结构视图(统一三种模式的显示逻辑)
+    
+    Args:
+        structure: 表格结构
+        image: 图片对象
+        line_width: 线条宽度
+        display_mode: 显示模式
+        zoom_level: 缩放级别
+        show_line_numbers: 是否显示线条编号
+        viewport_width: 视口宽度
+        viewport_height: 视口高度
+    """
+    # 绘制表格线
+    img_with_lines = get_cached_table_lines_image(
+        image, structure, line_width=line_width, show_numbers=show_line_numbers
+    )
+    
+    # 根据显示模式显示图片
+    if display_mode == "对比显示":
+        col1, col2 = st.columns(2)
+        with col1:
+            show_image_with_scroll(image, "原图", viewport_width, viewport_height, zoom_level)
+        with col2:
+            show_image_with_scroll(img_with_lines, "表格线", viewport_width, viewport_height, zoom_level)
+    elif display_mode == "仅显示划线图":
+        show_image_with_scroll(
+            img_with_lines, 
+            f"表格线图 (缩放: {zoom_level:.0%})", 
+            viewport_width, 
+            viewport_height, 
+            zoom_level
+        )
+    else:
+        show_image_with_scroll(
+            image, 
+            f"原图 (缩放: {zoom_level:.0%})", 
+            viewport_width, 
+            viewport_height, 
+            zoom_level
+        )
+    
+    # 手动调整区域
+    create_adjustment_section(structure)
+    
+    # 显示详细信息
+    with st.expander("📊 表格结构详情"):
+        st.json({
+            "行数": len(structure['rows']),
+            "列数": len(structure['columns']),
+            "横线数": len(structure.get('horizontal_lines', [])),
+            "竖线数": len(structure.get('vertical_lines', [])),
+            "横线坐标": structure.get('horizontal_lines', []),
+            "竖线坐标": structure.get('vertical_lines', []),
+            "标准行高": structure.get('row_height'),
+            "列宽度": structure.get('col_widths'),
+            "修改的横线": list(structure.get('modified_h_lines', set())),
+            "修改的竖线": list(structure.get('modified_v_lines', set()))
+        })
+
+
+def create_directory_selector(data_sources: List[Dict], global_output_config: Dict):
+    """目录模式选择器(优化:避免重复加载)"""
+    st.sidebar.subheader("目录模式")
+    source_names = [src["name"] for src in data_sources]
+    selected_name = st.sidebar.selectbox("选择数据源", source_names, key="dir_mode_source")
+    source_cfg = next(src for src in data_sources if src["name"] == selected_name)
+    
+    output_cfg = source_cfg.get("output", global_output_config)
+    output_dir = Path(output_cfg.get("directory", "output/table_structures"))
+    structure_suffix = output_cfg.get("structure_suffix", "_structure.json")
+    
+    catalog_key = f"catalog::{selected_name}"
+    if catalog_key not in st.session_state:
+        st.session_state[catalog_key] = build_data_source_catalog(source_cfg)
+    catalog = st.session_state[catalog_key]
+
+    if not catalog:
+        st.sidebar.warning("目录中没有 JSON 文件")
+        return
+
+    if 'dir_selected_index' not in st.session_state:
+        st.session_state.dir_selected_index = 0
+
+    selected = st.sidebar.selectbox(
+        "选择文件",
+        range(len(catalog)),
+        format_func=lambda i: catalog[i]["display"],
+        index=st.session_state.dir_selected_index,
+        key="dir_select_box"
+    )
+
+    page_input = st.sidebar.number_input(
+        "页码跳转",
+        min_value=1,
+        max_value=len(catalog),
+        value=catalog[selected]["index"],
+        step=1,
+        key="dir_page_input"
+    )
+    
+    # 🔑 关键优化:只在切换文件时才重新加载
+    current_entry_key = f"{selected_name}::{catalog[selected]['json']}"
+    
+    if 'last_loaded_entry' not in st.session_state or st.session_state.last_loaded_entry != current_entry_key:
+        # 文件切换,重新加载
+        entry = catalog[selected]
+        base_name = entry["json"].stem
+        structure_file = output_dir / f"{base_name}{structure_suffix}"
+        has_structure = structure_file.exists()
+        
+        # 📂 加载 JSON
+        with open(entry["json"], "r", encoding="utf-8") as fp:
+            raw = json.load(fp)
+        st.session_state.ocr_data = parse_ocr_data(raw)
+        st.session_state.loaded_json_name = entry["json"].name
+
+        # 🖼️ 加载图片
+        if entry["image"] and entry["image"].exists():
+            st.session_state.image = Image.open(entry["image"])
+            st.session_state.loaded_image_name = entry["image"].name
+        else:
+            st.session_state.image = None
+
+        # 🎯 自动模式判断
+        if has_structure:
+            st.session_state.dir_auto_mode = "edit"
+            st.session_state.loaded_config_name = structure_file.name
+            
+            try:
+                structure = load_structure_from_config(structure_file)
+                st.session_state.structure = structure
+                st.session_state.undo_stack = []
+                st.session_state.redo_stack = []
+                clear_table_image_cache()
+                st.sidebar.success(f"✅ 编辑模式")
+            except Exception as e:
+                st.error(f"❌ 加载标注失败: {e}")
+                st.session_state.dir_auto_mode = "new"
+        else:
+            st.session_state.dir_auto_mode = "new"
+            if 'structure' in st.session_state:
+                del st.session_state.structure
+            if 'generator' in st.session_state:
+                del st.session_state.generator
+            st.sidebar.info(f"🆕 新建模式")
+        
+        # 标记已加载
+        st.session_state.last_loaded_entry = current_entry_key
+        st.info(f"📂 已加载: {entry['json'].name}")
+    
+    # 页码跳转处理
+    if page_input != catalog[selected]["index"]:
+        target = next((i for i, item in enumerate(catalog) if item["index"] == page_input), None)
+        if target is not None:
+            st.session_state.dir_selected_index = target
+            st.rerun()
+
+    return st.session_state.get('dir_auto_mode', 'new')

+ 29 - 0
table_line_generator/editor/viewer.py

@@ -0,0 +1,29 @@
+from io import BytesIO
+import base64
+import streamlit as st
+
+def show_image_with_scroll(
+    image,
+    caption="",
+    viewport_width=800,
+    viewport_height=1200,
+    zoom=1.0
+):
+    buf = BytesIO()
+    image.save(buf, format="PNG")
+    img_base64 = base64.b64encode(buf.getvalue()).decode("utf-8")
+
+    width = image.width
+    height = image.height
+
+    html = f"""
+    <div style="border:1px solid #e0e0e0;border-radius:6px;width:{viewport_width}px;height:{viewport_height}px;
+                overflow:auto;background:#fff;margin-bottom:0.5rem;">
+        <div style="transform:scale({zoom});transform-origin:top left;width:{width}px;height:{height}px;">
+            <img src="data:image/png;base64,{img_base64}" style="width:{width}px;max-width:none;display:block;" />
+        </div>
+    </div>
+    """
+    st.markdown(html, unsafe_allow_html=True)
+    if caption:
+        st.caption(caption)