""" 表格线可视化编辑器 支持人工调整表格线位置 """ import warnings # 过滤 PaddleX 的语法警告 warnings.filterwarnings('ignore', category=SyntaxWarning, module='paddlex') import streamlit as st from pathlib import Path from PIL import Image import yaml from typing import Dict, List, Optional, Tuple import argparse import sys try: from table_line_generator import TableLineGenerator except ImportError: from .table_line_generator import TableLineGenerator # 导入编辑器模块 from editor import ( # UI 组件 create_file_uploader_section, create_display_settings_section, create_undo_redo_section, create_analysis_section, create_save_section, create_directory_selector, # 新增的模块功能 setup_new_annotation_mode, setup_edit_annotation_mode, render_table_structure_view, # 绘图 get_cached_table_lines_image, # 状态管理 init_undo_stack, # 调整 create_adjustment_section, show_image_with_scroll, # 配置 load_table_editor_config, build_data_source_catalog, parse_table_editor_cli_args, # 🆕 批量应用模板 create_batch_template_section, ) DEFAULT_CONFIG_PATH = Path(__file__).with_name("table_line_generator.yaml") @st.cache_resource def get_cli_args(): return parse_table_editor_cli_args() @st.cache_resource def get_table_editor_config(): """缓存配置加载(整个 session 共享)""" cli_args = get_cli_args() config_path = ( Path(cli_args.config).expanduser() if cli_args.config else DEFAULT_CONFIG_PATH ) return load_table_editor_config(config_path) def create_table_line_editor(): """创建表格线编辑器界面""" # 配置页面 st.set_page_config( page_title="表格线编辑器", page_icon="📏", layout="wide", initial_sidebar_state="expanded" ) st.title("📏 表格线编辑器") # 🎯 从缓存获取配置 TABLE_EDITOR_CONFIG = get_table_editor_config() VIEWPORT_WIDTH = TABLE_EDITOR_CONFIG["viewport"]["width"] VIEWPORT_HEIGHT = TABLE_EDITOR_CONFIG["viewport"]["height"] DATA_SOURCES = TABLE_EDITOR_CONFIG.get("data_sources", []) # 初始化 session_state(集中管理) if 'loaded_json_name' not in st.session_state: st.session_state.loaded_json_name = None if 'loaded_image_name' not in st.session_state: st.session_state.loaded_image_name = None if 'loaded_config_name' not in st.session_state: st.session_state.loaded_config_name = None if 'ocr_data' not in st.session_state: st.session_state.ocr_data = None if 'image' not in st.session_state: st.session_state.image = None # 🆕 目录模式专用状态 if 'dir_selected_index' not in st.session_state: st.session_state.dir_selected_index = 0 if 'last_loaded_entry' not in st.session_state: st.session_state.last_loaded_entry = None if 'dir_auto_mode' not in st.session_state: st.session_state.dir_auto_mode = None if 'current_data_source' not in st.session_state: # 🔑 新增 st.session_state.current_data_source = None if 'current_output_config' not in st.session_state: # 🔑 新增 st.session_state.current_output_config = None # 初始化撤销/重做栈 init_undo_stack() # 🆕 工作模式选择 st.sidebar.header("📂 工作模式") work_mode = st.sidebar.radio( "选择模式", ["🆕 新建标注", "📂 加载已有标注", "📁 目录模式"], index=0 ) # 📁 目录模式 if work_mode == "📁 目录模式": if not DATA_SOURCES: st.sidebar.warning("未配置 data_sources") return auto_mode = create_directory_selector(DATA_SOURCES, TABLE_EDITOR_CONFIG["output"]) # 显示当前数据源 if st.session_state.current_data_source: ds_name = st.session_state.current_data_source.get("name", "未知") st.sidebar.success(f"✅ 数据源: {ds_name}") if auto_mode == "new": if not (st.session_state.ocr_data and st.session_state.image): st.warning("⚠️ 缺少必要数据") return _, _, _, line_width, display_mode, zoom_level, show_line_numbers = setup_new_annotation_mode( st.session_state.ocr_data, st.session_state.image, TABLE_EDITOR_CONFIG["display"] ) else: # edit if 'structure' not in st.session_state: st.warning("⚠️ 结构加载失败") return image, line_width, display_mode, zoom_level, show_line_numbers = setup_edit_annotation_mode( st.session_state.structure, st.session_state.image, TABLE_EDITOR_CONFIG["display"] ) # 统一渲染 if 'structure' in st.session_state and st.session_state.structure: render_table_structure_view( st.session_state.structure, st.session_state.image or Image.new('RGB', (2000, 2000), 'white'), line_width, # 🔑 统一使用这个 line_width display_mode, zoom_level, show_line_numbers, VIEWPORT_WIDTH, VIEWPORT_HEIGHT ) # 保存区域 create_save_section( auto_mode, st.session_state.structure, st.session_state.image, line_width, # 🔑 传递给保存区域 st.session_state.current_output_config or TABLE_EDITOR_CONFIG["output"] ) # 🆕 批量应用模板区域(仅在 edit 模式显示) if auto_mode == "edit": # 🔑 获取当前的线条颜色名称(从保存区域的选择) output_cfg = TABLE_EDITOR_CONFIG["output"] line_colors = output_cfg.get("line_colors") defaults = output_cfg.get("defaults", {}) default_color = defaults.get("line_color", line_colors[0]["name"]) # 🔑 传递当前页的设置 create_batch_template_section( current_line_width=line_width, current_line_color=st.session_state.get('save_line_color', default_color) ) return # 🎯 新建标注模式 if work_mode == "🆕 新建标注": create_file_uploader_section(work_mode) if st.session_state.ocr_data and st.session_state.image: st.info(f"📂 已加载: {st.session_state.loaded_json_name}") # 🔧 显示分析参数设置(统一处理) st.sidebar.subheader("🔬 分析参数") analysis_method = st.sidebar.selectbox( "分析算法", ["auto", "cluster", "mineru"], format_func=lambda x: { "auto": "🤖 自动选择(推荐)", "cluster": "📊 聚类算法(通用)", "mineru": "🎯 MinerU 索引算法" }[x] ) if analysis_method in ["auto", "cluster"]: y_tolerance = st.sidebar.slider("Y轴容差", 1, 20, 5) x_tolerance = st.sidebar.slider("X轴容差", 1, 30, 10) min_row_height = st.sidebar.slider("最小行高", 10, 50, 20) # 🎯 分析按钮 if st.button("🔍 分析表格结构"): with st.spinner("正在分析..."): # 统一的分析流程 generator = TableLineGenerator( st.session_state.image, st.session_state.ocr_data ) if analysis_method == "auto": # 根据数据特征自动选择 has_cell_index = any('row' in item for item in st.session_state.ocr_data) method = "mineru" if has_cell_index else "cluster" else: method = analysis_method st.session_state.structure = generator.analyze_table_structure( y_tolerance=y_tolerance if method == "cluster" else 5, x_tolerance=x_tolerance if method == "cluster" else 10, min_row_height=min_row_height if method == "cluster" else 20, method=method ) st.success(f"✅ 分析完成(使用 {method} 算法)") return # 📂 加载已有标注模式 if work_mode == "📂 加载已有标注": create_file_uploader_section(work_mode) if 'structure' not in st.session_state: st.info("👆 请在左侧上传配置文件 (*_structure.json)") with st.expander("📖 使用说明"): st.markdown(""" ### 📂 加载已有标注 **步骤:** 1. **上传配置文件**:选择之前保存的 `*_structure.json` 2. **上传图片**(可选):上传对应的图片以查看效果 3. **调整表格线**:使用下方的工具调整横线/竖线位置 4. **保存更新**:保存修改后的配置 **提示:** - 即使没有图片,也可以直接编辑配置文件中的坐标 - 配置文件包含完整的表格结构信息 - 可以应用到同类型的其他页面 """) return if st.session_state.image is None: st.warning("⚠️ 仅加载了配置,未加载图片。部分功能受限。") image, line_width, display_mode, zoom_level, show_line_numbers = setup_edit_annotation_mode( st.session_state.structure, st.session_state.image, TABLE_EDITOR_CONFIG["display"] ) render_table_structure_view( st.session_state.structure, image, line_width, display_mode, zoom_level, show_line_numbers, VIEWPORT_WIDTH, VIEWPORT_HEIGHT ) create_save_section( work_mode, st.session_state.structure, image, line_width, TABLE_EDITOR_CONFIG["output"] ) if __name__ == "__main__": try: create_table_line_editor() except GeneratorExit: pass # Streamlit 内部清理,忽略 except KeyboardInterrupt: st.info("👋 程序已停止") except Exception as e: st.error(f"❌ 程序崩溃: {e}") import traceback with st.expander("🔍 详细错误信息"): st.code(traceback.format_exc())