""" 表格线可视化编辑器 支持人工调整表格线位置 """ import streamlit as st from pathlib import Path from PIL import Image import yaml from typing import Dict, List, Optional, Tuple import argparse import sys try: from table_line_generator import TableLineGenerator except ImportError: from .table_line_generator import TableLineGenerator # 导入编辑器模块 from editor import ( # UI 组件 create_file_uploader_section, create_display_settings_section, create_undo_redo_section, create_analysis_section, create_save_section, create_directory_selector, # 新增的模块功能 setup_new_annotation_mode, setup_edit_annotation_mode, render_table_structure_view, # 绘图 get_cached_table_lines_image, # 状态管理 init_undo_stack, # 调整 create_adjustment_section, show_image_with_scroll, # 配置 load_table_editor_config, build_data_source_catalog, parse_table_editor_cli_args, ) DEFAULT_CONFIG_PATH = Path(__file__).with_name("table_line_generator.yaml") @st.cache_resource def get_cli_args(): return parse_table_editor_cli_args() @st.cache_resource def get_table_editor_config(): """缓存配置加载(整个 session 共享)""" cli_args = get_cli_args() config_path = ( Path(cli_args.config).expanduser() if cli_args.config else DEFAULT_CONFIG_PATH ) return load_table_editor_config(config_path) def create_table_line_editor(): """创建表格线编辑器界面""" # 配置页面 st.set_page_config( page_title="表格线编辑器", page_icon="📏", layout="wide", initial_sidebar_state="expanded" ) st.title("📏 表格线编辑器") # 🎯 从缓存获取配置 TABLE_EDITOR_CONFIG = get_table_editor_config() VIEWPORT_WIDTH = TABLE_EDITOR_CONFIG["viewport"]["width"] VIEWPORT_HEIGHT = TABLE_EDITOR_CONFIG["viewport"]["height"] DATA_SOURCES = TABLE_EDITOR_CONFIG.get("data_sources", []) # 初始化 session_state(集中管理) if 'loaded_json_name' not in st.session_state: st.session_state.loaded_json_name = None if 'loaded_image_name' not in st.session_state: st.session_state.loaded_image_name = None if 'loaded_config_name' not in st.session_state: st.session_state.loaded_config_name = None if 'ocr_data' not in st.session_state: st.session_state.ocr_data = None if 'image' not in st.session_state: st.session_state.image = None # 🆕 目录模式专用状态 if 'dir_selected_index' not in st.session_state: st.session_state.dir_selected_index = 0 if 'last_loaded_entry' not in st.session_state: st.session_state.last_loaded_entry = None if 'dir_auto_mode' not in st.session_state: st.session_state.dir_auto_mode = None # 初始化撤销/重做栈 init_undo_stack() # 🆕 工作模式选择 st.sidebar.header("📂 工作模式") work_mode = st.sidebar.radio( "选择模式", ["🆕 新建标注", "📂 加载已有标注", "📁 目录模式"], index=0 ) # 📁 目录模式 if work_mode == "📁 目录模式": if not DATA_SOURCES: st.sidebar.warning("未配置 data_sources") return auto_mode = create_directory_selector(DATA_SOURCES, TABLE_EDITOR_CONFIG["output"]) if auto_mode == "new": if not (st.session_state.ocr_data and st.session_state.image): st.warning("⚠️ 缺少必要数据") return setup_new_annotation_mode( st.session_state.ocr_data, st.session_state.image, TABLE_EDITOR_CONFIG["display"] ) else: # edit if 'structure' not in st.session_state: st.warning("⚠️ 结构加载失败") return image, line_width, display_mode, zoom_level, show_line_numbers = setup_edit_annotation_mode( st.session_state.structure, st.session_state.image, TABLE_EDITOR_CONFIG["display"] ) # 统一渲染 if 'structure' in st.session_state and st.session_state.structure: render_table_structure_view( st.session_state.structure, st.session_state.image or Image.new('RGB', (2000, 2000), 'white'), line_width if auto_mode == "edit" else st.session_state.get('line_width', 2), display_mode if auto_mode == "edit" else st.session_state.get('display_mode', "仅显示划线图"), zoom_level if auto_mode == "edit" else st.session_state.get('zoom_level', 1.0), show_line_numbers if auto_mode == "edit" else True, VIEWPORT_WIDTH, VIEWPORT_HEIGHT ) create_save_section( auto_mode, st.session_state.structure, st.session_state.image, line_width if auto_mode == "edit" else 2, TABLE_EDITOR_CONFIG["output"] ) return # 🆕 新建标注模式 if work_mode == "🆕 新建标注": create_file_uploader_section(work_mode) if not (st.session_state.ocr_data and st.session_state.image): st.info("👆 请在左侧上传 OCR 结果 JSON 文件和对应的图片") with st.expander("📖 使用说明"): st.markdown(""" ### 🆕 新建标注模式 **支持的OCR格式** **1. PPStructure V3 格式 (推荐)** ```json { "parsing_res_list": [...], "overall_ocr_res": { "rec_boxes": [[x1, y1, x2, y2], ...], "rec_texts": ["文本1", "文本2", ...] } } ``` **2. 标准格式** ```json [ { "text": "文本内容", "bbox": [x1, y1, x2, y2] } ] ``` ### 📂 加载已有标注模式 1. 上传之前保存的 `*_structure.json` 配置文件 2. 上传对应的图片(可选) 3. 继续调整表格线位置 4. 保存更新后的配置 """) return st.info(f"📂 已加载: {st.session_state.loaded_json_name} + {st.session_state.loaded_image_name}") _, _, _, line_width, display_mode, zoom_level, show_line_numbers = setup_new_annotation_mode( st.session_state.ocr_data, st.session_state.image, TABLE_EDITOR_CONFIG["display"] ) if 'structure' in st.session_state and st.session_state.structure: render_table_structure_view( st.session_state.structure, st.session_state.image, line_width, display_mode, zoom_level, show_line_numbers, VIEWPORT_WIDTH, VIEWPORT_HEIGHT ) create_save_section( work_mode, st.session_state.structure, st.session_state.image, line_width, TABLE_EDITOR_CONFIG["output"] ) return # 📂 加载已有标注模式 if work_mode == "📂 加载已有标注": create_file_uploader_section(work_mode) if 'structure' not in st.session_state: st.info("👆 请在左侧上传配置文件 (*_structure.json)") with st.expander("📖 使用说明"): st.markdown(""" ### 📂 加载已有标注 **步骤:** 1. **上传配置文件**:选择之前保存的 `*_structure.json` 2. **上传图片**(可选):上传对应的图片以查看效果 3. **调整表格线**:使用下方的工具调整横线/竖线位置 4. **保存更新**:保存修改后的配置 **提示:** - 即使没有图片,也可以直接编辑配置文件中的坐标 - 配置文件包含完整的表格结构信息 - 可以应用到同类型的其他页面 """) return if st.session_state.image is None: st.warning("⚠️ 仅加载了配置,未加载图片。部分功能受限。") image, line_width, display_mode, zoom_level, show_line_numbers = setup_edit_annotation_mode( st.session_state.structure, st.session_state.image, TABLE_EDITOR_CONFIG["display"] ) render_table_structure_view( st.session_state.structure, image, line_width, display_mode, zoom_level, show_line_numbers, VIEWPORT_WIDTH, VIEWPORT_HEIGHT ) create_save_section( work_mode, st.session_state.structure, image, line_width, TABLE_EDITOR_CONFIG["output"] ) if __name__ == "__main__": create_table_line_editor()