| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310 |
- """
- 表格线可视化编辑器
- 支持人工调整表格线位置
- """
- import warnings
- # 过滤 PaddleX 的语法警告
- warnings.filterwarnings('ignore', category=SyntaxWarning, module='paddlex')
- import streamlit as st
- from pathlib import Path
- from PIL import Image
- import yaml
- from typing import Dict, List, Optional, Tuple
- import argparse
- import sys
- try:
- from table_line_generator import TableLineGenerator
- except ImportError:
- from .table_line_generator import TableLineGenerator
- # 导入编辑器模块
- from editor import (
- # UI 组件
- create_file_uploader_section,
- create_display_settings_section,
- create_undo_redo_section,
- create_analysis_section,
- create_save_section,
- create_directory_selector,
- # 新增的模块功能
- setup_new_annotation_mode,
- setup_edit_annotation_mode,
- render_table_structure_view,
-
- # 绘图
- get_cached_table_lines_image,
-
- # 状态管理
- init_undo_stack,
-
- # 调整
- create_adjustment_section,
- show_image_with_scroll,
- # 配置
- load_table_editor_config,
- build_data_source_catalog,
- parse_table_editor_cli_args,
- # 🆕 批量应用模板
- create_batch_template_section,
- )
- DEFAULT_CONFIG_PATH = Path(__file__).with_name("table_line_generator.yaml")
- @st.cache_resource
- def get_cli_args():
- return parse_table_editor_cli_args()
- @st.cache_resource
- def get_table_editor_config():
- """缓存配置加载(整个 session 共享)"""
- cli_args = get_cli_args()
- config_path = (
- Path(cli_args.config).expanduser()
- if cli_args.config
- else DEFAULT_CONFIG_PATH
- )
- return load_table_editor_config(config_path)
- def create_table_line_editor():
- """创建表格线编辑器界面"""
- # 配置页面
- st.set_page_config(
- page_title="表格线编辑器",
- page_icon="📏",
- layout="wide",
- initial_sidebar_state="expanded"
- )
-
- st.title("📏 表格线编辑器")
-
- # 🎯 从缓存获取配置
- TABLE_EDITOR_CONFIG = get_table_editor_config()
- VIEWPORT_WIDTH = TABLE_EDITOR_CONFIG["viewport"]["width"]
- VIEWPORT_HEIGHT = TABLE_EDITOR_CONFIG["viewport"]["height"]
- DATA_SOURCES = TABLE_EDITOR_CONFIG.get("data_sources", [])
-
- # 初始化 session_state(集中管理)
- if 'loaded_json_name' not in st.session_state:
- st.session_state.loaded_json_name = None
- if 'loaded_image_name' not in st.session_state:
- st.session_state.loaded_image_name = None
- if 'loaded_config_name' not in st.session_state:
- st.session_state.loaded_config_name = None
- if 'ocr_data' not in st.session_state:
- st.session_state.ocr_data = None
- if 'image' not in st.session_state:
- st.session_state.image = None
-
- # 🆕 目录模式专用状态
- if 'dir_selected_index' not in st.session_state:
- st.session_state.dir_selected_index = 0
- if 'last_loaded_entry' not in st.session_state:
- st.session_state.last_loaded_entry = None
- if 'dir_auto_mode' not in st.session_state:
- st.session_state.dir_auto_mode = None
- if 'current_data_source' not in st.session_state: # 🔑 新增
- st.session_state.current_data_source = None
- if 'current_output_config' not in st.session_state: # 🔑 新增
- st.session_state.current_output_config = None
-
- # 初始化撤销/重做栈
- init_undo_stack()
-
- # 🆕 工作模式选择
- st.sidebar.header("📂 工作模式")
- work_mode = st.sidebar.radio(
- "选择模式",
- ["🆕 新建标注", "📂 加载已有标注", "📁 目录模式"],
- index=0
- )
-
- # 📁 目录模式
- if work_mode == "📁 目录模式":
- if not DATA_SOURCES:
- st.sidebar.warning("未配置 data_sources")
- return
-
- auto_mode = create_directory_selector(DATA_SOURCES, TABLE_EDITOR_CONFIG["output"])
-
- # 显示当前数据源
- if st.session_state.current_data_source:
- ds_name = st.session_state.current_data_source.get("name", "未知")
- st.sidebar.success(f"✅ 数据源: {ds_name}")
-
- if auto_mode == "new":
- if not (st.session_state.ocr_data and st.session_state.image):
- st.warning("⚠️ 缺少必要数据")
- return
- _, _, _, line_width, display_mode, zoom_level, show_line_numbers = setup_new_annotation_mode(
- st.session_state.ocr_data,
- st.session_state.image,
- TABLE_EDITOR_CONFIG["display"]
- )
- else: # edit
- if 'structure' not in st.session_state:
- st.warning("⚠️ 结构加载失败")
- return
- image, line_width, display_mode, zoom_level, show_line_numbers = setup_edit_annotation_mode(
- st.session_state.structure,
- st.session_state.image,
- TABLE_EDITOR_CONFIG["display"]
- )
-
- # 统一渲染
- if 'structure' in st.session_state and st.session_state.structure:
- render_table_structure_view(
- st.session_state.structure,
- st.session_state.image or Image.new('RGB', (2000, 2000), 'white'),
- line_width, # 🔑 统一使用这个 line_width
- display_mode,
- zoom_level,
- show_line_numbers,
- VIEWPORT_WIDTH,
- VIEWPORT_HEIGHT
- )
-
- # 保存区域
- create_save_section(
- auto_mode,
- st.session_state.structure,
- st.session_state.image,
- line_width, # 🔑 传递给保存区域
- st.session_state.current_output_config or TABLE_EDITOR_CONFIG["output"]
- )
-
- # 🆕 批量应用模板区域(仅在 edit 模式显示)
- if auto_mode == "edit":
- # 🔑 获取当前的线条颜色名称(从保存区域的选择)
- output_cfg = TABLE_EDITOR_CONFIG["output"]
- line_colors = output_cfg.get("line_colors")
- defaults = output_cfg.get("defaults", {})
- default_color = defaults.get("line_color", line_colors[0]["name"])
-
- # 🔑 传递当前页的设置
- create_batch_template_section(
- current_line_width=line_width,
- current_line_color=st.session_state.get('save_line_color', default_color)
- )
-
- return
-
- # 🎯 新建标注模式
- if work_mode == "🆕 新建标注":
- create_file_uploader_section(work_mode)
-
- if st.session_state.ocr_data and st.session_state.image:
- st.info(f"📂 已加载: {st.session_state.loaded_json_name}")
-
- # 🔧 显示分析参数设置(统一处理)
- st.sidebar.subheader("🔬 分析参数")
-
- analysis_method = st.sidebar.selectbox(
- "分析算法",
- ["auto", "cluster", "mineru"],
- format_func=lambda x: {
- "auto": "🤖 自动选择(推荐)",
- "cluster": "📊 聚类算法(通用)",
- "mineru": "🎯 MinerU 索引算法"
- }[x]
- )
-
- if analysis_method in ["auto", "cluster"]:
- y_tolerance = st.sidebar.slider("Y轴容差", 1, 20, 5)
- x_tolerance = st.sidebar.slider("X轴容差", 1, 30, 10)
- min_row_height = st.sidebar.slider("最小行高", 10, 50, 20)
-
- # 🎯 分析按钮
- if st.button("🔍 分析表格结构"):
- with st.spinner("正在分析..."):
- # 统一的分析流程
- generator = TableLineGenerator(
- st.session_state.image,
- st.session_state.ocr_data
- )
-
- if analysis_method == "auto":
- # 根据数据特征自动选择
- has_cell_index = any('row' in item for item in st.session_state.ocr_data)
- method = "mineru" if has_cell_index else "cluster"
- else:
- method = analysis_method
-
- st.session_state.structure = generator.analyze_table_structure(
- y_tolerance=y_tolerance if method == "cluster" else 5,
- x_tolerance=x_tolerance if method == "cluster" else 10,
- min_row_height=min_row_height if method == "cluster" else 20,
- method=method
- )
-
- st.success(f"✅ 分析完成(使用 {method} 算法)")
-
- return
-
- # 📂 加载已有标注模式
- if work_mode == "📂 加载已有标注":
- create_file_uploader_section(work_mode)
-
- if 'structure' not in st.session_state:
- st.info("👆 请在左侧上传配置文件 (*_structure.json)")
- with st.expander("📖 使用说明"):
- st.markdown("""
- ### 📂 加载已有标注
-
- **步骤:**
-
- 1. **上传配置文件**:选择之前保存的 `*_structure.json`
- 2. **上传图片**(可选):上传对应的图片以查看效果
- 3. **调整表格线**:使用下方的工具调整横线/竖线位置
- 4. **保存更新**:保存修改后的配置
-
- **提示:**
- - 即使没有图片,也可以直接编辑配置文件中的坐标
- - 配置文件包含完整的表格结构信息
- - 可以应用到同类型的其他页面
- """)
- return
-
- if st.session_state.image is None:
- st.warning("⚠️ 仅加载了配置,未加载图片。部分功能受限。")
-
- image, line_width, display_mode, zoom_level, show_line_numbers = setup_edit_annotation_mode(
- st.session_state.structure,
- st.session_state.image,
- TABLE_EDITOR_CONFIG["display"]
- )
-
- render_table_structure_view(
- st.session_state.structure,
- image,
- line_width,
- display_mode,
- zoom_level,
- show_line_numbers,
- VIEWPORT_WIDTH,
- VIEWPORT_HEIGHT
- )
- create_save_section(
- work_mode,
- st.session_state.structure,
- image,
- line_width,
- TABLE_EDITOR_CONFIG["output"]
- )
- if __name__ == "__main__":
- try:
- create_table_line_editor()
- except GeneratorExit:
- pass # Streamlit 内部清理,忽略
- except KeyboardInterrupt:
- st.info("👋 程序已停止")
- except Exception as e:
- st.error(f"❌ 程序崩溃: {e}")
- import traceback
- with st.expander("🔍 详细错误信息"):
- st.code(traceback.format_exc())
|