| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245 |
- """
- 表格线可视化编辑器
- 支持人工调整表格线位置
- """
- import streamlit as st
- from pathlib import Path
- from PIL import Image
- try:
- from table_line_generator import TableLineGenerator
- except ImportError:
- from .table_line_generator import TableLineGenerator
- # 导入编辑器模块
- from editor import (
- # UI 组件
- create_file_uploader_section,
- create_display_settings_section,
- create_undo_redo_section,
- create_analysis_section,
- create_save_section,
-
- # 绘图
- get_cached_table_lines_image,
-
- # 状态管理
- init_undo_stack,
-
- # 调整
- create_adjustment_section,
- )
- def create_table_line_editor():
- """创建表格线编辑器界面"""
- # 配置页面
- st.set_page_config(
- page_title="表格线编辑器",
- page_icon="📏",
- layout="wide",
- initial_sidebar_state="expanded"
- )
-
- st.title("📏 表格线编辑器")
-
- # 初始化 session_state
- if 'loaded_json_name' not in st.session_state:
- st.session_state.loaded_json_name = None
- if 'loaded_image_name' not in st.session_state:
- st.session_state.loaded_image_name = None
- if 'loaded_config_name' not in st.session_state:
- st.session_state.loaded_config_name = None
- if 'ocr_data' not in st.session_state:
- st.session_state.ocr_data = None
- if 'image' not in st.session_state:
- st.session_state.image = None
-
- # 初始化撤销/重做栈
- init_undo_stack()
-
- # 🆕 工作模式选择
- st.sidebar.header("📂 工作模式")
- work_mode = st.sidebar.radio(
- "选择模式",
- ["🆕 新建标注", "📂 加载已有标注"],
- index=0
- )
-
- # 文件上传区域
- create_file_uploader_section(work_mode)
-
- # 检查必要条件
- if work_mode == "🆕 新建标注":
- if st.session_state.ocr_data is None or st.session_state.image is None:
- st.info("👆 请在左侧上传 OCR 结果 JSON 文件和对应的图片")
-
- with st.expander("📖 使用说明"):
- st.markdown("""
- ### 🆕 新建标注模式
-
- **支持的OCR格式**
-
- **1. PPStructure V3 格式 (推荐)**
- ```json
- {
- "parsing_res_list": [...],
- "overall_ocr_res": {
- "rec_boxes": [[x1, y1, x2, y2], ...],
- "rec_texts": ["文本1", "文本2", ...]
- }
- }
- ```
-
- **2. 标准格式**
- ```json
- [
- {
- "text": "文本内容",
- "bbox": [x1, y1, x2, y2]
- }
- ]
- ```
-
- ### 📂 加载已有标注模式
-
- 1. 上传之前保存的 `*_structure.json` 配置文件
- 2. 上传对应的图片(可选)
- 3. 继续调整表格线位置
- 4. 保存更新后的配置
- """)
- return
-
- ocr_data = st.session_state.ocr_data
- image = st.session_state.image
-
- st.info(f"📂 已加载: {st.session_state.loaded_json_name} + {st.session_state.loaded_image_name}")
-
- # 初始化生成器
- if 'generator' not in st.session_state or st.session_state.generator is None:
- try:
- generator = TableLineGenerator(image, ocr_data)
- st.session_state.generator = generator
- except Exception as e:
- st.error(f"❌ 初始化失败: {e}")
- st.stop()
-
- else: # 加载已有标注模式
- if 'structure' not in st.session_state:
- st.info("👆 请在左侧上传配置文件 (*_structure.json)")
-
- with st.expander("📖 使用说明"):
- st.markdown("""
- ### 📂 加载已有标注
-
- **步骤:**
-
- 1. **上传配置文件**:选择之前保存的 `*_structure.json`
- 2. **上传图片**(可选):上传对应的图片以查看效果
- 3. **调整表格线**:使用下方的工具调整横线/竖线位置
- 4. **保存更新**:保存修改后的配置
-
- **提示:**
- - 即使没有图片,也可以直接编辑配置文件中的坐标
- - 配置文件包含完整的表格结构信息
- - 可以应用到同类型的其他页面
- """)
- return
-
- if st.session_state.image is None:
- st.warning("⚠️ 仅加载了配置,未加载图片。部分功能受限。")
-
- structure = st.session_state.structure
- image = st.session_state.image
-
- # 如果没有图片,创建虚拟画布
- if image is None:
- if 'table_bbox' in structure:
- bbox = structure['table_bbox']
- dummy_width = bbox[2] + 100
- dummy_height = bbox[3] + 100
- else:
- dummy_width = 2000
- dummy_height = 2000
-
- image = Image.new('RGB', (dummy_width, dummy_height), color='white')
- st.info(f"💡 使用虚拟画布 ({dummy_width}x{dummy_height}) 显示表格结构")
-
- # 参数调整(仅在新建模式显示)
- if work_mode == "🆕 新建标注":
- st.sidebar.header("🔧 参数调整")
-
- y_tolerance = st.sidebar.slider("Y轴聚类容差(像素)", 1, 20, 5)
- x_tolerance = st.sidebar.slider("X轴聚类容差(像素)", 5, 50, 10)
- min_row_height = st.sidebar.slider("最小行高(像素)", 10, 100, 20)
-
- # 显示设置
- line_width, display_mode, zoom_level, show_line_numbers = create_display_settings_section()
-
- # 撤销/重做
- create_undo_redo_section()
-
- # 分析表格结构(仅在新建模式显示)
- if work_mode == "🆕 新建标注":
- create_analysis_section(y_tolerance, x_tolerance, min_row_height)
-
- # 显示结果
- if 'structure' in st.session_state and st.session_state.structure:
- structure = st.session_state.structure
-
- # 使用缓存机制绘制表格线
- img_with_lines = get_cached_table_lines_image(
- image,
- structure,
- line_width=line_width,
- show_numbers=show_line_numbers
- )
-
- # 根据显示模式显示图片
- if display_mode == "对比显示":
- col1, col2 = st.columns(2)
- with col1:
- st.subheader("原图")
- st.image(image, use_container_width=True)
-
- with col2:
- st.subheader("添加表格线")
- st.image(img_with_lines, use_container_width=True)
-
- elif display_mode == "仅显示划线图":
- display_width = int(img_with_lines.width * zoom_level)
-
- st.subheader(f"表格线图 (缩放: {zoom_level:.0%})")
- st.image(img_with_lines, width=display_width)
-
- else:
- display_width = int(image.width * zoom_level)
-
- st.subheader(f"原图 (缩放: {zoom_level:.0%})")
- st.image(image, width=display_width)
-
- # 手动调整区域
- create_adjustment_section(structure)
-
- # 显示详细信息
- with st.expander("📊 表格结构详情"):
- st.json({
- "行数": len(structure['rows']),
- "列数": len(structure['columns']),
- "横线数": len(structure.get('horizontal_lines', [])),
- "竖线数": len(structure.get('vertical_lines', [])),
- "横线坐标": structure.get('horizontal_lines', []),
- "竖线坐标": structure.get('vertical_lines', []),
- "标准行高": structure.get('row_height'),
- "列宽度": structure.get('col_widths'),
- "修改的横线": list(structure.get('modified_h_lines', set())),
- "修改的竖线": list(structure.get('modified_v_lines', set()))
- })
-
- # 保存区域
- create_save_section(work_mode, structure, image, line_width)
- if __name__ == "__main__":
- create_table_line_editor()
|