| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204 |
- """
- 文件上传和加载处理
- """
- import streamlit as st
- import json
- import tempfile
- from pathlib import Path
- from PIL import Image
- from .config_loader import load_structure_from_config
- from .drawing import clear_table_image_cache
- try:
- from table_line_generator import TableLineGenerator
- except ImportError:
- from ..table_line_generator import TableLineGenerator
- def handle_json_upload(uploaded_json):
- """处理 JSON 文件上传"""
- if uploaded_json is None:
- return
-
- if st.session_state.loaded_json_name == uploaded_json.name:
- return
-
- try:
- raw_data = json.load(uploaded_json)
-
- with st.expander("🔍 原始数据结构"):
- if isinstance(raw_data, dict):
- st.json({
- k: f"<{type(v).__name__}>"
- if not isinstance(v, (str, int, float, bool, type(None)))
- else v
- for k, v in list(raw_data.items())[:5]
- })
- else:
- st.json(raw_data[:3] if len(raw_data) > 3 else raw_data)
-
- ocr_data = TableLineGenerator.parse_ocr_data(raw_data, tool="ppstructv3")
-
- if not ocr_data:
- st.error("❌ 无法解析 OCR 数据,请检查 JSON 格式")
- st.stop()
-
- st.session_state.ocr_data = ocr_data
- st.session_state.loaded_json_name = uploaded_json.name
- st.session_state.loaded_config_name = None
-
- # 清除旧数据
- if 'structure' in st.session_state:
- del st.session_state.structure
- if 'generator' in st.session_state:
- del st.session_state.generator
- st.session_state.undo_stack = []
- st.session_state.redo_stack = []
- clear_table_image_cache()
-
- st.success(f"✅ 成功加载 {len(ocr_data)} 条 OCR 记录")
-
- except Exception as e:
- st.error(f"❌ 加载数据失败: {e}")
- st.stop()
- def handle_image_upload(uploaded_image):
- """处理图片文件上传"""
- if uploaded_image is None:
- return
-
- if st.session_state.loaded_image_name == uploaded_image.name:
- return
-
- try:
- image = Image.open(uploaded_image)
- st.session_state.image = image
- st.session_state.loaded_image_name = uploaded_image.name
-
- # 清除旧数据
- if 'structure' in st.session_state:
- del st.session_state.structure
- if 'generator' in st.session_state:
- del st.session_state.generator
- st.session_state.undo_stack = []
- st.session_state.redo_stack = []
- clear_table_image_cache()
-
- st.success(f"✅ 成功加载图片: {uploaded_image.name}")
-
- except Exception as e:
- st.error(f"❌ 加载图片失败: {e}")
- st.stop()
- def handle_config_upload(uploaded_config):
- """处理配置文件上传"""
- if uploaded_config is None:
- return
-
- if st.session_state.loaded_config_name == uploaded_config.name:
- return
-
- try:
- # 创建临时文件
- with tempfile.NamedTemporaryFile(
- mode='w',
- suffix='.json',
- delete=False,
- encoding='utf-8'
- ) as tmp:
- tmp.write(uploaded_config.getvalue().decode('utf-8'))
- tmp_path = tmp.name
-
- # 加载结构
- structure = load_structure_from_config(Path(tmp_path))
-
- # 清理临时文件
- Path(tmp_path).unlink()
-
- st.session_state.structure = structure
- st.session_state.loaded_config_name = uploaded_config.name
-
- # 清除历史记录和缓存
- st.session_state.undo_stack = []
- st.session_state.redo_stack = []
- clear_table_image_cache()
-
- st.success(f"✅ 成功加载配置: {uploaded_config.name}")
- st.info(
- f"📊 表格结构: {len(structure['rows'])}行 x {len(structure['columns'])}列\n\n"
- f"📏 横线数: {len(structure.get('horizontal_lines', []))}\n\n"
- f"📏 竖线数: {len(structure.get('vertical_lines', []))}"
- )
-
- # 显示配置文件详情
- with st.expander("📋 配置详情"):
- st.json({
- "行数": len(structure['rows']),
- "列数": len(structure['columns']),
- "横线数": len(structure.get('horizontal_lines', [])),
- "竖线数": len(structure.get('vertical_lines', [])),
- "行高": structure.get('row_height'),
- "列宽": structure.get('col_widths'),
- "已修改的横线": list(structure.get('modified_h_lines', set())),
- "已修改的竖线": list(structure.get('modified_v_lines', set()))
- })
-
- except Exception as e:
- st.error(f"❌ 加载配置失败: {e}")
- import traceback
- st.code(traceback.format_exc())
- st.stop()
- def create_file_uploader_section(work_mode: str):
- """
- 创建文件上传区域
-
- Args:
- work_mode: 工作模式("🆕 新建标注" 或 "📂 加载已有标注")
- """
- if work_mode == "🆕 新建标注":
- st.sidebar.subheader("上传文件")
-
- uploaded_json = st.sidebar.file_uploader(
- "上传OCR结果JSON",
- type=['json'],
- key="new_json"
- )
- uploaded_image = st.sidebar.file_uploader(
- "上传对应图片",
- type=['jpg', 'png'],
- key="new_image"
- )
-
- handle_json_upload(uploaded_json)
- handle_image_upload(uploaded_image)
-
- else: # 加载已有标注
- st.sidebar.subheader("加载已保存的标注")
-
- uploaded_config = st.sidebar.file_uploader(
- "上传配置文件 (*_structure.json)",
- type=['json'],
- key="load_config"
- )
-
- uploaded_image_for_config = st.sidebar.file_uploader(
- "上传对应图片(可选)",
- type=['jpg', 'png'],
- key="load_image"
- )
-
- handle_config_upload(uploaded_config)
- handle_image_upload(uploaded_image_for_config)
-
- # 提示信息
- if 'structure' in st.session_state and st.session_state.image is None:
- st.warning("⚠️ 已加载配置,但未加载对应图片。请上传图片以查看效果。")
- st.info(
- "💡 提示:配置文件已加载,您可以:\n"
- "1. 上传对应图片查看效果\n"
- "2. 直接编辑配置并保存"
- )
|