""" UI 组件 """ import streamlit as st import json from pathlib import Path from PIL import Image import tempfile try: from ..table_line_generator import TableLineGenerator except ImportError: from table_line_generator import TableLineGenerator from .config_loader import load_structure_from_config from .drawing import clear_table_image_cache def parse_ocr_data(ocr_data): """解析OCR数据,支持多种格式""" # 如果是字符串,尝试解析 if isinstance(ocr_data, str): try: ocr_data = json.loads(ocr_data) except json.JSONDecodeError: st.error("❌ JSON 格式错误,无法解析") return [] # 检查是否为 PPStructure V3 格式 if isinstance(ocr_data, dict) and 'parsing_res_list' in ocr_data and 'overall_ocr_res' in ocr_data: st.info("🔍 检测到 PPStructure V3 格式") try: table_bbox, text_boxes = TableLineGenerator.parse_ppstructure_result(ocr_data) st.success(f"✅ 表格区域: {table_bbox}") st.success(f"✅ 表格内文本框: {len(text_boxes)} 个") return text_boxes except Exception as e: st.error(f"❌ 解析 PPStructure 结果失败: {e}") return [] # 确保是列表 if not isinstance(ocr_data, list): st.error(f"❌ OCR 数据应该是列表,实际类型: {type(ocr_data)}") return [] if not ocr_data: st.warning("⚠️ OCR 数据为空") return [] first_item = ocr_data[0] if not isinstance(first_item, dict): st.error(f"❌ OCR 数据项应该是字典,实际类型: {type(first_item)}") return [] if 'bbox' not in first_item: st.error("❌ OCR 数据缺少 'bbox' 字段") st.info("💡 支持的格式示例:\n```json\n[\n {\n \"text\": \"文本\",\n \"bbox\": [x1, y1, x2, y2]\n }\n]\n```") return [] return ocr_data def create_file_uploader_section(work_mode: str): """ 创建文件上传区域 Args: work_mode: 工作模式("🆕 新建标注" 或 "📂 加载已有标注") """ if work_mode == "🆕 新建标注": st.sidebar.subheader("上传文件") uploaded_json = st.sidebar.file_uploader("上传OCR结果JSON", type=['json'], key="new_json") uploaded_image = st.sidebar.file_uploader("上传对应图片", type=['jpg', 'png'], key="new_image") # 处理 JSON 上传 if uploaded_json is not None: if st.session_state.loaded_json_name != uploaded_json.name: try: raw_data = json.load(uploaded_json) with st.expander("🔍 原始数据结构"): if isinstance(raw_data, dict): st.json({k: f"<{type(v).__name__}>" if not isinstance(v, (str, int, float, bool, type(None))) else v for k, v in list(raw_data.items())[:5]}) else: st.json(raw_data[:3] if len(raw_data) > 3 else raw_data) ocr_data = parse_ocr_data(raw_data) if not ocr_data: st.error("❌ 无法解析 OCR 数据,请检查 JSON 格式") st.stop() st.session_state.ocr_data = ocr_data st.session_state.loaded_json_name = uploaded_json.name st.session_state.loaded_config_name = None # 清除旧数据 if 'structure' in st.session_state: del st.session_state.structure if 'generator' in st.session_state: del st.session_state.generator st.session_state.undo_stack = [] st.session_state.redo_stack = [] clear_table_image_cache() st.success(f"✅ 成功加载 {len(ocr_data)} 条 OCR 记录") except Exception as e: st.error(f"❌ 加载数据失败: {e}") st.stop() # 处理图片上传 if uploaded_image is not None: if st.session_state.loaded_image_name != uploaded_image.name: try: image = Image.open(uploaded_image) st.session_state.image = image st.session_state.loaded_image_name = uploaded_image.name # 清除旧数据 if 'structure' in st.session_state: del st.session_state.structure if 'generator' in st.session_state: del st.session_state.generator st.session_state.undo_stack = [] st.session_state.redo_stack = [] clear_table_image_cache() st.success(f"✅ 成功加载图片: {uploaded_image.name}") except Exception as e: st.error(f"❌ 加载图片失败: {e}") st.stop() else: # 加载已有标注 st.sidebar.subheader("加载已保存的标注") uploaded_config = st.sidebar.file_uploader( "上传配置文件 (*_structure.json)", type=['json'], key="load_config" ) uploaded_image_for_config = st.sidebar.file_uploader( "上传对应图片(可选)", type=['jpg', 'png'], key="load_image" ) # 处理配置文件加载 if uploaded_config is not None: if st.session_state.loaded_config_name != uploaded_config.name: try: # 创建临时文件 with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False, encoding='utf-8') as tmp: tmp.write(uploaded_config.getvalue().decode('utf-8')) tmp_path = tmp.name # 加载结构 structure = load_structure_from_config(Path(tmp_path)) # 清理临时文件 Path(tmp_path).unlink() st.session_state.structure = structure st.session_state.loaded_config_name = uploaded_config.name # 清除历史记录和缓存 st.session_state.undo_stack = [] st.session_state.redo_stack = [] clear_table_image_cache() st.success(f"✅ 成功加载配置: {uploaded_config.name}") st.info( f"📊 表格结构: {len(structure['rows'])}行 x {len(structure['columns'])}列\n\n" f"📏 横线数: {len(structure.get('horizontal_lines', []))}\n\n" f"📏 竖线数: {len(structure.get('vertical_lines', []))}" ) # 显示配置文件详情 with st.expander("📋 配置详情"): st.json({ "行数": len(structure['rows']), "列数": len(structure['columns']), "横线数": len(structure.get('horizontal_lines', [])), "竖线数": len(structure.get('vertical_lines', [])), "行高": structure.get('row_height'), "列宽": structure.get('col_widths'), "已修改的横线": list(structure.get('modified_h_lines', set())), "已修改的竖线": list(structure.get('modified_v_lines', set())) }) except Exception as e: st.error(f"❌ 加载配置失败: {e}") import traceback st.code(traceback.format_exc()) st.stop() # 处理图片加载 if uploaded_image_for_config is not None: if st.session_state.loaded_image_name != uploaded_image_for_config.name: try: image = Image.open(uploaded_image_for_config) st.session_state.image = image st.session_state.loaded_image_name = uploaded_image_for_config.name clear_table_image_cache() st.success(f"✅ 成功加载图片: {uploaded_image_for_config.name}") except Exception as e: st.error(f"❌ 加载图片失败: {e}") st.stop() # 提示信息 if 'structure' in st.session_state and st.session_state.image is None: st.warning("⚠️ 已加载配置,但未加载对应图片。请上传图片以查看效果。") st.info("💡 提示:配置文件已加载,您可以:\n1. 上传对应图片查看效果\n2. 直接编辑配置并保存") def create_display_settings_section(): """创建显示设置区域""" st.sidebar.divider() st.sidebar.subheader("🖼️ 显示设置") line_width = st.sidebar.slider("线条宽度", 1, 5, 2) display_mode = st.sidebar.radio("显示模式", ["对比显示", "仅显示划线图", "仅显示原图"], index=1) zoom_level = st.sidebar.slider("图片缩放", 0.25, 2.0, 1.0, 0.25) show_line_numbers = st.sidebar.checkbox("显示线条编号", value=True) return line_width, display_mode, zoom_level, show_line_numbers def create_undo_redo_section(): """创建撤销/重做区域""" from .state_manager import undo_last_action, redo_last_action from .drawing import clear_table_image_cache st.sidebar.divider() st.sidebar.subheader("↩️ 撤销/重做") col1, col2 = st.sidebar.columns(2) with col1: if st.button("↩️ 撤销", disabled=len(st.session_state.undo_stack) == 0): if undo_last_action(): clear_table_image_cache() st.success("✅ 已撤销") st.rerun() with col2: if st.button("↪️ 重做", disabled=len(st.session_state.redo_stack) == 0): if redo_last_action(): clear_table_image_cache() st.success("✅ 已重做") st.rerun() st.sidebar.info(f"📚 历史记录: {len(st.session_state.undo_stack)} 条") def create_analysis_section(y_tolerance, x_tolerance, min_row_height): """ 创建分析区域 Args: y_tolerance: Y轴聚类容差 x_tolerance: X轴聚类容差 min_row_height: 最小行高 """ if st.button("🔍 分析表格结构"): with st.spinner("分析中..."): try: generator = st.session_state.generator structure = generator.analyze_table_structure( y_tolerance=y_tolerance, x_tolerance=x_tolerance, min_row_height=min_row_height ) if not structure: st.warning("⚠️ 未检测到表格结构") st.stop() structure['modified_h_lines'] = set() structure['modified_v_lines'] = set() st.session_state.structure = structure st.session_state.undo_stack = [] st.session_state.redo_stack = [] clear_table_image_cache() st.success( f"✅ 检测到 {len(structure['rows'])} 行({len(structure['horizontal_lines'])} 条横线)," f"{len(structure['columns'])} 列({len(structure['vertical_lines'])} 条竖线)" ) col1, col2, col3, col4 = st.columns(4) with col1: st.metric("行数", len(structure['rows'])) with col2: st.metric("横线数", len(structure['horizontal_lines'])) with col3: st.metric("列数", len(structure['columns'])) with col4: st.metric("竖线数", len(structure['vertical_lines'])) except Exception as e: st.error(f"❌ 分析失败: {e}") import traceback st.code(traceback.format_exc()) st.stop() def create_save_section(work_mode, structure, image, line_width): """ 创建保存区域 Args: work_mode: 工作模式 structure: 表格结构 image: 图片 line_width: 线条宽度 """ from .config_loader import save_structure_to_config from .drawing import draw_clean_table_lines import io st.divider() save_col1, save_col2, save_col3 = st.columns(3) with save_col1: save_structure = st.checkbox("保存表格结构配置", value=True) with save_col2: save_image = st.checkbox("保存表格线图片", value=True) with save_col3: line_color_option = st.selectbox( "保存时线条颜色", ["黑色", "蓝色", "红色"], index=0 ) if st.button("💾 保存", type="primary"): output_dir = Path("output/table_structures") output_dir.mkdir(parents=True, exist_ok=True) # 确定文件名 if work_mode == "🆕 新建标注": if st.session_state.loaded_json_name: base_name = Path(st.session_state.loaded_json_name).stem else: base_name = "table_structure" else: if st.session_state.loaded_config_name: base_name = Path(st.session_state.loaded_config_name).stem if base_name.endswith('_structure'): base_name = base_name[:-10] elif st.session_state.loaded_image_name: base_name = Path(st.session_state.loaded_image_name).stem else: base_name = "table_structure" saved_files = [] if save_structure: structure_path = output_dir / f"{base_name}_structure.json" save_structure_to_config(structure, structure_path) saved_files.append(("配置文件", structure_path)) with open(structure_path, 'r') as f: st.download_button( "📥 下载配置文件", f.read(), file_name=f"{base_name}_structure.json", mime="application/json" ) if save_image: if st.session_state.image is None: st.warning("⚠️ 无法保存图片:未加载图片文件") else: color_map = { "黑色": (0, 0, 0), "蓝色": (0, 0, 255), "红色": (255, 0, 0) } selected_color = color_map[line_color_option] clean_img = draw_clean_table_lines( st.session_state.image, structure, line_width=line_width, line_color=selected_color ) output_image_path = output_dir / f"{base_name}_with_lines.png" clean_img.save(output_image_path) saved_files.append(("表格线图片", output_image_path)) buf = io.BytesIO() clean_img.save(buf, format='PNG') buf.seek(0) st.download_button( "📥 下载表格线图片", buf, file_name=f"{base_name}_with_lines.png", mime="image/png" ) if saved_files: st.success(f"✅ 已保存 {len(saved_files)} 个文件:") for file_type, file_path in saved_files: st.info(f" • {file_type}: {file_path}")