""" 分析功能控件 """ import streamlit as st from typing import Dict, Optional import json def create_analysis_section(generator, tool: str = "ppstructv3") -> Optional[Dict]: """ 创建分析控件 Args: generator: TableLineGenerator 实例 tool: 工具类型 Returns: 分析后的表格结构(如果点击了分析按钮) """ st.sidebar.subheader("🔍 表格结构分析") # 🔑 根据工具类型显示不同的参数 if tool.lower() == "mineru": st.sidebar.info("📋 MinerU 格式:直接使用 table_cells 生成结构") if st.sidebar.button("🚀 生成表格结构", type="primary"): with st.spinner("正在分析表格结构..."): try: # 🔑 MinerU 格式:从原始 JSON 重新解析 current_catalog = st.session_state.get('current_catalog', []) current_index = st.session_state.get('current_catalog_index', 0) if not current_catalog or current_index >= len(current_catalog): st.error("❌ 未找到当前文件") return None entry = current_catalog[current_index] # 加载原始 JSON with open(entry["json"], "r", encoding="utf-8") as fp: raw = json.load(fp) # 重新解析获取完整结构 from .data_processor import get_structure_from_ocr table_bbox, structure = get_structure_from_ocr(raw, tool) # 保存到 session_state st.session_state.structure = structure st.session_state.table_bbox = table_bbox st.session_state.undo_stack = [] st.session_state.redo_stack = [] # 清除缓存的图片 from .drawing import clear_table_image_cache clear_table_image_cache() st.success( f"✅ 表格结构生成成功!\n\n" f"检测到 {structure['total_rows']} 行,{structure['total_cols']} 列" ) return structure except Exception as e: st.error(f"❌ 分析失败: {e}") import traceback with st.expander("🔍 详细错误"): st.code(traceback.format_exc()) else: # 🔑 PPStructure V3 格式:使用参数调整 y_tolerance = st.sidebar.slider( "Y轴聚类容差(行检测)", min_value=1, max_value=20, value=5, help="相邻文本框Y坐标差小于此值时合并为同一行" ) x_tolerance = st.sidebar.slider( "X轴聚类容差(列检测)", min_value=5, max_value=30, value=10, help="相邻文本框X坐标差小于此值时合并为同一列" ) min_row_height = st.sidebar.slider( "最小行高", min_value=10, max_value=50, value=20, help="行高小于此值的将被过滤" ) if st.sidebar.button("🚀 分析表格结构", type="primary"): with st.spinner("正在分析表格结构..."): try: structure = generator.analyze_table_structure( y_tolerance=y_tolerance, x_tolerance=x_tolerance, min_row_height=min_row_height ) st.session_state.structure = structure st.session_state.undo_stack = [] st.session_state.redo_stack = [] # 清除缓存的图片 from .drawing import clear_table_image_cache clear_table_image_cache() st.success( f"✅ 分析完成!\n\n" f"检测到 {len(structure['rows'])} 行,{len(structure['columns'])} 列" ) return structure except Exception as e: st.error(f"❌ 分析失败: {e}") import traceback with st.expander("🔍 详细错误"): st.code(traceback.format_exc()) return None