#!/usr/bin/env python3
"""
基于Streamlit的OCR可视化校验工具（主入口）
"""

import streamlit as st
from pathlib import Path
import json

from streamlit_validator_core import StreamlitOCRValidator
from streamlit_validator_ui import (
    setup_page_config, create_data_source_selector, message_box
)
from streamlit_validator_table import display_html_table_as_dataframe
from streamlit_validator_cross import (
    cross_validation_dialog, show_batch_cross_validation_results_dialog
)
from streamlit_validator_result import display_single_page_cross_validation
from ocr_validator_utils import get_data_source_display_name


def reset_cross_validation_results():
    """重置交叉验证结果"""
    if 'cross_validation_batch_result' in st.session_state:
        st.session_state.cross_validation_batch_result = None
        print("🔄 数据源已变更，交叉验证结果已清空")


def main():
    """主应用"""
    # 初始化应用
    if 'validator' not in st.session_state:
        validator = StreamlitOCRValidator()
        st.session_state.validator = validator
        setup_page_config(validator.config)
        
        # 页面标题
        config = st.session_state.validator.config
        st.title(config['ui']['page_title'])
        
        # 初始化数据源追踪
        st.session_state.current_ocr_source = validator.current_source_key
        st.session_state.current_verify_source = validator.verify_source_key
    else:
        validator = st.session_state.validator
        config = st.session_state.validator.config
    
    if 'selected_text' not in st.session_state:
        st.session_state.selected_text = None
        st.session_state.compact_search_query = None
    if 'marked_errors' not in st.session_state:
        st.session_state.marked_errors = set()
    
    # 数据源选择器
    create_data_source_selector(validator)
    
    # ✅ 检测数据源是否变更
    ocr_source_changed = False
    verify_source_changed = False
    
    if 'current_ocr_source' in st.session_state:
        if st.session_state.current_ocr_source != validator.current_source_key:
            ocr_source_changed = True
            st.session_state.current_ocr_source = validator.current_source_key
            print(f"🔄 OCR数据源已切换到: {validator.current_source_key}")
    
    if 'current_verify_source' in st.session_state:
        if st.session_state.current_verify_source != validator.verify_source_key:
            verify_source_changed = True
            st.session_state.current_verify_source = validator.verify_source_key
            print(f"🔄 验证数据源已切换到: {validator.verify_source_key}")
    
    # ✅ 如果任一数据源变更，清空交叉验证结果
    if ocr_source_changed or verify_source_changed:
        reset_cross_validation_results()
        
        # 显示提示信息
        if ocr_source_changed and verify_source_changed:
            st.info("ℹ️ OCR数据源和验证数据源已变更，请重新运行交叉验证")
        elif ocr_source_changed:
            st.info("ℹ️ OCR数据源已变更，请重新运行交叉验证")
        elif verify_source_changed:
            st.info("ℹ️ 验证数据源已变更，请重新运行交叉验证")
    
    # 如果没有可用的数据源，提前返回
    if not validator.all_sources:
        st.stop()
    
    # 文件选择区域
    with st.container(height=75, horizontal=True, horizontal_alignment='left', gap="medium"):
        if 'selected_file_index' not in st.session_state:
            st.session_state.selected_file_index = 0
            
        if validator.display_options:
            selected_index = st.selectbox(
                "选择OCR结果文件", 
                range(len(validator.display_options)),
                format_func=lambda i: validator.display_options[i],
                index=st.session_state.selected_file_index,
                key="selected_selectbox",
                label_visibility="collapsed"
            )
            
            if selected_index != st.session_state.selected_file_index:
                st.session_state.selected_file_index = selected_index

            selected_file = validator.file_paths[selected_index]

            current_page = validator.file_info[selected_index]['page']
            page_input = st.number_input(
                "输入页码", 
                placeholder="输入页码", 
                label_visibility="collapsed",
                min_value=1, 
                max_value=len(validator.display_options), 
                value=current_page, 
                step=1,
                key="page_input"
            )
            
            if page_input != current_page:
                for i, info in enumerate(validator.file_info):
                    if info['page'] == page_input:
                        st.session_state.selected_file_index = i
                        selected_file = validator.file_paths[i]
                        st.rerun()
                        break

            if (st.session_state.selected_file_index >= 0
                and validator.selected_file_index != st.session_state.selected_file_index
                and selected_file):
                validator.selected_file_index = st.session_state.selected_file_index
                st.session_state.validator.load_ocr_data(selected_file)
                
                current_source_name = get_data_source_display_name(validator.current_source_config)
                st.success(f"✅ 已加载 {current_source_name} - 第{validator.file_info[st.session_state.selected_file_index]['page']}页")
                st.rerun()
        else:
            st.warning("当前数据源中未找到OCR结果文件")

        # ✅ 交叉验证按钮 - 添加数据源检查
        cross_validation_enabled = (
            validator.current_source_key != validator.verify_source_key 
            and validator.image_path 
            and validator.md_content
        )
        
        if st.button(
            "交叉验证", 
            type="primary", 
            icon=":material/compare_arrows:",
            disabled=not cross_validation_enabled,
            help="需要选择不同的OCR数据源和验证数据源" if not cross_validation_enabled else "开始批量交叉验证"
        ):
            cross_validation_dialog(validator)

        # ✅ 查看验证结果按钮 - 检查是否有验证结果
        has_validation_results = (
            'cross_validation_batch_result' in st.session_state 
            and st.session_state.cross_validation_batch_result is not None
        )
        
        if st.button(
            "查看验证结果", 
            type="secondary", 
            icon=":material/quick_reference_all:",
            disabled=not has_validation_results,
            help="暂无验证结果，请先运行交叉验证" if not has_validation_results else "查看批量验证结果"
        ):
            show_batch_cross_validation_results_dialog()

    # 显示当前数据源统计信息
    with st.expander("🔧 OCR工具统计信息", expanded=False):
        stats = validator.get_statistics()
        col1, col2, col3, col4, col5 = st.columns(5)
        
        with col1:
            st.metric("📊 总文本块", stats['total_texts'])
        with col2:
            st.metric("🔗 可点击文本", stats['clickable_texts'])
        with col3:
            st.metric("❌ 标记错误", stats['marked_errors'])
        with col4:
            st.metric("✅ 准确率", f"{stats['accuracy_rate']:.1f}%")
        with col5:
            if validator.current_source_config:
                tool_display = validator.current_source_config['ocr_tool'].upper()
                st.metric("🔧 OCR工具", tool_display)
        
        if stats['tool_info']:
            st.write("**详细信息:**", stats['tool_info'])
    
    tab1, tab2, tab3 = st.tabs(["📄 内容人工检查", "🔍 交叉验证结果", "📊 表格分析"])
    
    with tab1:
        validator.create_compact_layout(config)

    with tab2:
        # ✅ 使用封装的函数显示单页交叉验证结果
        display_single_page_cross_validation(validator, config)

    with tab3:
        st.header("📊 表格数据分析")
        
        if validator.md_content and '<table' in validator.md_content.lower():
            st.subheader("🔍 表格数据预览")
            display_html_table_as_dataframe(validator.md_content)
        else:
            st.info("当前OCR结果中没有检测到表格数据")
    

if __name__ == "__main__":
    main()