2 tháng trước cách đây · faa33c7919
--- a/streamlit_ocr_validator.py
+++ b/streamlit_ocr_validator.py
@@ -369,20 +369,130 @@ class StreamlitOCRValidator:
 
				         
			
 
				         return wrapped_content
			
 
				     
			
 
				-    def display_html_table_as_dataframe(self, html_content: str):
			
 
				+    def display_html_table_as_dataframe(self, html_content: str, enable_editing: bool = False):
			
 
				         """将HTML表格解析为DataFrame显示"""
			
 
				         import pandas as pd
			
 
				-        from io import StringIO
			
 
				+        from io import StringIO, BytesIO
			
 
				         
			
 
				         try:
			
 
				             # 使用pandas直接读取HTML表格
			
 
				             tables = pd.read_html(StringIO(html_content))
			
 
				             if tables:
			
 
				                 for i, table in enumerate(tables):
			
 
				-                    st.subheader(f"表格 {i+1}")
			
 
				-                    st.dataframe(table, use_container_width=True)
			
 
				+                    st.subheader(f"📊 表格 {i+1}")
			
 
				+                    
			
 
				+                    # 创建表格操作按钮
			
 
				+                    col1, col2, col3, col4 = st.columns(4)
			
 
				+                    with col1:
			
 
				+                        show_info = st.checkbox(f"显示表格信息", key=f"info_{i}")
			
 
				+                    with col2:
			
 
				+                        show_stats = st.checkbox(f"显示统计信息", key=f"stats_{i}")
			
 
				+                    with col3:
			
 
				+                        enable_filter = st.checkbox(f"启用过滤", key=f"filter_{i}")
			
 
				+                    with col4:
			
 
				+                        enable_sort = st.checkbox(f"启用排序", key=f"sort_{i}")
			
 
				+                    
			
 
				+                    # 数据过滤
			
 
				+                    filtered_table = table.copy()
			
 
				+                    if enable_filter and not table.empty:
			
 
				+                        filter_col = st.selectbox(
			
 
				+                            f"选择过滤列 (表格 {i+1})", 
			
 
				+                            options=['无'] + list(table.columns),
			
 
				+                            key=f"filter_col_{i}"
			
 
				+                        )
			
 
				+                        
			
 
				+                        if filter_col != '无':
			
 
				+                            filter_value = st.text_input(
			
 
				+                                f"过滤值 (表格 {i+1})", 
			
 
				+                                key=f"filter_value_{i}"
			
 
				+                            )
			
 
				+                            if filter_value:
			
 
				+                                filtered_table = table[
			
 
				+                                    table[filter_col].astype(str).str.contains(filter_value, na=False)
			
 
				+                                ]
			
 
				+                    
			
 
				+                    # 数据排序
			
 
				+                    if enable_sort and not filtered_table.empty:
			
 
				+                        sort_col = st.selectbox(
			
 
				+                            f"选择排序列 (表格 {i+1})", 
			
 
				+                            options=['无'] + list(filtered_table.columns),
			
 
				+                            key=f"sort_col_{i}"
			
 
				+                        )
			
 
				+                        
			
 
				+                        if sort_col != '无':
			
 
				+                            sort_order = st.radio(
			
 
				+                                f"排序方式 (表格 {i+1})",
			
 
				+                                options=['升序', '降序'],
			
 
				+                                horizontal=True,
			
 
				+                                key=f"sort_order_{i}"
			
 
				+                            )
			
 
				+                            ascending = (sort_order == '升序')
			
 
				+                            filtered_table = filtered_table.sort_values(sort_col, ascending=ascending)
			
 
				+                    
			
 
				+                    # 显示表格
			
 
				+                    if enable_editing:
			
 
				+                        # 可编辑表格
			
 
				+                        edited_table = st.data_editor(
			
 
				+                            filtered_table, 
			
 
				+                            use_container_width=True,
			
 
				+                            key=f"editor_{i}"
			
 
				+                        )
			
 
				+                        
			
 
				+                        # 检查是否有编辑
			
 
				+                        if not edited_table.equals(filtered_table):
			
 
				+                            st.success("✏️ 表格已编辑，可以导出修改后的数据")
			
 
				+                            
			
 
				+                    else:
			
 
				+                        # 只读表格
			
 
				+                        st.dataframe(filtered_table, use_container_width=True)
			
 
				+                    
			
 
				+                    # 显示表格信息
			
 
				+                    if show_info:
			
 
				+                        st.write(f"**表格信息:**")
			
 
				+                        st.write(f"- 原始行数: {len(table)}")
			
 
				+                        st.write(f"- 过滤后行数: {len(filtered_table)}")
			
 
				+                        st.write(f"- 列数: {len(table.columns)}")
			
 
				+                        st.write(f"- 列名: {', '.join(table.columns)}")
			
 
				+                    
			
 
				+                    # 显示统计信息
			
 
				+                    if show_stats:
			
 
				+                        st.write(f"**统计信息:**")
			
 
				+                        numeric_cols = filtered_table.select_dtypes(include=[np.number]).columns
			
 
				+                        if len(numeric_cols) > 0:
			
 
				+                            st.dataframe(filtered_table[numeric_cols].describe())
			
 
				+                        else:
			
 
				+                            st.info("表格中没有数值列")
			
 
				+                    
			
 
				+                    # 导出功能
			
 
				+                    if st.button(f"📥 导出表格 {i+1}", key=f"export_{i}"):
			
 
				+                        # 创建CSV数据
			
 
				+                        csv_data = filtered_table.to_csv(index=False)
			
 
				+                        st.download_button(
			
 
				+                            label=f"下载CSV (表格 {i+1})",
			
 
				+                            data=csv_data,
			
 
				+                            file_name=f"table_{i+1}.csv",
			
 
				+                            mime="text/csv",
			
 
				+                            key=f"download_csv_{i}"
			
 
				+                        )
			
 
				+                        
			
 
				+                        # 创建Excel数据
			
 
				+                        excel_buffer = BytesIO()
			
 
				+                        filtered_table.to_excel(excel_buffer, index=False)
			
 
				+                        st.download_button(
			
 
				+                            label=f"下载Excel (表格 {i+1})",
			
 
				+                            data=excel_buffer.getvalue(),
			
 
				+                            file_name=f"table_{i+1}.xlsx",
			
 
				+                            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
			
 
				+                            key=f"download_excel_{i}"
			
 
				+                        )
			
 
				+                    
			
 
				+                    st.markdown("---")
			
 
				+            else:
			
 
				+                st.warning("未找到可解析的表格")
			
 
				+                
			
 
				         except Exception as e:
			
 
				             st.error(f"表格解析失败: {e}")
			
 
				+            st.info("尝试使用HTML渲染模式查看表格")
			
 
				             # 回退到HTML渲染
			
 
				             st.markdown(html_content, unsafe_allow_html=True)
			
 
				 
			
@@ -486,154 +596,221 @@ def main():
 
				         st.error(f"❌ 统计信息计算失败: {e}")
			
 
				         return
			
 
				     
			
 
				-    # 主要布局 - 左右分栏
			
 
				-    left_col, right_col = st.columns([1, 1])
			
 
				+    # 创建标签页
			
 
				+    tab1, tab2, tab3 = st.tabs(["📄 文本校验", "📊 表格分析", "📈 数据统计"])
			
 
				     
			
 
				-    # 左侧 - OCR文本内容
			
 
				-    with left_col:
			
 
				-        st.header("📄 OCR识别内容")
			
 
				-        
			
 
				-        # 文本选择器
			
 
				-        if st.session_state.validator.text_bbox_mapping:
			
 
				-            text_options = ["请选择文本..."] + list(st.session_state.validator.text_bbox_mapping.keys())
			
 
				-            selected_index = st.selectbox(
			
 
				-                "选择要校验的文本",
			
 
				-                range(len(text_options)),
			
 
				-                format_func=lambda x: text_options[x],
			
 
				-                key="text_selector"
			
 
				-            )
			
 
				-            
			
 
				-            if selected_index > 0:
			
 
				-                st.session_state.selected_text = text_options[selected_index]
			
 
				-        else:
			
 
				-            st.warning("没有找到可点击的文本")
			
 
				+    with tab1:
			
 
				+        # 原有的左右分栏内容
			
 
				+        left_col, right_col = st.columns([1, 1])
			
 
				         
			
 
				-        # 显示MD内容（可搜索和过滤）
			
 
				-        if st.session_state.validator.md_content:
			
 
				-            search_term = st.text_input("🔍 搜索文本内容", placeholder="输入关键词搜索...")
			
 
				+        # 左侧 - OCR文本内容
			
 
				+        with left_col:
			
 
				+            st.header("📄 OCR识别内容")
			
 
				             
			
 
				-            display_content = st.session_state.validator.md_content
			
 
				-            if search_term:
			
 
				-                lines = display_content.split('\n')
			
 
				-                filtered_lines = [line for line in lines if search_term.lower() in line.lower()]
			
 
				-                display_content = '\n'.join(filtered_lines)
			
 
				-                if filtered_lines:
			
 
				-                    st.success(f"找到 {len(filtered_lines)} 行包含 '{search_term}'")
			
 
				-                else:
			
 
				-                    st.warning(f"未找到包含 '{search_term}' 的内容")
			
 
				-            
			
 
				-            # 渲染方式选择
			
 
				-            render_mode = st.radio(
			
 
				-                "选择渲染方式",
			
 
				-                ["HTML渲染", "Markdown渲染", "DataFrame表格", "原始文本"],  # 添加DataFrame选项
			
 
				-                horizontal=True
			
 
				-            )
			
 
				-
			
 
				-            if render_mode == "HTML渲染":
			
 
				-                # 使用unsafe_allow_html=True来渲染HTML表格
			
 
				-                st.markdown(display_content, unsafe_allow_html=True)
			
 
				-            elif render_mode == "Markdown渲染":
			
 
				-                # 转换HTML表格为Markdown格式
			
 
				-                converted_content = st.session_state.validator.convert_html_table_to_markdown(display_content)
			
 
				-                st.markdown(converted_content)
			
 
				-            elif render_mode == "DataFrame表格":
			
 
				-                # 新增：使用DataFrame显示表格
			
 
				-                if '<table>' in display_content.lower():
			
 
				-                    st.session_state.validator.display_html_table_as_dataframe(display_content)
			
 
				-                else:
			
 
				-                    st.info("当前内容中没有检测到HTML表格")
			
 
				-                    st.markdown(display_content)
			
 
				-            else:
			
 
				-                # 原始文本显示
			
 
				-                st.text_area(
			
 
				-                    "MD内容预览",
			
 
				-                    display_content,
			
 
				-                    height=300,
			
 
				-                    help="OCR识别的文本内容"
			
 
				+            # 文本选择器
			
 
				+            if st.session_state.validator.text_bbox_mapping:
			
 
				+                text_options = ["请选择文本..."] + list(st.session_state.validator.text_bbox_mapping.keys())
			
 
				+                selected_index = st.selectbox(
			
 
				+                    "选择要校验的文本",
			
 
				+                    range(len(text_options)),
			
 
				+                    format_func=lambda x: text_options[x],
			
 
				+                    key="text_selector"
			
 
				                 )
			
 
				-        
			
 
				-        # 可点击文本列表
			
 
				-        st.subheader("🎯 可点击文本列表")
			
 
				-        
			
 
				-        if st.session_state.validator.text_bbox_mapping:
			
 
				-            for text, info_list in st.session_state.validator.text_bbox_mapping.items():
			
 
				-                info = info_list[0]  # 使用第一个bbox信息
			
 
				-                
			
 
				-                # 确定显示样式
			
 
				-                is_selected = (text == st.session_state.selected_text)
			
 
				-                is_error = (text in st.session_state.marked_errors)
			
 
				                 
			
 
				-                # 创建按钮行
			
 
				-                button_col, error_col = st.columns([4, 1])
			
 
				+                if selected_index > 0:
			
 
				+                    st.session_state.selected_text = text_options[selected_index]
			
 
				+            else:
			
 
				+                st.warning("没有找到可点击的文本")
			
 
				+            
			
 
				+            # 显示MD内容（可搜索和过滤）
			
 
				+            if st.session_state.validator.md_content:
			
 
				+                search_term = st.text_input("🔍 搜索文本内容", placeholder="输入关键词搜索...")
			
 
				                 
			
 
				-                with button_col:
			
 
				-                    button_type = "primary" if is_selected else "secondary"
			
 
				-                    if st.button(f"📍 {text}", key=f"btn_{text}", type=button_type):
			
 
				-                        st.session_state.selected_text = text
			
 
				-                        st.rerun()
			
 
				+                display_content = st.session_state.validator.md_content
			
 
				+                if search_term:
			
 
				+                    lines = display_content.split('\n')
			
 
				+                    filtered_lines = [line for line in lines if search_term.lower() in line.lower()]
			
 
				+                    display_content = '\n'.join(filtered_lines)
			
 
				+                    if filtered_lines:
			
 
				+                        st.success(f"找到 {len(filtered_lines)} 行包含 '{search_term}'")
			
 
				+                    else:
			
 
				+                        st.warning(f"未找到包含 '{search_term}' 的内容")
			
 
				                 
			
 
				-                with error_col:
			
 
				-                    if is_error:
			
 
				-                        if st.button("✅", key=f"fix_{text}", help="取消错误标记"):
			
 
				-                            st.session_state.marked_errors.discard(text)
			
 
				-                            st.rerun()
			
 
				+                # 渲染方式选择
			
 
				+                render_mode = st.radio(
			
 
				+                    "选择渲染方式",
			
 
				+                    ["HTML渲染", "Markdown渲染", "DataFrame表格", "原始文本"],  # 添加DataFrame选项
			
 
				+                    horizontal=True
			
 
				+                )
			
 
				+
			
 
				+                if render_mode == "HTML渲染":
			
 
				+                    # 使用unsafe_allow_html=True来渲染HTML表格
			
 
				+                    st.markdown(display_content, unsafe_allow_html=True)
			
 
				+                elif render_mode == "Markdown渲染":
			
 
				+                    # 转换HTML表格为Markdown格式
			
 
				+                    converted_content = st.session_state.validator.convert_html_table_to_markdown(display_content)
			
 
				+                    st.markdown(converted_content)
			
 
				+                elif render_mode == "DataFrame表格":
			
 
				+                    # 新增：使用DataFrame显示表格
			
 
				+                    if '<table>' in display_content.lower():
			
 
				+                        st.session_state.validator.display_html_table_as_dataframe(display_content)
			
 
				                     else:
			
 
				-                        if st.button("❌", key=f"error_{text}", help="标记为错误"):
			
 
				-                            st.session_state.marked_errors.add(text)
			
 
				+                        st.info("当前内容中没有检测到HTML表格")
			
 
				+                        st.markdown(display_content)
			
 
				+                else:
			
 
				+                    # 原始文本显示
			
 
				+                    st.text_area(
			
 
				+                        "MD内容预览",
			
 
				+                        display_content,
			
 
				+                        height=300,
			
 
				+                        help="OCR识别的文本内容"
			
 
				+                    )
			
 
				+            
			
 
				+            # 可点击文本列表
			
 
				+            st.subheader("🎯 可点击文本列表")
			
 
				+            
			
 
				+            if st.session_state.validator.text_bbox_mapping:
			
 
				+                for text, info_list in st.session_state.validator.text_bbox_mapping.items():
			
 
				+                    info = info_list[0] # 使用第一个bbox信息
			
 
				+                    
			
 
				+                    # 确定显示样式
			
 
				+                    is_selected = (text == st.session_state.selected_text)
			
 
				+                    is_error = (text in st.session_state.marked_errors)
			
 
				+                    
			
 
				+                    # 创建按钮行
			
 
				+                    button_col, error_col = st.columns([4, 1])
			
 
				+                    
			
 
				+                    with button_col:
			
 
				+                        button_type = "primary" if is_selected else "secondary"
			
 
				+                        if st.button(f"📍 {text}", key=f"btn_{text}", type=button_type):
			
 
				+                            st.session_state.selected_text = text
			
 
				                             st.rerun()
			
 
				-        else:
			
 
				-            st.info("没有可点击的文本项目")
			
 
				+                    
			
 
				+                    with error_col:
			
 
				+                        if is_error:
			
 
				+                            if st.button("✅", key=f"fix_{text}", help="取消错误标记"):
			
 
				+                                st.session_state.marked_errors.discard(text)
			
 
				+                                st.rerun()
			
 
				+                        else:
			
 
				+                            if st.button("❌", key=f"error_{text}", help="标记为错误"):
			
 
				+                                st.session_state.marked_errors.add(text)
			
 
				+                                st.rerun()
			
 
				+            else:
			
 
				+                st.info("没有可点击的文本项目")
			
 
				+        
			
 
				+        # 右侧 - 图像显示
			
 
				+        with right_col:
			
 
				+            st.header("🖼️ 原图标注")
			
 
				+            
			
 
				+            if st.session_state.validator.image_path and Path(st.session_state.validator.image_path).exists():
			
 
				+                try:
			
 
				+                    # 加载图片
			
 
				+                    image = Image.open(st.session_state.validator.image_path)
			
 
				+                    
			
 
				+                    # 创建交互式图片
			
 
				+                    selected_bbox = None
			
 
				+                    if st.session_state.selected_text and st.session_state.selected_text in st.session_state.validator.text_bbox_mapping:
			
 
				+                        info = st.session_state.validator.text_bbox_mapping[st.session_state.selected_text][0]
			
 
				+                        selected_bbox = info['bbox']
			
 
				+                    
			
 
				+                    fig = st.session_state.validator.create_interactive_plot(image, selected_bbox)
			
 
				+                    st.plotly_chart(fig, use_container_width=True)
			
 
				+                    
			
 
				+                    # 显示选中文本的详细信息
			
 
				+                    if st.session_state.selected_text:
			
 
				+                        st.subheader("📍 选中文本详情")
			
 
				+                        
			
 
				+                        if st.session_state.selected_text in st.session_state.validator.text_bbox_mapping:
			
 
				+                            info = st.session_state.validator.text_bbox_mapping[st.session_state.selected_text][0]
			
 
				+                            bbox = info['bbox']
			
 
				+                            
			
 
				+                            info_col1, info_col2 = st.columns(2)
			
 
				+                            with info_col1:
			
 
				+                                st.write(f"**文本内容:** {st.session_state.selected_text}")
			
 
				+                                st.write(f"**类别:** {info['category']}")
			
 
				+                                st.write(f"**置信度:** {info.get('confidence', 'N/A')}")
			
 
				+                            
			
 
				+                            with info_col2:
			
 
				+                                st.write(f"**位置:** [{', '.join(map(str, bbox))}]")
			
 
				+                                if len(bbox) >= 4:
			
 
				+                                    st.write(f"**宽度:** {bbox[2] - bbox[0]} px")
			
 
				+                                    st.write(f"**高度:** {bbox[3] - bbox[1]} px")
			
 
				+                            
			
 
				+                            # 标记状态
			
 
				+                            is_error = st.session_state.selected_text in st.session_state.marked_errors
			
 
				+                            if is_error:
			
 
				+                                st.error("⚠️ 此文本已标记为错误")
			
 
				+                            else:
			
 
				+                                st.success("✅ 此文本未标记错误")
			
 
				+                except Exception as e:
			
 
				+                    st.error(f"❌ 图片处理失败: {e}")
			
 
				+            else:
			
 
				+                st.error("未找到对应的图片文件")
			
 
				+                if st.session_state.validator.image_path:
			
 
				+                    st.write(f"期望路径: {st.session_state.validator.image_path}")
			
 
				     
			
 
				-    # 右侧 - 图像显示
			
 
				-    with right_col:
			
 
				-        st.header("🖼️ 原图标注")
			
 
				-        
			
 
				-        if st.session_state.validator.image_path and Path(st.session_state.validator.image_path).exists():
			
 
				-            try:
			
 
				-                # 加载图片
			
 
				-                image = Image.open(st.session_state.validator.image_path)
			
 
				-                
			
 
				-                # 创建交互式图片
			
 
				-                selected_bbox = None
			
 
				-                if st.session_state.selected_text and st.session_state.selected_text in st.session_state.validator.text_bbox_mapping:
			
 
				-                    info = st.session_state.validator.text_bbox_mapping[st.session_state.selected_text][0]
			
 
				-                    selected_bbox = info['bbox']
			
 
				+    with tab2:
			
 
				+        # 新增：专门的表格分析页面
			
 
				+        st.header("📊 表格数据分析")
			
 
				+        
			
 
				+        if st.session_state.validator.md_content:
			
 
				+            # 检查是否包含表格
			
 
				+            if '<table' in st.session_state.validator.md_content.lower():
			
 
				+                col1, col2 = st.columns([2, 1])
			
 
				                 
			
 
				-                fig = st.session_state.validator.create_interactive_plot(image, selected_bbox)
			
 
				-                st.plotly_chart(fig, use_container_width=True)
			
 
				+                with col1:
			
 
				+                    st.subheader("🔍 表格数据预览")
			
 
				+                    st.session_state.validator.display_html_table_as_dataframe(
			
 
				+                        st.session_state.validator.md_content
			
 
				+                    )
			
 
				                 
			
 
				-                # 显示选中文本的详细信息
			
 
				-                if st.session_state.selected_text:
			
 
				-                    st.subheader("📍 选中文本详情")
			
 
				+                with col2:
			
 
				+                    st.subheader("⚙️ 表格操作")
			
 
				                     
			
 
				-                    if st.session_state.selected_text in st.session_state.validator.text_bbox_mapping:
			
 
				-                        info = st.session_state.validator.text_bbox_mapping[st.session_state.selected_text][0]
			
 
				-                        bbox = info['bbox']
			
 
				-                        
			
 
				-                        info_col1, info_col2 = st.columns(2)
			
 
				-                        with info_col1:
			
 
				-                            st.write(f"**文本内容:** {st.session_state.selected_text}")
			
 
				-                            st.write(f"**类别:** {info['category']}")
			
 
				-                            st.write(f"**置信度:** {info.get('confidence', 'N/A')}")
			
 
				-                        
			
 
				-                        with info_col2:
			
 
				-                            st.write(f"**位置:** [{', '.join(map(str, bbox))}]")
			
 
				-                            if len(bbox) >= 4:
			
 
				-                                st.write(f"**宽度:** {bbox[2] - bbox[0]} px")
			
 
				-                                st.write(f"**高度:** {bbox[3] - bbox[1]} px")
			
 
				-                        
			
 
				-                        # 标记状态
			
 
				-                        is_error = st.session_state.selected_text in st.session_state.marked_errors
			
 
				-                        if is_error:
			
 
				-                            st.error("⚠️ 此文本已标记为错误")
			
 
				-                        else:
			
 
				-                            st.success("✅ 此文本未标记错误")
			
 
				-            except Exception as e:
			
 
				-                st.error(f"❌ 图片处理失败: {e}")
			
 
				+                    if st.button("📥 导出表格数据", type="primary"):
			
 
				+                        try:
			
 
				+                            import pandas as pd
			
 
				+                            from io import StringIO
			
 
				+                            tables = pd.read_html(StringIO(st.session_state.validator.md_content))
			
 
				+                            if tables:
			
 
				+                                # 创建Excel文件
			
 
				+                                output = BytesIO()
			
 
				+                                with pd.ExcelWriter(output, engine='openpyxl') as writer:
			
 
				+                                    for i, table in enumerate(tables):
			
 
				+                                        table.to_excel(writer, sheet_name=f'Table_{i+1}', index=False)
			
 
				+                                
			
 
				+                                st.download_button(
			
 
				+                                    label="📥 下载Excel文件",
			
 
				+                                    data=output.getvalue(),
			
 
				+                                    file_name="ocr_tables.xlsx",
			
 
				+                                    mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
			
 
				+                                )
			
 
				+                        except Exception as e:
			
 
				+                            st.error(f"导出失败: {e}")
			
 
				+                    
			
 
				+                    if st.button("🔍 表格统计分析"):
			
 
				+                        try:
			
 
				+                            import pandas as pd
			
 
				+                            from io import StringIO
			
 
				+                            tables = pd.read_html(StringIO(st.session_state.validator.md_content))
			
 
				+                            if tables:
			
 
				+                                st.write("**表格统计信息:**")
			
 
				+                                for i, table in enumerate(tables):
			
 
				+                                    st.write(f"表格 {i+1}:")
			
 
				+                                    st.write(f"- 行数: {len(table)}")
			
 
				+                                    st.write(f"- 列数: {len(table.columns)}")
			
 
				+                                    st.write(f"- 数值列数: {len(table.select_dtypes(include=[np.number]).columns)}")
			
 
				+                        except Exception as e:
			
 
				+                            st.error(f"统计分析失败: {e}")
			
 
				+            else:
			
 
				+                st.info("当前OCR结果中没有检测到表格数据")
			
 
				         else:
			
 
				-            st.error("未找到对应的图片文件")
			
 
				-            if st.session_state.validator.image_path:
			
 
				-                st.write(f"期望路径: {st.session_state.validator.image_path}")
			
 
				+            st.warning("请先加载OCR数据")
			
 
				+    
			
 
				+    with tab3:
			
 
				+        # 数据统计页面
			
 
				+        st.header("📈 OCR数据统计")
			
 
				+        # ...现有的统计代码...
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     main()