Browse Source

新增VLM预校验识别结果标签页,优化OCR结果与VLM结果的对比展示逻辑

zhch158_admin 1 tháng trước cách đây
mục cha
commit
d148122e35
1 tập tin đã thay đổi với 30 bổ sung25 xóa
  1. 30 25
      streamlit_ocr_validator.py

+ 30 - 25
streamlit_ocr_validator.py

@@ -1025,12 +1025,40 @@ def main():
             st.write("**详细信息:**", stats['tool_info'])
     
     # 其余标签页保持不变...
-    tab1, tab2, tab3, tab4 = st.tabs(["📄 内容校验", "📊 表格分析", "📈 数据统计", "🚀 快速导航"])
+    tab1, tab2, tab3, tab4 = st.tabs(["📄 内容校验", "📄 VLM预校验识别结果", "📊 表格分析", "📈 数据统计"])
     
     with tab1:
         validator.create_compact_layout(config)
 
     with tab2:
+        # st.header("📄 VLM预校验识别结果")
+        current_md_path = Path(validator.file_paths[validator.selected_file_index]).with_suffix('.md')
+        pre_validation_dir = Path(validator.config['pre_validation'].get('out_dir', './output/pre_validation/')).resolve()
+        comparison_result_path = pre_validation_dir / f"{current_md_path.stem}_comparison_result.json"
+        pre_validation_path = pre_validation_dir / f"{current_md_path.stem}.md"
+        if comparison_result_path.exists():
+            # 左边显示OCR结果,右边显示VLM结果
+            col1, col2 = st.columns([1,1])
+            with col1:
+                st.subheader("🤖 原OCR识别结果")
+                with open(current_md_path, "r", encoding="utf-8") as f:
+                    original_md_content = f.read()
+                font_size = config['styles'].get('font_size', 10)
+                height = config['styles']['layout'].get('default_height', 800)
+                layout_type = "compact"
+                validator.layout_manager.render_content_by_mode(original_md_content, "HTML渲染", font_size, height, layout_type)
+            with col2:
+                st.subheader("🤖 VLM识别结果")
+                with open(pre_validation_path, "r", encoding="utf-8") as f:
+                    pre_validation_md_content = f.read()
+                font_size = config['styles'].get('font_size', 10)
+                height = config['styles']['layout'].get('default_height', 800)
+                layout_type = "compact"
+                validator.layout_manager.render_content_by_mode(pre_validation_md_content, "HTML渲染", font_size, height, layout_type)
+        else:
+            st.info("暂无预校验结果,请先运行VLM预校验")
+
+    with tab3:
         # 表格分析页面 - 保持原有逻辑
         st.header("📊 表格数据分析")
         
@@ -1057,7 +1085,7 @@ def main():
         else:
             st.info("当前OCR结果中没有检测到表格数据")
     
-    with tab3:
+    with tab4:
         # 数据统计页面 - 保持原有逻辑
         st.header("📈 OCR数据统计")
         
@@ -1087,28 +1115,5 @@ def main():
         )
         st.plotly_chart(fig_bar, use_container_width=True)
     
-    with tab4:
-        # 快速导航功能 - 保持原有逻辑
-        st.header("🚀 快速导航")
-        
-        if not validator.text_bbox_mapping:
-            st.info("没有可用的文本项进行导航")
-        else:
-            # 按类别分组
-            categories = group_texts_by_category(validator.text_bbox_mapping)
-            
-            # 创建导航按钮
-            for category, texts in categories.items():
-                with st.expander(f"{category} ({len(texts)}项)", expanded=False):
-                    cols = st.columns(3)
-                    for i, text in enumerate(texts):
-                        col_idx = i % 3
-                        with cols[col_idx]:
-                            display_text = text[:15] + "..." if len(text) > 15 else text
-                            if st.button(display_text, key=f"nav_{category}_{i}"):
-                                st.session_state.selected_text = text
-                                st.rerun()
-
-
 if __name__ == "__main__":
     main()