|
|
@@ -1025,12 +1025,40 @@ def main():
|
|
|
st.write("**详细信息:**", stats['tool_info'])
|
|
|
|
|
|
# 其余标签页保持不变...
|
|
|
- tab1, tab2, tab3, tab4 = st.tabs(["📄 内容校验", "📊 表格分析", "📈 数据统计", "🚀 快速导航"])
|
|
|
+ tab1, tab2, tab3, tab4 = st.tabs(["📄 内容校验", "📄 VLM预校验识别结果", "📊 表格分析", "📈 数据统计"])
|
|
|
|
|
|
with tab1:
|
|
|
validator.create_compact_layout(config)
|
|
|
|
|
|
with tab2:
|
|
|
+ # st.header("📄 VLM预校验识别结果")
|
|
|
+ current_md_path = Path(validator.file_paths[validator.selected_file_index]).with_suffix('.md')
|
|
|
+ pre_validation_dir = Path(validator.config['pre_validation'].get('out_dir', './output/pre_validation/')).resolve()
|
|
|
+ comparison_result_path = pre_validation_dir / f"{current_md_path.stem}_comparison_result.json"
|
|
|
+ pre_validation_path = pre_validation_dir / f"{current_md_path.stem}.md"
|
|
|
+ if comparison_result_path.exists():
|
|
|
+ # 左边显示OCR结果,右边显示VLM结果
|
|
|
+ col1, col2 = st.columns([1,1])
|
|
|
+ with col1:
|
|
|
+ st.subheader("🤖 原OCR识别结果")
|
|
|
+ with open(current_md_path, "r", encoding="utf-8") as f:
|
|
|
+ original_md_content = f.read()
|
|
|
+ font_size = config['styles'].get('font_size', 10)
|
|
|
+ height = config['styles']['layout'].get('default_height', 800)
|
|
|
+ layout_type = "compact"
|
|
|
+ validator.layout_manager.render_content_by_mode(original_md_content, "HTML渲染", font_size, height, layout_type)
|
|
|
+ with col2:
|
|
|
+ st.subheader("🤖 VLM识别结果")
|
|
|
+ with open(pre_validation_path, "r", encoding="utf-8") as f:
|
|
|
+ pre_validation_md_content = f.read()
|
|
|
+ font_size = config['styles'].get('font_size', 10)
|
|
|
+ height = config['styles']['layout'].get('default_height', 800)
|
|
|
+ layout_type = "compact"
|
|
|
+ validator.layout_manager.render_content_by_mode(pre_validation_md_content, "HTML渲染", font_size, height, layout_type)
|
|
|
+ else:
|
|
|
+ st.info("暂无预校验结果,请先运行VLM预校验")
|
|
|
+
|
|
|
+ with tab3:
|
|
|
# 表格分析页面 - 保持原有逻辑
|
|
|
st.header("📊 表格数据分析")
|
|
|
|
|
|
@@ -1057,7 +1085,7 @@ def main():
|
|
|
else:
|
|
|
st.info("当前OCR结果中没有检测到表格数据")
|
|
|
|
|
|
- with tab3:
|
|
|
+ with tab4:
|
|
|
# 数据统计页面 - 保持原有逻辑
|
|
|
st.header("📈 OCR数据统计")
|
|
|
|
|
|
@@ -1087,28 +1115,5 @@ def main():
|
|
|
)
|
|
|
st.plotly_chart(fig_bar, use_container_width=True)
|
|
|
|
|
|
- with tab4:
|
|
|
- # 快速导航功能 - 保持原有逻辑
|
|
|
- st.header("🚀 快速导航")
|
|
|
-
|
|
|
- if not validator.text_bbox_mapping:
|
|
|
- st.info("没有可用的文本项进行导航")
|
|
|
- else:
|
|
|
- # 按类别分组
|
|
|
- categories = group_texts_by_category(validator.text_bbox_mapping)
|
|
|
-
|
|
|
- # 创建导航按钮
|
|
|
- for category, texts in categories.items():
|
|
|
- with st.expander(f"{category} ({len(texts)}项)", expanded=False):
|
|
|
- cols = st.columns(3)
|
|
|
- for i, text in enumerate(texts):
|
|
|
- col_idx = i % 3
|
|
|
- with cols[col_idx]:
|
|
|
- display_text = text[:15] + "..." if len(text) > 15 else text
|
|
|
- if st.button(display_text, key=f"nav_{category}_{i}"):
|
|
|
- st.session_state.selected_text = text
|
|
|
- st.rerun()
|
|
|
-
|
|
|
-
|
|
|
if __name__ == "__main__":
|
|
|
main()
|