|
|
@@ -17,6 +17,7 @@ from streamlit_validator_cross import (
|
|
|
)
|
|
|
from streamlit_validator_result import display_single_page_cross_validation
|
|
|
from ocr_validator_utils import get_data_source_display_name
|
|
|
+from config_manager import load_config # 🎯 使用新配置管理器
|
|
|
|
|
|
|
|
|
def reset_cross_validation_results():
|
|
|
@@ -28,22 +29,37 @@ def reset_cross_validation_results():
|
|
|
|
|
|
def main():
|
|
|
"""主应用"""
|
|
|
+ # 🎯 初始化配置管理器
|
|
|
+ if 'config_manager' not in st.session_state:
|
|
|
+ try:
|
|
|
+ st.session_state.config_manager = load_config(config_dir="config")
|
|
|
+ # 🎯 生成 OCRValidator 所需的配置
|
|
|
+ st.session_state.validator_config = st.session_state.config_manager.to_validator_config()
|
|
|
+ print("✅ 配置管理器初始化成功")
|
|
|
+ print(f"📄 发现 {len(st.session_state.config_manager.list_documents())} 个文档配置")
|
|
|
+ print(f"🔧 发现 {len(st.session_state.config_manager.list_ocr_tools())} 个 OCR 工具")
|
|
|
+ except Exception as e:
|
|
|
+ st.error(f"❌ 配置加载失败: {e}")
|
|
|
+ st.stop()
|
|
|
+
|
|
|
+ config_manager = st.session_state.config_manager
|
|
|
+ validator_config = config_manager.to_validator_config()
|
|
|
+
|
|
|
# 初始化应用
|
|
|
if 'validator' not in st.session_state:
|
|
|
- validator = StreamlitOCRValidator()
|
|
|
+ # 🎯 直接传递配置字典给 OCRValidator
|
|
|
+ validator = StreamlitOCRValidator(config_dict=validator_config)
|
|
|
st.session_state.validator = validator
|
|
|
- setup_page_config(validator.config)
|
|
|
+ setup_page_config(validator_config)
|
|
|
|
|
|
# 页面标题
|
|
|
- config = st.session_state.validator.config
|
|
|
- st.title(config['ui']['page_title'])
|
|
|
+ st.title(validator_config['ui']['page_title'])
|
|
|
|
|
|
# 初始化数据源追踪
|
|
|
st.session_state.current_ocr_source = validator.current_source_key
|
|
|
st.session_state.current_verify_source = validator.verify_source_key
|
|
|
else:
|
|
|
validator = st.session_state.validator
|
|
|
- config = st.session_state.validator.config
|
|
|
|
|
|
if 'selected_text' not in st.session_state:
|
|
|
st.session_state.selected_text = None
|
|
|
@@ -84,6 +100,44 @@ def main():
|
|
|
|
|
|
# 如果没有可用的数据源,提前返回
|
|
|
if not validator.all_sources:
|
|
|
+ st.warning("⚠️ 未找到任何数据源,请检查配置文件")
|
|
|
+
|
|
|
+ # 🎯 显示配置信息帮助调试
|
|
|
+ with st.expander("🔍 配置信息", expanded=True):
|
|
|
+ st.write("**已加载的文档:**")
|
|
|
+ docs = config_manager.list_documents()
|
|
|
+ if docs:
|
|
|
+ for doc in docs:
|
|
|
+ doc_config = config_manager.get_document(doc)
|
|
|
+ st.write(f"- **{doc}**")
|
|
|
+ st.write(f" - 基础目录: `{doc_config.base_dir}`")
|
|
|
+ st.write(f" - OCR 结果: {len([r for r in doc_config.ocr_results if r.enabled])} 个已启用")
|
|
|
+ else:
|
|
|
+ st.write("无")
|
|
|
+
|
|
|
+ st.write("**已加载的 OCR 工具:**")
|
|
|
+ tools = config_manager.list_ocr_tools()
|
|
|
+ if tools:
|
|
|
+ for tool in tools:
|
|
|
+ tool_config = config_manager.get_ocr_tool(tool)
|
|
|
+ st.write(f"- **{tool_config.name}** (`{tool}`)")
|
|
|
+ else:
|
|
|
+ st.write("无")
|
|
|
+
|
|
|
+ st.write("**配置文件路径:**")
|
|
|
+ st.code(str(config_manager.config_dir / "global.yaml"))
|
|
|
+
|
|
|
+ st.write("**生成的数据源:**")
|
|
|
+ data_sources = config_manager.get_data_sources()
|
|
|
+ if data_sources:
|
|
|
+ for ds in data_sources:
|
|
|
+ st.write(f"- `{ds.name}`")
|
|
|
+ st.write(f" - 工具: {ds.ocr_tool}")
|
|
|
+ st.write(f" - 结果目录: {ds.ocr_out_dir}")
|
|
|
+ st.write(f" - 图片目录: {ds.src_img_dir}")
|
|
|
+ else:
|
|
|
+ st.write("无")
|
|
|
+
|
|
|
st.stop()
|
|
|
|
|
|
# 文件选择区域
|
|
|
@@ -170,7 +224,7 @@ def main():
|
|
|
show_batch_cross_validation_results_dialog()
|
|
|
|
|
|
# 显示当前数据源统计信息
|
|
|
- with st.expander("🔧 OCR工具统计信息", expanded=False):
|
|
|
+ with st.expander("统� OCR工具计信息", expanded=False):
|
|
|
stats = validator.get_statistics()
|
|
|
col1, col2, col3, col4, col5 = st.columns(5)
|
|
|
|
|
|
@@ -184,20 +238,70 @@ def main():
|
|
|
st.metric("✅ 准确率", f"{stats['accuracy_rate']:.1f}%")
|
|
|
with col5:
|
|
|
if validator.current_source_config:
|
|
|
- tool_display = validator.current_source_config['ocr_tool'].upper()
|
|
|
+ tool_id = validator.current_source_config['ocr_tool']
|
|
|
+ # 🎯 从配置管理器获取工具名称
|
|
|
+ tool_config = config_manager.get_ocr_tool(tool_id)
|
|
|
+ tool_display = tool_config.name if tool_config else tool_id.upper()
|
|
|
st.metric("🔧 OCR工具", tool_display)
|
|
|
|
|
|
if stats['tool_info']:
|
|
|
st.write("**详细信息:**", stats['tool_info'])
|
|
|
+
|
|
|
+ # 🎯 显示当前文档和 OCR 结果信息
|
|
|
+ if validator.current_source_config:
|
|
|
+ source_name = validator.current_source_config['name']
|
|
|
+ # 解析数据源名称,提取文档名(更精确的解析)
|
|
|
+ parts = source_name.split('_', 1)
|
|
|
+ doc_name = parts[0] if parts else source_name
|
|
|
+
|
|
|
+ doc_config = config_manager.get_document(doc_name)
|
|
|
+ if doc_config:
|
|
|
+ st.write("**文档信息:**")
|
|
|
+ st.write(f"- 文档名称: {doc_config.name}")
|
|
|
+ st.write(f"- 基础目录: {doc_config.base_dir}")
|
|
|
+ st.write(f"- 可用 OCR 工具: {len([r for r in doc_config.ocr_results if r.enabled])} 个")
|
|
|
+
|
|
|
+ # 🎯 添加配置管理面板
|
|
|
+ with st.expander("⚙️ 配置管理", expanded=False):
|
|
|
+ col1, col2 = st.columns(2)
|
|
|
+
|
|
|
+ with col1:
|
|
|
+ st.subheader("📄 已加载文档")
|
|
|
+ docs = config_manager.list_documents()
|
|
|
+ for doc_name in docs:
|
|
|
+ doc_config = config_manager.get_document(doc_name)
|
|
|
+ enabled_count = len([r for r in doc_config.ocr_results if r.enabled])
|
|
|
+ total_count = len(doc_config.ocr_results)
|
|
|
+
|
|
|
+ with st.container():
|
|
|
+ st.write(f"✅ **{doc_name}**")
|
|
|
+ st.caption(f"📊 {enabled_count}/{total_count} 工具已启用")
|
|
|
+
|
|
|
+ # 显示每个 OCR 工具的状态
|
|
|
+ for ocr_result in doc_config.ocr_results:
|
|
|
+ status_icon = "🟢" if ocr_result.enabled else "⚪"
|
|
|
+ tool_config = config_manager.get_ocr_tool(ocr_result.tool)
|
|
|
+ tool_name = tool_config.name if tool_config else ocr_result.tool
|
|
|
+ st.caption(f" {status_icon} {tool_name} - {ocr_result.description or ocr_result.result_dir}")
|
|
|
+
|
|
|
+ with col2:
|
|
|
+ st.subheader("🔧 已加载 OCR 工具")
|
|
|
+ tools = config_manager.list_ocr_tools()
|
|
|
+ for tool_id in tools:
|
|
|
+ tool_config = config_manager.get_ocr_tool(tool_id)
|
|
|
+ with st.container():
|
|
|
+ st.write(f"🔧 **{tool_config.name}**")
|
|
|
+ st.caption(f"ID: `{tool_id}`")
|
|
|
+ st.caption(f"描述: {tool_config.description}")
|
|
|
|
|
|
tab1, tab2, tab3 = st.tabs(["📄 内容人工检查", "🔍 交叉验证结果", "📊 表格分析"])
|
|
|
|
|
|
with tab1:
|
|
|
- validator.create_compact_layout(config)
|
|
|
+ validator.create_compact_layout(validator_config)
|
|
|
|
|
|
with tab2:
|
|
|
# ✅ 使用封装的函数显示单页交叉验证结果
|
|
|
- display_single_page_cross_validation(validator, config)
|
|
|
+ display_single_page_cross_validation(validator, validator_config)
|
|
|
|
|
|
with tab3:
|
|
|
st.header("📊 表格数据分析")
|
|
|
@@ -207,7 +311,7 @@ def main():
|
|
|
display_html_table_as_dataframe(validator.md_content)
|
|
|
else:
|
|
|
st.info("当前OCR结果中没有检测到表格数据")
|
|
|
-
|
|
|
+
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main()
|