|
@@ -193,74 +193,56 @@ def create_table_line_editor():
|
|
|
|
|
|
|
|
return
|
|
return
|
|
|
|
|
|
|
|
- # 🆕 新建标注模式
|
|
|
|
|
|
|
+ # 🎯 新建标注模式
|
|
|
if work_mode == "🆕 新建标注":
|
|
if work_mode == "🆕 新建标注":
|
|
|
create_file_uploader_section(work_mode)
|
|
create_file_uploader_section(work_mode)
|
|
|
|
|
|
|
|
- if not (st.session_state.ocr_data and st.session_state.image):
|
|
|
|
|
- st.info("👆 请在左侧上传 OCR 结果 JSON 文件和对应的图片")
|
|
|
|
|
- with st.expander("📖 使用说明"):
|
|
|
|
|
- st.markdown("""
|
|
|
|
|
- ### 🆕 新建标注模式
|
|
|
|
|
-
|
|
|
|
|
- **支持的OCR格式**
|
|
|
|
|
-
|
|
|
|
|
- **1. PPStructure V3 格式 (推荐)**
|
|
|
|
|
- ```json
|
|
|
|
|
- {
|
|
|
|
|
- "parsing_res_list": [...],
|
|
|
|
|
- "overall_ocr_res": {
|
|
|
|
|
- "rec_boxes": [[x1, y1, x2, y2], ...],
|
|
|
|
|
- "rec_texts": ["文本1", "文本2", ...]
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
- ```
|
|
|
|
|
-
|
|
|
|
|
- **2. 标准格式**
|
|
|
|
|
- ```json
|
|
|
|
|
- [
|
|
|
|
|
- {
|
|
|
|
|
- "text": "文本内容",
|
|
|
|
|
- "bbox": [x1, y1, x2, y2]
|
|
|
|
|
- }
|
|
|
|
|
- ]
|
|
|
|
|
- ```
|
|
|
|
|
-
|
|
|
|
|
- ### 📂 加载已有标注模式
|
|
|
|
|
-
|
|
|
|
|
- 1. 上传之前保存的 `*_structure.json` 配置文件
|
|
|
|
|
- 2. 上传对应的图片(可选)
|
|
|
|
|
- 3. 继续调整表格线位置
|
|
|
|
|
- 4. 保存更新后的配置
|
|
|
|
|
- """)
|
|
|
|
|
- return
|
|
|
|
|
-
|
|
|
|
|
- st.info(f"📂 已加载: {st.session_state.loaded_json_name} + {st.session_state.loaded_image_name}")
|
|
|
|
|
-
|
|
|
|
|
- _, _, _, line_width, display_mode, zoom_level, show_line_numbers = setup_new_annotation_mode(
|
|
|
|
|
- st.session_state.ocr_data,
|
|
|
|
|
- st.session_state.image,
|
|
|
|
|
- TABLE_EDITOR_CONFIG["display"]
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- if 'structure' in st.session_state and st.session_state.structure:
|
|
|
|
|
- render_table_structure_view(
|
|
|
|
|
- st.session_state.structure,
|
|
|
|
|
- st.session_state.image,
|
|
|
|
|
- line_width,
|
|
|
|
|
- display_mode,
|
|
|
|
|
- zoom_level,
|
|
|
|
|
- show_line_numbers,
|
|
|
|
|
- VIEWPORT_WIDTH,
|
|
|
|
|
- VIEWPORT_HEIGHT
|
|
|
|
|
- )
|
|
|
|
|
- create_save_section(
|
|
|
|
|
- work_mode,
|
|
|
|
|
- st.session_state.structure,
|
|
|
|
|
- st.session_state.image,
|
|
|
|
|
- line_width,
|
|
|
|
|
- TABLE_EDITOR_CONFIG["output"]
|
|
|
|
|
|
|
+ if st.session_state.ocr_data and st.session_state.image:
|
|
|
|
|
+ st.info(f"📂 已加载: {st.session_state.loaded_json_name}")
|
|
|
|
|
+
|
|
|
|
|
+ # 🔧 显示分析参数设置(统一处理)
|
|
|
|
|
+ st.sidebar.subheader("🔬 分析参数")
|
|
|
|
|
+
|
|
|
|
|
+ analysis_method = st.sidebar.selectbox(
|
|
|
|
|
+ "分析算法",
|
|
|
|
|
+ ["auto", "cluster", "mineru"],
|
|
|
|
|
+ format_func=lambda x: {
|
|
|
|
|
+ "auto": "🤖 自动选择(推荐)",
|
|
|
|
|
+ "cluster": "📊 聚类算法(通用)",
|
|
|
|
|
+ "mineru": "🎯 MinerU 索引算法"
|
|
|
|
|
+ }[x]
|
|
|
)
|
|
)
|
|
|
|
|
+
|
|
|
|
|
+ if analysis_method in ["auto", "cluster"]:
|
|
|
|
|
+ y_tolerance = st.sidebar.slider("Y轴容差", 1, 20, 5)
|
|
|
|
|
+ x_tolerance = st.sidebar.slider("X轴容差", 1, 30, 10)
|
|
|
|
|
+ min_row_height = st.sidebar.slider("最小行高", 10, 50, 20)
|
|
|
|
|
+
|
|
|
|
|
+ # 🎯 分析按钮
|
|
|
|
|
+ if st.button("🔍 分析表格结构"):
|
|
|
|
|
+ with st.spinner("正在分析..."):
|
|
|
|
|
+ # 统一的分析流程
|
|
|
|
|
+ generator = TableLineGenerator(
|
|
|
|
|
+ st.session_state.image,
|
|
|
|
|
+ st.session_state.ocr_data
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ if analysis_method == "auto":
|
|
|
|
|
+ # 根据数据特征自动选择
|
|
|
|
|
+ has_cell_index = any('row' in item for item in st.session_state.ocr_data)
|
|
|
|
|
+ method = "mineru" if has_cell_index else "cluster"
|
|
|
|
|
+ else:
|
|
|
|
|
+ method = analysis_method
|
|
|
|
|
+
|
|
|
|
|
+ st.session_state.structure = generator.analyze_table_structure(
|
|
|
|
|
+ y_tolerance=y_tolerance if method == "cluster" else 5,
|
|
|
|
|
+ x_tolerance=x_tolerance if method == "cluster" else 10,
|
|
|
|
|
+ min_row_height=min_row_height if method == "cluster" else 20,
|
|
|
|
|
+ method=method
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ st.success(f"✅ 分析完成(使用 {method} 算法)")
|
|
|
|
|
+
|
|
|
return
|
|
return
|
|
|
|
|
|
|
|
# 📂 加载已有标注模式
|
|
# 📂 加载已有标注模式
|