|
@@ -5,339 +5,70 @@
|
|
|
|
|
|
|
|
import streamlit as st
|
|
import streamlit as st
|
|
|
from pathlib import Path
|
|
from pathlib import Path
|
|
|
-import json
|
|
|
|
|
-from PIL import Image, ImageDraw, ImageFont
|
|
|
|
|
-import numpy as np
|
|
|
|
|
-import copy
|
|
|
|
|
|
|
+from PIL import Image
|
|
|
|
|
+import yaml
|
|
|
|
|
+from typing import Dict, List, Optional, Tuple
|
|
|
|
|
+import argparse
|
|
|
|
|
+import sys
|
|
|
|
|
|
|
|
try:
|
|
try:
|
|
|
- from .table_line_generator import TableLineGenerator
|
|
|
|
|
-except ImportError:
|
|
|
|
|
from table_line_generator import TableLineGenerator
|
|
from table_line_generator import TableLineGenerator
|
|
|
|
|
+except ImportError:
|
|
|
|
|
+ from .table_line_generator import TableLineGenerator
|
|
|
|
|
|
|
|
|
|
+# 导入编辑器模块
|
|
|
|
|
+from editor import (
|
|
|
|
|
+ # UI 组件
|
|
|
|
|
+ create_file_uploader_section,
|
|
|
|
|
+ create_display_settings_section,
|
|
|
|
|
+ create_undo_redo_section,
|
|
|
|
|
+ create_analysis_section,
|
|
|
|
|
+ create_save_section,
|
|
|
|
|
+ create_directory_selector,
|
|
|
|
|
+ # 新增的模块功能
|
|
|
|
|
+ setup_new_annotation_mode,
|
|
|
|
|
+ setup_edit_annotation_mode,
|
|
|
|
|
+ render_table_structure_view,
|
|
|
|
|
+
|
|
|
|
|
+ # 绘图
|
|
|
|
|
+ get_cached_table_lines_image,
|
|
|
|
|
+
|
|
|
|
|
+ # 状态管理
|
|
|
|
|
+ init_undo_stack,
|
|
|
|
|
+
|
|
|
|
|
+ # 调整
|
|
|
|
|
+ create_adjustment_section,
|
|
|
|
|
+ show_image_with_scroll,
|
|
|
|
|
|
|
|
-def parse_ocr_data(ocr_data):
|
|
|
|
|
- """解析OCR数据,支持多种格式"""
|
|
|
|
|
- # 如果是字符串,尝试解析
|
|
|
|
|
- if isinstance(ocr_data, str):
|
|
|
|
|
- try:
|
|
|
|
|
- ocr_data = json.loads(ocr_data)
|
|
|
|
|
- except json.JSONDecodeError:
|
|
|
|
|
- st.error("❌ JSON 格式错误,无法解析")
|
|
|
|
|
- return []
|
|
|
|
|
-
|
|
|
|
|
- # 检查是否为 PPStructure V3 格式
|
|
|
|
|
- if isinstance(ocr_data, dict) and 'parsing_res_list' in ocr_data and 'overall_ocr_res' in ocr_data:
|
|
|
|
|
- st.info("🔍 检测到 PPStructure V3 格式")
|
|
|
|
|
-
|
|
|
|
|
- try:
|
|
|
|
|
- table_bbox, text_boxes = TableLineGenerator.parse_ppstructure_result(ocr_data)
|
|
|
|
|
- st.success(f"✅ 表格区域: {table_bbox}")
|
|
|
|
|
- st.success(f"✅ 表格内文本框: {len(text_boxes)} 个")
|
|
|
|
|
- return text_boxes
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- st.error(f"❌ 解析 PPStructure 结果失败: {e}")
|
|
|
|
|
- return []
|
|
|
|
|
-
|
|
|
|
|
- # 确保是列表
|
|
|
|
|
- if not isinstance(ocr_data, list):
|
|
|
|
|
- st.error(f"❌ OCR 数据应该是列表,实际类型: {type(ocr_data)}")
|
|
|
|
|
- return []
|
|
|
|
|
-
|
|
|
|
|
- if not ocr_data:
|
|
|
|
|
- st.warning("⚠️ OCR 数据为空")
|
|
|
|
|
- return []
|
|
|
|
|
-
|
|
|
|
|
- first_item = ocr_data[0]
|
|
|
|
|
- if not isinstance(first_item, dict):
|
|
|
|
|
- st.error(f"❌ OCR 数据项应该是字典,实际类型: {type(first_item)}")
|
|
|
|
|
- return []
|
|
|
|
|
-
|
|
|
|
|
- if 'bbox' not in first_item:
|
|
|
|
|
- st.error("❌ OCR 数据缺少 'bbox' 字段")
|
|
|
|
|
- st.info("💡 支持的格式示例:\n```json\n[\n {\n \"text\": \"文本\",\n \"bbox\": [x1, y1, x2, y2]\n }\n]\n```")
|
|
|
|
|
- return []
|
|
|
|
|
-
|
|
|
|
|
- return ocr_data
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-def draw_table_lines_with_numbers(image, structure, line_width=2, show_numbers=True):
|
|
|
|
|
- """
|
|
|
|
|
- 绘制带编号的表格线(使用线坐标列表)
|
|
|
|
|
-
|
|
|
|
|
- Args:
|
|
|
|
|
- image: PIL Image 对象
|
|
|
|
|
- structure: 表格结构字典(包含 horizontal_lines 和 vertical_lines)
|
|
|
|
|
- line_width: 线条宽度
|
|
|
|
|
- show_numbers: 是否显示编号
|
|
|
|
|
-
|
|
|
|
|
- Returns:
|
|
|
|
|
- 绘制了表格线和编号的图片
|
|
|
|
|
- """
|
|
|
|
|
- img_with_lines = image.copy()
|
|
|
|
|
- draw = ImageDraw.Draw(img_with_lines)
|
|
|
|
|
-
|
|
|
|
|
- # 尝试加载字体
|
|
|
|
|
- try:
|
|
|
|
|
- font = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 20)
|
|
|
|
|
- except:
|
|
|
|
|
- font = ImageFont.load_default()
|
|
|
|
|
-
|
|
|
|
|
- # 🆕 使用线坐标列表
|
|
|
|
|
- horizontal_lines = structure.get('horizontal_lines', [])
|
|
|
|
|
- vertical_lines = structure.get('vertical_lines', [])
|
|
|
|
|
- modified_h_lines = structure.get('modified_h_lines', set())
|
|
|
|
|
- modified_v_lines = structure.get('modified_v_lines', set())
|
|
|
|
|
-
|
|
|
|
|
- # 计算绘制范围
|
|
|
|
|
- x_start = vertical_lines[0] if vertical_lines else 0
|
|
|
|
|
- x_end = vertical_lines[-1] if vertical_lines else img_with_lines.width
|
|
|
|
|
- y_start = horizontal_lines[0] if horizontal_lines else 0
|
|
|
|
|
- y_end = horizontal_lines[-1] if horizontal_lines else img_with_lines.height
|
|
|
|
|
-
|
|
|
|
|
- # 🎨 绘制横线
|
|
|
|
|
- for idx, y in enumerate(horizontal_lines):
|
|
|
|
|
- color = (255, 0, 0) if idx in modified_h_lines else (0, 0, 255)
|
|
|
|
|
- draw.line([(x_start, y), (x_end, y)], fill=color, width=line_width)
|
|
|
|
|
-
|
|
|
|
|
- # 🔢 绘制行编号
|
|
|
|
|
- if show_numbers:
|
|
|
|
|
- text = f"R{idx+1}"
|
|
|
|
|
- bbox = draw.textbbox((x_start - 35, y - 10), text, font=font)
|
|
|
|
|
- draw.rectangle(bbox, fill='white', outline='black')
|
|
|
|
|
- draw.text((x_start - 35, y - 10), text, fill=color, font=font)
|
|
|
|
|
-
|
|
|
|
|
- # 🎨 绘制竖线
|
|
|
|
|
- for idx, x in enumerate(vertical_lines):
|
|
|
|
|
- color = (255, 0, 0) if idx in modified_v_lines else (0, 0, 255)
|
|
|
|
|
- draw.line([(x, y_start), (x, y_end)], fill=color, width=line_width)
|
|
|
|
|
-
|
|
|
|
|
- # 🔢 绘制列编号
|
|
|
|
|
- if show_numbers:
|
|
|
|
|
- text = f"C{idx+1}"
|
|
|
|
|
- bbox = draw.textbbox((x - 10, y_start - 25), text, font=font)
|
|
|
|
|
- draw.rectangle(bbox, fill='white', outline='black')
|
|
|
|
|
- draw.text((x - 10, y_start - 25), text, fill=color, font=font)
|
|
|
|
|
- bbox = draw.textbbox((x - 10, y_end + 25), text, font=font)
|
|
|
|
|
- draw.rectangle(bbox, fill='white', outline='black')
|
|
|
|
|
- draw.text((x - 10, y_end + 25), text, fill=color, font=font)
|
|
|
|
|
-
|
|
|
|
|
- return img_with_lines
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-# 🆕 新增:用于保存的纯净表格线绘制函数
|
|
|
|
|
-def draw_clean_table_lines(image, structure, line_width=2, line_color=(0, 0, 0)):
|
|
|
|
|
- """
|
|
|
|
|
- 绘制纯净的表格线(用于保存)
|
|
|
|
|
- - 所有线用黑色
|
|
|
|
|
- - 不显示编号
|
|
|
|
|
-
|
|
|
|
|
- Args:
|
|
|
|
|
- image: PIL Image 对象
|
|
|
|
|
- structure: 表格结构字典
|
|
|
|
|
- line_width: 线条宽度
|
|
|
|
|
- line_color: 线条颜色,默认黑色 (0, 0, 0)
|
|
|
|
|
-
|
|
|
|
|
- Returns:
|
|
|
|
|
- 绘制了纯净表格线的图片
|
|
|
|
|
- """
|
|
|
|
|
- img_with_lines = image.copy()
|
|
|
|
|
- draw = ImageDraw.Draw(img_with_lines)
|
|
|
|
|
-
|
|
|
|
|
- horizontal_lines = structure.get('horizontal_lines', [])
|
|
|
|
|
- vertical_lines = structure.get('vertical_lines', [])
|
|
|
|
|
-
|
|
|
|
|
- if not horizontal_lines or not vertical_lines:
|
|
|
|
|
- return img_with_lines
|
|
|
|
|
-
|
|
|
|
|
- # 计算绘制范围
|
|
|
|
|
- x_start = vertical_lines[0]
|
|
|
|
|
- x_end = vertical_lines[-1]
|
|
|
|
|
- y_start = horizontal_lines[0]
|
|
|
|
|
- y_end = horizontal_lines[-1]
|
|
|
|
|
-
|
|
|
|
|
- # 🖤 绘制横线(统一黑色)
|
|
|
|
|
- for y in horizontal_lines:
|
|
|
|
|
- draw.line([(x_start, y), (x_end, y)], fill=line_color, width=line_width)
|
|
|
|
|
-
|
|
|
|
|
- # 🖤 绘制竖线(统一黑色)
|
|
|
|
|
- for x in vertical_lines:
|
|
|
|
|
- draw.line([(x, y_start), (x, y_end)], fill=line_color, width=line_width)
|
|
|
|
|
-
|
|
|
|
|
- return img_with_lines
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-def init_undo_stack():
|
|
|
|
|
- """初始化撤销/重做栈"""
|
|
|
|
|
- if 'undo_stack' not in st.session_state:
|
|
|
|
|
- st.session_state.undo_stack = []
|
|
|
|
|
- if 'redo_stack' not in st.session_state:
|
|
|
|
|
- st.session_state.redo_stack = []
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-def save_state_for_undo(structure):
|
|
|
|
|
- """保存当前状态到撤销栈"""
|
|
|
|
|
- # 深拷贝当前结构
|
|
|
|
|
- state = copy.deepcopy(structure)
|
|
|
|
|
- st.session_state.undo_stack.append(state)
|
|
|
|
|
- # 清空重做栈
|
|
|
|
|
- st.session_state.redo_stack = []
|
|
|
|
|
-
|
|
|
|
|
- # 限制栈深度(最多保存20个历史状态)
|
|
|
|
|
- if len(st.session_state.undo_stack) > 20:
|
|
|
|
|
- st.session_state.undo_stack.pop(0)
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-def undo_last_action():
|
|
|
|
|
- """撤销上一个操作"""
|
|
|
|
|
- if st.session_state.undo_stack:
|
|
|
|
|
- # 保存当前状态到重做栈
|
|
|
|
|
- current_state = copy.deepcopy(st.session_state.structure)
|
|
|
|
|
- st.session_state.redo_stack.append(current_state)
|
|
|
|
|
-
|
|
|
|
|
- # 恢复上一个状态
|
|
|
|
|
- st.session_state.structure = st.session_state.undo_stack.pop()
|
|
|
|
|
- return True
|
|
|
|
|
- return False
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-def redo_last_action():
|
|
|
|
|
- """重做上一个操作"""
|
|
|
|
|
- if st.session_state.redo_stack:
|
|
|
|
|
- # 保存当前状态到撤销栈
|
|
|
|
|
- current_state = copy.deepcopy(st.session_state.structure)
|
|
|
|
|
- st.session_state.undo_stack.append(current_state)
|
|
|
|
|
-
|
|
|
|
|
- # 恢复重做的状态
|
|
|
|
|
- st.session_state.structure = st.session_state.redo_stack.pop()
|
|
|
|
|
- return True
|
|
|
|
|
- return False
|
|
|
|
|
-
|
|
|
|
|
|
|
+ # 配置
|
|
|
|
|
+ load_table_editor_config,
|
|
|
|
|
+ build_data_source_catalog,
|
|
|
|
|
+ parse_table_editor_cli_args,
|
|
|
|
|
+ # 🆕 批量应用模板
|
|
|
|
|
+ create_batch_template_section,
|
|
|
|
|
+)
|
|
|
|
|
|
|
|
-def get_structure_hash(structure, line_width, show_numbers):
|
|
|
|
|
- """生成结构的哈希值,用于判断是否需要重新绘制"""
|
|
|
|
|
- import hashlib
|
|
|
|
|
-
|
|
|
|
|
- # 🔧 使用线坐标列表生成哈希
|
|
|
|
|
- key_data = {
|
|
|
|
|
- 'horizontal_lines': structure.get('horizontal_lines', []),
|
|
|
|
|
- 'vertical_lines': structure.get('vertical_lines', []),
|
|
|
|
|
- 'modified_h_lines': sorted(list(structure.get('modified_h_lines', set()))),
|
|
|
|
|
- 'modified_v_lines': sorted(list(structure.get('modified_v_lines', set()))),
|
|
|
|
|
- 'line_width': line_width,
|
|
|
|
|
- 'show_numbers': show_numbers
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- key_str = json.dumps(key_data, sort_keys=True)
|
|
|
|
|
- return hashlib.md5(key_str.encode()).hexdigest()
|
|
|
|
|
|
|
+DEFAULT_CONFIG_PATH = Path(__file__).with_name("table_line_generator.yaml")
|
|
|
|
|
|
|
|
|
|
+@st.cache_resource
|
|
|
|
|
+def get_cli_args():
|
|
|
|
|
+ return parse_table_editor_cli_args()
|
|
|
|
|
|
|
|
-def get_cached_table_lines_image(image, structure, line_width, show_numbers):
|
|
|
|
|
- """
|
|
|
|
|
- 获取缓存的表格线图片,如果缓存不存在或失效则重新绘制
|
|
|
|
|
-
|
|
|
|
|
- Args:
|
|
|
|
|
- image: PIL Image 对象
|
|
|
|
|
- structure: 表格结构字典
|
|
|
|
|
- line_width: 线条宽度
|
|
|
|
|
- show_numbers: 是否显示编号
|
|
|
|
|
-
|
|
|
|
|
- Returns:
|
|
|
|
|
- 绘制了表格线和编号的图片
|
|
|
|
|
- """
|
|
|
|
|
- # 初始化缓存
|
|
|
|
|
- if 'cached_table_image' not in st.session_state:
|
|
|
|
|
- st.session_state.cached_table_image = None
|
|
|
|
|
- if 'cached_table_hash' not in st.session_state:
|
|
|
|
|
- st.session_state.cached_table_hash = None
|
|
|
|
|
-
|
|
|
|
|
- # 计算当前结构的哈希
|
|
|
|
|
- current_hash = get_structure_hash(structure, line_width, show_numbers)
|
|
|
|
|
-
|
|
|
|
|
- # 检查缓存是否有效
|
|
|
|
|
- if (st.session_state.cached_table_hash == current_hash and
|
|
|
|
|
- st.session_state.cached_table_image is not None):
|
|
|
|
|
- # 缓存有效,直接返回
|
|
|
|
|
- return st.session_state.cached_table_image
|
|
|
|
|
-
|
|
|
|
|
- # 缓存失效,重新绘制
|
|
|
|
|
- img_with_lines = draw_table_lines_with_numbers(
|
|
|
|
|
- image,
|
|
|
|
|
- structure,
|
|
|
|
|
- line_width=line_width,
|
|
|
|
|
- show_numbers=show_numbers
|
|
|
|
|
|
|
+@st.cache_resource
|
|
|
|
|
+def get_table_editor_config():
|
|
|
|
|
+ """缓存配置加载(整个 session 共享)"""
|
|
|
|
|
+ cli_args = get_cli_args()
|
|
|
|
|
+ config_path = (
|
|
|
|
|
+ Path(cli_args.config).expanduser()
|
|
|
|
|
+ if cli_args.config
|
|
|
|
|
+ else DEFAULT_CONFIG_PATH
|
|
|
)
|
|
)
|
|
|
-
|
|
|
|
|
- # 更新缓存
|
|
|
|
|
- st.session_state.cached_table_image = img_with_lines
|
|
|
|
|
- st.session_state.cached_table_hash = current_hash
|
|
|
|
|
-
|
|
|
|
|
- return img_with_lines
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-def clear_table_image_cache():
|
|
|
|
|
- """清除表格图片缓存"""
|
|
|
|
|
- if 'cached_table_image' in st.session_state:
|
|
|
|
|
- st.session_state.cached_table_image = None
|
|
|
|
|
- if 'cached_table_hash' in st.session_state:
|
|
|
|
|
- st.session_state.cached_table_hash = None
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-def load_structure_from_config(config_path: Path) -> dict:
|
|
|
|
|
- """
|
|
|
|
|
- 从配置文件加载表格结构
|
|
|
|
|
-
|
|
|
|
|
- Args:
|
|
|
|
|
- config_path: 配置文件路径
|
|
|
|
|
-
|
|
|
|
|
- Returns:
|
|
|
|
|
- 表格结构字典
|
|
|
|
|
- """
|
|
|
|
|
- with open(config_path, 'r', encoding='utf-8') as f:
|
|
|
|
|
- structure = json.load(f)
|
|
|
|
|
-
|
|
|
|
|
- # 🔧 兼容旧版配置(补充缺失字段)
|
|
|
|
|
- if 'horizontal_lines' not in structure:
|
|
|
|
|
- # 从 rows 生成横线坐标
|
|
|
|
|
- horizontal_lines = []
|
|
|
|
|
- for row in structure.get('rows', []):
|
|
|
|
|
- horizontal_lines.append(row['y_start'])
|
|
|
|
|
- if structure.get('rows'):
|
|
|
|
|
- horizontal_lines.append(structure['rows'][-1]['y_end'])
|
|
|
|
|
- structure['horizontal_lines'] = horizontal_lines
|
|
|
|
|
-
|
|
|
|
|
- if 'vertical_lines' not in structure:
|
|
|
|
|
- # 从 columns 生成竖线坐标
|
|
|
|
|
- vertical_lines = []
|
|
|
|
|
- for col in structure.get('columns', []):
|
|
|
|
|
- vertical_lines.append(col['x_start'])
|
|
|
|
|
- if structure.get('columns'):
|
|
|
|
|
- vertical_lines.append(structure['columns'][-1]['x_end'])
|
|
|
|
|
- structure['vertical_lines'] = vertical_lines
|
|
|
|
|
-
|
|
|
|
|
- # 🔧 转换修改标记(从列表转为集合)
|
|
|
|
|
- if 'modified_h_lines' in structure:
|
|
|
|
|
- structure['modified_h_lines'] = set(structure['modified_h_lines'])
|
|
|
|
|
- else:
|
|
|
|
|
- structure['modified_h_lines'] = set()
|
|
|
|
|
-
|
|
|
|
|
- if 'modified_v_lines' in structure:
|
|
|
|
|
- structure['modified_v_lines'] = set(structure['modified_v_lines'])
|
|
|
|
|
- else:
|
|
|
|
|
- structure['modified_v_lines'] = set()
|
|
|
|
|
-
|
|
|
|
|
- # 🔧 转换旧版的 modified_rows/modified_cols(如果存在)
|
|
|
|
|
- if 'modified_rows' in structure and not structure['modified_h_lines']:
|
|
|
|
|
- structure['modified_h_lines'] = set(structure.get('modified_rows', []))
|
|
|
|
|
- if 'modified_cols' in structure and not structure['modified_v_lines']:
|
|
|
|
|
- structure['modified_v_lines'] = set(structure.get('modified_cols', []))
|
|
|
|
|
-
|
|
|
|
|
- return structure
|
|
|
|
|
|
|
+ return load_table_editor_config(config_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_table_line_editor():
|
|
def create_table_line_editor():
|
|
|
"""创建表格线编辑器界面"""
|
|
"""创建表格线编辑器界面"""
|
|
|
- # 🆕 配置页面为宽屏模式
|
|
|
|
|
|
|
+ # 配置页面
|
|
|
st.set_page_config(
|
|
st.set_page_config(
|
|
|
page_title="表格线编辑器",
|
|
page_title="表格线编辑器",
|
|
|
page_icon="📏",
|
|
page_icon="📏",
|
|
@@ -347,7 +78,13 @@ def create_table_line_editor():
|
|
|
|
|
|
|
|
st.title("📏 表格线编辑器")
|
|
st.title("📏 表格线编辑器")
|
|
|
|
|
|
|
|
- # 初始化 session_state
|
|
|
|
|
|
|
+ # 🎯 从缓存获取配置
|
|
|
|
|
+ TABLE_EDITOR_CONFIG = get_table_editor_config()
|
|
|
|
|
+ VIEWPORT_WIDTH = TABLE_EDITOR_CONFIG["viewport"]["width"]
|
|
|
|
|
+ VIEWPORT_HEIGHT = TABLE_EDITOR_CONFIG["viewport"]["height"]
|
|
|
|
|
+ DATA_SOURCES = TABLE_EDITOR_CONFIG.get("data_sources", [])
|
|
|
|
|
+
|
|
|
|
|
+ # 初始化 session_state(集中管理)
|
|
|
if 'loaded_json_name' not in st.session_state:
|
|
if 'loaded_json_name' not in st.session_state:
|
|
|
st.session_state.loaded_json_name = None
|
|
st.session_state.loaded_json_name = None
|
|
|
if 'loaded_image_name' not in st.session_state:
|
|
if 'loaded_image_name' not in st.session_state:
|
|
@@ -359,179 +96,105 @@ def create_table_line_editor():
|
|
|
if 'image' not in st.session_state:
|
|
if 'image' not in st.session_state:
|
|
|
st.session_state.image = None
|
|
st.session_state.image = None
|
|
|
|
|
|
|
|
|
|
+ # 🆕 目录模式专用状态
|
|
|
|
|
+ if 'dir_selected_index' not in st.session_state:
|
|
|
|
|
+ st.session_state.dir_selected_index = 0
|
|
|
|
|
+ if 'last_loaded_entry' not in st.session_state:
|
|
|
|
|
+ st.session_state.last_loaded_entry = None
|
|
|
|
|
+ if 'dir_auto_mode' not in st.session_state:
|
|
|
|
|
+ st.session_state.dir_auto_mode = None
|
|
|
|
|
+ if 'current_data_source' not in st.session_state: # 🔑 新增
|
|
|
|
|
+ st.session_state.current_data_source = None
|
|
|
|
|
+ if 'current_output_config' not in st.session_state: # 🔑 新增
|
|
|
|
|
+ st.session_state.current_output_config = None
|
|
|
|
|
+
|
|
|
# 初始化撤销/重做栈
|
|
# 初始化撤销/重做栈
|
|
|
init_undo_stack()
|
|
init_undo_stack()
|
|
|
|
|
|
|
|
- # 🆕 添加工作模式选择
|
|
|
|
|
|
|
+ # 🆕 工作模式选择
|
|
|
st.sidebar.header("📂 工作模式")
|
|
st.sidebar.header("📂 工作模式")
|
|
|
work_mode = st.sidebar.radio(
|
|
work_mode = st.sidebar.radio(
|
|
|
"选择模式",
|
|
"选择模式",
|
|
|
- ["🆕 新建标注", "📂 加载已有标注"],
|
|
|
|
|
|
|
+ ["🆕 新建标注", "📂 加载已有标注", "📁 目录模式"],
|
|
|
index=0
|
|
index=0
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
- if work_mode == "🆕 新建标注":
|
|
|
|
|
- # 原有的上传流程
|
|
|
|
|
- st.sidebar.subheader("上传文件")
|
|
|
|
|
- uploaded_json = st.sidebar.file_uploader("上传OCR结果JSON", type=['json'], key="new_json")
|
|
|
|
|
- uploaded_image = st.sidebar.file_uploader("上传对应图片", type=['jpg', 'png'], key="new_image")
|
|
|
|
|
-
|
|
|
|
|
- # 检查是否需要重新加载 JSON
|
|
|
|
|
- if uploaded_json is not None:
|
|
|
|
|
- if st.session_state.loaded_json_name != uploaded_json.name:
|
|
|
|
|
- try:
|
|
|
|
|
- raw_data = json.load(uploaded_json)
|
|
|
|
|
-
|
|
|
|
|
- with st.expander("🔍 原始数据结构"):
|
|
|
|
|
- if isinstance(raw_data, dict):
|
|
|
|
|
- st.json({k: f"<{type(v).__name__}>" if not isinstance(v, (str, int, float, bool, type(None))) else v
|
|
|
|
|
- for k, v in list(raw_data.items())[:5]})
|
|
|
|
|
- else:
|
|
|
|
|
- st.json(raw_data[:3] if len(raw_data) > 3 else raw_data)
|
|
|
|
|
-
|
|
|
|
|
- ocr_data = parse_ocr_data(raw_data)
|
|
|
|
|
-
|
|
|
|
|
- if not ocr_data:
|
|
|
|
|
- st.error("❌ 无法解析 OCR 数据,请检查 JSON 格式")
|
|
|
|
|
- st.stop()
|
|
|
|
|
-
|
|
|
|
|
- st.session_state.ocr_data = ocr_data
|
|
|
|
|
- st.session_state.loaded_json_name = uploaded_json.name
|
|
|
|
|
- st.session_state.loaded_config_name = None # 清除配置文件标记
|
|
|
|
|
-
|
|
|
|
|
- # 清除旧的分析结果、历史记录和缓存
|
|
|
|
|
- if 'structure' in st.session_state:
|
|
|
|
|
- del st.session_state.structure
|
|
|
|
|
- if 'generator' in st.session_state:
|
|
|
|
|
- del st.session_state.generator
|
|
|
|
|
- st.session_state.undo_stack = []
|
|
|
|
|
- st.session_state.redo_stack = []
|
|
|
|
|
- clear_table_image_cache()
|
|
|
|
|
-
|
|
|
|
|
- st.success(f"✅ 成功加载 {len(ocr_data)} 条 OCR 记录")
|
|
|
|
|
-
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- st.error(f"❌ 加载数据失败: {e}")
|
|
|
|
|
- st.stop()
|
|
|
|
|
-
|
|
|
|
|
- # 检查是否需要重新加载图片
|
|
|
|
|
- if uploaded_image is not None:
|
|
|
|
|
- if st.session_state.loaded_image_name != uploaded_image.name:
|
|
|
|
|
- try:
|
|
|
|
|
- image = Image.open(uploaded_image)
|
|
|
|
|
-
|
|
|
|
|
- st.session_state.image = image
|
|
|
|
|
- st.session_state.loaded_image_name = uploaded_image.name
|
|
|
|
|
-
|
|
|
|
|
- if 'structure' in st.session_state:
|
|
|
|
|
- del st.session_state.structure
|
|
|
|
|
- if 'generator' in st.session_state:
|
|
|
|
|
- del st.session_state.generator
|
|
|
|
|
- st.session_state.undo_stack = []
|
|
|
|
|
- st.session_state.redo_stack = []
|
|
|
|
|
- clear_table_image_cache()
|
|
|
|
|
-
|
|
|
|
|
- st.success(f"✅ 成功加载图片: {uploaded_image.name}")
|
|
|
|
|
-
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- st.error(f"❌ 加载图片失败: {e}")
|
|
|
|
|
- st.stop()
|
|
|
|
|
-
|
|
|
|
|
- else: # 📂 加载已有标注
|
|
|
|
|
- st.sidebar.subheader("加载已保存的标注")
|
|
|
|
|
-
|
|
|
|
|
- # 🆕 上传配置文件
|
|
|
|
|
- uploaded_config = st.sidebar.file_uploader(
|
|
|
|
|
- "上传配置文件 (*_structure.json)",
|
|
|
|
|
- type=['json'],
|
|
|
|
|
- key="load_config"
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- # 🆕 上传对应的图片(可选,用于重新标注)
|
|
|
|
|
- uploaded_image_for_config = st.sidebar.file_uploader(
|
|
|
|
|
- "上传对应图片(可选)",
|
|
|
|
|
- type=['jpg', 'png'],
|
|
|
|
|
- key="load_image"
|
|
|
|
|
- )
|
|
|
|
|
|
|
+ # 📁 目录模式
|
|
|
|
|
+ if work_mode == "📁 目录模式":
|
|
|
|
|
+ if not DATA_SOURCES:
|
|
|
|
|
+ st.sidebar.warning("未配置 data_sources")
|
|
|
|
|
+ return
|
|
|
|
|
|
|
|
- # 处理配置文件加载
|
|
|
|
|
- if uploaded_config is not None:
|
|
|
|
|
- if st.session_state.loaded_config_name != uploaded_config.name:
|
|
|
|
|
- try:
|
|
|
|
|
- # 🔧 直接从配置文件路径加载
|
|
|
|
|
- import tempfile
|
|
|
|
|
-
|
|
|
|
|
- # 创建临时文件
|
|
|
|
|
- with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False, encoding='utf-8') as tmp:
|
|
|
|
|
- tmp.write(uploaded_config.getvalue().decode('utf-8'))
|
|
|
|
|
- tmp_path = tmp.name
|
|
|
|
|
-
|
|
|
|
|
- # 加载结构
|
|
|
|
|
- structure = load_structure_from_config(Path(tmp_path))
|
|
|
|
|
-
|
|
|
|
|
- # 清理临时文件
|
|
|
|
|
- Path(tmp_path).unlink()
|
|
|
|
|
-
|
|
|
|
|
- st.session_state.structure = structure
|
|
|
|
|
- st.session_state.loaded_config_name = uploaded_config.name
|
|
|
|
|
-
|
|
|
|
|
- # 清除历史记录和缓存
|
|
|
|
|
- st.session_state.undo_stack = []
|
|
|
|
|
- st.session_state.redo_stack = []
|
|
|
|
|
- clear_table_image_cache()
|
|
|
|
|
-
|
|
|
|
|
- st.success(f"✅ 成功加载配置: {uploaded_config.name}")
|
|
|
|
|
- st.info(
|
|
|
|
|
- f"📊 表格结构: {len(structure['rows'])}行 x {len(structure['columns'])}列\n\n"
|
|
|
|
|
- f"📏 横线数: {len(structure.get('horizontal_lines', []))}\n\n"
|
|
|
|
|
- f"📏 竖线数: {len(structure.get('vertical_lines', []))}"
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- # 🆕 显示配置文件详情
|
|
|
|
|
- with st.expander("📋 配置详情"):
|
|
|
|
|
- st.json({
|
|
|
|
|
- "行数": len(structure['rows']),
|
|
|
|
|
- "列数": len(structure['columns']),
|
|
|
|
|
- "横线数": len(structure.get('horizontal_lines', [])),
|
|
|
|
|
- "竖线数": len(structure.get('vertical_lines', [])),
|
|
|
|
|
- "行高": structure.get('row_height'),
|
|
|
|
|
- "列宽": structure.get('col_widths'),
|
|
|
|
|
- "已修改的横线": list(structure.get('modified_h_lines', set())),
|
|
|
|
|
- "已修改的竖线": list(structure.get('modified_v_lines', set()))
|
|
|
|
|
- })
|
|
|
|
|
-
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- st.error(f"❌ 加载配置失败: {e}")
|
|
|
|
|
- import traceback
|
|
|
|
|
- st.code(traceback.format_exc())
|
|
|
|
|
- st.stop()
|
|
|
|
|
|
|
+ auto_mode = create_directory_selector(DATA_SOURCES, TABLE_EDITOR_CONFIG["output"])
|
|
|
|
|
+
|
|
|
|
|
+ # 显示当前数据源
|
|
|
|
|
+ if st.session_state.current_data_source:
|
|
|
|
|
+ ds_name = st.session_state.current_data_source.get("name", "未知")
|
|
|
|
|
+ st.sidebar.success(f"✅ 数据源: {ds_name}")
|
|
|
|
|
+
|
|
|
|
|
+ if auto_mode == "new":
|
|
|
|
|
+ if not (st.session_state.ocr_data and st.session_state.image):
|
|
|
|
|
+ st.warning("⚠️ 缺少必要数据")
|
|
|
|
|
+ return
|
|
|
|
|
+ _, _, _, line_width, display_mode, zoom_level, show_line_numbers = setup_new_annotation_mode(
|
|
|
|
|
+ st.session_state.ocr_data,
|
|
|
|
|
+ st.session_state.image,
|
|
|
|
|
+ TABLE_EDITOR_CONFIG["display"]
|
|
|
|
|
+ )
|
|
|
|
|
+ else: # edit
|
|
|
|
|
+ if 'structure' not in st.session_state:
|
|
|
|
|
+ st.warning("⚠️ 结构加载失败")
|
|
|
|
|
+ return
|
|
|
|
|
+ image, line_width, display_mode, zoom_level, show_line_numbers = setup_edit_annotation_mode(
|
|
|
|
|
+ st.session_state.structure,
|
|
|
|
|
+ st.session_state.image,
|
|
|
|
|
+ TABLE_EDITOR_CONFIG["display"]
|
|
|
|
|
+ )
|
|
|
|
|
|
|
|
- # 处理图片加载(用于显示)
|
|
|
|
|
- if uploaded_image_for_config is not None:
|
|
|
|
|
- if st.session_state.loaded_image_name != uploaded_image_for_config.name:
|
|
|
|
|
- try:
|
|
|
|
|
- image = Image.open(uploaded_image_for_config)
|
|
|
|
|
- st.session_state.image = image
|
|
|
|
|
- st.session_state.loaded_image_name = uploaded_image_for_config.name
|
|
|
|
|
-
|
|
|
|
|
- clear_table_image_cache()
|
|
|
|
|
-
|
|
|
|
|
- st.success(f"✅ 成功加载图片: {uploaded_image_for_config.name}")
|
|
|
|
|
-
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- st.error(f"❌ 加载图片失败: {e}")
|
|
|
|
|
- st.stop()
|
|
|
|
|
|
|
+ # 统一渲染
|
|
|
|
|
+ if 'structure' in st.session_state and st.session_state.structure:
|
|
|
|
|
+ render_table_structure_view(
|
|
|
|
|
+ st.session_state.structure,
|
|
|
|
|
+ st.session_state.image or Image.new('RGB', (2000, 2000), 'white'),
|
|
|
|
|
+ line_width, # 🔑 统一使用这个 line_width
|
|
|
|
|
+ display_mode,
|
|
|
|
|
+ zoom_level,
|
|
|
|
|
+ show_line_numbers,
|
|
|
|
|
+ VIEWPORT_WIDTH,
|
|
|
|
|
+ VIEWPORT_HEIGHT
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 保存区域
|
|
|
|
|
+ create_save_section(
|
|
|
|
|
+ auto_mode,
|
|
|
|
|
+ st.session_state.structure,
|
|
|
|
|
+ st.session_state.image,
|
|
|
|
|
+ line_width, # 🔑 传递给保存区域
|
|
|
|
|
+ st.session_state.current_output_config or TABLE_EDITOR_CONFIG["output"]
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 🆕 批量应用模板区域(仅在 edit 模式显示)
|
|
|
|
|
+ if auto_mode == "edit":
|
|
|
|
|
+ # 🔑 获取当前的线条颜色名称(从保存区域的选择)
|
|
|
|
|
+ output_cfg = TABLE_EDITOR_CONFIG["output"]
|
|
|
|
|
+ line_colors = output_cfg.get("line_colors")
|
|
|
|
|
+ defaults = output_cfg.get("defaults", {})
|
|
|
|
|
+ default_color = defaults.get("line_color", line_colors[0]["name"])
|
|
|
|
|
+
|
|
|
|
|
+ # 🔑 传递当前页的设置
|
|
|
|
|
+ create_batch_template_section(
|
|
|
|
|
+ current_line_width=line_width,
|
|
|
|
|
+ current_line_color=st.session_state.get('save_line_color', default_color)
|
|
|
|
|
+ )
|
|
|
|
|
|
|
|
- # 🆕 如果配置已加载但没有图片,提示用户
|
|
|
|
|
- if 'structure' in st.session_state and st.session_state.image is None:
|
|
|
|
|
- st.warning("⚠️ 已加载配置,但未加载对应图片。请上传图片以查看效果。")
|
|
|
|
|
- st.info("💡 提示:配置文件已加载,您可以:\n1. 上传对应图片查看效果\n2. 直接编辑配置并保存")
|
|
|
|
|
|
|
+ return
|
|
|
|
|
|
|
|
- # 检查必要条件
|
|
|
|
|
|
|
+ # 🆕 新建标注模式
|
|
|
if work_mode == "🆕 新建标注":
|
|
if work_mode == "🆕 新建标注":
|
|
|
- if st.session_state.ocr_data is None or st.session_state.image is None:
|
|
|
|
|
|
|
+ create_file_uploader_section(work_mode)
|
|
|
|
|
+
|
|
|
|
|
+ if not (st.session_state.ocr_data and st.session_state.image):
|
|
|
st.info("👆 请在左侧上传 OCR 结果 JSON 文件和对应的图片")
|
|
st.info("👆 请在左侧上传 OCR 结果 JSON 文件和对应的图片")
|
|
|
-
|
|
|
|
|
with st.expander("📖 使用说明"):
|
|
with st.expander("📖 使用说明"):
|
|
|
st.markdown("""
|
|
st.markdown("""
|
|
|
### 🆕 新建标注模式
|
|
### 🆕 新建标注模式
|
|
@@ -568,23 +231,40 @@ def create_table_line_editor():
|
|
|
""")
|
|
""")
|
|
|
return
|
|
return
|
|
|
|
|
|
|
|
- ocr_data = st.session_state.ocr_data
|
|
|
|
|
- image = st.session_state.image
|
|
|
|
|
-
|
|
|
|
|
st.info(f"📂 已加载: {st.session_state.loaded_json_name} + {st.session_state.loaded_image_name}")
|
|
st.info(f"📂 已加载: {st.session_state.loaded_json_name} + {st.session_state.loaded_image_name}")
|
|
|
|
|
|
|
|
- if 'generator' not in st.session_state or st.session_state.generator is None:
|
|
|
|
|
- try:
|
|
|
|
|
- generator = TableLineGenerator(image, ocr_data)
|
|
|
|
|
- st.session_state.generator = generator
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- st.error(f"❌ 初始化失败: {e}")
|
|
|
|
|
- st.stop()
|
|
|
|
|
|
|
+ _, _, _, line_width, display_mode, zoom_level, show_line_numbers = setup_new_annotation_mode(
|
|
|
|
|
+ st.session_state.ocr_data,
|
|
|
|
|
+ st.session_state.image,
|
|
|
|
|
+ TABLE_EDITOR_CONFIG["display"]
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ if 'structure' in st.session_state and st.session_state.structure:
|
|
|
|
|
+ render_table_structure_view(
|
|
|
|
|
+ st.session_state.structure,
|
|
|
|
|
+ st.session_state.image,
|
|
|
|
|
+ line_width,
|
|
|
|
|
+ display_mode,
|
|
|
|
|
+ zoom_level,
|
|
|
|
|
+ show_line_numbers,
|
|
|
|
|
+ VIEWPORT_WIDTH,
|
|
|
|
|
+ VIEWPORT_HEIGHT
|
|
|
|
|
+ )
|
|
|
|
|
+ create_save_section(
|
|
|
|
|
+ work_mode,
|
|
|
|
|
+ st.session_state.structure,
|
|
|
|
|
+ st.session_state.image,
|
|
|
|
|
+ line_width,
|
|
|
|
|
+ TABLE_EDITOR_CONFIG["output"]
|
|
|
|
|
+ )
|
|
|
|
|
+ return
|
|
|
|
|
|
|
|
- else: # 加载已有标注模式
|
|
|
|
|
|
|
+ # 📂 加载已有标注模式
|
|
|
|
|
+ if work_mode == "📂 加载已有标注":
|
|
|
|
|
+ create_file_uploader_section(work_mode)
|
|
|
|
|
+
|
|
|
if 'structure' not in st.session_state:
|
|
if 'structure' not in st.session_state:
|
|
|
st.info("👆 请在左侧上传配置文件 (*_structure.json)")
|
|
st.info("👆 请在左侧上传配置文件 (*_structure.json)")
|
|
|
-
|
|
|
|
|
with st.expander("📖 使用说明"):
|
|
with st.expander("📖 使用说明"):
|
|
|
st.markdown("""
|
|
st.markdown("""
|
|
|
### 📂 加载已有标注
|
|
### 📂 加载已有标注
|
|
@@ -606,396 +286,39 @@ def create_table_line_editor():
|
|
|
if st.session_state.image is None:
|
|
if st.session_state.image is None:
|
|
|
st.warning("⚠️ 仅加载了配置,未加载图片。部分功能受限。")
|
|
st.warning("⚠️ 仅加载了配置,未加载图片。部分功能受限。")
|
|
|
|
|
|
|
|
- # 🆕 使用配置中的信息
|
|
|
|
|
- structure = st.session_state.structure
|
|
|
|
|
- image = st.session_state.image
|
|
|
|
|
-
|
|
|
|
|
- if image is None:
|
|
|
|
|
- # 如果没有图片,创建一个虚拟的空白图片用于显示坐标信息
|
|
|
|
|
- if 'table_bbox' in structure:
|
|
|
|
|
- bbox = structure['table_bbox']
|
|
|
|
|
- dummy_width = bbox[2] + 100
|
|
|
|
|
- dummy_height = bbox[3] + 100
|
|
|
|
|
- else:
|
|
|
|
|
- dummy_width = 2000
|
|
|
|
|
- dummy_height = 2000
|
|
|
|
|
-
|
|
|
|
|
- image = Image.new('RGB', (dummy_width, dummy_height), color='white')
|
|
|
|
|
- st.info(f"💡 使用虚拟画布 ({dummy_width}x{dummy_height}) 显示表格结构")
|
|
|
|
|
-
|
|
|
|
|
- # 显示设置
|
|
|
|
|
- st.sidebar.divider()
|
|
|
|
|
- st.sidebar.subheader("🖼️ 显示设置")
|
|
|
|
|
-
|
|
|
|
|
- line_width = st.sidebar.slider("线条宽度", 1, 5, 2)
|
|
|
|
|
- display_mode = st.sidebar.radio("显示模式", ["对比显示", "仅显示划线图", "仅显示原图"], index=1)
|
|
|
|
|
- zoom_level = st.sidebar.slider("图片缩放", 0.25, 2.0, 1.0, 0.25)
|
|
|
|
|
- show_line_numbers = st.sidebar.checkbox("显示线条编号", value=True)
|
|
|
|
|
-
|
|
|
|
|
- # 撤销/重做按钮
|
|
|
|
|
- st.sidebar.divider()
|
|
|
|
|
- st.sidebar.subheader("↩️ 撤销/重做")
|
|
|
|
|
-
|
|
|
|
|
- col1, col2 = st.sidebar.columns(2)
|
|
|
|
|
- with col1:
|
|
|
|
|
- if st.button("↩️ 撤销", disabled=len(st.session_state.undo_stack) == 0):
|
|
|
|
|
- if undo_last_action():
|
|
|
|
|
- clear_table_image_cache()
|
|
|
|
|
- st.success("✅ 已撤销")
|
|
|
|
|
- st.rerun()
|
|
|
|
|
-
|
|
|
|
|
- with col2:
|
|
|
|
|
- if st.button("↪️ 重做", disabled=len(st.session_state.redo_stack) == 0):
|
|
|
|
|
- if redo_last_action():
|
|
|
|
|
- clear_table_image_cache()
|
|
|
|
|
- st.success("✅ 已重做")
|
|
|
|
|
- st.rerun()
|
|
|
|
|
-
|
|
|
|
|
- st.sidebar.info(f"📚 历史记录: {len(st.session_state.undo_stack)} 条")
|
|
|
|
|
-
|
|
|
|
|
- # 分析表格结构(仅在新建模式显示)
|
|
|
|
|
- if work_mode == "🆕 新建标注" and st.button("🔍 分析表格结构"):
|
|
|
|
|
- with st.spinner("分析中..."):
|
|
|
|
|
- try:
|
|
|
|
|
- generator = st.session_state.generator
|
|
|
|
|
- structure = generator.analyze_table_structure(
|
|
|
|
|
- y_tolerance=y_tolerance,
|
|
|
|
|
- x_tolerance=x_tolerance,
|
|
|
|
|
- min_row_height=min_row_height
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- if not structure:
|
|
|
|
|
- st.warning("⚠️ 未检测到表格结构")
|
|
|
|
|
- st.stop()
|
|
|
|
|
-
|
|
|
|
|
- structure['modified_h_lines'] = set()
|
|
|
|
|
- structure['modified_v_lines'] = set()
|
|
|
|
|
-
|
|
|
|
|
- st.session_state.structure = structure
|
|
|
|
|
-
|
|
|
|
|
- st.session_state.undo_stack = []
|
|
|
|
|
- st.session_state.redo_stack = []
|
|
|
|
|
- clear_table_image_cache()
|
|
|
|
|
-
|
|
|
|
|
- st.success(
|
|
|
|
|
- f"✅ 检测到 {len(structure['rows'])} 行({len(structure['horizontal_lines'])} 条横线),"
|
|
|
|
|
- f"{len(structure['columns'])} 列({len(structure['vertical_lines'])} 条竖线)"
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- col1, col2, col3, col4 = st.columns(4)
|
|
|
|
|
- with col1:
|
|
|
|
|
- st.metric("行数", len(structure['rows']))
|
|
|
|
|
- with col2:
|
|
|
|
|
- st.metric("横线数", len(structure['horizontal_lines']))
|
|
|
|
|
- with col3:
|
|
|
|
|
- st.metric("列数", len(structure['columns']))
|
|
|
|
|
- with col4:
|
|
|
|
|
- st.metric("竖线数", len(structure['vertical_lines']))
|
|
|
|
|
-
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- st.error(f"❌ 分析失败: {e}")
|
|
|
|
|
- import traceback
|
|
|
|
|
- st.code(traceback.format_exc())
|
|
|
|
|
- st.stop()
|
|
|
|
|
-
|
|
|
|
|
- # 显示结果(两种模式通用)
|
|
|
|
|
- if 'structure' in st.session_state and st.session_state.structure:
|
|
|
|
|
- structure = st.session_state.structure
|
|
|
|
|
-
|
|
|
|
|
- # 使用缓存机制绘制表格线
|
|
|
|
|
- img_with_lines = get_cached_table_lines_image(
|
|
|
|
|
- image,
|
|
|
|
|
- structure,
|
|
|
|
|
- line_width=line_width,
|
|
|
|
|
- show_numbers=show_line_numbers
|
|
|
|
|
|
|
+ image, line_width, display_mode, zoom_level, show_line_numbers = setup_edit_annotation_mode(
|
|
|
|
|
+ st.session_state.structure,
|
|
|
|
|
+ st.session_state.image,
|
|
|
|
|
+ TABLE_EDITOR_CONFIG["display"]
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
- # 根据显示模式显示图片
|
|
|
|
|
- if display_mode == "对比显示":
|
|
|
|
|
- col1, col2 = st.columns(2)
|
|
|
|
|
- with col1:
|
|
|
|
|
- st.subheader("原图")
|
|
|
|
|
- st.image(image, use_container_width=True)
|
|
|
|
|
-
|
|
|
|
|
- with col2:
|
|
|
|
|
- st.subheader("添加表格线")
|
|
|
|
|
- st.image(img_with_lines, use_container_width=True)
|
|
|
|
|
-
|
|
|
|
|
- elif display_mode == "仅显示划线图":
|
|
|
|
|
- display_width = int(img_with_lines.width * zoom_level)
|
|
|
|
|
-
|
|
|
|
|
- st.subheader(f"表格线图 (缩放: {zoom_level:.0%})")
|
|
|
|
|
- st.image(img_with_lines, width=display_width)
|
|
|
|
|
-
|
|
|
|
|
- else:
|
|
|
|
|
- display_width = int(image.width * zoom_level)
|
|
|
|
|
-
|
|
|
|
|
- st.subheader(f"原图 (缩放: {zoom_level:.0%})")
|
|
|
|
|
- st.image(image, width=display_width)
|
|
|
|
|
-
|
|
|
|
|
- # 显示详细信息
|
|
|
|
|
- with st.expander("📊 表格结构详情"):
|
|
|
|
|
- st.json({
|
|
|
|
|
- "行数": len(structure['rows']),
|
|
|
|
|
- "列数": len(structure['columns']),
|
|
|
|
|
- "横线数": len(structure.get('horizontal_lines', [])),
|
|
|
|
|
- "竖线数": len(structure.get('vertical_lines', [])),
|
|
|
|
|
- "横线坐标": structure.get('horizontal_lines', []),
|
|
|
|
|
- "竖线坐标": structure.get('vertical_lines', []),
|
|
|
|
|
- "标准行高": structure.get('row_height'),
|
|
|
|
|
- "列宽度": structure.get('col_widths'),
|
|
|
|
|
- "修改的横线": list(structure.get('modified_h_lines', set())),
|
|
|
|
|
- "修改的竖线": list(structure.get('modified_v_lines', set()))
|
|
|
|
|
- })
|
|
|
|
|
-
|
|
|
|
|
- # 🆕 手动调整 - 使用线坐标列表
|
|
|
|
|
- st.subheader("🛠️ 手动调整")
|
|
|
|
|
-
|
|
|
|
|
- adjust_type = st.radio(
|
|
|
|
|
- "调整类型",
|
|
|
|
|
- ["调整横线", "调整竖线", "添加横线", "删除横线", "添加竖线", "删除竖线"],
|
|
|
|
|
- horizontal=True
|
|
|
|
|
|
|
+ render_table_structure_view(
|
|
|
|
|
+ st.session_state.structure,
|
|
|
|
|
+ image,
|
|
|
|
|
+ line_width,
|
|
|
|
|
+ display_mode,
|
|
|
|
|
+ zoom_level,
|
|
|
|
|
+ show_line_numbers,
|
|
|
|
|
+ VIEWPORT_WIDTH,
|
|
|
|
|
+ VIEWPORT_HEIGHT
|
|
|
|
|
+ )
|
|
|
|
|
+ create_save_section(
|
|
|
|
|
+ work_mode,
|
|
|
|
|
+ st.session_state.structure,
|
|
|
|
|
+ image,
|
|
|
|
|
+ line_width,
|
|
|
|
|
+ TABLE_EDITOR_CONFIG["output"]
|
|
|
)
|
|
)
|
|
|
-
|
|
|
|
|
- if adjust_type == "调整横线":
|
|
|
|
|
- horizontal_lines = structure.get('horizontal_lines', [])
|
|
|
|
|
- if len(horizontal_lines) > 0:
|
|
|
|
|
- line_index = st.selectbox(
|
|
|
|
|
- "选择横线",
|
|
|
|
|
- range(len(horizontal_lines)),
|
|
|
|
|
- format_func=lambda x: f"第 {x+1} 条横线 (Y: {horizontal_lines[x]}) {'🔴已修改' if x in structure.get('modified_h_lines', set()) else ''}"
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- new_y = st.number_input(
|
|
|
|
|
- "新的Y坐标",
|
|
|
|
|
- value=int(horizontal_lines[line_index]),
|
|
|
|
|
- step=1
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- if st.button("应用调整"):
|
|
|
|
|
- save_state_for_undo(structure)
|
|
|
|
|
-
|
|
|
|
|
- structure['horizontal_lines'][line_index] = new_y
|
|
|
|
|
- structure['modified_h_lines'].add(line_index)
|
|
|
|
|
-
|
|
|
|
|
- # 🔧 同步更新 rows
|
|
|
|
|
- if line_index < len(structure['rows']):
|
|
|
|
|
- structure['rows'][line_index]['y_start'] = new_y
|
|
|
|
|
- if line_index > 0:
|
|
|
|
|
- structure['rows'][line_index - 1]['y_end'] = new_y
|
|
|
|
|
-
|
|
|
|
|
- clear_table_image_cache()
|
|
|
|
|
- st.success("✅ 已调整")
|
|
|
|
|
- st.rerun()
|
|
|
|
|
- else:
|
|
|
|
|
- st.warning("⚠️ 没有检测到横线")
|
|
|
|
|
-
|
|
|
|
|
- elif adjust_type == "调整竖线":
|
|
|
|
|
- vertical_lines = structure.get('vertical_lines', [])
|
|
|
|
|
- if len(vertical_lines) > 0:
|
|
|
|
|
- line_index = st.selectbox(
|
|
|
|
|
- "选择竖线",
|
|
|
|
|
- range(len(vertical_lines)),
|
|
|
|
|
- format_func=lambda x: f"第 {x+1} 条竖线 (X: {vertical_lines[x]}) {'🔴已修改' if x in structure.get('modified_v_lines', set()) else ''}"
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- new_x = st.number_input(
|
|
|
|
|
- "新的X坐标",
|
|
|
|
|
- value=int(vertical_lines[line_index]),
|
|
|
|
|
- step=1
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- if st.button("应用调整"):
|
|
|
|
|
- save_state_for_undo(structure)
|
|
|
|
|
-
|
|
|
|
|
- structure['vertical_lines'][line_index] = new_x
|
|
|
|
|
- structure['modified_v_lines'].add(line_index)
|
|
|
|
|
-
|
|
|
|
|
- # 🔧 同步更新 columns
|
|
|
|
|
- if line_index < len(structure['columns']):
|
|
|
|
|
- structure['columns'][line_index]['x_start'] = new_x
|
|
|
|
|
- if line_index > 0:
|
|
|
|
|
- structure['columns'][line_index - 1]['x_end'] = new_x
|
|
|
|
|
-
|
|
|
|
|
- clear_table_image_cache()
|
|
|
|
|
- st.success("✅ 已调整")
|
|
|
|
|
- st.rerun()
|
|
|
|
|
- else:
|
|
|
|
|
- st.warning("⚠️ 没有检测到竖线")
|
|
|
|
|
-
|
|
|
|
|
- elif adjust_type == "删除横线":
|
|
|
|
|
- horizontal_lines = structure.get('horizontal_lines', [])
|
|
|
|
|
- if len(horizontal_lines) > 0:
|
|
|
|
|
- lines_to_delete = st.multiselect(
|
|
|
|
|
- "选择要删除的横线(可多选)",
|
|
|
|
|
- range(len(horizontal_lines)),
|
|
|
|
|
- format_func=lambda x: f"第 {x+1} 条横线 (Y: {horizontal_lines[x]}) {'🔴已修改' if x in structure.get('modified_h_lines', set()) else ''}"
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- if lines_to_delete and st.button("🗑️ 批量删除", type="primary"):
|
|
|
|
|
- save_state_for_undo(structure)
|
|
|
|
|
-
|
|
|
|
|
- # 🔧 删除线坐标
|
|
|
|
|
- for idx in sorted(lines_to_delete, reverse=True):
|
|
|
|
|
- del structure['horizontal_lines'][idx]
|
|
|
|
|
-
|
|
|
|
|
- # 🔧 重新计算 rows(删除线后重建行区间)
|
|
|
|
|
- new_rows = []
|
|
|
|
|
- for i in range(len(structure['horizontal_lines']) - 1):
|
|
|
|
|
- new_rows.append({
|
|
|
|
|
- 'y_start': structure['horizontal_lines'][i],
|
|
|
|
|
- 'y_end': structure['horizontal_lines'][i + 1],
|
|
|
|
|
- # 'bboxes': []
|
|
|
|
|
- })
|
|
|
|
|
- structure['rows'] = new_rows
|
|
|
|
|
-
|
|
|
|
|
- # 更新修改标记
|
|
|
|
|
- structure['modified_h_lines'] = set()
|
|
|
|
|
-
|
|
|
|
|
- clear_table_image_cache()
|
|
|
|
|
- st.success(f"✅ 已删除 {len(lines_to_delete)} 条横线")
|
|
|
|
|
- st.rerun()
|
|
|
|
|
-
|
|
|
|
|
- st.info(f"💡 当前有 {len(horizontal_lines)} 条横线,已选择 {len(lines_to_delete)} 条")
|
|
|
|
|
- else:
|
|
|
|
|
- st.warning("⚠️ 没有可删除的横线")
|
|
|
|
|
-
|
|
|
|
|
- elif adjust_type == "删除竖线":
|
|
|
|
|
- vertical_lines = structure.get('vertical_lines', [])
|
|
|
|
|
- if len(vertical_lines) > 0:
|
|
|
|
|
- lines_to_delete = st.multiselect(
|
|
|
|
|
- "选择要删除的竖线(可多选)",
|
|
|
|
|
- range(len(vertical_lines)),
|
|
|
|
|
- format_func=lambda x: f"第 {x+1} 条竖线 (X: {vertical_lines[x]}) {'🔴已修改' if x in structure.get('modified_v_lines', set()) else ''}"
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- if lines_to_delete and st.button("🗑️ 批量删除", type="primary"):
|
|
|
|
|
- save_state_for_undo(structure)
|
|
|
|
|
-
|
|
|
|
|
- # 🔧 删除线坐标
|
|
|
|
|
- for idx in sorted(lines_to_delete, reverse=True):
|
|
|
|
|
- del structure['vertical_lines'][idx]
|
|
|
|
|
-
|
|
|
|
|
- # 🔧 重新计算 columns
|
|
|
|
|
- new_columns = []
|
|
|
|
|
- for i in range(len(structure['vertical_lines']) - 1):
|
|
|
|
|
- new_columns.append({
|
|
|
|
|
- 'x_start': structure['vertical_lines'][i],
|
|
|
|
|
- 'x_end': structure['vertical_lines'][i + 1]
|
|
|
|
|
- })
|
|
|
|
|
- structure['columns'] = new_columns
|
|
|
|
|
-
|
|
|
|
|
- # 重新计算列宽
|
|
|
|
|
- structure['col_widths'] = [
|
|
|
|
|
- col['x_end'] - col['x_start']
|
|
|
|
|
- for col in new_columns
|
|
|
|
|
- ]
|
|
|
|
|
-
|
|
|
|
|
- # 更新修改标记
|
|
|
|
|
- structure['modified_v_lines'] = set()
|
|
|
|
|
-
|
|
|
|
|
- clear_table_image_cache()
|
|
|
|
|
- st.success(f"✅ 已删除 {len(lines_to_delete)} 条竖线")
|
|
|
|
|
- st.rerun()
|
|
|
|
|
-
|
|
|
|
|
- st.info(f"💡 当前有 {len(vertical_lines)} 条竖线,已选择 {len(lines_to_delete)} 条")
|
|
|
|
|
- else:
|
|
|
|
|
- st.warning("⚠️ 没有可删除的列")
|
|
|
|
|
-
|
|
|
|
|
- # 保存配置
|
|
|
|
|
- st.divider()
|
|
|
|
|
-
|
|
|
|
|
- save_col1, save_col2, save_col3 = st.columns(3)
|
|
|
|
|
-
|
|
|
|
|
- with save_col1:
|
|
|
|
|
- save_structure = st.checkbox("保存表格结构配置", value=True)
|
|
|
|
|
-
|
|
|
|
|
- with save_col2:
|
|
|
|
|
- save_image = st.checkbox("保存表格线图片", value=True)
|
|
|
|
|
-
|
|
|
|
|
- with save_col3:
|
|
|
|
|
- # 🆕 线条颜色选择
|
|
|
|
|
- line_color_option = st.selectbox(
|
|
|
|
|
- "保存时线条颜色",
|
|
|
|
|
- ["黑色", "蓝色", "红色"],
|
|
|
|
|
- index=0
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- if st.button("💾 保存", type="primary"):
|
|
|
|
|
- output_dir = Path("output/table_structures")
|
|
|
|
|
- output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
-
|
|
|
|
|
- base_name = Path(st.session_state.loaded_image_name).stem
|
|
|
|
|
- saved_files = []
|
|
|
|
|
-
|
|
|
|
|
- if save_structure:
|
|
|
|
|
- structure_path = output_dir / f"{base_name}_structure.json"
|
|
|
|
|
-
|
|
|
|
|
- # 🔧 保存线坐标列表
|
|
|
|
|
- save_structure_data = {
|
|
|
|
|
- 'rows': structure['rows'],
|
|
|
|
|
- 'columns': structure['columns'],
|
|
|
|
|
- 'horizontal_lines': structure.get('horizontal_lines', []),
|
|
|
|
|
- 'vertical_lines': structure.get('vertical_lines', []),
|
|
|
|
|
- 'row_height': structure['row_height'],
|
|
|
|
|
- 'col_widths': structure['col_widths'],
|
|
|
|
|
- 'table_bbox': structure['table_bbox'],
|
|
|
|
|
- 'modified_h_lines': list(structure.get('modified_h_lines', set())),
|
|
|
|
|
- 'modified_v_lines': list(structure.get('modified_v_lines', set()))
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- with open(structure_path, 'w', encoding='utf-8') as f:
|
|
|
|
|
- json.dump(save_structure_data, f, indent=2, ensure_ascii=False)
|
|
|
|
|
-
|
|
|
|
|
- saved_files.append(("配置文件", structure_path))
|
|
|
|
|
-
|
|
|
|
|
- with open(structure_path, 'r') as f:
|
|
|
|
|
- st.download_button(
|
|
|
|
|
- "📥 下载配置文件",
|
|
|
|
|
- f.read(),
|
|
|
|
|
- file_name=f"{base_name}_structure.json",
|
|
|
|
|
- mime="application/json"
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- if save_image:
|
|
|
|
|
- # 🆕 根据选择的颜色绘制纯净表格线
|
|
|
|
|
- color_map = {
|
|
|
|
|
- "黑色": (0, 0, 0),
|
|
|
|
|
- "蓝色": (0, 0, 255),
|
|
|
|
|
- "红色": (255, 0, 0)
|
|
|
|
|
- }
|
|
|
|
|
- selected_color = color_map[line_color_option]
|
|
|
|
|
-
|
|
|
|
|
- # 🎯 使用纯净绘制函数
|
|
|
|
|
- clean_img = draw_clean_table_lines(
|
|
|
|
|
- image,
|
|
|
|
|
- structure,
|
|
|
|
|
- line_width=line_width,
|
|
|
|
|
- line_color=selected_color
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- output_image_path = output_dir / f"{base_name}_with_lines.png"
|
|
|
|
|
- clean_img.save(output_image_path)
|
|
|
|
|
- saved_files.append(("表格线图片", output_image_path))
|
|
|
|
|
-
|
|
|
|
|
- # 🆕 提供下载按钮
|
|
|
|
|
- import io
|
|
|
|
|
- buf = io.BytesIO()
|
|
|
|
|
- clean_img.save(buf, format='PNG')
|
|
|
|
|
- buf.seek(0)
|
|
|
|
|
-
|
|
|
|
|
- st.download_button(
|
|
|
|
|
- "📥 下载表格线图片",
|
|
|
|
|
- buf,
|
|
|
|
|
- file_name=f"{base_name}_with_lines.png",
|
|
|
|
|
- mime="image/png"
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- if saved_files:
|
|
|
|
|
- st.success(f"✅ 已保存 {len(saved_files)} 个文件:")
|
|
|
|
|
- for file_type, file_path in saved_files:
|
|
|
|
|
- st.info(f" • {file_type}: {file_path}")
|
|
|
|
|
-
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
|
- create_table_line_editor()
|
|
|
|
|
|
|
+ try:
|
|
|
|
|
+ create_table_line_editor()
|
|
|
|
|
+ except GeneratorExit:
|
|
|
|
|
+ pass # Streamlit 内部清理,忽略
|
|
|
|
|
+ except KeyboardInterrupt:
|
|
|
|
|
+ st.info("👋 程序已停止")
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ st.error(f"❌ 程序崩溃: {e}")
|
|
|
|
|
+ import traceback
|
|
|
|
|
+ with st.expander("🔍 详细错误信息"):
|
|
|
|
|
+ st.code(traceback.format_exc())
|