|
|
@@ -1,1001 +0,0 @@
|
|
|
-"""
|
|
|
-表格线可视化编辑器
|
|
|
-支持人工调整表格线位置
|
|
|
-"""
|
|
|
-
|
|
|
-import streamlit as st
|
|
|
-from pathlib import Path
|
|
|
-import json
|
|
|
-from PIL import Image, ImageDraw, ImageFont
|
|
|
-import numpy as np
|
|
|
-import copy
|
|
|
-
|
|
|
-try:
|
|
|
- from .table_line_generator import TableLineGenerator
|
|
|
-except ImportError:
|
|
|
- from table_line_generator import TableLineGenerator
|
|
|
-
|
|
|
-
|
|
|
-def parse_ocr_data(ocr_data):
|
|
|
- """解析OCR数据,支持多种格式"""
|
|
|
- # 如果是字符串,尝试解析
|
|
|
- if isinstance(ocr_data, str):
|
|
|
- try:
|
|
|
- ocr_data = json.loads(ocr_data)
|
|
|
- except json.JSONDecodeError:
|
|
|
- st.error("❌ JSON 格式错误,无法解析")
|
|
|
- return []
|
|
|
-
|
|
|
- # 检查是否为 PPStructure V3 格式
|
|
|
- if isinstance(ocr_data, dict) and 'parsing_res_list' in ocr_data and 'overall_ocr_res' in ocr_data:
|
|
|
- st.info("🔍 检测到 PPStructure V3 格式")
|
|
|
-
|
|
|
- try:
|
|
|
- table_bbox, text_boxes = TableLineGenerator.parse_ppstructure_result(ocr_data)
|
|
|
- st.success(f"✅ 表格区域: {table_bbox}")
|
|
|
- st.success(f"✅ 表格内文本框: {len(text_boxes)} 个")
|
|
|
- return text_boxes
|
|
|
- except Exception as e:
|
|
|
- st.error(f"❌ 解析 PPStructure 结果失败: {e}")
|
|
|
- return []
|
|
|
-
|
|
|
- # 确保是列表
|
|
|
- if not isinstance(ocr_data, list):
|
|
|
- st.error(f"❌ OCR 数据应该是列表,实际类型: {type(ocr_data)}")
|
|
|
- return []
|
|
|
-
|
|
|
- if not ocr_data:
|
|
|
- st.warning("⚠️ OCR 数据为空")
|
|
|
- return []
|
|
|
-
|
|
|
- first_item = ocr_data[0]
|
|
|
- if not isinstance(first_item, dict):
|
|
|
- st.error(f"❌ OCR 数据项应该是字典,实际类型: {type(first_item)}")
|
|
|
- return []
|
|
|
-
|
|
|
- if 'bbox' not in first_item:
|
|
|
- st.error("❌ OCR 数据缺少 'bbox' 字段")
|
|
|
- st.info("💡 支持的格式示例:\n```json\n[\n {\n \"text\": \"文本\",\n \"bbox\": [x1, y1, x2, y2]\n }\n]\n```")
|
|
|
- return []
|
|
|
-
|
|
|
- return ocr_data
|
|
|
-
|
|
|
-
|
|
|
-def draw_table_lines_with_numbers(image, structure, line_width=2, show_numbers=True):
|
|
|
- """
|
|
|
- 绘制带编号的表格线(使用线坐标列表)
|
|
|
-
|
|
|
- Args:
|
|
|
- image: PIL Image 对象
|
|
|
- structure: 表格结构字典(包含 horizontal_lines 和 vertical_lines)
|
|
|
- line_width: 线条宽度
|
|
|
- show_numbers: 是否显示编号
|
|
|
-
|
|
|
- Returns:
|
|
|
- 绘制了表格线和编号的图片
|
|
|
- """
|
|
|
- img_with_lines = image.copy()
|
|
|
- draw = ImageDraw.Draw(img_with_lines)
|
|
|
-
|
|
|
- # 尝试加载字体
|
|
|
- try:
|
|
|
- font = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 20)
|
|
|
- except:
|
|
|
- font = ImageFont.load_default()
|
|
|
-
|
|
|
- # 🆕 使用线坐标列表
|
|
|
- horizontal_lines = structure.get('horizontal_lines', [])
|
|
|
- vertical_lines = structure.get('vertical_lines', [])
|
|
|
- modified_h_lines = structure.get('modified_h_lines', set())
|
|
|
- modified_v_lines = structure.get('modified_v_lines', set())
|
|
|
-
|
|
|
- # 计算绘制范围
|
|
|
- x_start = vertical_lines[0] if vertical_lines else 0
|
|
|
- x_end = vertical_lines[-1] if vertical_lines else img_with_lines.width
|
|
|
- y_start = horizontal_lines[0] if horizontal_lines else 0
|
|
|
- y_end = horizontal_lines[-1] if horizontal_lines else img_with_lines.height
|
|
|
-
|
|
|
- # 🎨 绘制横线
|
|
|
- for idx, y in enumerate(horizontal_lines):
|
|
|
- color = (255, 0, 0) if idx in modified_h_lines else (0, 0, 255)
|
|
|
- draw.line([(x_start, y), (x_end, y)], fill=color, width=line_width)
|
|
|
-
|
|
|
- # 🔢 绘制行编号
|
|
|
- if show_numbers:
|
|
|
- text = f"R{idx+1}"
|
|
|
- bbox = draw.textbbox((x_start - 35, y - 10), text, font=font)
|
|
|
- draw.rectangle(bbox, fill='white', outline='black')
|
|
|
- draw.text((x_start - 35, y - 10), text, fill=color, font=font)
|
|
|
-
|
|
|
- # 🎨 绘制竖线
|
|
|
- for idx, x in enumerate(vertical_lines):
|
|
|
- color = (255, 0, 0) if idx in modified_v_lines else (0, 0, 255)
|
|
|
- draw.line([(x, y_start), (x, y_end)], fill=color, width=line_width)
|
|
|
-
|
|
|
- # 🔢 绘制列编号
|
|
|
- if show_numbers:
|
|
|
- text = f"C{idx+1}"
|
|
|
- bbox = draw.textbbox((x - 10, y_start - 25), text, font=font)
|
|
|
- draw.rectangle(bbox, fill='white', outline='black')
|
|
|
- draw.text((x - 10, y_start - 25), text, fill=color, font=font)
|
|
|
- bbox = draw.textbbox((x - 10, y_end + 25), text, font=font)
|
|
|
- draw.rectangle(bbox, fill='white', outline='black')
|
|
|
- draw.text((x - 10, y_end + 25), text, fill=color, font=font)
|
|
|
-
|
|
|
- return img_with_lines
|
|
|
-
|
|
|
-
|
|
|
-# 🆕 新增:用于保存的纯净表格线绘制函数
|
|
|
-def draw_clean_table_lines(image, structure, line_width=2, line_color=(0, 0, 0)):
|
|
|
- """
|
|
|
- 绘制纯净的表格线(用于保存)
|
|
|
- - 所有线用黑色
|
|
|
- - 不显示编号
|
|
|
-
|
|
|
- Args:
|
|
|
- image: PIL Image 对象
|
|
|
- structure: 表格结构字典
|
|
|
- line_width: 线条宽度
|
|
|
- line_color: 线条颜色,默认黑色 (0, 0, 0)
|
|
|
-
|
|
|
- Returns:
|
|
|
- 绘制了纯净表格线的图片
|
|
|
- """
|
|
|
- img_with_lines = image.copy()
|
|
|
- draw = ImageDraw.Draw(img_with_lines)
|
|
|
-
|
|
|
- horizontal_lines = structure.get('horizontal_lines', [])
|
|
|
- vertical_lines = structure.get('vertical_lines', [])
|
|
|
-
|
|
|
- if not horizontal_lines or not vertical_lines:
|
|
|
- return img_with_lines
|
|
|
-
|
|
|
- # 计算绘制范围
|
|
|
- x_start = vertical_lines[0]
|
|
|
- x_end = vertical_lines[-1]
|
|
|
- y_start = horizontal_lines[0]
|
|
|
- y_end = horizontal_lines[-1]
|
|
|
-
|
|
|
- # 🖤 绘制横线(统一黑色)
|
|
|
- for y in horizontal_lines:
|
|
|
- draw.line([(x_start, y), (x_end, y)], fill=line_color, width=line_width)
|
|
|
-
|
|
|
- # 🖤 绘制竖线(统一黑色)
|
|
|
- for x in vertical_lines:
|
|
|
- draw.line([(x, y_start), (x, y_end)], fill=line_color, width=line_width)
|
|
|
-
|
|
|
- return img_with_lines
|
|
|
-
|
|
|
-
|
|
|
-def init_undo_stack():
|
|
|
- """初始化撤销/重做栈"""
|
|
|
- if 'undo_stack' not in st.session_state:
|
|
|
- st.session_state.undo_stack = []
|
|
|
- if 'redo_stack' not in st.session_state:
|
|
|
- st.session_state.redo_stack = []
|
|
|
-
|
|
|
-
|
|
|
-def save_state_for_undo(structure):
|
|
|
- """保存当前状态到撤销栈"""
|
|
|
- # 深拷贝当前结构
|
|
|
- state = copy.deepcopy(structure)
|
|
|
- st.session_state.undo_stack.append(state)
|
|
|
- # 清空重做栈
|
|
|
- st.session_state.redo_stack = []
|
|
|
-
|
|
|
- # 限制栈深度(最多保存20个历史状态)
|
|
|
- if len(st.session_state.undo_stack) > 20:
|
|
|
- st.session_state.undo_stack.pop(0)
|
|
|
-
|
|
|
-
|
|
|
-def undo_last_action():
|
|
|
- """撤销上一个操作"""
|
|
|
- if st.session_state.undo_stack:
|
|
|
- # 保存当前状态到重做栈
|
|
|
- current_state = copy.deepcopy(st.session_state.structure)
|
|
|
- st.session_state.redo_stack.append(current_state)
|
|
|
-
|
|
|
- # 恢复上一个状态
|
|
|
- st.session_state.structure = st.session_state.undo_stack.pop()
|
|
|
- return True
|
|
|
- return False
|
|
|
-
|
|
|
-
|
|
|
-def redo_last_action():
|
|
|
- """重做上一个操作"""
|
|
|
- if st.session_state.redo_stack:
|
|
|
- # 保存当前状态到撤销栈
|
|
|
- current_state = copy.deepcopy(st.session_state.structure)
|
|
|
- st.session_state.undo_stack.append(current_state)
|
|
|
-
|
|
|
- # 恢复重做的状态
|
|
|
- st.session_state.structure = st.session_state.redo_stack.pop()
|
|
|
- return True
|
|
|
- return False
|
|
|
-
|
|
|
-
|
|
|
-def get_structure_hash(structure, line_width, show_numbers):
|
|
|
- """生成结构的哈希值,用于判断是否需要重新绘制"""
|
|
|
- import hashlib
|
|
|
-
|
|
|
- # 🔧 使用线坐标列表生成哈希
|
|
|
- key_data = {
|
|
|
- 'horizontal_lines': structure.get('horizontal_lines', []),
|
|
|
- 'vertical_lines': structure.get('vertical_lines', []),
|
|
|
- 'modified_h_lines': sorted(list(structure.get('modified_h_lines', set()))),
|
|
|
- 'modified_v_lines': sorted(list(structure.get('modified_v_lines', set()))),
|
|
|
- 'line_width': line_width,
|
|
|
- 'show_numbers': show_numbers
|
|
|
- }
|
|
|
-
|
|
|
- key_str = json.dumps(key_data, sort_keys=True)
|
|
|
- return hashlib.md5(key_str.encode()).hexdigest()
|
|
|
-
|
|
|
-
|
|
|
-def get_cached_table_lines_image(image, structure, line_width, show_numbers):
|
|
|
- """
|
|
|
- 获取缓存的表格线图片,如果缓存不存在或失效则重新绘制
|
|
|
-
|
|
|
- Args:
|
|
|
- image: PIL Image 对象
|
|
|
- structure: 表格结构字典
|
|
|
- line_width: 线条宽度
|
|
|
- show_numbers: 是否显示编号
|
|
|
-
|
|
|
- Returns:
|
|
|
- 绘制了表格线和编号的图片
|
|
|
- """
|
|
|
- # 初始化缓存
|
|
|
- if 'cached_table_image' not in st.session_state:
|
|
|
- st.session_state.cached_table_image = None
|
|
|
- if 'cached_table_hash' not in st.session_state:
|
|
|
- st.session_state.cached_table_hash = None
|
|
|
-
|
|
|
- # 计算当前结构的哈希
|
|
|
- current_hash = get_structure_hash(structure, line_width, show_numbers)
|
|
|
-
|
|
|
- # 检查缓存是否有效
|
|
|
- if (st.session_state.cached_table_hash == current_hash and
|
|
|
- st.session_state.cached_table_image is not None):
|
|
|
- # 缓存有效,直接返回
|
|
|
- return st.session_state.cached_table_image
|
|
|
-
|
|
|
- # 缓存失效,重新绘制
|
|
|
- img_with_lines = draw_table_lines_with_numbers(
|
|
|
- image,
|
|
|
- structure,
|
|
|
- line_width=line_width,
|
|
|
- show_numbers=show_numbers
|
|
|
- )
|
|
|
-
|
|
|
- # 更新缓存
|
|
|
- st.session_state.cached_table_image = img_with_lines
|
|
|
- st.session_state.cached_table_hash = current_hash
|
|
|
-
|
|
|
- return img_with_lines
|
|
|
-
|
|
|
-
|
|
|
-def clear_table_image_cache():
|
|
|
- """清除表格图片缓存"""
|
|
|
- if 'cached_table_image' in st.session_state:
|
|
|
- st.session_state.cached_table_image = None
|
|
|
- if 'cached_table_hash' in st.session_state:
|
|
|
- st.session_state.cached_table_hash = None
|
|
|
-
|
|
|
-
|
|
|
-def load_structure_from_config(config_path: Path) -> dict:
|
|
|
- """
|
|
|
- 从配置文件加载表格结构
|
|
|
-
|
|
|
- Args:
|
|
|
- config_path: 配置文件路径
|
|
|
-
|
|
|
- Returns:
|
|
|
- 表格结构字典
|
|
|
- """
|
|
|
- with open(config_path, 'r', encoding='utf-8') as f:
|
|
|
- structure = json.load(f)
|
|
|
-
|
|
|
- # 🔧 兼容旧版配置(补充缺失字段)
|
|
|
- if 'horizontal_lines' not in structure:
|
|
|
- # 从 rows 生成横线坐标
|
|
|
- horizontal_lines = []
|
|
|
- for row in structure.get('rows', []):
|
|
|
- horizontal_lines.append(row['y_start'])
|
|
|
- if structure.get('rows'):
|
|
|
- horizontal_lines.append(structure['rows'][-1]['y_end'])
|
|
|
- structure['horizontal_lines'] = horizontal_lines
|
|
|
-
|
|
|
- if 'vertical_lines' not in structure:
|
|
|
- # 从 columns 生成竖线坐标
|
|
|
- vertical_lines = []
|
|
|
- for col in structure.get('columns', []):
|
|
|
- vertical_lines.append(col['x_start'])
|
|
|
- if structure.get('columns'):
|
|
|
- vertical_lines.append(structure['columns'][-1]['x_end'])
|
|
|
- structure['vertical_lines'] = vertical_lines
|
|
|
-
|
|
|
- # 🔧 转换修改标记(从列表转为集合)
|
|
|
- if 'modified_h_lines' in structure:
|
|
|
- structure['modified_h_lines'] = set(structure['modified_h_lines'])
|
|
|
- else:
|
|
|
- structure['modified_h_lines'] = set()
|
|
|
-
|
|
|
- if 'modified_v_lines' in structure:
|
|
|
- structure['modified_v_lines'] = set(structure['modified_v_lines'])
|
|
|
- else:
|
|
|
- structure['modified_v_lines'] = set()
|
|
|
-
|
|
|
- # 🔧 转换旧版的 modified_rows/modified_cols(如果存在)
|
|
|
- if 'modified_rows' in structure and not structure['modified_h_lines']:
|
|
|
- structure['modified_h_lines'] = set(structure.get('modified_rows', []))
|
|
|
- if 'modified_cols' in structure and not structure['modified_v_lines']:
|
|
|
- structure['modified_v_lines'] = set(structure.get('modified_cols', []))
|
|
|
-
|
|
|
- return structure
|
|
|
-
|
|
|
-
|
|
|
-def create_table_line_editor():
|
|
|
- """创建表格线编辑器界面"""
|
|
|
- # 🆕 配置页面为宽屏模式
|
|
|
- st.set_page_config(
|
|
|
- page_title="表格线编辑器",
|
|
|
- page_icon="📏",
|
|
|
- layout="wide",
|
|
|
- initial_sidebar_state="expanded"
|
|
|
- )
|
|
|
-
|
|
|
- st.title("📏 表格线编辑器")
|
|
|
-
|
|
|
- # 初始化 session_state
|
|
|
- if 'loaded_json_name' not in st.session_state:
|
|
|
- st.session_state.loaded_json_name = None
|
|
|
- if 'loaded_image_name' not in st.session_state:
|
|
|
- st.session_state.loaded_image_name = None
|
|
|
- if 'loaded_config_name' not in st.session_state:
|
|
|
- st.session_state.loaded_config_name = None
|
|
|
- if 'ocr_data' not in st.session_state:
|
|
|
- st.session_state.ocr_data = None
|
|
|
- if 'image' not in st.session_state:
|
|
|
- st.session_state.image = None
|
|
|
-
|
|
|
- # 初始化撤销/重做栈
|
|
|
- init_undo_stack()
|
|
|
-
|
|
|
- # 🆕 添加工作模式选择
|
|
|
- st.sidebar.header("📂 工作模式")
|
|
|
- work_mode = st.sidebar.radio(
|
|
|
- "选择模式",
|
|
|
- ["🆕 新建标注", "📂 加载已有标注"],
|
|
|
- index=0
|
|
|
- )
|
|
|
-
|
|
|
- if work_mode == "🆕 新建标注":
|
|
|
- # 原有的上传流程
|
|
|
- st.sidebar.subheader("上传文件")
|
|
|
- uploaded_json = st.sidebar.file_uploader("上传OCR结果JSON", type=['json'], key="new_json")
|
|
|
- uploaded_image = st.sidebar.file_uploader("上传对应图片", type=['jpg', 'png'], key="new_image")
|
|
|
-
|
|
|
- # 检查是否需要重新加载 JSON
|
|
|
- if uploaded_json is not None:
|
|
|
- if st.session_state.loaded_json_name != uploaded_json.name:
|
|
|
- try:
|
|
|
- raw_data = json.load(uploaded_json)
|
|
|
-
|
|
|
- with st.expander("🔍 原始数据结构"):
|
|
|
- if isinstance(raw_data, dict):
|
|
|
- st.json({k: f"<{type(v).__name__}>" if not isinstance(v, (str, int, float, bool, type(None))) else v
|
|
|
- for k, v in list(raw_data.items())[:5]})
|
|
|
- else:
|
|
|
- st.json(raw_data[:3] if len(raw_data) > 3 else raw_data)
|
|
|
-
|
|
|
- ocr_data = parse_ocr_data(raw_data)
|
|
|
-
|
|
|
- if not ocr_data:
|
|
|
- st.error("❌ 无法解析 OCR 数据,请检查 JSON 格式")
|
|
|
- st.stop()
|
|
|
-
|
|
|
- st.session_state.ocr_data = ocr_data
|
|
|
- st.session_state.loaded_json_name = uploaded_json.name
|
|
|
- st.session_state.loaded_config_name = None # 清除配置文件标记
|
|
|
-
|
|
|
- # 清除旧的分析结果、历史记录和缓存
|
|
|
- if 'structure' in st.session_state:
|
|
|
- del st.session_state.structure
|
|
|
- if 'generator' in st.session_state:
|
|
|
- del st.session_state.generator
|
|
|
- st.session_state.undo_stack = []
|
|
|
- st.session_state.redo_stack = []
|
|
|
- clear_table_image_cache()
|
|
|
-
|
|
|
- st.success(f"✅ 成功加载 {len(ocr_data)} 条 OCR 记录")
|
|
|
-
|
|
|
- except Exception as e:
|
|
|
- st.error(f"❌ 加载数据失败: {e}")
|
|
|
- st.stop()
|
|
|
-
|
|
|
- # 检查是否需要重新加载图片
|
|
|
- if uploaded_image is not None:
|
|
|
- if st.session_state.loaded_image_name != uploaded_image.name:
|
|
|
- try:
|
|
|
- image = Image.open(uploaded_image)
|
|
|
-
|
|
|
- st.session_state.image = image
|
|
|
- st.session_state.loaded_image_name = uploaded_image.name
|
|
|
-
|
|
|
- if 'structure' in st.session_state:
|
|
|
- del st.session_state.structure
|
|
|
- if 'generator' in st.session_state:
|
|
|
- del st.session_state.generator
|
|
|
- st.session_state.undo_stack = []
|
|
|
- st.session_state.redo_stack = []
|
|
|
- clear_table_image_cache()
|
|
|
-
|
|
|
- st.success(f"✅ 成功加载图片: {uploaded_image.name}")
|
|
|
-
|
|
|
- except Exception as e:
|
|
|
- st.error(f"❌ 加载图片失败: {e}")
|
|
|
- st.stop()
|
|
|
-
|
|
|
- else: # 📂 加载已有标注
|
|
|
- st.sidebar.subheader("加载已保存的标注")
|
|
|
-
|
|
|
- # 🆕 上传配置文件
|
|
|
- uploaded_config = st.sidebar.file_uploader(
|
|
|
- "上传配置文件 (*_structure.json)",
|
|
|
- type=['json'],
|
|
|
- key="load_config"
|
|
|
- )
|
|
|
-
|
|
|
- # 🆕 上传对应的图片(可选,用于重新标注)
|
|
|
- uploaded_image_for_config = st.sidebar.file_uploader(
|
|
|
- "上传对应图片(可选)",
|
|
|
- type=['jpg', 'png'],
|
|
|
- key="load_image"
|
|
|
- )
|
|
|
-
|
|
|
- # 处理配置文件加载
|
|
|
- if uploaded_config is not None:
|
|
|
- if st.session_state.loaded_config_name != uploaded_config.name:
|
|
|
- try:
|
|
|
- # 🔧 直接从配置文件路径加载
|
|
|
- import tempfile
|
|
|
-
|
|
|
- # 创建临时文件
|
|
|
- with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False, encoding='utf-8') as tmp:
|
|
|
- tmp.write(uploaded_config.getvalue().decode('utf-8'))
|
|
|
- tmp_path = tmp.name
|
|
|
-
|
|
|
- # 加载结构
|
|
|
- structure = load_structure_from_config(Path(tmp_path))
|
|
|
-
|
|
|
- # 清理临时文件
|
|
|
- Path(tmp_path).unlink()
|
|
|
-
|
|
|
- st.session_state.structure = structure
|
|
|
- st.session_state.loaded_config_name = uploaded_config.name
|
|
|
-
|
|
|
- # 清除历史记录和缓存
|
|
|
- st.session_state.undo_stack = []
|
|
|
- st.session_state.redo_stack = []
|
|
|
- clear_table_image_cache()
|
|
|
-
|
|
|
- st.success(f"✅ 成功加载配置: {uploaded_config.name}")
|
|
|
- st.info(
|
|
|
- f"📊 表格结构: {len(structure['rows'])}行 x {len(structure['columns'])}列\n\n"
|
|
|
- f"📏 横线数: {len(structure.get('horizontal_lines', []))}\n\n"
|
|
|
- f"📏 竖线数: {len(structure.get('vertical_lines', []))}"
|
|
|
- )
|
|
|
-
|
|
|
- # 🆕 显示配置文件详情
|
|
|
- with st.expander("📋 配置详情"):
|
|
|
- st.json({
|
|
|
- "行数": len(structure['rows']),
|
|
|
- "列数": len(structure['columns']),
|
|
|
- "横线数": len(structure.get('horizontal_lines', [])),
|
|
|
- "竖线数": len(structure.get('vertical_lines', [])),
|
|
|
- "行高": structure.get('row_height'),
|
|
|
- "列宽": structure.get('col_widths'),
|
|
|
- "已修改的横线": list(structure.get('modified_h_lines', set())),
|
|
|
- "已修改的竖线": list(structure.get('modified_v_lines', set()))
|
|
|
- })
|
|
|
-
|
|
|
- except Exception as e:
|
|
|
- st.error(f"❌ 加载配置失败: {e}")
|
|
|
- import traceback
|
|
|
- st.code(traceback.format_exc())
|
|
|
- st.stop()
|
|
|
-
|
|
|
- # 处理图片加载(用于显示)
|
|
|
- if uploaded_image_for_config is not None:
|
|
|
- if st.session_state.loaded_image_name != uploaded_image_for_config.name:
|
|
|
- try:
|
|
|
- image = Image.open(uploaded_image_for_config)
|
|
|
- st.session_state.image = image
|
|
|
- st.session_state.loaded_image_name = uploaded_image_for_config.name
|
|
|
-
|
|
|
- clear_table_image_cache()
|
|
|
-
|
|
|
- st.success(f"✅ 成功加载图片: {uploaded_image_for_config.name}")
|
|
|
-
|
|
|
- except Exception as e:
|
|
|
- st.error(f"❌ 加载图片失败: {e}")
|
|
|
- st.stop()
|
|
|
-
|
|
|
- # 🆕 如果配置已加载但没有图片,提示用户
|
|
|
- if 'structure' in st.session_state and st.session_state.image is None:
|
|
|
- st.warning("⚠️ 已加载配置,但未加载对应图片。请上传图片以查看效果。")
|
|
|
- st.info("💡 提示:配置文件已加载,您可以:\n1. 上传对应图片查看效果\n2. 直接编辑配置并保存")
|
|
|
-
|
|
|
- # 检查必要条件
|
|
|
- if work_mode == "🆕 新建标注":
|
|
|
- if st.session_state.ocr_data is None or st.session_state.image is None:
|
|
|
- st.info("👆 请在左侧上传 OCR 结果 JSON 文件和对应的图片")
|
|
|
-
|
|
|
- with st.expander("📖 使用说明"):
|
|
|
- st.markdown("""
|
|
|
- ### 🆕 新建标注模式
|
|
|
-
|
|
|
- **支持的OCR格式**
|
|
|
-
|
|
|
- **1. PPStructure V3 格式 (推荐)**
|
|
|
- ```json
|
|
|
- {
|
|
|
- "parsing_res_list": [...],
|
|
|
- "overall_ocr_res": {
|
|
|
- "rec_boxes": [[x1, y1, x2, y2], ...],
|
|
|
- "rec_texts": ["文本1", "文本2", ...]
|
|
|
- }
|
|
|
- }
|
|
|
- ```
|
|
|
-
|
|
|
- **2. 标准格式**
|
|
|
- ```json
|
|
|
- [
|
|
|
- {
|
|
|
- "text": "文本内容",
|
|
|
- "bbox": [x1, y1, x2, y2]
|
|
|
- }
|
|
|
- ]
|
|
|
- ```
|
|
|
-
|
|
|
- ### 📂 加载已有标注模式
|
|
|
-
|
|
|
- 1. 上传之前保存的 `*_structure.json` 配置文件
|
|
|
- 2. 上传对应的图片(可选)
|
|
|
- 3. 继续调整表格线位置
|
|
|
- 4. 保存更新后的配置
|
|
|
- """)
|
|
|
- return
|
|
|
-
|
|
|
- ocr_data = st.session_state.ocr_data
|
|
|
- image = st.session_state.image
|
|
|
-
|
|
|
- st.info(f"📂 已加载: {st.session_state.loaded_json_name} + {st.session_state.loaded_image_name}")
|
|
|
-
|
|
|
- if 'generator' not in st.session_state or st.session_state.generator is None:
|
|
|
- try:
|
|
|
- generator = TableLineGenerator(image, ocr_data)
|
|
|
- st.session_state.generator = generator
|
|
|
- except Exception as e:
|
|
|
- st.error(f"❌ 初始化失败: {e}")
|
|
|
- st.stop()
|
|
|
-
|
|
|
- else: # 加载已有标注模式
|
|
|
- if 'structure' not in st.session_state:
|
|
|
- st.info("👆 请在左侧上传配置文件 (*_structure.json)")
|
|
|
-
|
|
|
- with st.expander("📖 使用说明"):
|
|
|
- st.markdown("""
|
|
|
- ### 📂 加载已有标注
|
|
|
-
|
|
|
- **步骤:**
|
|
|
-
|
|
|
- 1. **上传配置文件**:选择之前保存的 `*_structure.json`
|
|
|
- 2. **上传图片**(可选):上传对应的图片以查看效果
|
|
|
- 3. **调整表格线**:使用下方的工具调整横线/竖线位置
|
|
|
- 4. **保存更新**:保存修改后的配置
|
|
|
-
|
|
|
- **提示:**
|
|
|
- - 即使没有图片,也可以直接编辑配置文件中的坐标
|
|
|
- - 配置文件包含完整的表格结构信息
|
|
|
- - 可以应用到同类型的其他页面
|
|
|
- """)
|
|
|
- return
|
|
|
-
|
|
|
- if st.session_state.image is None:
|
|
|
- st.warning("⚠️ 仅加载了配置,未加载图片。部分功能受限。")
|
|
|
-
|
|
|
- # 🆕 使用配置中的信息
|
|
|
- structure = st.session_state.structure
|
|
|
- image = st.session_state.image
|
|
|
-
|
|
|
- if image is None:
|
|
|
- # 如果没有图片,创建一个虚拟的空白图片用于显示坐标信息
|
|
|
- if 'table_bbox' in structure:
|
|
|
- bbox = structure['table_bbox']
|
|
|
- dummy_width = bbox[2] + 100
|
|
|
- dummy_height = bbox[3] + 100
|
|
|
- else:
|
|
|
- dummy_width = 2000
|
|
|
- dummy_height = 2000
|
|
|
-
|
|
|
- image = Image.new('RGB', (dummy_width, dummy_height), color='white')
|
|
|
- st.info(f"💡 使用虚拟画布 ({dummy_width}x{dummy_height}) 显示表格结构")
|
|
|
-
|
|
|
- # 显示设置
|
|
|
- st.sidebar.divider()
|
|
|
- st.sidebar.subheader("🖼️ 显示设置")
|
|
|
-
|
|
|
- line_width = st.sidebar.slider("线条宽度", 1, 5, 2)
|
|
|
- display_mode = st.sidebar.radio("显示模式", ["对比显示", "仅显示划线图", "仅显示原图"], index=1)
|
|
|
- zoom_level = st.sidebar.slider("图片缩放", 0.25, 2.0, 1.0, 0.25)
|
|
|
- show_line_numbers = st.sidebar.checkbox("显示线条编号", value=True)
|
|
|
-
|
|
|
- # 撤销/重做按钮
|
|
|
- st.sidebar.divider()
|
|
|
- st.sidebar.subheader("↩️ 撤销/重做")
|
|
|
-
|
|
|
- col1, col2 = st.sidebar.columns(2)
|
|
|
- with col1:
|
|
|
- if st.button("↩️ 撤销", disabled=len(st.session_state.undo_stack) == 0):
|
|
|
- if undo_last_action():
|
|
|
- clear_table_image_cache()
|
|
|
- st.success("✅ 已撤销")
|
|
|
- st.rerun()
|
|
|
-
|
|
|
- with col2:
|
|
|
- if st.button("↪️ 重做", disabled=len(st.session_state.redo_stack) == 0):
|
|
|
- if redo_last_action():
|
|
|
- clear_table_image_cache()
|
|
|
- st.success("✅ 已重做")
|
|
|
- st.rerun()
|
|
|
-
|
|
|
- st.sidebar.info(f"📚 历史记录: {len(st.session_state.undo_stack)} 条")
|
|
|
-
|
|
|
- # 分析表格结构(仅在新建模式显示)
|
|
|
- if work_mode == "🆕 新建标注" and st.button("🔍 分析表格结构"):
|
|
|
- with st.spinner("分析中..."):
|
|
|
- try:
|
|
|
- generator = st.session_state.generator
|
|
|
- structure = generator.analyze_table_structure(
|
|
|
- y_tolerance=y_tolerance,
|
|
|
- x_tolerance=x_tolerance,
|
|
|
- min_row_height=min_row_height
|
|
|
- )
|
|
|
-
|
|
|
- if not structure:
|
|
|
- st.warning("⚠️ 未检测到表格结构")
|
|
|
- st.stop()
|
|
|
-
|
|
|
- structure['modified_h_lines'] = set()
|
|
|
- structure['modified_v_lines'] = set()
|
|
|
-
|
|
|
- st.session_state.structure = structure
|
|
|
-
|
|
|
- st.session_state.undo_stack = []
|
|
|
- st.session_state.redo_stack = []
|
|
|
- clear_table_image_cache()
|
|
|
-
|
|
|
- st.success(
|
|
|
- f"✅ 检测到 {len(structure['rows'])} 行({len(structure['horizontal_lines'])} 条横线),"
|
|
|
- f"{len(structure['columns'])} 列({len(structure['vertical_lines'])} 条竖线)"
|
|
|
- )
|
|
|
-
|
|
|
- col1, col2, col3, col4 = st.columns(4)
|
|
|
- with col1:
|
|
|
- st.metric("行数", len(structure['rows']))
|
|
|
- with col2:
|
|
|
- st.metric("横线数", len(structure['horizontal_lines']))
|
|
|
- with col3:
|
|
|
- st.metric("列数", len(structure['columns']))
|
|
|
- with col4:
|
|
|
- st.metric("竖线数", len(structure['vertical_lines']))
|
|
|
-
|
|
|
- except Exception as e:
|
|
|
- st.error(f"❌ 分析失败: {e}")
|
|
|
- import traceback
|
|
|
- st.code(traceback.format_exc())
|
|
|
- st.stop()
|
|
|
-
|
|
|
- # 显示结果(两种模式通用)
|
|
|
- if 'structure' in st.session_state and st.session_state.structure:
|
|
|
- structure = st.session_state.structure
|
|
|
-
|
|
|
- # 使用缓存机制绘制表格线
|
|
|
- img_with_lines = get_cached_table_lines_image(
|
|
|
- image,
|
|
|
- structure,
|
|
|
- line_width=line_width,
|
|
|
- show_numbers=show_line_numbers
|
|
|
- )
|
|
|
-
|
|
|
- # 根据显示模式显示图片
|
|
|
- if display_mode == "对比显示":
|
|
|
- col1, col2 = st.columns(2)
|
|
|
- with col1:
|
|
|
- st.subheader("原图")
|
|
|
- st.image(image, use_container_width=True)
|
|
|
-
|
|
|
- with col2:
|
|
|
- st.subheader("添加表格线")
|
|
|
- st.image(img_with_lines, use_container_width=True)
|
|
|
-
|
|
|
- elif display_mode == "仅显示划线图":
|
|
|
- display_width = int(img_with_lines.width * zoom_level)
|
|
|
-
|
|
|
- st.subheader(f"表格线图 (缩放: {zoom_level:.0%})")
|
|
|
- st.image(img_with_lines, width=display_width)
|
|
|
-
|
|
|
- else:
|
|
|
- display_width = int(image.width * zoom_level)
|
|
|
-
|
|
|
- st.subheader(f"原图 (缩放: {zoom_level:.0%})")
|
|
|
- st.image(image, width=display_width)
|
|
|
-
|
|
|
- # 显示详细信息
|
|
|
- with st.expander("📊 表格结构详情"):
|
|
|
- st.json({
|
|
|
- "行数": len(structure['rows']),
|
|
|
- "列数": len(structure['columns']),
|
|
|
- "横线数": len(structure.get('horizontal_lines', [])),
|
|
|
- "竖线数": len(structure.get('vertical_lines', [])),
|
|
|
- "横线坐标": structure.get('horizontal_lines', []),
|
|
|
- "竖线坐标": structure.get('vertical_lines', []),
|
|
|
- "标准行高": structure.get('row_height'),
|
|
|
- "列宽度": structure.get('col_widths'),
|
|
|
- "修改的横线": list(structure.get('modified_h_lines', set())),
|
|
|
- "修改的竖线": list(structure.get('modified_v_lines', set()))
|
|
|
- })
|
|
|
-
|
|
|
- # 🆕 手动调整 - 使用线坐标列表
|
|
|
- st.subheader("🛠️ 手动调整")
|
|
|
-
|
|
|
- adjust_type = st.radio(
|
|
|
- "调整类型",
|
|
|
- ["调整横线", "调整竖线", "添加横线", "删除横线", "添加竖线", "删除竖线"],
|
|
|
- horizontal=True
|
|
|
- )
|
|
|
-
|
|
|
- if adjust_type == "调整横线":
|
|
|
- horizontal_lines = structure.get('horizontal_lines', [])
|
|
|
- if len(horizontal_lines) > 0:
|
|
|
- line_index = st.selectbox(
|
|
|
- "选择横线",
|
|
|
- range(len(horizontal_lines)),
|
|
|
- format_func=lambda x: f"第 {x+1} 条横线 (Y: {horizontal_lines[x]}) {'🔴已修改' if x in structure.get('modified_h_lines', set()) else ''}"
|
|
|
- )
|
|
|
-
|
|
|
- new_y = st.number_input(
|
|
|
- "新的Y坐标",
|
|
|
- value=int(horizontal_lines[line_index]),
|
|
|
- step=1
|
|
|
- )
|
|
|
-
|
|
|
- if st.button("应用调整"):
|
|
|
- save_state_for_undo(structure)
|
|
|
-
|
|
|
- structure['horizontal_lines'][line_index] = new_y
|
|
|
- structure['modified_h_lines'].add(line_index)
|
|
|
-
|
|
|
- # 🔧 同步更新 rows
|
|
|
- if line_index < len(structure['rows']):
|
|
|
- structure['rows'][line_index]['y_start'] = new_y
|
|
|
- if line_index > 0:
|
|
|
- structure['rows'][line_index - 1]['y_end'] = new_y
|
|
|
-
|
|
|
- clear_table_image_cache()
|
|
|
- st.success("✅ 已调整")
|
|
|
- st.rerun()
|
|
|
- else:
|
|
|
- st.warning("⚠️ 没有检测到横线")
|
|
|
-
|
|
|
- elif adjust_type == "调整竖线":
|
|
|
- vertical_lines = structure.get('vertical_lines', [])
|
|
|
- if len(vertical_lines) > 0:
|
|
|
- line_index = st.selectbox(
|
|
|
- "选择竖线",
|
|
|
- range(len(vertical_lines)),
|
|
|
- format_func=lambda x: f"第 {x+1} 条竖线 (X: {vertical_lines[x]}) {'🔴已修改' if x in structure.get('modified_v_lines', set()) else ''}"
|
|
|
- )
|
|
|
-
|
|
|
- new_x = st.number_input(
|
|
|
- "新的X坐标",
|
|
|
- value=int(vertical_lines[line_index]),
|
|
|
- step=1
|
|
|
- )
|
|
|
-
|
|
|
- if st.button("应用调整"):
|
|
|
- save_state_for_undo(structure)
|
|
|
-
|
|
|
- structure['vertical_lines'][line_index] = new_x
|
|
|
- structure['modified_v_lines'].add(line_index)
|
|
|
-
|
|
|
- # 🔧 同步更新 columns
|
|
|
- if line_index < len(structure['columns']):
|
|
|
- structure['columns'][line_index]['x_start'] = new_x
|
|
|
- if line_index > 0:
|
|
|
- structure['columns'][line_index - 1]['x_end'] = new_x
|
|
|
-
|
|
|
- clear_table_image_cache()
|
|
|
- st.success("✅ 已调整")
|
|
|
- st.rerun()
|
|
|
- else:
|
|
|
- st.warning("⚠️ 没有检测到竖线")
|
|
|
-
|
|
|
- elif adjust_type == "删除横线":
|
|
|
- horizontal_lines = structure.get('horizontal_lines', [])
|
|
|
- if len(horizontal_lines) > 0:
|
|
|
- lines_to_delete = st.multiselect(
|
|
|
- "选择要删除的横线(可多选)",
|
|
|
- range(len(horizontal_lines)),
|
|
|
- format_func=lambda x: f"第 {x+1} 条横线 (Y: {horizontal_lines[x]}) {'🔴已修改' if x in structure.get('modified_h_lines', set()) else ''}"
|
|
|
- )
|
|
|
-
|
|
|
- if lines_to_delete and st.button("🗑️ 批量删除", type="primary"):
|
|
|
- save_state_for_undo(structure)
|
|
|
-
|
|
|
- # 🔧 删除线坐标
|
|
|
- for idx in sorted(lines_to_delete, reverse=True):
|
|
|
- del structure['horizontal_lines'][idx]
|
|
|
-
|
|
|
- # 🔧 重新计算 rows(删除线后重建行区间)
|
|
|
- new_rows = []
|
|
|
- for i in range(len(structure['horizontal_lines']) - 1):
|
|
|
- new_rows.append({
|
|
|
- 'y_start': structure['horizontal_lines'][i],
|
|
|
- 'y_end': structure['horizontal_lines'][i + 1],
|
|
|
- # 'bboxes': []
|
|
|
- })
|
|
|
- structure['rows'] = new_rows
|
|
|
-
|
|
|
- # 更新修改标记
|
|
|
- structure['modified_h_lines'] = set()
|
|
|
-
|
|
|
- clear_table_image_cache()
|
|
|
- st.success(f"✅ 已删除 {len(lines_to_delete)} 条横线")
|
|
|
- st.rerun()
|
|
|
-
|
|
|
- st.info(f"💡 当前有 {len(horizontal_lines)} 条横线,已选择 {len(lines_to_delete)} 条")
|
|
|
- else:
|
|
|
- st.warning("⚠️ 没有可删除的横线")
|
|
|
-
|
|
|
- elif adjust_type == "删除竖线":
|
|
|
- vertical_lines = structure.get('vertical_lines', [])
|
|
|
- if len(vertical_lines) > 0:
|
|
|
- lines_to_delete = st.multiselect(
|
|
|
- "选择要删除的竖线(可多选)",
|
|
|
- range(len(vertical_lines)),
|
|
|
- format_func=lambda x: f"第 {x+1} 条竖线 (X: {vertical_lines[x]}) {'🔴已修改' if x in structure.get('modified_v_lines', set()) else ''}"
|
|
|
- )
|
|
|
-
|
|
|
- if lines_to_delete and st.button("🗑️ 批量删除", type="primary"):
|
|
|
- save_state_for_undo(structure)
|
|
|
-
|
|
|
- # 🔧 删除线坐标
|
|
|
- for idx in sorted(lines_to_delete, reverse=True):
|
|
|
- del structure['vertical_lines'][idx]
|
|
|
-
|
|
|
- # 🔧 重新计算 columns
|
|
|
- new_columns = []
|
|
|
- for i in range(len(structure['vertical_lines']) - 1):
|
|
|
- new_columns.append({
|
|
|
- 'x_start': structure['vertical_lines'][i],
|
|
|
- 'x_end': structure['vertical_lines'][i + 1]
|
|
|
- })
|
|
|
- structure['columns'] = new_columns
|
|
|
-
|
|
|
- # 重新计算列宽
|
|
|
- structure['col_widths'] = [
|
|
|
- col['x_end'] - col['x_start']
|
|
|
- for col in new_columns
|
|
|
- ]
|
|
|
-
|
|
|
- # 更新修改标记
|
|
|
- structure['modified_v_lines'] = set()
|
|
|
-
|
|
|
- clear_table_image_cache()
|
|
|
- st.success(f"✅ 已删除 {len(lines_to_delete)} 条竖线")
|
|
|
- st.rerun()
|
|
|
-
|
|
|
- st.info(f"💡 当前有 {len(vertical_lines)} 条竖线,已选择 {len(lines_to_delete)} 条")
|
|
|
- else:
|
|
|
- st.warning("⚠️ 没有可删除的列")
|
|
|
-
|
|
|
- # 保存配置
|
|
|
- st.divider()
|
|
|
-
|
|
|
- save_col1, save_col2, save_col3 = st.columns(3)
|
|
|
-
|
|
|
- with save_col1:
|
|
|
- save_structure = st.checkbox("保存表格结构配置", value=True)
|
|
|
-
|
|
|
- with save_col2:
|
|
|
- save_image = st.checkbox("保存表格线图片", value=True)
|
|
|
-
|
|
|
- with save_col3:
|
|
|
- # 🆕 线条颜色选择
|
|
|
- line_color_option = st.selectbox(
|
|
|
- "保存时线条颜色",
|
|
|
- ["黑色", "蓝色", "红色"],
|
|
|
- index=0
|
|
|
- )
|
|
|
-
|
|
|
- if st.button("💾 保存", type="primary"):
|
|
|
- output_dir = Path("output/table_structures")
|
|
|
- output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
-
|
|
|
- base_name = Path(st.session_state.loaded_image_name).stem
|
|
|
- saved_files = []
|
|
|
-
|
|
|
- if save_structure:
|
|
|
- structure_path = output_dir / f"{base_name}_structure.json"
|
|
|
-
|
|
|
- # 🔧 保存线坐标列表
|
|
|
- save_structure_data = {
|
|
|
- 'rows': structure['rows'],
|
|
|
- 'columns': structure['columns'],
|
|
|
- 'horizontal_lines': structure.get('horizontal_lines', []),
|
|
|
- 'vertical_lines': structure.get('vertical_lines', []),
|
|
|
- 'row_height': structure['row_height'],
|
|
|
- 'col_widths': structure['col_widths'],
|
|
|
- 'table_bbox': structure['table_bbox'],
|
|
|
- 'modified_h_lines': list(structure.get('modified_h_lines', set())),
|
|
|
- 'modified_v_lines': list(structure.get('modified_v_lines', set()))
|
|
|
- }
|
|
|
-
|
|
|
- with open(structure_path, 'w', encoding='utf-8') as f:
|
|
|
- json.dump(save_structure_data, f, indent=2, ensure_ascii=False)
|
|
|
-
|
|
|
- saved_files.append(("配置文件", structure_path))
|
|
|
-
|
|
|
- with open(structure_path, 'r') as f:
|
|
|
- st.download_button(
|
|
|
- "📥 下载配置文件",
|
|
|
- f.read(),
|
|
|
- file_name=f"{base_name}_structure.json",
|
|
|
- mime="application/json"
|
|
|
- )
|
|
|
-
|
|
|
- if save_image:
|
|
|
- # 🆕 根据选择的颜色绘制纯净表格线
|
|
|
- color_map = {
|
|
|
- "黑色": (0, 0, 0),
|
|
|
- "蓝色": (0, 0, 255),
|
|
|
- "红色": (255, 0, 0)
|
|
|
- }
|
|
|
- selected_color = color_map[line_color_option]
|
|
|
-
|
|
|
- # 🎯 使用纯净绘制函数
|
|
|
- clean_img = draw_clean_table_lines(
|
|
|
- image,
|
|
|
- structure,
|
|
|
- line_width=line_width,
|
|
|
- line_color=selected_color
|
|
|
- )
|
|
|
-
|
|
|
- output_image_path = output_dir / f"{base_name}_with_lines.png"
|
|
|
- clean_img.save(output_image_path)
|
|
|
- saved_files.append(("表格线图片", output_image_path))
|
|
|
-
|
|
|
- # 🆕 提供下载按钮
|
|
|
- import io
|
|
|
- buf = io.BytesIO()
|
|
|
- clean_img.save(buf, format='PNG')
|
|
|
- buf.seek(0)
|
|
|
-
|
|
|
- st.download_button(
|
|
|
- "📥 下载表格线图片",
|
|
|
- buf,
|
|
|
- file_name=f"{base_name}_with_lines.png",
|
|
|
- mime="image/png"
|
|
|
- )
|
|
|
-
|
|
|
- if saved_files:
|
|
|
- st.success(f"✅ 已保存 {len(saved_files)} 个文件:")
|
|
|
- for file_type, file_path in saved_files:
|
|
|
- st.info(f" • {file_type}: {file_path}")
|
|
|
-
|
|
|
-
|
|
|
-if __name__ == "__main__":
|
|
|
- create_table_line_editor()
|