| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001 |
- """
- 表格线可视化编辑器
- 支持人工调整表格线位置
- """
- import streamlit as st
- from pathlib import Path
- import json
- from PIL import Image, ImageDraw, ImageFont
- import numpy as np
- import copy
- try:
- from .table_line_generator import TableLineGenerator
- except ImportError:
- from table_line_generator import TableLineGenerator
- def parse_ocr_data(ocr_data):
- """解析OCR数据,支持多种格式"""
- # 如果是字符串,尝试解析
- if isinstance(ocr_data, str):
- try:
- ocr_data = json.loads(ocr_data)
- except json.JSONDecodeError:
- st.error("❌ JSON 格式错误,无法解析")
- return []
-
- # 检查是否为 PPStructure V3 格式
- if isinstance(ocr_data, dict) and 'parsing_res_list' in ocr_data and 'overall_ocr_res' in ocr_data:
- st.info("🔍 检测到 PPStructure V3 格式")
-
- try:
- table_bbox, text_boxes = TableLineGenerator.parse_ppstructure_result(ocr_data)
- st.success(f"✅ 表格区域: {table_bbox}")
- st.success(f"✅ 表格内文本框: {len(text_boxes)} 个")
- return text_boxes
- except Exception as e:
- st.error(f"❌ 解析 PPStructure 结果失败: {e}")
- return []
-
- # 确保是列表
- if not isinstance(ocr_data, list):
- st.error(f"❌ OCR 数据应该是列表,实际类型: {type(ocr_data)}")
- return []
-
- if not ocr_data:
- st.warning("⚠️ OCR 数据为空")
- return []
-
- first_item = ocr_data[0]
- if not isinstance(first_item, dict):
- st.error(f"❌ OCR 数据项应该是字典,实际类型: {type(first_item)}")
- return []
-
- if 'bbox' not in first_item:
- st.error("❌ OCR 数据缺少 'bbox' 字段")
- st.info("💡 支持的格式示例:\n```json\n[\n {\n \"text\": \"文本\",\n \"bbox\": [x1, y1, x2, y2]\n }\n]\n```")
- return []
-
- return ocr_data
- def draw_table_lines_with_numbers(image, structure, line_width=2, show_numbers=True):
- """
- 绘制带编号的表格线(使用线坐标列表)
-
- Args:
- image: PIL Image 对象
- structure: 表格结构字典(包含 horizontal_lines 和 vertical_lines)
- line_width: 线条宽度
- show_numbers: 是否显示编号
-
- Returns:
- 绘制了表格线和编号的图片
- """
- img_with_lines = image.copy()
- draw = ImageDraw.Draw(img_with_lines)
-
- # 尝试加载字体
- try:
- font = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 20)
- except:
- font = ImageFont.load_default()
-
- # 🆕 使用线坐标列表
- horizontal_lines = structure.get('horizontal_lines', [])
- vertical_lines = structure.get('vertical_lines', [])
- modified_h_lines = structure.get('modified_h_lines', set())
- modified_v_lines = structure.get('modified_v_lines', set())
-
- # 计算绘制范围
- x_start = vertical_lines[0] if vertical_lines else 0
- x_end = vertical_lines[-1] if vertical_lines else img_with_lines.width
- y_start = horizontal_lines[0] if horizontal_lines else 0
- y_end = horizontal_lines[-1] if horizontal_lines else img_with_lines.height
-
- # 🎨 绘制横线
- for idx, y in enumerate(horizontal_lines):
- color = (255, 0, 0) if idx in modified_h_lines else (0, 0, 255)
- draw.line([(x_start, y), (x_end, y)], fill=color, width=line_width)
-
- # 🔢 绘制行编号
- if show_numbers:
- text = f"R{idx+1}"
- bbox = draw.textbbox((x_start - 35, y - 10), text, font=font)
- draw.rectangle(bbox, fill='white', outline='black')
- draw.text((x_start - 35, y - 10), text, fill=color, font=font)
-
- # 🎨 绘制竖线
- for idx, x in enumerate(vertical_lines):
- color = (255, 0, 0) if idx in modified_v_lines else (0, 0, 255)
- draw.line([(x, y_start), (x, y_end)], fill=color, width=line_width)
-
- # 🔢 绘制列编号
- if show_numbers:
- text = f"C{idx+1}"
- bbox = draw.textbbox((x - 10, y_start - 25), text, font=font)
- draw.rectangle(bbox, fill='white', outline='black')
- draw.text((x - 10, y_start - 25), text, fill=color, font=font)
- bbox = draw.textbbox((x - 10, y_end + 25), text, font=font)
- draw.rectangle(bbox, fill='white', outline='black')
- draw.text((x - 10, y_end + 25), text, fill=color, font=font)
-
- return img_with_lines
- # 🆕 新增:用于保存的纯净表格线绘制函数
- def draw_clean_table_lines(image, structure, line_width=2, line_color=(0, 0, 0)):
- """
- 绘制纯净的表格线(用于保存)
- - 所有线用黑色
- - 不显示编号
-
- Args:
- image: PIL Image 对象
- structure: 表格结构字典
- line_width: 线条宽度
- line_color: 线条颜色,默认黑色 (0, 0, 0)
-
- Returns:
- 绘制了纯净表格线的图片
- """
- img_with_lines = image.copy()
- draw = ImageDraw.Draw(img_with_lines)
-
- horizontal_lines = structure.get('horizontal_lines', [])
- vertical_lines = structure.get('vertical_lines', [])
-
- if not horizontal_lines or not vertical_lines:
- return img_with_lines
-
- # 计算绘制范围
- x_start = vertical_lines[0]
- x_end = vertical_lines[-1]
- y_start = horizontal_lines[0]
- y_end = horizontal_lines[-1]
-
- # 🖤 绘制横线(统一黑色)
- for y in horizontal_lines:
- draw.line([(x_start, y), (x_end, y)], fill=line_color, width=line_width)
-
- # 🖤 绘制竖线(统一黑色)
- for x in vertical_lines:
- draw.line([(x, y_start), (x, y_end)], fill=line_color, width=line_width)
-
- return img_with_lines
- def init_undo_stack():
- """初始化撤销/重做栈"""
- if 'undo_stack' not in st.session_state:
- st.session_state.undo_stack = []
- if 'redo_stack' not in st.session_state:
- st.session_state.redo_stack = []
- def save_state_for_undo(structure):
- """保存当前状态到撤销栈"""
- # 深拷贝当前结构
- state = copy.deepcopy(structure)
- st.session_state.undo_stack.append(state)
- # 清空重做栈
- st.session_state.redo_stack = []
-
- # 限制栈深度(最多保存20个历史状态)
- if len(st.session_state.undo_stack) > 20:
- st.session_state.undo_stack.pop(0)
- def undo_last_action():
- """撤销上一个操作"""
- if st.session_state.undo_stack:
- # 保存当前状态到重做栈
- current_state = copy.deepcopy(st.session_state.structure)
- st.session_state.redo_stack.append(current_state)
-
- # 恢复上一个状态
- st.session_state.structure = st.session_state.undo_stack.pop()
- return True
- return False
- def redo_last_action():
- """重做上一个操作"""
- if st.session_state.redo_stack:
- # 保存当前状态到撤销栈
- current_state = copy.deepcopy(st.session_state.structure)
- st.session_state.undo_stack.append(current_state)
-
- # 恢复重做的状态
- st.session_state.structure = st.session_state.redo_stack.pop()
- return True
- return False
- def get_structure_hash(structure, line_width, show_numbers):
- """生成结构的哈希值,用于判断是否需要重新绘制"""
- import hashlib
-
- # 🔧 使用线坐标列表生成哈希
- key_data = {
- 'horizontal_lines': structure.get('horizontal_lines', []),
- 'vertical_lines': structure.get('vertical_lines', []),
- 'modified_h_lines': sorted(list(structure.get('modified_h_lines', set()))),
- 'modified_v_lines': sorted(list(structure.get('modified_v_lines', set()))),
- 'line_width': line_width,
- 'show_numbers': show_numbers
- }
-
- key_str = json.dumps(key_data, sort_keys=True)
- return hashlib.md5(key_str.encode()).hexdigest()
- def get_cached_table_lines_image(image, structure, line_width, show_numbers):
- """
- 获取缓存的表格线图片,如果缓存不存在或失效则重新绘制
-
- Args:
- image: PIL Image 对象
- structure: 表格结构字典
- line_width: 线条宽度
- show_numbers: 是否显示编号
-
- Returns:
- 绘制了表格线和编号的图片
- """
- # 初始化缓存
- if 'cached_table_image' not in st.session_state:
- st.session_state.cached_table_image = None
- if 'cached_table_hash' not in st.session_state:
- st.session_state.cached_table_hash = None
-
- # 计算当前结构的哈希
- current_hash = get_structure_hash(structure, line_width, show_numbers)
-
- # 检查缓存是否有效
- if (st.session_state.cached_table_hash == current_hash and
- st.session_state.cached_table_image is not None):
- # 缓存有效,直接返回
- return st.session_state.cached_table_image
-
- # 缓存失效,重新绘制
- img_with_lines = draw_table_lines_with_numbers(
- image,
- structure,
- line_width=line_width,
- show_numbers=show_numbers
- )
-
- # 更新缓存
- st.session_state.cached_table_image = img_with_lines
- st.session_state.cached_table_hash = current_hash
-
- return img_with_lines
- def clear_table_image_cache():
- """清除表格图片缓存"""
- if 'cached_table_image' in st.session_state:
- st.session_state.cached_table_image = None
- if 'cached_table_hash' in st.session_state:
- st.session_state.cached_table_hash = None
- def load_structure_from_config(config_path: Path) -> dict:
- """
- 从配置文件加载表格结构
-
- Args:
- config_path: 配置文件路径
-
- Returns:
- 表格结构字典
- """
- with open(config_path, 'r', encoding='utf-8') as f:
- structure = json.load(f)
-
- # 🔧 兼容旧版配置(补充缺失字段)
- if 'horizontal_lines' not in structure:
- # 从 rows 生成横线坐标
- horizontal_lines = []
- for row in structure.get('rows', []):
- horizontal_lines.append(row['y_start'])
- if structure.get('rows'):
- horizontal_lines.append(structure['rows'][-1]['y_end'])
- structure['horizontal_lines'] = horizontal_lines
-
- if 'vertical_lines' not in structure:
- # 从 columns 生成竖线坐标
- vertical_lines = []
- for col in structure.get('columns', []):
- vertical_lines.append(col['x_start'])
- if structure.get('columns'):
- vertical_lines.append(structure['columns'][-1]['x_end'])
- structure['vertical_lines'] = vertical_lines
-
- # 🔧 转换修改标记(从列表转为集合)
- if 'modified_h_lines' in structure:
- structure['modified_h_lines'] = set(structure['modified_h_lines'])
- else:
- structure['modified_h_lines'] = set()
-
- if 'modified_v_lines' in structure:
- structure['modified_v_lines'] = set(structure['modified_v_lines'])
- else:
- structure['modified_v_lines'] = set()
-
- # 🔧 转换旧版的 modified_rows/modified_cols(如果存在)
- if 'modified_rows' in structure and not structure['modified_h_lines']:
- structure['modified_h_lines'] = set(structure.get('modified_rows', []))
- if 'modified_cols' in structure and not structure['modified_v_lines']:
- structure['modified_v_lines'] = set(structure.get('modified_cols', []))
-
- return structure
- def create_table_line_editor():
- """创建表格线编辑器界面"""
- # 🆕 配置页面为宽屏模式
- st.set_page_config(
- page_title="表格线编辑器",
- page_icon="📏",
- layout="wide",
- initial_sidebar_state="expanded"
- )
-
- st.title("📏 表格线编辑器")
-
- # 初始化 session_state
- if 'loaded_json_name' not in st.session_state:
- st.session_state.loaded_json_name = None
- if 'loaded_image_name' not in st.session_state:
- st.session_state.loaded_image_name = None
- if 'loaded_config_name' not in st.session_state:
- st.session_state.loaded_config_name = None
- if 'ocr_data' not in st.session_state:
- st.session_state.ocr_data = None
- if 'image' not in st.session_state:
- st.session_state.image = None
-
- # 初始化撤销/重做栈
- init_undo_stack()
-
- # 🆕 添加工作模式选择
- st.sidebar.header("📂 工作模式")
- work_mode = st.sidebar.radio(
- "选择模式",
- ["🆕 新建标注", "📂 加载已有标注"],
- index=0
- )
-
- if work_mode == "🆕 新建标注":
- # 原有的上传流程
- st.sidebar.subheader("上传文件")
- uploaded_json = st.sidebar.file_uploader("上传OCR结果JSON", type=['json'], key="new_json")
- uploaded_image = st.sidebar.file_uploader("上传对应图片", type=['jpg', 'png'], key="new_image")
-
- # 检查是否需要重新加载 JSON
- if uploaded_json is not None:
- if st.session_state.loaded_json_name != uploaded_json.name:
- try:
- raw_data = json.load(uploaded_json)
-
- with st.expander("🔍 原始数据结构"):
- if isinstance(raw_data, dict):
- st.json({k: f"<{type(v).__name__}>" if not isinstance(v, (str, int, float, bool, type(None))) else v
- for k, v in list(raw_data.items())[:5]})
- else:
- st.json(raw_data[:3] if len(raw_data) > 3 else raw_data)
-
- ocr_data = parse_ocr_data(raw_data)
-
- if not ocr_data:
- st.error("❌ 无法解析 OCR 数据,请检查 JSON 格式")
- st.stop()
-
- st.session_state.ocr_data = ocr_data
- st.session_state.loaded_json_name = uploaded_json.name
- st.session_state.loaded_config_name = None # 清除配置文件标记
-
- # 清除旧的分析结果、历史记录和缓存
- if 'structure' in st.session_state:
- del st.session_state.structure
- if 'generator' in st.session_state:
- del st.session_state.generator
- st.session_state.undo_stack = []
- st.session_state.redo_stack = []
- clear_table_image_cache()
-
- st.success(f"✅ 成功加载 {len(ocr_data)} 条 OCR 记录")
-
- except Exception as e:
- st.error(f"❌ 加载数据失败: {e}")
- st.stop()
-
- # 检查是否需要重新加载图片
- if uploaded_image is not None:
- if st.session_state.loaded_image_name != uploaded_image.name:
- try:
- image = Image.open(uploaded_image)
-
- st.session_state.image = image
- st.session_state.loaded_image_name = uploaded_image.name
-
- if 'structure' in st.session_state:
- del st.session_state.structure
- if 'generator' in st.session_state:
- del st.session_state.generator
- st.session_state.undo_stack = []
- st.session_state.redo_stack = []
- clear_table_image_cache()
-
- st.success(f"✅ 成功加载图片: {uploaded_image.name}")
-
- except Exception as e:
- st.error(f"❌ 加载图片失败: {e}")
- st.stop()
-
- else: # 📂 加载已有标注
- st.sidebar.subheader("加载已保存的标注")
-
- # 🆕 上传配置文件
- uploaded_config = st.sidebar.file_uploader(
- "上传配置文件 (*_structure.json)",
- type=['json'],
- key="load_config"
- )
-
- # 🆕 上传对应的图片(可选,用于重新标注)
- uploaded_image_for_config = st.sidebar.file_uploader(
- "上传对应图片(可选)",
- type=['jpg', 'png'],
- key="load_image"
- )
-
- # 处理配置文件加载
- if uploaded_config is not None:
- if st.session_state.loaded_config_name != uploaded_config.name:
- try:
- # 🔧 直接从配置文件路径加载
- import tempfile
-
- # 创建临时文件
- with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False, encoding='utf-8') as tmp:
- tmp.write(uploaded_config.getvalue().decode('utf-8'))
- tmp_path = tmp.name
-
- # 加载结构
- structure = load_structure_from_config(Path(tmp_path))
-
- # 清理临时文件
- Path(tmp_path).unlink()
-
- st.session_state.structure = structure
- st.session_state.loaded_config_name = uploaded_config.name
-
- # 清除历史记录和缓存
- st.session_state.undo_stack = []
- st.session_state.redo_stack = []
- clear_table_image_cache()
-
- st.success(f"✅ 成功加载配置: {uploaded_config.name}")
- st.info(
- f"📊 表格结构: {len(structure['rows'])}行 x {len(structure['columns'])}列\n\n"
- f"📏 横线数: {len(structure.get('horizontal_lines', []))}\n\n"
- f"📏 竖线数: {len(structure.get('vertical_lines', []))}"
- )
-
- # 🆕 显示配置文件详情
- with st.expander("📋 配置详情"):
- st.json({
- "行数": len(structure['rows']),
- "列数": len(structure['columns']),
- "横线数": len(structure.get('horizontal_lines', [])),
- "竖线数": len(structure.get('vertical_lines', [])),
- "行高": structure.get('row_height'),
- "列宽": structure.get('col_widths'),
- "已修改的横线": list(structure.get('modified_h_lines', set())),
- "已修改的竖线": list(structure.get('modified_v_lines', set()))
- })
-
- except Exception as e:
- st.error(f"❌ 加载配置失败: {e}")
- import traceback
- st.code(traceback.format_exc())
- st.stop()
-
- # 处理图片加载(用于显示)
- if uploaded_image_for_config is not None:
- if st.session_state.loaded_image_name != uploaded_image_for_config.name:
- try:
- image = Image.open(uploaded_image_for_config)
- st.session_state.image = image
- st.session_state.loaded_image_name = uploaded_image_for_config.name
-
- clear_table_image_cache()
-
- st.success(f"✅ 成功加载图片: {uploaded_image_for_config.name}")
-
- except Exception as e:
- st.error(f"❌ 加载图片失败: {e}")
- st.stop()
-
- # 🆕 如果配置已加载但没有图片,提示用户
- if 'structure' in st.session_state and st.session_state.image is None:
- st.warning("⚠️ 已加载配置,但未加载对应图片。请上传图片以查看效果。")
- st.info("💡 提示:配置文件已加载,您可以:\n1. 上传对应图片查看效果\n2. 直接编辑配置并保存")
-
- # 检查必要条件
- if work_mode == "🆕 新建标注":
- if st.session_state.ocr_data is None or st.session_state.image is None:
- st.info("👆 请在左侧上传 OCR 结果 JSON 文件和对应的图片")
-
- with st.expander("📖 使用说明"):
- st.markdown("""
- ### 🆕 新建标注模式
-
- **支持的OCR格式**
-
- **1. PPStructure V3 格式 (推荐)**
- ```json
- {
- "parsing_res_list": [...],
- "overall_ocr_res": {
- "rec_boxes": [[x1, y1, x2, y2], ...],
- "rec_texts": ["文本1", "文本2", ...]
- }
- }
- ```
-
- **2. 标准格式**
- ```json
- [
- {
- "text": "文本内容",
- "bbox": [x1, y1, x2, y2]
- }
- ]
- ```
-
- ### 📂 加载已有标注模式
-
- 1. 上传之前保存的 `*_structure.json` 配置文件
- 2. 上传对应的图片(可选)
- 3. 继续调整表格线位置
- 4. 保存更新后的配置
- """)
- return
-
- ocr_data = st.session_state.ocr_data
- image = st.session_state.image
-
- st.info(f"📂 已加载: {st.session_state.loaded_json_name} + {st.session_state.loaded_image_name}")
-
- if 'generator' not in st.session_state or st.session_state.generator is None:
- try:
- generator = TableLineGenerator(image, ocr_data)
- st.session_state.generator = generator
- except Exception as e:
- st.error(f"❌ 初始化失败: {e}")
- st.stop()
-
- else: # 加载已有标注模式
- if 'structure' not in st.session_state:
- st.info("👆 请在左侧上传配置文件 (*_structure.json)")
-
- with st.expander("📖 使用说明"):
- st.markdown("""
- ### 📂 加载已有标注
-
- **步骤:**
-
- 1. **上传配置文件**:选择之前保存的 `*_structure.json`
- 2. **上传图片**(可选):上传对应的图片以查看效果
- 3. **调整表格线**:使用下方的工具调整横线/竖线位置
- 4. **保存更新**:保存修改后的配置
-
- **提示:**
- - 即使没有图片,也可以直接编辑配置文件中的坐标
- - 配置文件包含完整的表格结构信息
- - 可以应用到同类型的其他页面
- """)
- return
-
- if st.session_state.image is None:
- st.warning("⚠️ 仅加载了配置,未加载图片。部分功能受限。")
-
- # 🆕 使用配置中的信息
- structure = st.session_state.structure
- image = st.session_state.image
-
- if image is None:
- # 如果没有图片,创建一个虚拟的空白图片用于显示坐标信息
- if 'table_bbox' in structure:
- bbox = structure['table_bbox']
- dummy_width = bbox[2] + 100
- dummy_height = bbox[3] + 100
- else:
- dummy_width = 2000
- dummy_height = 2000
-
- image = Image.new('RGB', (dummy_width, dummy_height), color='white')
- st.info(f"💡 使用虚拟画布 ({dummy_width}x{dummy_height}) 显示表格结构")
-
- # 显示设置
- st.sidebar.divider()
- st.sidebar.subheader("🖼️ 显示设置")
-
- line_width = st.sidebar.slider("线条宽度", 1, 5, 2)
- display_mode = st.sidebar.radio("显示模式", ["对比显示", "仅显示划线图", "仅显示原图"], index=1)
- zoom_level = st.sidebar.slider("图片缩放", 0.25, 2.0, 1.0, 0.25)
- show_line_numbers = st.sidebar.checkbox("显示线条编号", value=True)
-
- # 撤销/重做按钮
- st.sidebar.divider()
- st.sidebar.subheader("↩️ 撤销/重做")
-
- col1, col2 = st.sidebar.columns(2)
- with col1:
- if st.button("↩️ 撤销", disabled=len(st.session_state.undo_stack) == 0):
- if undo_last_action():
- clear_table_image_cache()
- st.success("✅ 已撤销")
- st.rerun()
-
- with col2:
- if st.button("↪️ 重做", disabled=len(st.session_state.redo_stack) == 0):
- if redo_last_action():
- clear_table_image_cache()
- st.success("✅ 已重做")
- st.rerun()
-
- st.sidebar.info(f"📚 历史记录: {len(st.session_state.undo_stack)} 条")
-
- # 分析表格结构(仅在新建模式显示)
- if work_mode == "🆕 新建标注" and st.button("🔍 分析表格结构"):
- with st.spinner("分析中..."):
- try:
- generator = st.session_state.generator
- structure = generator.analyze_table_structure(
- y_tolerance=y_tolerance,
- x_tolerance=x_tolerance,
- min_row_height=min_row_height
- )
-
- if not structure:
- st.warning("⚠️ 未检测到表格结构")
- st.stop()
-
- structure['modified_h_lines'] = set()
- structure['modified_v_lines'] = set()
-
- st.session_state.structure = structure
-
- st.session_state.undo_stack = []
- st.session_state.redo_stack = []
- clear_table_image_cache()
-
- st.success(
- f"✅ 检测到 {len(structure['rows'])} 行({len(structure['horizontal_lines'])} 条横线),"
- f"{len(structure['columns'])} 列({len(structure['vertical_lines'])} 条竖线)"
- )
-
- col1, col2, col3, col4 = st.columns(4)
- with col1:
- st.metric("行数", len(structure['rows']))
- with col2:
- st.metric("横线数", len(structure['horizontal_lines']))
- with col3:
- st.metric("列数", len(structure['columns']))
- with col4:
- st.metric("竖线数", len(structure['vertical_lines']))
-
- except Exception as e:
- st.error(f"❌ 分析失败: {e}")
- import traceback
- st.code(traceback.format_exc())
- st.stop()
-
- # 显示结果(两种模式通用)
- if 'structure' in st.session_state and st.session_state.structure:
- structure = st.session_state.structure
-
- # 使用缓存机制绘制表格线
- img_with_lines = get_cached_table_lines_image(
- image,
- structure,
- line_width=line_width,
- show_numbers=show_line_numbers
- )
-
- # 根据显示模式显示图片
- if display_mode == "对比显示":
- col1, col2 = st.columns(2)
- with col1:
- st.subheader("原图")
- st.image(image, use_container_width=True)
-
- with col2:
- st.subheader("添加表格线")
- st.image(img_with_lines, use_container_width=True)
-
- elif display_mode == "仅显示划线图":
- display_width = int(img_with_lines.width * zoom_level)
-
- st.subheader(f"表格线图 (缩放: {zoom_level:.0%})")
- st.image(img_with_lines, width=display_width)
-
- else:
- display_width = int(image.width * zoom_level)
-
- st.subheader(f"原图 (缩放: {zoom_level:.0%})")
- st.image(image, width=display_width)
-
- # 显示详细信息
- with st.expander("📊 表格结构详情"):
- st.json({
- "行数": len(structure['rows']),
- "列数": len(structure['columns']),
- "横线数": len(structure.get('horizontal_lines', [])),
- "竖线数": len(structure.get('vertical_lines', [])),
- "横线坐标": structure.get('horizontal_lines', []),
- "竖线坐标": structure.get('vertical_lines', []),
- "标准行高": structure.get('row_height'),
- "列宽度": structure.get('col_widths'),
- "修改的横线": list(structure.get('modified_h_lines', set())),
- "修改的竖线": list(structure.get('modified_v_lines', set()))
- })
-
- # 🆕 手动调整 - 使用线坐标列表
- st.subheader("🛠️ 手动调整")
-
- adjust_type = st.radio(
- "调整类型",
- ["调整横线", "调整竖线", "添加横线", "删除横线", "添加竖线", "删除竖线"],
- horizontal=True
- )
-
- if adjust_type == "调整横线":
- horizontal_lines = structure.get('horizontal_lines', [])
- if len(horizontal_lines) > 0:
- line_index = st.selectbox(
- "选择横线",
- range(len(horizontal_lines)),
- format_func=lambda x: f"第 {x+1} 条横线 (Y: {horizontal_lines[x]}) {'🔴已修改' if x in structure.get('modified_h_lines', set()) else ''}"
- )
-
- new_y = st.number_input(
- "新的Y坐标",
- value=int(horizontal_lines[line_index]),
- step=1
- )
-
- if st.button("应用调整"):
- save_state_for_undo(structure)
-
- structure['horizontal_lines'][line_index] = new_y
- structure['modified_h_lines'].add(line_index)
-
- # 🔧 同步更新 rows
- if line_index < len(structure['rows']):
- structure['rows'][line_index]['y_start'] = new_y
- if line_index > 0:
- structure['rows'][line_index - 1]['y_end'] = new_y
-
- clear_table_image_cache()
- st.success("✅ 已调整")
- st.rerun()
- else:
- st.warning("⚠️ 没有检测到横线")
-
- elif adjust_type == "调整竖线":
- vertical_lines = structure.get('vertical_lines', [])
- if len(vertical_lines) > 0:
- line_index = st.selectbox(
- "选择竖线",
- range(len(vertical_lines)),
- format_func=lambda x: f"第 {x+1} 条竖线 (X: {vertical_lines[x]}) {'🔴已修改' if x in structure.get('modified_v_lines', set()) else ''}"
- )
-
- new_x = st.number_input(
- "新的X坐标",
- value=int(vertical_lines[line_index]),
- step=1
- )
-
- if st.button("应用调整"):
- save_state_for_undo(structure)
-
- structure['vertical_lines'][line_index] = new_x
- structure['modified_v_lines'].add(line_index)
-
- # 🔧 同步更新 columns
- if line_index < len(structure['columns']):
- structure['columns'][line_index]['x_start'] = new_x
- if line_index > 0:
- structure['columns'][line_index - 1]['x_end'] = new_x
-
- clear_table_image_cache()
- st.success("✅ 已调整")
- st.rerun()
- else:
- st.warning("⚠️ 没有检测到竖线")
-
- elif adjust_type == "删除横线":
- horizontal_lines = structure.get('horizontal_lines', [])
- if len(horizontal_lines) > 0:
- lines_to_delete = st.multiselect(
- "选择要删除的横线(可多选)",
- range(len(horizontal_lines)),
- format_func=lambda x: f"第 {x+1} 条横线 (Y: {horizontal_lines[x]}) {'🔴已修改' if x in structure.get('modified_h_lines', set()) else ''}"
- )
-
- if lines_to_delete and st.button("🗑️ 批量删除", type="primary"):
- save_state_for_undo(structure)
-
- # 🔧 删除线坐标
- for idx in sorted(lines_to_delete, reverse=True):
- del structure['horizontal_lines'][idx]
-
- # 🔧 重新计算 rows(删除线后重建行区间)
- new_rows = []
- for i in range(len(structure['horizontal_lines']) - 1):
- new_rows.append({
- 'y_start': structure['horizontal_lines'][i],
- 'y_end': structure['horizontal_lines'][i + 1],
- # 'bboxes': []
- })
- structure['rows'] = new_rows
-
- # 更新修改标记
- structure['modified_h_lines'] = set()
-
- clear_table_image_cache()
- st.success(f"✅ 已删除 {len(lines_to_delete)} 条横线")
- st.rerun()
-
- st.info(f"💡 当前有 {len(horizontal_lines)} 条横线,已选择 {len(lines_to_delete)} 条")
- else:
- st.warning("⚠️ 没有可删除的横线")
-
- elif adjust_type == "删除竖线":
- vertical_lines = structure.get('vertical_lines', [])
- if len(vertical_lines) > 0:
- lines_to_delete = st.multiselect(
- "选择要删除的竖线(可多选)",
- range(len(vertical_lines)),
- format_func=lambda x: f"第 {x+1} 条竖线 (X: {vertical_lines[x]}) {'🔴已修改' if x in structure.get('modified_v_lines', set()) else ''}"
- )
-
- if lines_to_delete and st.button("🗑️ 批量删除", type="primary"):
- save_state_for_undo(structure)
-
- # 🔧 删除线坐标
- for idx in sorted(lines_to_delete, reverse=True):
- del structure['vertical_lines'][idx]
-
- # 🔧 重新计算 columns
- new_columns = []
- for i in range(len(structure['vertical_lines']) - 1):
- new_columns.append({
- 'x_start': structure['vertical_lines'][i],
- 'x_end': structure['vertical_lines'][i + 1]
- })
- structure['columns'] = new_columns
-
- # 重新计算列宽
- structure['col_widths'] = [
- col['x_end'] - col['x_start']
- for col in new_columns
- ]
-
- # 更新修改标记
- structure['modified_v_lines'] = set()
-
- clear_table_image_cache()
- st.success(f"✅ 已删除 {len(lines_to_delete)} 条竖线")
- st.rerun()
-
- st.info(f"💡 当前有 {len(vertical_lines)} 条竖线,已选择 {len(lines_to_delete)} 条")
- else:
- st.warning("⚠️ 没有可删除的列")
-
- # 保存配置
- st.divider()
-
- save_col1, save_col2, save_col3 = st.columns(3)
-
- with save_col1:
- save_structure = st.checkbox("保存表格结构配置", value=True)
-
- with save_col2:
- save_image = st.checkbox("保存表格线图片", value=True)
-
- with save_col3:
- # 🆕 线条颜色选择
- line_color_option = st.selectbox(
- "保存时线条颜色",
- ["黑色", "蓝色", "红色"],
- index=0
- )
-
- if st.button("💾 保存", type="primary"):
- output_dir = Path("output/table_structures")
- output_dir.mkdir(parents=True, exist_ok=True)
-
- base_name = Path(st.session_state.loaded_image_name).stem
- saved_files = []
-
- if save_structure:
- structure_path = output_dir / f"{base_name}_structure.json"
-
- # 🔧 保存线坐标列表
- save_structure_data = {
- 'rows': structure['rows'],
- 'columns': structure['columns'],
- 'horizontal_lines': structure.get('horizontal_lines', []),
- 'vertical_lines': structure.get('vertical_lines', []),
- 'row_height': structure['row_height'],
- 'col_widths': structure['col_widths'],
- 'table_bbox': structure['table_bbox'],
- 'modified_h_lines': list(structure.get('modified_h_lines', set())),
- 'modified_v_lines': list(structure.get('modified_v_lines', set()))
- }
-
- with open(structure_path, 'w', encoding='utf-8') as f:
- json.dump(save_structure_data, f, indent=2, ensure_ascii=False)
-
- saved_files.append(("配置文件", structure_path))
-
- with open(structure_path, 'r') as f:
- st.download_button(
- "📥 下载配置文件",
- f.read(),
- file_name=f"{base_name}_structure.json",
- mime="application/json"
- )
-
- if save_image:
- # 🆕 根据选择的颜色绘制纯净表格线
- color_map = {
- "黑色": (0, 0, 0),
- "蓝色": (0, 0, 255),
- "红色": (255, 0, 0)
- }
- selected_color = color_map[line_color_option]
-
- # 🎯 使用纯净绘制函数
- clean_img = draw_clean_table_lines(
- image,
- structure,
- line_width=line_width,
- line_color=selected_color
- )
-
- output_image_path = output_dir / f"{base_name}_with_lines.png"
- clean_img.save(output_image_path)
- saved_files.append(("表格线图片", output_image_path))
-
- # 🆕 提供下载按钮
- import io
- buf = io.BytesIO()
- clean_img.save(buf, format='PNG')
- buf.seek(0)
-
- st.download_button(
- "📥 下载表格线图片",
- buf,
- file_name=f"{base_name}_with_lines.png",
- mime="image/png"
- )
-
- if saved_files:
- st.success(f"✅ 已保存 {len(saved_files)} 个文件:")
- for file_type, file_path in saved_files:
- st.info(f" • {file_type}: {file_path}")
- if __name__ == "__main__":
- create_table_line_editor()
|