streamlit_table_line_editor.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. """
  2. 表格线可视化编辑器
  3. 支持人工调整表格线位置
  4. """
  5. import warnings
  6. # 过滤 PaddleX 的语法警告
  7. warnings.filterwarnings('ignore', category=SyntaxWarning, module='paddlex')
  8. import streamlit as st
  9. from pathlib import Path
  10. from PIL import Image
  11. import yaml
  12. from typing import Dict, List, Optional, Tuple
  13. import argparse
  14. import sys
  15. try:
  16. from table_line_generator import TableLineGenerator
  17. except ImportError:
  18. from .table_line_generator import TableLineGenerator
  19. # 导入编辑器模块
  20. from editor import (
  21. # UI 组件
  22. create_file_uploader_section,
  23. create_display_settings_section,
  24. create_undo_redo_section,
  25. create_analysis_section,
  26. create_save_section,
  27. create_directory_selector,
  28. # 新增的模块功能
  29. setup_new_annotation_mode,
  30. setup_edit_annotation_mode,
  31. render_table_structure_view,
  32. # 绘图
  33. get_cached_table_lines_image,
  34. # 状态管理
  35. init_undo_stack,
  36. # 调整
  37. create_adjustment_section,
  38. show_image_with_scroll,
  39. # 配置
  40. load_table_editor_config,
  41. build_data_source_catalog,
  42. parse_table_editor_cli_args,
  43. # 🆕 批量应用模板
  44. create_batch_template_section,
  45. )
  46. DEFAULT_CONFIG_PATH = Path(__file__).with_name("table_line_generator.yaml")
  47. @st.cache_resource
  48. def get_cli_args():
  49. return parse_table_editor_cli_args()
  50. @st.cache_resource
  51. def get_table_editor_config():
  52. """缓存配置加载(整个 session 共享)"""
  53. cli_args = get_cli_args()
  54. config_path = (
  55. Path(cli_args.config).expanduser()
  56. if cli_args.config
  57. else DEFAULT_CONFIG_PATH
  58. )
  59. return load_table_editor_config(config_path)
  60. def create_table_line_editor():
  61. """创建表格线编辑器界面"""
  62. # 配置页面
  63. st.set_page_config(
  64. page_title="表格线编辑器",
  65. page_icon="📏",
  66. layout="wide",
  67. initial_sidebar_state="expanded"
  68. )
  69. st.title("📏 表格线编辑器")
  70. # 🎯 从缓存获取配置
  71. TABLE_EDITOR_CONFIG = get_table_editor_config()
  72. VIEWPORT_WIDTH = TABLE_EDITOR_CONFIG["viewport"]["width"]
  73. VIEWPORT_HEIGHT = TABLE_EDITOR_CONFIG["viewport"]["height"]
  74. DATA_SOURCES = TABLE_EDITOR_CONFIG.get("data_sources", [])
  75. # 初始化 session_state(集中管理)
  76. if 'loaded_json_name' not in st.session_state:
  77. st.session_state.loaded_json_name = None
  78. if 'loaded_image_name' not in st.session_state:
  79. st.session_state.loaded_image_name = None
  80. if 'loaded_config_name' not in st.session_state:
  81. st.session_state.loaded_config_name = None
  82. if 'ocr_data' not in st.session_state:
  83. st.session_state.ocr_data = None
  84. if 'image' not in st.session_state:
  85. st.session_state.image = None
  86. # 🆕 目录模式专用状态
  87. if 'dir_selected_index' not in st.session_state:
  88. st.session_state.dir_selected_index = 0
  89. if 'last_loaded_entry' not in st.session_state:
  90. st.session_state.last_loaded_entry = None
  91. if 'dir_auto_mode' not in st.session_state:
  92. st.session_state.dir_auto_mode = None
  93. if 'current_data_source' not in st.session_state: # 🔑 新增
  94. st.session_state.current_data_source = None
  95. if 'current_output_config' not in st.session_state: # 🔑 新增
  96. st.session_state.current_output_config = None
  97. # 初始化撤销/重做栈
  98. init_undo_stack()
  99. # 🆕 工作模式选择
  100. st.sidebar.header("📂 工作模式")
  101. work_mode = st.sidebar.radio(
  102. "选择模式",
  103. ["🆕 新建标注", "📂 加载已有标注", "📁 目录模式"],
  104. index=0
  105. )
  106. # 📁 目录模式
  107. if work_mode == "📁 目录模式":
  108. if not DATA_SOURCES:
  109. st.sidebar.warning("未配置 data_sources")
  110. return
  111. auto_mode = create_directory_selector(DATA_SOURCES, TABLE_EDITOR_CONFIG["output"])
  112. # 显示当前数据源
  113. if st.session_state.current_data_source:
  114. ds_name = st.session_state.current_data_source.get("name", "未知")
  115. st.sidebar.success(f"✅ 数据源: {ds_name}")
  116. if auto_mode == "new":
  117. if not (st.session_state.ocr_data and st.session_state.image):
  118. st.warning("⚠️ 缺少必要数据")
  119. return
  120. _, _, _, line_width, display_mode, zoom_level, show_line_numbers = setup_new_annotation_mode(
  121. st.session_state.ocr_data,
  122. st.session_state.image,
  123. TABLE_EDITOR_CONFIG["display"]
  124. )
  125. else: # edit
  126. if 'structure' not in st.session_state:
  127. st.warning("⚠️ 结构加载失败")
  128. return
  129. image, line_width, display_mode, zoom_level, show_line_numbers = setup_edit_annotation_mode(
  130. st.session_state.structure,
  131. st.session_state.image,
  132. TABLE_EDITOR_CONFIG["display"]
  133. )
  134. # 统一渲染
  135. if 'structure' in st.session_state and st.session_state.structure:
  136. render_table_structure_view(
  137. st.session_state.structure,
  138. st.session_state.image or Image.new('RGB', (2000, 2000), 'white'),
  139. line_width, # 🔑 统一使用这个 line_width
  140. display_mode,
  141. zoom_level,
  142. show_line_numbers,
  143. VIEWPORT_WIDTH,
  144. VIEWPORT_HEIGHT
  145. )
  146. # 保存区域
  147. create_save_section(
  148. auto_mode,
  149. st.session_state.structure,
  150. st.session_state.image,
  151. line_width, # 🔑 传递给保存区域
  152. st.session_state.current_output_config or TABLE_EDITOR_CONFIG["output"]
  153. )
  154. # 🆕 批量应用模板区域(仅在 edit 模式显示)
  155. if auto_mode == "edit":
  156. # 🔑 获取当前的线条颜色名称(从保存区域的选择)
  157. output_cfg = TABLE_EDITOR_CONFIG["output"]
  158. line_colors = output_cfg.get("line_colors")
  159. defaults = output_cfg.get("defaults", {})
  160. default_color = defaults.get("line_color", line_colors[0]["name"])
  161. # 🔑 传递当前页的设置
  162. create_batch_template_section(
  163. current_line_width=line_width,
  164. current_line_color=st.session_state.get('save_line_color', default_color)
  165. )
  166. return
  167. # 🆕 新建标注模式
  168. if work_mode == "🆕 新建标注":
  169. create_file_uploader_section(work_mode)
  170. if not (st.session_state.ocr_data and st.session_state.image):
  171. st.info("👆 请在左侧上传 OCR 结果 JSON 文件和对应的图片")
  172. with st.expander("📖 使用说明"):
  173. st.markdown("""
  174. ### 🆕 新建标注模式
  175. **支持的OCR格式**
  176. **1. PPStructure V3 格式 (推荐)**
  177. ```json
  178. {
  179. "parsing_res_list": [...],
  180. "overall_ocr_res": {
  181. "rec_boxes": [[x1, y1, x2, y2], ...],
  182. "rec_texts": ["文本1", "文本2", ...]
  183. }
  184. }
  185. ```
  186. **2. 标准格式**
  187. ```json
  188. [
  189. {
  190. "text": "文本内容",
  191. "bbox": [x1, y1, x2, y2]
  192. }
  193. ]
  194. ```
  195. ### 📂 加载已有标注模式
  196. 1. 上传之前保存的 `*_structure.json` 配置文件
  197. 2. 上传对应的图片(可选)
  198. 3. 继续调整表格线位置
  199. 4. 保存更新后的配置
  200. """)
  201. return
  202. st.info(f"📂 已加载: {st.session_state.loaded_json_name} + {st.session_state.loaded_image_name}")
  203. _, _, _, line_width, display_mode, zoom_level, show_line_numbers = setup_new_annotation_mode(
  204. st.session_state.ocr_data,
  205. st.session_state.image,
  206. TABLE_EDITOR_CONFIG["display"]
  207. )
  208. if 'structure' in st.session_state and st.session_state.structure:
  209. render_table_structure_view(
  210. st.session_state.structure,
  211. st.session_state.image,
  212. line_width,
  213. display_mode,
  214. zoom_level,
  215. show_line_numbers,
  216. VIEWPORT_WIDTH,
  217. VIEWPORT_HEIGHT
  218. )
  219. create_save_section(
  220. work_mode,
  221. st.session_state.structure,
  222. st.session_state.image,
  223. line_width,
  224. TABLE_EDITOR_CONFIG["output"]
  225. )
  226. return
  227. # 📂 加载已有标注模式
  228. if work_mode == "📂 加载已有标注":
  229. create_file_uploader_section(work_mode)
  230. if 'structure' not in st.session_state:
  231. st.info("👆 请在左侧上传配置文件 (*_structure.json)")
  232. with st.expander("📖 使用说明"):
  233. st.markdown("""
  234. ### 📂 加载已有标注
  235. **步骤:**
  236. 1. **上传配置文件**:选择之前保存的 `*_structure.json`
  237. 2. **上传图片**(可选):上传对应的图片以查看效果
  238. 3. **调整表格线**:使用下方的工具调整横线/竖线位置
  239. 4. **保存更新**:保存修改后的配置
  240. **提示:**
  241. - 即使没有图片,也可以直接编辑配置文件中的坐标
  242. - 配置文件包含完整的表格结构信息
  243. - 可以应用到同类型的其他页面
  244. """)
  245. return
  246. if st.session_state.image is None:
  247. st.warning("⚠️ 仅加载了配置,未加载图片。部分功能受限。")
  248. image, line_width, display_mode, zoom_level, show_line_numbers = setup_edit_annotation_mode(
  249. st.session_state.structure,
  250. st.session_state.image,
  251. TABLE_EDITOR_CONFIG["display"]
  252. )
  253. render_table_structure_view(
  254. st.session_state.structure,
  255. image,
  256. line_width,
  257. display_mode,
  258. zoom_level,
  259. show_line_numbers,
  260. VIEWPORT_WIDTH,
  261. VIEWPORT_HEIGHT
  262. )
  263. create_save_section(
  264. work_mode,
  265. st.session_state.structure,
  266. image,
  267. line_width,
  268. TABLE_EDITOR_CONFIG["output"]
  269. )
  270. if __name__ == "__main__":
  271. try:
  272. create_table_line_editor()
  273. except GeneratorExit:
  274. pass # Streamlit 内部清理,忽略
  275. except KeyboardInterrupt:
  276. st.info("👋 程序已停止")
  277. except Exception as e:
  278. st.error(f"❌ 程序崩溃: {e}")
  279. import traceback
  280. with st.expander("🔍 详细错误信息"):
  281. st.code(traceback.format_exc())