streamlit_table_line_editor.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. """
  2. 表格线可视化编辑器
  3. 支持人工调整表格线位置
  4. """
  5. import streamlit as st
  6. from pathlib import Path
  7. from PIL import Image
  8. import yaml
  9. from typing import Dict, List, Optional, Tuple
  10. import argparse
  11. import sys
  12. try:
  13. from table_line_generator import TableLineGenerator
  14. except ImportError:
  15. from .table_line_generator import TableLineGenerator
  16. # 导入编辑器模块
  17. from editor import (
  18. # UI 组件
  19. create_file_uploader_section,
  20. create_display_settings_section,
  21. create_undo_redo_section,
  22. create_analysis_section,
  23. create_save_section,
  24. create_directory_selector,
  25. # 新增的模块功能
  26. setup_new_annotation_mode,
  27. setup_edit_annotation_mode,
  28. render_table_structure_view,
  29. # 绘图
  30. get_cached_table_lines_image,
  31. # 状态管理
  32. init_undo_stack,
  33. # 调整
  34. create_adjustment_section,
  35. show_image_with_scroll,
  36. # 配置
  37. load_table_editor_config,
  38. build_data_source_catalog,
  39. parse_table_editor_cli_args,
  40. # 🆕 批量应用模板
  41. create_batch_template_section,
  42. )
  43. DEFAULT_CONFIG_PATH = Path(__file__).with_name("table_line_generator.yaml")
  44. @st.cache_resource
  45. def get_cli_args():
  46. return parse_table_editor_cli_args()
  47. @st.cache_resource
  48. def get_table_editor_config():
  49. """缓存配置加载(整个 session 共享)"""
  50. cli_args = get_cli_args()
  51. config_path = (
  52. Path(cli_args.config).expanduser()
  53. if cli_args.config
  54. else DEFAULT_CONFIG_PATH
  55. )
  56. return load_table_editor_config(config_path)
  57. def create_table_line_editor():
  58. """创建表格线编辑器界面"""
  59. # 配置页面
  60. st.set_page_config(
  61. page_title="表格线编辑器",
  62. page_icon="📏",
  63. layout="wide",
  64. initial_sidebar_state="expanded"
  65. )
  66. st.title("📏 表格线编辑器")
  67. # 🎯 从缓存获取配置
  68. TABLE_EDITOR_CONFIG = get_table_editor_config()
  69. VIEWPORT_WIDTH = TABLE_EDITOR_CONFIG["viewport"]["width"]
  70. VIEWPORT_HEIGHT = TABLE_EDITOR_CONFIG["viewport"]["height"]
  71. DATA_SOURCES = TABLE_EDITOR_CONFIG.get("data_sources", [])
  72. # 初始化 session_state(集中管理)
  73. if 'loaded_json_name' not in st.session_state:
  74. st.session_state.loaded_json_name = None
  75. if 'loaded_image_name' not in st.session_state:
  76. st.session_state.loaded_image_name = None
  77. if 'loaded_config_name' not in st.session_state:
  78. st.session_state.loaded_config_name = None
  79. if 'ocr_data' not in st.session_state:
  80. st.session_state.ocr_data = None
  81. if 'image' not in st.session_state:
  82. st.session_state.image = None
  83. # 🆕 目录模式专用状态
  84. if 'dir_selected_index' not in st.session_state:
  85. st.session_state.dir_selected_index = 0
  86. if 'last_loaded_entry' not in st.session_state:
  87. st.session_state.last_loaded_entry = None
  88. if 'dir_auto_mode' not in st.session_state:
  89. st.session_state.dir_auto_mode = None
  90. if 'current_data_source' not in st.session_state: # 🔑 新增
  91. st.session_state.current_data_source = None
  92. if 'current_output_config' not in st.session_state: # 🔑 新增
  93. st.session_state.current_output_config = None
  94. # 初始化撤销/重做栈
  95. init_undo_stack()
  96. # 🆕 工作模式选择
  97. st.sidebar.header("📂 工作模式")
  98. work_mode = st.sidebar.radio(
  99. "选择模式",
  100. ["🆕 新建标注", "📂 加载已有标注", "📁 目录模式"],
  101. index=0
  102. )
  103. # 📁 目录模式
  104. if work_mode == "📁 目录模式":
  105. if not DATA_SOURCES:
  106. st.sidebar.warning("未配置 data_sources")
  107. return
  108. auto_mode = create_directory_selector(DATA_SOURCES, TABLE_EDITOR_CONFIG["output"])
  109. # 显示当前数据源
  110. if st.session_state.current_data_source:
  111. ds_name = st.session_state.current_data_source.get("name", "未知")
  112. st.sidebar.success(f"✅ 数据源: {ds_name}")
  113. if auto_mode == "new":
  114. if not (st.session_state.ocr_data and st.session_state.image):
  115. st.warning("⚠️ 缺少必要数据")
  116. return
  117. _, _, _, line_width, display_mode, zoom_level, show_line_numbers = setup_new_annotation_mode(
  118. st.session_state.ocr_data,
  119. st.session_state.image,
  120. TABLE_EDITOR_CONFIG["display"]
  121. )
  122. else: # edit
  123. if 'structure' not in st.session_state:
  124. st.warning("⚠️ 结构加载失败")
  125. return
  126. image, line_width, display_mode, zoom_level, show_line_numbers = setup_edit_annotation_mode(
  127. st.session_state.structure,
  128. st.session_state.image,
  129. TABLE_EDITOR_CONFIG["display"]
  130. )
  131. # 统一渲染
  132. if 'structure' in st.session_state and st.session_state.structure:
  133. render_table_structure_view(
  134. st.session_state.structure,
  135. st.session_state.image or Image.new('RGB', (2000, 2000), 'white'),
  136. line_width, # 🔑 统一使用这个 line_width
  137. display_mode,
  138. zoom_level,
  139. show_line_numbers,
  140. VIEWPORT_WIDTH,
  141. VIEWPORT_HEIGHT
  142. )
  143. # 保存区域
  144. create_save_section(
  145. auto_mode,
  146. st.session_state.structure,
  147. st.session_state.image,
  148. line_width, # 🔑 传递给保存区域
  149. st.session_state.current_output_config or TABLE_EDITOR_CONFIG["output"]
  150. )
  151. # 🆕 批量应用模板区域(仅在 edit 模式显示)
  152. if auto_mode == "edit":
  153. # 🔑 获取当前的线条颜色名称(从保存区域的选择)
  154. output_cfg = TABLE_EDITOR_CONFIG["output"]
  155. line_colors = output_cfg.get("line_colors")
  156. defaults = output_cfg.get("defaults", {})
  157. default_color = defaults.get("line_color", line_colors[0]["name"])
  158. # 🔑 传递当前页的设置
  159. create_batch_template_section(
  160. current_line_width=line_width,
  161. current_line_color=st.session_state.get('save_line_color', default_color)
  162. )
  163. return
  164. # 🆕 新建标注模式
  165. if work_mode == "🆕 新建标注":
  166. create_file_uploader_section(work_mode)
  167. if not (st.session_state.ocr_data and st.session_state.image):
  168. st.info("👆 请在左侧上传 OCR 结果 JSON 文件和对应的图片")
  169. with st.expander("📖 使用说明"):
  170. st.markdown("""
  171. ### 🆕 新建标注模式
  172. **支持的OCR格式**
  173. **1. PPStructure V3 格式 (推荐)**
  174. ```json
  175. {
  176. "parsing_res_list": [...],
  177. "overall_ocr_res": {
  178. "rec_boxes": [[x1, y1, x2, y2], ...],
  179. "rec_texts": ["文本1", "文本2", ...]
  180. }
  181. }
  182. ```
  183. **2. 标准格式**
  184. ```json
  185. [
  186. {
  187. "text": "文本内容",
  188. "bbox": [x1, y1, x2, y2]
  189. }
  190. ]
  191. ```
  192. ### 📂 加载已有标注模式
  193. 1. 上传之前保存的 `*_structure.json` 配置文件
  194. 2. 上传对应的图片(可选)
  195. 3. 继续调整表格线位置
  196. 4. 保存更新后的配置
  197. """)
  198. return
  199. st.info(f"📂 已加载: {st.session_state.loaded_json_name} + {st.session_state.loaded_image_name}")
  200. _, _, _, line_width, display_mode, zoom_level, show_line_numbers = setup_new_annotation_mode(
  201. st.session_state.ocr_data,
  202. st.session_state.image,
  203. TABLE_EDITOR_CONFIG["display"]
  204. )
  205. if 'structure' in st.session_state and st.session_state.structure:
  206. render_table_structure_view(
  207. st.session_state.structure,
  208. st.session_state.image,
  209. line_width,
  210. display_mode,
  211. zoom_level,
  212. show_line_numbers,
  213. VIEWPORT_WIDTH,
  214. VIEWPORT_HEIGHT
  215. )
  216. create_save_section(
  217. work_mode,
  218. st.session_state.structure,
  219. st.session_state.image,
  220. line_width,
  221. TABLE_EDITOR_CONFIG["output"]
  222. )
  223. return
  224. # 📂 加载已有标注模式
  225. if work_mode == "📂 加载已有标注":
  226. create_file_uploader_section(work_mode)
  227. if 'structure' not in st.session_state:
  228. st.info("👆 请在左侧上传配置文件 (*_structure.json)")
  229. with st.expander("📖 使用说明"):
  230. st.markdown("""
  231. ### 📂 加载已有标注
  232. **步骤:**
  233. 1. **上传配置文件**:选择之前保存的 `*_structure.json`
  234. 2. **上传图片**(可选):上传对应的图片以查看效果
  235. 3. **调整表格线**:使用下方的工具调整横线/竖线位置
  236. 4. **保存更新**:保存修改后的配置
  237. **提示:**
  238. - 即使没有图片,也可以直接编辑配置文件中的坐标
  239. - 配置文件包含完整的表格结构信息
  240. - 可以应用到同类型的其他页面
  241. """)
  242. return
  243. if st.session_state.image is None:
  244. st.warning("⚠️ 仅加载了配置,未加载图片。部分功能受限。")
  245. image, line_width, display_mode, zoom_level, show_line_numbers = setup_edit_annotation_mode(
  246. st.session_state.structure,
  247. st.session_state.image,
  248. TABLE_EDITOR_CONFIG["display"]
  249. )
  250. render_table_structure_view(
  251. st.session_state.structure,
  252. image,
  253. line_width,
  254. display_mode,
  255. zoom_level,
  256. show_line_numbers,
  257. VIEWPORT_WIDTH,
  258. VIEWPORT_HEIGHT
  259. )
  260. create_save_section(
  261. work_mode,
  262. st.session_state.structure,
  263. image,
  264. line_width,
  265. TABLE_EDITOR_CONFIG["output"]
  266. )
  267. if __name__ == "__main__":
  268. try:
  269. create_table_line_editor()
  270. except GeneratorExit:
  271. pass # Streamlit 内部清理,忽略
  272. except KeyboardInterrupt:
  273. st.info("👋 程序已停止")
  274. except Exception as e:
  275. st.error(f"❌ 程序崩溃: {e}")
  276. import traceback
  277. with st.expander("🔍 详细错误信息"):
  278. st.code(traceback.format_exc())