streamlit_table_line_editor.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. """
  2. 表格线可视化编辑器
  3. 支持人工调整表格线位置
  4. """
  5. import streamlit as st
  6. from pathlib import Path
  7. from PIL import Image
  8. import yaml
  9. from typing import Dict, List, Optional, Tuple
  10. import argparse
  11. import sys
  12. try:
  13. from table_line_generator import TableLineGenerator
  14. except ImportError:
  15. from .table_line_generator import TableLineGenerator
  16. # 导入编辑器模块
  17. from editor import (
  18. # UI 组件
  19. create_file_uploader_section,
  20. create_display_settings_section,
  21. create_undo_redo_section,
  22. create_analysis_section,
  23. create_save_section,
  24. create_directory_selector,
  25. # 新增的模块功能
  26. setup_new_annotation_mode,
  27. setup_edit_annotation_mode,
  28. render_table_structure_view,
  29. # 绘图
  30. get_cached_table_lines_image,
  31. # 状态管理
  32. init_undo_stack,
  33. # 调整
  34. create_adjustment_section,
  35. show_image_with_scroll,
  36. # 配置
  37. load_table_editor_config,
  38. build_data_source_catalog,
  39. parse_table_editor_cli_args,
  40. )
  41. DEFAULT_CONFIG_PATH = Path(__file__).with_name("table_line_generator.yaml")
  42. @st.cache_resource
  43. def get_cli_args():
  44. return parse_table_editor_cli_args()
  45. @st.cache_resource
  46. def get_table_editor_config():
  47. """缓存配置加载(整个 session 共享)"""
  48. cli_args = get_cli_args()
  49. config_path = (
  50. Path(cli_args.config).expanduser()
  51. if cli_args.config
  52. else DEFAULT_CONFIG_PATH
  53. )
  54. return load_table_editor_config(config_path)
  55. def create_table_line_editor():
  56. """创建表格线编辑器界面"""
  57. # 配置页面
  58. st.set_page_config(
  59. page_title="表格线编辑器",
  60. page_icon="📏",
  61. layout="wide",
  62. initial_sidebar_state="expanded"
  63. )
  64. st.title("📏 表格线编辑器")
  65. # 🎯 从缓存获取配置
  66. TABLE_EDITOR_CONFIG = get_table_editor_config()
  67. VIEWPORT_WIDTH = TABLE_EDITOR_CONFIG["viewport"]["width"]
  68. VIEWPORT_HEIGHT = TABLE_EDITOR_CONFIG["viewport"]["height"]
  69. DATA_SOURCES = TABLE_EDITOR_CONFIG.get("data_sources", [])
  70. # 初始化 session_state(集中管理)
  71. if 'loaded_json_name' not in st.session_state:
  72. st.session_state.loaded_json_name = None
  73. if 'loaded_image_name' not in st.session_state:
  74. st.session_state.loaded_image_name = None
  75. if 'loaded_config_name' not in st.session_state:
  76. st.session_state.loaded_config_name = None
  77. if 'ocr_data' not in st.session_state:
  78. st.session_state.ocr_data = None
  79. if 'image' not in st.session_state:
  80. st.session_state.image = None
  81. # 🆕 目录模式专用状态
  82. if 'dir_selected_index' not in st.session_state:
  83. st.session_state.dir_selected_index = 0
  84. if 'last_loaded_entry' not in st.session_state:
  85. st.session_state.last_loaded_entry = None
  86. if 'dir_auto_mode' not in st.session_state:
  87. st.session_state.dir_auto_mode = None
  88. # 初始化撤销/重做栈
  89. init_undo_stack()
  90. # 🆕 工作模式选择
  91. st.sidebar.header("📂 工作模式")
  92. work_mode = st.sidebar.radio(
  93. "选择模式",
  94. ["🆕 新建标注", "📂 加载已有标注", "📁 目录模式"],
  95. index=0
  96. )
  97. # 📁 目录模式
  98. if work_mode == "📁 目录模式":
  99. if not DATA_SOURCES:
  100. st.sidebar.warning("未配置 data_sources")
  101. return
  102. auto_mode = create_directory_selector(DATA_SOURCES, TABLE_EDITOR_CONFIG["output"])
  103. if auto_mode == "new":
  104. if not (st.session_state.ocr_data and st.session_state.image):
  105. st.warning("⚠️ 缺少必要数据")
  106. return
  107. setup_new_annotation_mode(
  108. st.session_state.ocr_data,
  109. st.session_state.image,
  110. TABLE_EDITOR_CONFIG["display"]
  111. )
  112. else: # edit
  113. if 'structure' not in st.session_state:
  114. st.warning("⚠️ 结构加载失败")
  115. return
  116. image, line_width, display_mode, zoom_level, show_line_numbers = setup_edit_annotation_mode(
  117. st.session_state.structure,
  118. st.session_state.image,
  119. TABLE_EDITOR_CONFIG["display"]
  120. )
  121. # 统一渲染
  122. if 'structure' in st.session_state and st.session_state.structure:
  123. render_table_structure_view(
  124. st.session_state.structure,
  125. st.session_state.image or Image.new('RGB', (2000, 2000), 'white'),
  126. line_width if auto_mode == "edit" else st.session_state.get('line_width', 2),
  127. display_mode if auto_mode == "edit" else st.session_state.get('display_mode', "仅显示划线图"),
  128. zoom_level if auto_mode == "edit" else st.session_state.get('zoom_level', 1.0),
  129. show_line_numbers if auto_mode == "edit" else True,
  130. VIEWPORT_WIDTH,
  131. VIEWPORT_HEIGHT
  132. )
  133. create_save_section(
  134. auto_mode,
  135. st.session_state.structure,
  136. st.session_state.image,
  137. line_width if auto_mode == "edit" else 2,
  138. TABLE_EDITOR_CONFIG["output"]
  139. )
  140. return
  141. # 🆕 新建标注模式
  142. if work_mode == "🆕 新建标注":
  143. create_file_uploader_section(work_mode)
  144. if not (st.session_state.ocr_data and st.session_state.image):
  145. st.info("👆 请在左侧上传 OCR 结果 JSON 文件和对应的图片")
  146. with st.expander("📖 使用说明"):
  147. st.markdown("""
  148. ### 🆕 新建标注模式
  149. **支持的OCR格式**
  150. **1. PPStructure V3 格式 (推荐)**
  151. ```json
  152. {
  153. "parsing_res_list": [...],
  154. "overall_ocr_res": {
  155. "rec_boxes": [[x1, y1, x2, y2], ...],
  156. "rec_texts": ["文本1", "文本2", ...]
  157. }
  158. }
  159. ```
  160. **2. 标准格式**
  161. ```json
  162. [
  163. {
  164. "text": "文本内容",
  165. "bbox": [x1, y1, x2, y2]
  166. }
  167. ]
  168. ```
  169. ### 📂 加载已有标注模式
  170. 1. 上传之前保存的 `*_structure.json` 配置文件
  171. 2. 上传对应的图片(可选)
  172. 3. 继续调整表格线位置
  173. 4. 保存更新后的配置
  174. """)
  175. return
  176. st.info(f"📂 已加载: {st.session_state.loaded_json_name} + {st.session_state.loaded_image_name}")
  177. _, _, _, line_width, display_mode, zoom_level, show_line_numbers = setup_new_annotation_mode(
  178. st.session_state.ocr_data,
  179. st.session_state.image,
  180. TABLE_EDITOR_CONFIG["display"]
  181. )
  182. if 'structure' in st.session_state and st.session_state.structure:
  183. render_table_structure_view(
  184. st.session_state.structure,
  185. st.session_state.image,
  186. line_width,
  187. display_mode,
  188. zoom_level,
  189. show_line_numbers,
  190. VIEWPORT_WIDTH,
  191. VIEWPORT_HEIGHT
  192. )
  193. create_save_section(
  194. work_mode,
  195. st.session_state.structure,
  196. st.session_state.image,
  197. line_width,
  198. TABLE_EDITOR_CONFIG["output"]
  199. )
  200. return
  201. # 📂 加载已有标注模式
  202. if work_mode == "📂 加载已有标注":
  203. create_file_uploader_section(work_mode)
  204. if 'structure' not in st.session_state:
  205. st.info("👆 请在左侧上传配置文件 (*_structure.json)")
  206. with st.expander("📖 使用说明"):
  207. st.markdown("""
  208. ### 📂 加载已有标注
  209. **步骤:**
  210. 1. **上传配置文件**:选择之前保存的 `*_structure.json`
  211. 2. **上传图片**(可选):上传对应的图片以查看效果
  212. 3. **调整表格线**:使用下方的工具调整横线/竖线位置
  213. 4. **保存更新**:保存修改后的配置
  214. **提示:**
  215. - 即使没有图片,也可以直接编辑配置文件中的坐标
  216. - 配置文件包含完整的表格结构信息
  217. - 可以应用到同类型的其他页面
  218. """)
  219. return
  220. if st.session_state.image is None:
  221. st.warning("⚠️ 仅加载了配置,未加载图片。部分功能受限。")
  222. image, line_width, display_mode, zoom_level, show_line_numbers = setup_edit_annotation_mode(
  223. st.session_state.structure,
  224. st.session_state.image,
  225. TABLE_EDITOR_CONFIG["display"]
  226. )
  227. render_table_structure_view(
  228. st.session_state.structure,
  229. image,
  230. line_width,
  231. display_mode,
  232. zoom_level,
  233. show_line_numbers,
  234. VIEWPORT_WIDTH,
  235. VIEWPORT_HEIGHT
  236. )
  237. create_save_section(
  238. work_mode,
  239. st.session_state.structure,
  240. image,
  241. line_width,
  242. TABLE_EDITOR_CONFIG["output"]
  243. )
  244. if __name__ == "__main__":
  245. create_table_line_editor()