streamlit_table_line_editor.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. """
  2. 表格线可视化编辑器
  3. 支持人工调整表格线位置
  4. """
  5. import streamlit as st
  6. from pathlib import Path
  7. from PIL import Image
  8. try:
  9. from table_line_generator import TableLineGenerator
  10. except ImportError:
  11. from .table_line_generator import TableLineGenerator
  12. # 导入编辑器模块
  13. from editor import (
  14. # UI 组件
  15. create_file_uploader_section,
  16. create_display_settings_section,
  17. create_undo_redo_section,
  18. create_analysis_section,
  19. create_save_section,
  20. # 绘图
  21. get_cached_table_lines_image,
  22. # 状态管理
  23. init_undo_stack,
  24. # 调整
  25. create_adjustment_section,
  26. )
  27. def create_table_line_editor():
  28. """创建表格线编辑器界面"""
  29. # 配置页面
  30. st.set_page_config(
  31. page_title="表格线编辑器",
  32. page_icon="📏",
  33. layout="wide",
  34. initial_sidebar_state="expanded"
  35. )
  36. st.title("📏 表格线编辑器")
  37. # 初始化 session_state
  38. if 'loaded_json_name' not in st.session_state:
  39. st.session_state.loaded_json_name = None
  40. if 'loaded_image_name' not in st.session_state:
  41. st.session_state.loaded_image_name = None
  42. if 'loaded_config_name' not in st.session_state:
  43. st.session_state.loaded_config_name = None
  44. if 'ocr_data' not in st.session_state:
  45. st.session_state.ocr_data = None
  46. if 'image' not in st.session_state:
  47. st.session_state.image = None
  48. # 初始化撤销/重做栈
  49. init_undo_stack()
  50. # 🆕 工作模式选择
  51. st.sidebar.header("📂 工作模式")
  52. work_mode = st.sidebar.radio(
  53. "选择模式",
  54. ["🆕 新建标注", "📂 加载已有标注"],
  55. index=0
  56. )
  57. # 文件上传区域
  58. create_file_uploader_section(work_mode)
  59. # 检查必要条件
  60. if work_mode == "🆕 新建标注":
  61. if st.session_state.ocr_data is None or st.session_state.image is None:
  62. st.info("👆 请在左侧上传 OCR 结果 JSON 文件和对应的图片")
  63. with st.expander("📖 使用说明"):
  64. st.markdown("""
  65. ### 🆕 新建标注模式
  66. **支持的OCR格式**
  67. **1. PPStructure V3 格式 (推荐)**
  68. ```json
  69. {
  70. "parsing_res_list": [...],
  71. "overall_ocr_res": {
  72. "rec_boxes": [[x1, y1, x2, y2], ...],
  73. "rec_texts": ["文本1", "文本2", ...]
  74. }
  75. }
  76. ```
  77. **2. 标准格式**
  78. ```json
  79. [
  80. {
  81. "text": "文本内容",
  82. "bbox": [x1, y1, x2, y2]
  83. }
  84. ]
  85. ```
  86. ### 📂 加载已有标注模式
  87. 1. 上传之前保存的 `*_structure.json` 配置文件
  88. 2. 上传对应的图片(可选)
  89. 3. 继续调整表格线位置
  90. 4. 保存更新后的配置
  91. """)
  92. return
  93. ocr_data = st.session_state.ocr_data
  94. image = st.session_state.image
  95. st.info(f"📂 已加载: {st.session_state.loaded_json_name} + {st.session_state.loaded_image_name}")
  96. # 初始化生成器
  97. if 'generator' not in st.session_state or st.session_state.generator is None:
  98. try:
  99. generator = TableLineGenerator(image, ocr_data)
  100. st.session_state.generator = generator
  101. except Exception as e:
  102. st.error(f"❌ 初始化失败: {e}")
  103. st.stop()
  104. else: # 加载已有标注模式
  105. if 'structure' not in st.session_state:
  106. st.info("👆 请在左侧上传配置文件 (*_structure.json)")
  107. with st.expander("📖 使用说明"):
  108. st.markdown("""
  109. ### 📂 加载已有标注
  110. **步骤:**
  111. 1. **上传配置文件**:选择之前保存的 `*_structure.json`
  112. 2. **上传图片**(可选):上传对应的图片以查看效果
  113. 3. **调整表格线**:使用下方的工具调整横线/竖线位置
  114. 4. **保存更新**:保存修改后的配置
  115. **提示:**
  116. - 即使没有图片,也可以直接编辑配置文件中的坐标
  117. - 配置文件包含完整的表格结构信息
  118. - 可以应用到同类型的其他页面
  119. """)
  120. return
  121. if st.session_state.image is None:
  122. st.warning("⚠️ 仅加载了配置,未加载图片。部分功能受限。")
  123. structure = st.session_state.structure
  124. image = st.session_state.image
  125. # 如果没有图片,创建虚拟画布
  126. if image is None:
  127. if 'table_bbox' in structure:
  128. bbox = structure['table_bbox']
  129. dummy_width = bbox[2] + 100
  130. dummy_height = bbox[3] + 100
  131. else:
  132. dummy_width = 2000
  133. dummy_height = 2000
  134. image = Image.new('RGB', (dummy_width, dummy_height), color='white')
  135. st.info(f"💡 使用虚拟画布 ({dummy_width}x{dummy_height}) 显示表格结构")
  136. # 参数调整(仅在新建模式显示)
  137. if work_mode == "🆕 新建标注":
  138. st.sidebar.header("🔧 参数调整")
  139. y_tolerance = st.sidebar.slider("Y轴聚类容差(像素)", 1, 20, 5)
  140. x_tolerance = st.sidebar.slider("X轴聚类容差(像素)", 5, 50, 10)
  141. min_row_height = st.sidebar.slider("最小行高(像素)", 10, 100, 20)
  142. # 显示设置
  143. line_width, display_mode, zoom_level, show_line_numbers = create_display_settings_section()
  144. # 撤销/重做
  145. create_undo_redo_section()
  146. # 分析表格结构(仅在新建模式显示)
  147. if work_mode == "🆕 新建标注":
  148. create_analysis_section(y_tolerance, x_tolerance, min_row_height)
  149. # 显示结果
  150. if 'structure' in st.session_state and st.session_state.structure:
  151. structure = st.session_state.structure
  152. # 使用缓存机制绘制表格线
  153. img_with_lines = get_cached_table_lines_image(
  154. image,
  155. structure,
  156. line_width=line_width,
  157. show_numbers=show_line_numbers
  158. )
  159. # 根据显示模式显示图片
  160. if display_mode == "对比显示":
  161. col1, col2 = st.columns(2)
  162. with col1:
  163. st.subheader("原图")
  164. st.image(image, use_container_width=True)
  165. with col2:
  166. st.subheader("添加表格线")
  167. st.image(img_with_lines, use_container_width=True)
  168. elif display_mode == "仅显示划线图":
  169. display_width = int(img_with_lines.width * zoom_level)
  170. st.subheader(f"表格线图 (缩放: {zoom_level:.0%})")
  171. st.image(img_with_lines, width=display_width)
  172. else:
  173. display_width = int(image.width * zoom_level)
  174. st.subheader(f"原图 (缩放: {zoom_level:.0%})")
  175. st.image(image, width=display_width)
  176. # 手动调整区域
  177. create_adjustment_section(structure)
  178. # 显示详细信息
  179. with st.expander("📊 表格结构详情"):
  180. st.json({
  181. "行数": len(structure['rows']),
  182. "列数": len(structure['columns']),
  183. "横线数": len(structure.get('horizontal_lines', [])),
  184. "竖线数": len(structure.get('vertical_lines', [])),
  185. "横线坐标": structure.get('horizontal_lines', []),
  186. "竖线坐标": structure.get('vertical_lines', []),
  187. "标准行高": structure.get('row_height'),
  188. "列宽度": structure.get('col_widths'),
  189. "修改的横线": list(structure.get('modified_h_lines', set())),
  190. "修改的竖线": list(structure.get('modified_v_lines', set()))
  191. })
  192. # 保存区域
  193. create_save_section(work_mode, structure, image, line_width)
  194. if __name__ == "__main__":
  195. create_table_line_editor()