ocr_validator_layout.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497
  1. #!/usr/bin/env python3
  2. """
  3. OCR验证工具的布局管理模块
  4. 包含标准布局、滚动布局、紧凑布局的实现
  5. """
  6. import streamlit as st
  7. from pathlib import Path
  8. from PIL import Image
  9. from typing import Dict, List, Optional
  10. import plotly.graph_objects as go
  11. from ocr_validator_utils import (
  12. convert_html_table_to_markdown,
  13. parse_html_tables,
  14. draw_bbox_on_image,
  15. rotate_image_and_coordinates
  16. )
  17. class OCRLayoutManager:
  18. """OCR布局管理器"""
  19. def __init__(self, validator):
  20. self.validator = validator
  21. self.config = validator.config
  22. self._rotated_image_cache = {} # 缓存旋转后的图像
  23. def get_rotation_angle(self) -> float:
  24. """从OCR数据中获取旋转角度"""
  25. if self.validator.ocr_data:
  26. for item in self.validator.ocr_data:
  27. if isinstance(item, dict) and 'rotation_angle' in item:
  28. return item['rotation_angle']
  29. return 0.0
  30. def load_and_rotate_image(self, image_path: str) -> Optional[Image.Image]:
  31. """加载并根据需要旋转图像"""
  32. if not image_path or not Path(image_path).exists():
  33. return None
  34. # 检查缓存
  35. rotation_angle = self.get_rotation_angle()
  36. cache_key = f"{image_path}_{rotation_angle}"
  37. if cache_key in self._rotated_image_cache:
  38. return self._rotated_image_cache[cache_key]
  39. try:
  40. image = Image.open(image_path)
  41. # 如果需要旋转
  42. if rotation_angle != 0:
  43. st.info(f"🔄 检测到文档旋转角度: {rotation_angle}°,正在自动旋转图像...")
  44. # 收集所有bbox坐标
  45. all_bboxes = []
  46. text_to_bbox_map = {} # 记录文本到bbox索引的映射
  47. bbox_index = 0
  48. for text, info_list in self.validator.text_bbox_mapping.items():
  49. text_to_bbox_map[text] = []
  50. for info in info_list:
  51. all_bboxes.append(info['bbox'])
  52. text_to_bbox_map[text].append(bbox_index)
  53. bbox_index += 1
  54. # 旋转图像和坐标
  55. rotated_image, rotated_bboxes = rotate_image_and_coordinates(
  56. image, rotation_angle, all_bboxes
  57. )
  58. # 更新bbox映射 - 使用映射关系确保正确对应
  59. for text, bbox_indices in text_to_bbox_map.items():
  60. for i, bbox_idx in enumerate(bbox_indices):
  61. if bbox_idx < len(rotated_bboxes) and i < len(self.validator.text_bbox_mapping[text]):
  62. self.validator.text_bbox_mapping[text][i]['bbox'] = rotated_bboxes[bbox_idx]
  63. # 缓存结果
  64. self._rotated_image_cache[cache_key] = rotated_image
  65. return rotated_image
  66. else:
  67. # 无需旋转,直接缓存原图
  68. self._rotated_image_cache[cache_key] = image
  69. return image
  70. except Exception as e:
  71. st.error(f"❌ 图像加载失败: {e}")
  72. return None
  73. def render_content_section(self, layout_type: str = "standard"):
  74. """渲染内容区域 - 统一方法"""
  75. st.header("📄 OCR识别内容")
  76. # 显示旋转信息
  77. # rotation_angle = self.get_rotation_angle()
  78. # if rotation_angle != 0:
  79. # st.info(f"📐 文档旋转角度: {rotation_angle}°")
  80. # 文本选择器
  81. if self.validator.text_bbox_mapping:
  82. text_options = ["请选择文本..."] + list(self.validator.text_bbox_mapping.keys())
  83. selected_index = st.selectbox(
  84. "选择要校验的文本",
  85. range(len(text_options)),
  86. format_func=lambda x: text_options[x][:50] + "..." if len(text_options[x]) > 50 else text_options[x],
  87. key=f"{layout_type}_text_selector"
  88. )
  89. if selected_index > 0:
  90. st.session_state.selected_text = text_options[selected_index]
  91. else:
  92. st.warning("没有找到可点击的文本")
  93. def render_md_content(self, layout_type: str):
  94. """渲染Markdown内容 - 统一方法"""
  95. if not self.validator.md_content:
  96. return None, None
  97. # 搜索功能
  98. search_term = st.text_input(
  99. "🔍 搜索文本内容",
  100. placeholder="输入关键词搜索...",
  101. key=f"{layout_type}_search"
  102. )
  103. display_content = self.validator.md_content
  104. if search_term:
  105. lines = display_content.split('\n')
  106. filtered_lines = [line for line in lines if search_term.lower() in line.lower()]
  107. display_content = '\n'.join(filtered_lines)
  108. if filtered_lines:
  109. st.success(f"找到 {len(filtered_lines)} 行包含 '{search_term}'")
  110. else:
  111. st.warning(f"未找到包含 '{search_term}' 的内容")
  112. # 渲染方式选择
  113. render_mode = st.radio(
  114. "选择渲染方式",
  115. ["HTML渲染", "Markdown渲染", "DataFrame表格", "原始文本"],
  116. horizontal=True,
  117. key=f"{layout_type}_render_mode"
  118. )
  119. return display_content, render_mode
  120. def render_content_by_mode(self, content: str, render_mode: str, font_size: int, layout_type: str):
  121. """根据渲染模式显示内容 - 统一方法"""
  122. if content is None or render_mode is None:
  123. return
  124. if render_mode == "HTML渲染":
  125. content_style = f"""
  126. <style>
  127. .{layout_type}-content-display {{
  128. font-size: {font_size}px !important;
  129. line-height: 1.4;
  130. color: #333333 !important;
  131. background-color: #fafafa !important;
  132. padding: 10px;
  133. border-radius: 5px;
  134. border: 1px solid #ddd;
  135. }}
  136. </style>
  137. """
  138. st.markdown(content_style, unsafe_allow_html=True)
  139. st.markdown(f'<div class="{layout_type}-content-display">{content}</div>', unsafe_allow_html=True)
  140. elif render_mode == "Markdown渲染":
  141. converted_content = convert_html_table_to_markdown(content)
  142. content_style = f"""
  143. <style>
  144. .{layout_type}-content-display {{
  145. font-size: {font_size}px !important;
  146. line-height: 1.4;
  147. color: #333333 !important;
  148. background-color: #fafafa !important;
  149. padding: 10px;
  150. border-radius: 5px;
  151. border: 1px solid #ddd;
  152. }}
  153. </style>
  154. """
  155. st.markdown(content_style, unsafe_allow_html=True)
  156. st.markdown(f'<div class="{layout_type}-content-display">{converted_content}</div>', unsafe_allow_html=True)
  157. elif render_mode == "DataFrame表格":
  158. if '<table' in content.lower():
  159. self.validator.display_html_table_as_dataframe(content)
  160. else:
  161. st.info("当前内容中没有检测到HTML表格")
  162. st.markdown(content, unsafe_allow_html=True)
  163. else: # 原始文本
  164. st.text_area(
  165. "MD内容预览",
  166. content,
  167. height=300,
  168. key=f"{layout_type}_text_area"
  169. )
  170. # 布局实现
  171. def create_standard_layout(self, font_size: int = 10, zoom_level: float = 1.0):
  172. """创建标准布局"""
  173. if zoom_level is None:
  174. zoom_level = self.config['styles']['layout']['default_zoom']
  175. # 主要内容区域
  176. layout = self.config['styles']['layout']
  177. left_col, right_col = st.columns([layout['content_width'], layout['sidebar_width']])
  178. with left_col:
  179. self.render_content_section("standard")
  180. # 显示内容
  181. if self.validator.md_content:
  182. display_content, render_mode = self.render_md_content("standard")
  183. self.render_content_by_mode(display_content, render_mode, font_size, "standard")
  184. with right_col:
  185. self.create_aligned_image_display(zoom_level, "compact")
  186. def create_compact_layout(self, font_size: int = 10, zoom_level: float = 1.0):
  187. """创建紧凑的对比布局"""
  188. # 主要内容区域
  189. layout = self.config['styles']['layout']
  190. left_col, right_col = st.columns([layout['content_width'], layout['sidebar_width']])
  191. with left_col:
  192. self.render_content_section("compact")
  193. # 只保留一个内容区域高度选择
  194. container_height = st.selectbox(
  195. "选择内容区域高度",
  196. [400, 600, 800, 1000, 1200],
  197. index=2,
  198. key="compact_content_height"
  199. )
  200. # 快速定位文本选择器(使用不同的key)
  201. if self.validator.text_bbox_mapping:
  202. text_options = ["请选择文本..."] + list(self.validator.text_bbox_mapping.keys())
  203. selected_index = st.selectbox(
  204. "快速定位文本",
  205. range(len(text_options)),
  206. format_func=lambda x: text_options[x][:30] + "..." if len(text_options[x]) > 30 else text_options[x],
  207. key="compact_quick_text_selector" # 使用不同的key
  208. )
  209. if selected_index > 0:
  210. st.session_state.selected_text = text_options[selected_index]
  211. # 自定义CSS样式
  212. st.markdown(f"""
  213. <style>
  214. .compact-content {{
  215. height: {container_height}px;
  216. overflow-y: auto;
  217. font-size: {font_size}px !important;
  218. line-height: 1.4;
  219. border: 1px solid #ddd;
  220. padding: 10px;
  221. background-color: #fafafa !important;
  222. font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
  223. color: #333333 !important;
  224. }}
  225. .highlight-text {{
  226. background-color: #ffeb3b !important;
  227. padding: 2px 4px;
  228. border-radius: 3px;
  229. cursor: pointer;
  230. color: #333333 !important;
  231. }}
  232. .selected-highlight {{
  233. background-color: #4caf50 !important;
  234. color: white !important;
  235. }}
  236. </style>
  237. """, unsafe_allow_html=True)
  238. # 处理并显示OCR内容
  239. if self.validator.md_content:
  240. # 高亮可点击文本
  241. highlighted_content = self.validator.md_content
  242. for text in self.validator.text_bbox_mapping.keys():
  243. if len(text) > 2: # 避免高亮过短的文本
  244. css_class = "highlight-text selected-highlight" if text == st.session_state.selected_text else "highlight-text"
  245. highlighted_content = highlighted_content.replace(
  246. text,
  247. f'<span class="{css_class}" title="{text[:50]}...">{text}</span>'
  248. )
  249. st.markdown(
  250. f'<div class="compact-content">{highlighted_content}</div>',
  251. unsafe_allow_html=True
  252. )
  253. with right_col:
  254. # 修复的对齐图片显示
  255. self.create_aligned_image_display(zoom_level, "compact")
  256. def create_aligned_image_display(self, zoom_level: float = 1.0, layout_type: str = "aligned"):
  257. """创建与左侧对齐的图片显示"""
  258. # 精确对齐CSS
  259. st.markdown(f"""
  260. <style>
  261. .aligned-image-container-{layout_type} {{
  262. margin-top: -70px;
  263. padding-top: 0px;
  264. }}
  265. .aligned-image-container-{layout_type} h1 {{
  266. margin-top: 0px !important;
  267. padding-top: 0px !important;
  268. }}
  269. </style>
  270. """, unsafe_allow_html=True)
  271. st.markdown(f'<div class="aligned-image-container-{layout_type}">', unsafe_allow_html=True)
  272. st.header("🖼️ 原图标注")
  273. # 图片缩放控制
  274. col1, col2 = st.columns(2)
  275. with col1:
  276. current_zoom = st.slider("图片缩放", 0.3, 2.0, zoom_level, 0.1, key=f"{layout_type}_zoom_level")
  277. with col2:
  278. show_all_boxes = st.checkbox("显示所有框", value=False, key=f"{layout_type}_show_all_boxes")
  279. # 使用新的图像加载方法
  280. image = self.load_and_rotate_image(self.validator.image_path)
  281. if image:
  282. try:
  283. # 根据缩放级别调整图片大小
  284. new_width = int(image.width * current_zoom)
  285. new_height = int(image.height * current_zoom)
  286. resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
  287. # 计算选中的bbox - 注意bbox已经是旋转后的坐标
  288. selected_bbox = None
  289. if st.session_state.selected_text and st.session_state.selected_text in self.validator.text_bbox_mapping:
  290. info = self.validator.text_bbox_mapping[st.session_state.selected_text][0]
  291. # bbox已经是旋转后的坐标,只需要应用缩放
  292. bbox = info['bbox']
  293. selected_bbox = [int(coord * current_zoom) for coord in bbox]
  294. # 创建交互式图片
  295. fig = self.create_resized_interactive_plot(resized_image, selected_bbox, current_zoom, show_all_boxes)
  296. st.plotly_chart(fig, use_container_width=True, key=f"{layout_type}_plot")
  297. # 显示选中文本的详细信息
  298. if st.session_state.selected_text and st.session_state.selected_text in self.validator.text_bbox_mapping:
  299. st.subheader("📍 选中文本详情")
  300. info = self.validator.text_bbox_mapping[st.session_state.selected_text][0]
  301. bbox = info['bbox']
  302. info_col1, info_col2 = st.columns(2)
  303. with info_col1:
  304. st.write(f"**文本内容:** {st.session_state.selected_text[:30]}...")
  305. st.write(f"**类别:** {info['category']}")
  306. # 显示旋转信息
  307. rotation_angle = self.get_rotation_angle()
  308. if rotation_angle != 0:
  309. st.write(f"**旋转角度:** {rotation_angle}°")
  310. with info_col2:
  311. st.write(f"**位置:** [{', '.join(map(str, bbox))}]")
  312. if len(bbox) >= 4:
  313. st.write(f"**大小:** {bbox[2] - bbox[0]} x {bbox[3] - bbox[1]} px")
  314. # 错误标记功能
  315. col1, col2 = st.columns(2)
  316. with col1:
  317. if st.button("❌ 标记为错误", key=f"{layout_type}_mark_error"):
  318. st.session_state.marked_errors.add(st.session_state.selected_text)
  319. st.rerun()
  320. with col2:
  321. if st.button("✅ 取消错误标记", key=f"{layout_type}_unmark_error"):
  322. st.session_state.marked_errors.discard(st.session_state.selected_text)
  323. st.rerun()
  324. except Exception as e:
  325. st.error(f"❌ 图片处理失败: {e}")
  326. st.error(f"详细错误: {str(e)}")
  327. else:
  328. st.error("未找到对应的图片文件")
  329. if self.validator.image_path:
  330. st.write(f"期望路径: {self.validator.image_path}")
  331. st.markdown('</div>', unsafe_allow_html=True)
  332. def create_resized_interactive_plot(self, image: Image.Image, selected_bbox: Optional[List[int]], zoom_level: float, show_all_boxes: bool) -> go.Figure:
  333. """创建可调整大小的交互式图片 - 修复图像显示和bbox对齐问题"""
  334. fig = go.Figure()
  335. # 添加图片 - 修正图像定位,确保与工具栏距离一致
  336. fig.add_layout_image(
  337. dict(
  338. source=image,
  339. xref="x", yref="y",
  340. x=0, y=image.height * zoom_level, # 修正:图片左上角位置
  341. sizex=image.width * zoom_level,
  342. sizey=image.height * zoom_level,
  343. sizing="stretch",
  344. opacity=1.0,
  345. layer="below"
  346. )
  347. )
  348. # 显示所有bbox
  349. if show_all_boxes:
  350. for text, info_list in self.validator.text_bbox_mapping.items():
  351. for info in info_list:
  352. bbox = info['bbox']
  353. if len(bbox) >= 4:
  354. # bbox已经是旋转后的坐标,需要应用缩放并转换坐标系
  355. x1, y1, x2, y2 = bbox[:4]
  356. # 应用缩放
  357. scaled_x1 = x1 * zoom_level
  358. scaled_y1 = y1 * zoom_level
  359. scaled_x2 = x2 * zoom_level
  360. scaled_y2 = y2 * zoom_level
  361. # 转换为plotly坐标系(原点在左下角)
  362. plot_x1 = scaled_x1
  363. plot_y1 = (image.height * zoom_level) - scaled_y2 # 翻转Y坐标
  364. plot_x2 = scaled_x2
  365. plot_y2 = (image.height * zoom_level) - scaled_y1 # 翻转Y坐标
  366. color = "rgba(0, 100, 200, 0.2)"
  367. if text in self.validator.marked_errors:
  368. color = "rgba(255, 0, 0, 0.3)"
  369. fig.add_shape(
  370. type="rect",
  371. x0=plot_x1, y0=plot_y1,
  372. x1=plot_x2, y1=plot_y2,
  373. line=dict(color=color.replace('0.2', '0.8').replace('0.3', '1.0'), width=1),
  374. fillcolor=color,
  375. )
  376. # 高亮显示选中的bbox
  377. if selected_bbox and len(selected_bbox) >= 4:
  378. x1, y1, x2, y2 = selected_bbox[:4]
  379. # 转换为plotly坐标系(selected_bbox已经是缩放后的坐标)
  380. plot_x1 = x1
  381. plot_y1 = (image.height * zoom_level) - y2 # 翻转Y坐标
  382. plot_x2 = x2
  383. plot_y2 = (image.height * zoom_level) - y1 # 翻转Y坐标
  384. fig.add_shape(
  385. type="rect",
  386. x0=plot_x1, y0=plot_y1,
  387. x1=plot_x2, y1=plot_y2,
  388. line=dict(color="red", width=3),
  389. fillcolor="rgba(255, 0, 0, 0.3)",
  390. )
  391. # 计算合适的显示尺寸
  392. aspect_ratio = image.width / image.height
  393. display_height = min(800, max(400, image.height // 2))
  394. display_width = int(display_height * aspect_ratio)
  395. # 设置布局 - 确保图像完全可见,使用缩放后的尺寸
  396. fig.update_layout(
  397. width=display_width,
  398. height=display_height,
  399. margin=dict(l=0, r=0, t=0, b=0),
  400. showlegend=False,
  401. plot_bgcolor='white',
  402. dragmode="pan",
  403. # X轴设置 - 使用缩放后的图像尺寸
  404. xaxis=dict(
  405. visible=False,
  406. range=[0, image.width * zoom_level],
  407. constrain="domain",
  408. fixedrange=False,
  409. autorange=False
  410. ),
  411. # Y轴设置 - plotly坐标系(原点在左下角)
  412. yaxis=dict(
  413. visible=False,
  414. range=[0, image.height * zoom_level],
  415. constrain="domain",
  416. scaleanchor="x",
  417. scaleratio=1,
  418. fixedrange=False,
  419. autorange=False
  420. )
  421. )
  422. return fig