ocr_validator_layout.py 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983
  1. #!/usr/bin/env python3
  2. """
  3. OCR验证工具的布局管理模块
  4. 包含标准布局、滚动布局、紧凑布局的实现
  5. """
  6. import streamlit as st
  7. from pathlib import Path
  8. from PIL import Image
  9. from typing import Dict, List, Optional
  10. import plotly.graph_objects as go
  11. from typing import Tuple
  12. import re
  13. import html
  14. from ocr_validator_utils import (
  15. rotate_image_and_coordinates,
  16. get_ocr_tool_rotation_config,
  17. )
  18. # 添加 ocr_platform 根目录到 Python 路径(用于导入 ocr_utils)
  19. # 使用 resolve() 确保路径是绝对路径,避免相对路径导致的 IndexError
  20. import sys
  21. _file_path = Path(__file__).resolve()
  22. ocr_platform_root = _file_path.parents[1] # ocr_validator_layout.py -> ocr_validator -> ocr_platform
  23. if str(ocr_platform_root) not in sys.path:
  24. sys.path.insert(0, str(ocr_platform_root))
  25. # 从 ocr_utils 导入通用工具
  26. from ocr_utils.html_utils import convert_html_table_to_markdown, parse_html_tables
  27. from ocr_utils.visualization_utils import VisualizationUtils
  28. # BeautifulSoup用于精确HTML表格处理
  29. from bs4 import BeautifulSoup
  30. # 从本地文件导入 Streamlit 特定函数
  31. from ocr_validator_file_utils import load_css_styles
  32. # 为了向后兼容,提供函数别名
  33. draw_bbox_on_image = VisualizationUtils.draw_bbox_on_image
  34. # detect_image_orientation_by_opencv 保留在 ocr_validator_file_utils
  35. from ocr_validator_file_utils import detect_image_orientation_by_opencv
  36. class OCRLayoutManager:
  37. """OCR布局管理器"""
  38. def __init__(self, validator):
  39. self.validator = validator
  40. self.config = validator.config
  41. self._rotated_image_cache = {}
  42. self._cache_max_size = 10
  43. self._orientation_cache = {} # 缓存方向检测结果
  44. self.rotated_angle = 0.0 # 自动检测的旋转角度缓存
  45. self.show_all_boxes = False
  46. self.fit_to_container = False
  47. self.zoom_level = 1.0
  48. def _clear_selection_callback(self, layout_type: str):
  49. """在按钮回调中清理选择/搜索状态(避免 widget 实例化后修改报错)"""
  50. # 业务态
  51. st.session_state.selected_text = ""
  52. # 紧凑布局的搜索态
  53. st.session_state.compact_search_query = ""
  54. # widget key 对应的状态(必须在 on_click 回调里改)
  55. search_key = f"{layout_type}_search_input"
  56. quick_select_key = f"{layout_type}_quick_text_selector"
  57. st.session_state[search_key] = ""
  58. st.session_state[quick_select_key] = 0
  59. def _highlight_text_safely(self, content: str, text_to_highlight: str,
  60. highlight_class: str, title: Optional[str] = None) -> str:
  61. """
  62. 安全地高亮文本,保护Markdown语法(特别是图片)
  63. 策略:
  64. 1. 保护特殊内容(HTML注释、Markdown图片)
  65. 2. 只对HTML表格使用BeautifulSoup精确处理
  66. 3. 其他部分使用简单字符串替换,保持Markdown格式
  67. Args:
  68. content: 要处理的Markdown/HTML混合内容
  69. text_to_highlight: 要高亮的文本
  70. highlight_class: 高亮样式类名
  71. title: 鼠标悬停提示文本
  72. Returns:
  73. 处理后的内容
  74. """
  75. if not text_to_highlight or text_to_highlight not in content:
  76. return content
  77. if title is None:
  78. title = text_to_highlight
  79. try:
  80. import re
  81. # 1. 提取并保护特殊内容
  82. protected_parts = []
  83. # 保护 HTML 注释
  84. def protect_comment(match):
  85. protected_parts.append(match.group(0))
  86. return f"__PROTECTED_{len(protected_parts) - 1}__"
  87. content = re.sub(r'<!--.*?-->', protect_comment, content, flags=re.DOTALL)
  88. # 保护 Markdown 图片(完整语法)
  89. def protect_image(match):
  90. protected_parts.append(match.group(0))
  91. return f"__PROTECTED_{len(protected_parts) - 1}__"
  92. content = re.sub(r'!\[.*?\]\([^)]+\)', protect_image, content)
  93. # 2. 提取表格并单独处理
  94. tables = []
  95. def extract_table(match):
  96. tables.append(match.group(0))
  97. return f"__TABLE_{len(tables) - 1}__"
  98. content = re.sub(r'<table[^>]*>.*?</table>', extract_table, content, flags=re.DOTALL)
  99. # 3. 对表格使用 BeautifulSoup 精确处理(只高亮文本,不高亮整个单元格)
  100. highlighted_tables = []
  101. for table_html in tables:
  102. soup = BeautifulSoup(table_html, 'html.parser')
  103. # 在表格单元格中查找完全匹配
  104. for td in soup.find_all(['td', 'th']):
  105. cell_text = td.get_text(strip=True)
  106. if cell_text == text_to_highlight:
  107. # 🎯 只高亮文本,不高亮整个单元格
  108. # 清空单元格内容
  109. td.clear()
  110. # 创建高亮 span 包裹文本
  111. span = soup.new_tag('span')
  112. span['class'] = highlight_class.split()
  113. if title:
  114. span['title'] = title
  115. span.string = text_to_highlight
  116. # 将 span 添加到单元格
  117. td.append(span)
  118. highlighted_tables.append(str(soup))
  119. # 4. 对普通文本进行简单替换(保持Markdown格式,跳过占位符)
  120. if text_to_highlight in content:
  121. highlight_span = f'<span class="{highlight_class}"'
  122. if title:
  123. highlight_span += f' title="{title}"'
  124. highlight_span += f'>{text_to_highlight}</span>'
  125. # 🎯 安全替换:使用正则表达式,排除占位符内的匹配
  126. # 负向前瞻:确保前面不是占位符的一部分
  127. pattern = f'(?<!__PROTECTED_)(?<!__TABLE_){re.escape(text_to_highlight)}(?!__)'
  128. content = re.sub(pattern, highlight_span, content)
  129. # 5. 恢复表格
  130. for i, table in enumerate(highlighted_tables):
  131. content = content.replace(f"__TABLE_{i}__", table)
  132. # 6. 恢复受保护的内容(图片和注释)
  133. for i, protected in enumerate(protected_parts):
  134. content = content.replace(f"__PROTECTED_{i}__", protected)
  135. return content
  136. except Exception as e:
  137. st.warning(f"文本高亮时出错: {str(e)}")
  138. return content
  139. def clear_image_cache(self):
  140. """清理所有图像缓存"""
  141. self._rotated_image_cache.clear()
  142. def clear_cache_for_image(self, image_path: str):
  143. """清理指定图像的所有缓存"""
  144. keys_to_remove = [key for key in self._rotated_image_cache.keys() if key.startswith(image_path)]
  145. for key in keys_to_remove:
  146. del self._rotated_image_cache[key]
  147. def get_cache_info(self) -> dict:
  148. """获取缓存信息"""
  149. return {
  150. 'cache_size': len(self._rotated_image_cache),
  151. 'cached_images': list(self._rotated_image_cache.keys()),
  152. 'max_size': self._cache_max_size
  153. }
  154. def _manage_cache_size(self):
  155. """管理缓存大小,超出限制时清理最旧的缓存"""
  156. if len(self._rotated_image_cache) > self._cache_max_size:
  157. # 删除最旧的缓存项(FIFO策略)
  158. oldest_key = next(iter(self._rotated_image_cache))
  159. del self._rotated_image_cache[oldest_key]
  160. def detect_and_suggest_rotation(self, image_path: str) -> Dict:
  161. """检测并建议图片旋转角度"""
  162. if image_path in self._orientation_cache:
  163. return self._orientation_cache[image_path]
  164. # 使用自动检测功能
  165. detection_result = detect_image_orientation_by_opencv(image_path)
  166. # 缓存结果
  167. self._orientation_cache[image_path] = detection_result
  168. return detection_result
  169. def get_rotation_angle(self) -> float:
  170. """获取旋转角度 - 增强版本支持自动检测"""
  171. # 如果没有预设角度,优先人工设置
  172. if hasattr(self, 'rotated_angle') and self.rotated_angle != 0:
  173. return self.rotated_angle
  174. # 尝试从OCR数据中获取(PPStructV3等)
  175. if self.validator.ocr_data:
  176. for item in self.validator.ocr_data:
  177. if isinstance(item, dict) and 'rotation_angle' in item:
  178. return item['rotation_angle']
  179. return 0.0
  180. def load_and_rotate_image(self, image_path: str) -> Optional[Image.Image]:
  181. """加载并根据需要旋转图像"""
  182. if not image_path or not Path(image_path).exists():
  183. return None
  184. # 检查缓存
  185. rotation_angle = self.get_rotation_angle()
  186. cache_key = f"{image_path}_{rotation_angle}"
  187. if cache_key in self._rotated_image_cache:
  188. self.validator.text_bbox_mapping = self._rotated_image_cache[cache_key]['text_bbox_mapping']
  189. return self._rotated_image_cache[cache_key]['image']
  190. try:
  191. image = Image.open(image_path)
  192. # 如果需要旋转
  193. if rotation_angle != 0:
  194. # 获取OCR工具的旋转配置
  195. rotation_config = get_ocr_tool_rotation_config(self.validator.ocr_data, self.config)
  196. # st.info(f"🔄 检测到文档旋转角度: {rotation_angle}°,正在处理图像和坐标...")
  197. # st.info(f"📋 OCR工具配置: 坐标{'已预旋转' if rotation_config['coordinates_are_pre_rotated'] else '需要旋转'}")
  198. # 判断是否需要旋转坐标
  199. if rotation_config['coordinates_are_pre_rotated']:
  200. # 图片的角度与坐标的角度不一致,比如PPStructV3,图片0度,坐标已旋转270度
  201. # 这种情况下,只需要旋转图片,坐标不变
  202. # PPStructV3: 坐标已经是旋转后的,只旋转图像
  203. img_rotation_angle = (rotation_angle + self.rotated_angle) % 360
  204. if img_rotation_angle == 270:
  205. rotated_image = image.rotate(-90, expand=True) # 顺时针90度
  206. elif img_rotation_angle == 90:
  207. rotated_image = image.rotate(90, expand=True) # 逆时针90度
  208. elif img_rotation_angle == 180:
  209. rotated_image = image.rotate(180, expand=True) # 180度
  210. else:
  211. rotated_image = image.rotate(-img_rotation_angle, expand=True)
  212. if self.rotated_angle == 0:
  213. # 坐标不需要变换,因为JSON中已经是正确的坐标
  214. self._rotated_image_cache[cache_key] = {'image': rotated_image, 'text_bbox_mapping': self.validator.text_bbox_mapping}
  215. self._manage_cache_size()
  216. return rotated_image
  217. image = rotated_image # 继续使用旋转后的图像进行后续处理
  218. # VLM: 需要同时旋转图像和坐标
  219. # 收集所有bbox坐标
  220. all_bboxes = []
  221. text_to_bbox_map = {} # 记录文本到bbox索引的映射
  222. bbox_index = 0
  223. for text, info_list in self.validator.text_bbox_mapping.items():
  224. text_to_bbox_map[text] = []
  225. for info in info_list:
  226. all_bboxes.append(info['bbox'])
  227. text_to_bbox_map[text].append(bbox_index)
  228. bbox_index += 1
  229. # 旋转图像和坐标
  230. rotated_image, rotated_bboxes = rotate_image_and_coordinates(
  231. image, rotation_angle, all_bboxes,
  232. rotate_coordinates=not rotation_config['coordinates_are_pre_rotated']
  233. )
  234. # 更新bbox映射 - 使用映射关系确保正确对应
  235. for text, bbox_indices in text_to_bbox_map.items():
  236. for i, bbox_idx in enumerate(bbox_indices):
  237. if bbox_idx < len(rotated_bboxes) and i < len(self.validator.text_bbox_mapping[text]):
  238. self.validator.text_bbox_mapping[text][i]['bbox'] = rotated_bboxes[bbox_idx]
  239. # 缓存结果
  240. self._rotated_image_cache[cache_key] = {'image': rotated_image, 'text_bbox_mapping': self.validator.text_bbox_mapping}
  241. self._manage_cache_size()
  242. return rotated_image
  243. else:
  244. # 无需旋转,直接缓存原图
  245. self._rotated_image_cache[cache_key] = {'image': image, 'text_bbox_mapping': self.validator.text_bbox_mapping}
  246. self._manage_cache_size() # 检查并管理缓存大小
  247. return image
  248. except Exception as e:
  249. st.error(f"❌ 图像加载失败: {e}")
  250. return None
  251. def render_content_by_mode(self, content: str, render_mode: str, font_size: int,
  252. container_height: int, layout_type: str,
  253. highlight_config: Optional[Dict] = None):
  254. """
  255. 根据渲染模式显示内容 - 增强版本
  256. Args:
  257. content: 要渲染的内容
  258. render_mode: 渲染模式
  259. font_size: 字体大小
  260. container_height: 容器高度
  261. layout_type: 布局类型
  262. highlight_config: 高亮配置 {'has_bbox': bool, 'match_type': str}
  263. """
  264. if content is None or render_mode is None:
  265. return
  266. if render_mode == "HTML渲染":
  267. # 🎯 构建样式 - 包含基础样式和高亮样式
  268. content_style = f"""
  269. <style>
  270. /* ========== 基础容器样式 ========== */
  271. .{layout_type}-content-display {{
  272. height: {container_height}px;
  273. overflow-x: auto;
  274. overflow-y: auto;
  275. font-size: {font_size}px !important;
  276. line-height: 1.4;
  277. color: #333333 !important;
  278. background-color: #fafafa !important;
  279. padding: 10px;
  280. border-radius: 5px;
  281. border: 1px solid #ddd;
  282. max-width: 100%;
  283. }}
  284. /* ========== 表格样式 ========== */
  285. .{layout_type}-content-display table {{
  286. width: 100%;
  287. border-collapse: collapse;
  288. margin: 10px 0;
  289. white-space: nowrap;
  290. }}
  291. .{layout_type}-content-display th,
  292. .{layout_type}-content-display td {{
  293. border: 1px solid #ddd;
  294. padding: 8px;
  295. text-align: left;
  296. max-width: 300px;
  297. word-wrap: break-word;
  298. word-break: break-all;
  299. vertical-align: top;
  300. }}
  301. .{layout_type}-content-display th {{
  302. background-color: #f5f5f5;
  303. position: sticky;
  304. top: 0;
  305. z-index: 1;
  306. font-weight: bold;
  307. }}
  308. /* 数字列右对齐 */
  309. .{layout_type}-content-display td.number {{
  310. text-align: right;
  311. white-space: nowrap;
  312. font-family: 'Monaco', 'Menlo', monospace;
  313. }}
  314. /* 短文本列不换行 */
  315. .{layout_type}-content-display td.short-text {{
  316. white-space: nowrap;
  317. min-width: 80px;
  318. }}
  319. /* ========== 图片样式 ========== */
  320. .{layout_type}-content-display img {{
  321. max-width: 100%;
  322. height: auto;
  323. border-radius: 4px;
  324. margin: 10px 0;
  325. }}
  326. /* ========== 响应式设计 ========== */
  327. @media (max-width: 768px) {{
  328. .{layout_type}-content-display table {{
  329. font-size: {max(font_size-2, 8)}px;
  330. }}
  331. .{layout_type}-content-display th,
  332. .{layout_type}-content-display td {{
  333. padding: 4px;
  334. max-width: 150px;
  335. }}
  336. }}
  337. /* ========== 高亮文本样式 ========== */
  338. .{layout_type}-content-display .highlight-text {{
  339. padding: 2px 4px;
  340. border-radius: 3px;
  341. cursor: pointer;
  342. font-weight: 500;
  343. transition: all 0.2s ease;
  344. }}
  345. .{layout_type}-content-display .highlight-text:hover {{
  346. opacity: 0.8;
  347. transform: scale(1.02);
  348. }}
  349. /* 🎯 精确匹配且有框 - 绿色 */
  350. .{layout_type}-content-display .highlight-text.selected-highlight {{
  351. background-color: #4caf50 !important;
  352. color: white !important;
  353. border: 1px solid #2e7d32 !important;
  354. }}
  355. /* 🎯 OCR匹配 - 蓝色 */
  356. .{layout_type}-content-display .highlight-text.ocr-match {{
  357. background-color: #2196f3 !important;
  358. color: white !important;
  359. border: 1px solid #1565c0 !important;
  360. }}
  361. /* 🎯 无边界框 - 橙色虚线 */
  362. .{layout_type}-content-display .highlight-text.no-bbox {{
  363. background-color: #ff9800 !important;
  364. color: white !important;
  365. border: 1px dashed #f57c00 !important;
  366. }}
  367. /* 🎯 默认高亮 - 黄色 */
  368. .{layout_type}-content-display .highlight-text.default {{
  369. background-color: #ffeb3b !important;
  370. color: #333333 !important;
  371. border: 1px solid #fbc02d !important;
  372. }}
  373. </style>
  374. """
  375. st.markdown(content_style, unsafe_allow_html=True)
  376. st.markdown(f'<div class="{layout_type}-content-display">{content}</div>',
  377. unsafe_allow_html=True)
  378. elif render_mode == "Markdown渲染":
  379. converted_content = convert_html_table_to_markdown(content)
  380. st.markdown(converted_content, unsafe_allow_html=True)
  381. elif render_mode == "DataFrame表格":
  382. if '<table' in content.lower():
  383. self.validator.display_html_table_as_dataframe(content)
  384. else:
  385. st.info("当前内容中没有检测到HTML表格")
  386. st.markdown(content, unsafe_allow_html=True)
  387. else: # 原始文本
  388. st.text_area(
  389. "MD内容预览",
  390. content,
  391. height=300,
  392. key=f"{layout_type}_text_area"
  393. )
  394. def create_compact_layout(self, config: Dict):
  395. """创建紧凑的对比布局 - 增强搜索功能"""
  396. layout = config['styles']['layout']
  397. font_size = config['styles'].get('font_size', 10)
  398. container_height = layout.get('default_height', 600)
  399. zoom_level = layout.get('default_zoom', 1.0)
  400. layout_type = "compact"
  401. left_col, right_col = st.columns([layout['content_width'], layout['sidebar_width']],
  402. vertical_alignment='top', border=True)
  403. with left_col:
  404. if self.validator.text_bbox_mapping:
  405. # 搜索输入框
  406. search_col, select_col = st.columns([1, 2])
  407. if "compact_search_query" not in st.session_state:
  408. st.session_state.compact_search_query = ""
  409. with search_col:
  410. search_query = st.text_input(
  411. "搜索文本",
  412. placeholder="输入关键词...",
  413. value=st.session_state.compact_search_query,
  414. key=f"{layout_type}_search_input",
  415. label_visibility="collapsed"
  416. )
  417. st.session_state.compact_search_query = search_query
  418. # 🎯 增强搜索逻辑:构建选项列表
  419. text_options = ["请选择文本..."]
  420. text_display = ["请选择文本..."]
  421. match_info = [None] # 记录匹配信息
  422. for text, info_list in self.validator.text_bbox_mapping.items():
  423. # 🔑 关键改进:同时搜索 text 和 matched_text
  424. if search_query and search_query.strip():
  425. query_lower = search_query.lower()
  426. # 1. 检查原始文本
  427. text_match = query_lower in text.lower()
  428. # 2. 检查 matched_text(OCR识别文本)
  429. matched_text_match = False
  430. matched_text = None
  431. if info_list and isinstance(info_list[0], dict):
  432. matched_text = info_list[0].get('matched_text', '')
  433. matched_text_match = query_lower in matched_text.lower() if matched_text else False
  434. # 如果都不匹配,跳过
  435. if not text_match and not matched_text_match:
  436. continue
  437. # 记录匹配类型
  438. if text_match:
  439. match_type = "exact"
  440. match_source = text
  441. else:
  442. match_type = "ocr"
  443. match_source = matched_text
  444. else:
  445. match_type = None
  446. match_source = text
  447. text_options.append(text)
  448. # 🎯 构建显示文本(带匹配提示)
  449. if info_list and isinstance(info_list[0], dict):
  450. first_info = info_list[0]
  451. # 检查是否有 bbox
  452. has_bbox = 'bbox' in first_info and first_info['bbox']
  453. # 表格单元格显示
  454. if 'row' in first_info and 'col' in first_info:
  455. display_text = f"[R{first_info['row']},C{first_info['col']}] {text}"
  456. else:
  457. display_text = text
  458. # 🎯 添加匹配提示
  459. if match_type == "ocr":
  460. display_text = f"🔍 {display_text} (OCR: {match_source[:20]}...)"
  461. elif not has_bbox:
  462. display_text = f"⚠️ {display_text} (无框)"
  463. # 截断过长文本
  464. if len(display_text) > 60:
  465. display_text = display_text[:57] + "..."
  466. else:
  467. display_text = text[:57] + "..." if len(text) > 60 else text
  468. text_display.append(display_text)
  469. match_info.append({
  470. 'type': match_type,
  471. 'source': match_source,
  472. 'has_bbox': has_bbox if info_list else False
  473. })
  474. # 🎯 显示搜索统计
  475. if search_query and search_query.strip():
  476. ocr_matches = sum(1 for m in match_info[1:] if m and m['type'] == 'ocr')
  477. no_bbox_count = sum(1 for m in match_info[1:] if m and not m['has_bbox'])
  478. stat_parts = [f"找到 {len(text_options)-1} 个匹配项"]
  479. if ocr_matches > 0:
  480. stat_parts.append(f"🔍 {ocr_matches} 个OCR匹配")
  481. if no_bbox_count > 0:
  482. stat_parts.append(f"⚠️ {no_bbox_count} 个无框")
  483. st.caption(" | ".join(stat_parts))
  484. # 确定默认选中的索引
  485. default_index = 0
  486. if st.session_state.selected_text and st.session_state.selected_text in text_options:
  487. default_index = text_options.index(st.session_state.selected_text)
  488. with select_col:
  489. selected_index = st.selectbox(
  490. "快速定位文本",
  491. range(len(text_options)),
  492. index=default_index,
  493. format_func=lambda x: text_display[x] if x < len(text_display) else "",
  494. label_visibility="collapsed",
  495. key=f"{layout_type}_quick_text_selector"
  496. )
  497. # 🎯 显示匹配详情
  498. if selected_index > 0:
  499. st.session_state.selected_text = text_options[selected_index]
  500. # 获取匹配信息
  501. selected_match_info = match_info[selected_index]
  502. if selected_match_info:
  503. if selected_match_info['type'] == 'ocr':
  504. st.info(f"🔍 **OCR识别文本匹配**: `{selected_match_info['source']}`")
  505. elif not selected_match_info['has_bbox']:
  506. st.warning(f"⚠️ **未找到边界框**: 文本在MD中存在,但没有对应的坐标信息")
  507. # 🎯 增强高亮显示逻辑
  508. if self.validator.md_content:
  509. highlighted_content = self.validator.md_content
  510. if st.session_state.selected_text:
  511. selected_text = st.session_state.selected_text
  512. # 获取匹配信息
  513. info_list = self.validator.text_bbox_mapping.get(selected_text, [])
  514. has_bbox = False
  515. matched_text = None
  516. match_type = None
  517. if info_list and isinstance(info_list[0], dict):
  518. has_bbox = 'bbox' in info_list[0] and info_list[0]['bbox']
  519. matched_text = info_list[0].get('matched_text', '')
  520. # 🔑 判断匹配类型
  521. if matched_text and matched_text != selected_text:
  522. match_type = "ocr"
  523. elif has_bbox:
  524. match_type = "exact"
  525. else:
  526. match_type = "no_bbox"
  527. # 🎯 应用高亮
  528. if len(selected_text) >= self.config.get('ocr', {}).get('min_text_length', 2):
  529. # 1. 高亮原始文本
  530. if selected_text in highlighted_content:
  531. if match_type == "exact":
  532. highlight_class = "highlight-text selected-highlight"
  533. elif match_type == "no_bbox":
  534. highlight_class = "highlight-text no-bbox"
  535. else:
  536. highlight_class = "highlight-text default"
  537. # 使用正则表达式避免替换base64编码中的内容
  538. highlighted_content = self._highlight_text_safely(
  539. highlighted_content,
  540. selected_text,
  541. highlight_class
  542. )
  543. # 2. 如果有 matched_text 且不同,也高亮
  544. if matched_text and matched_text != selected_text and matched_text in highlighted_content:
  545. # 使用正则表达式避免替换base64编码中的内容
  546. highlighted_content = self._highlight_text_safely(
  547. highlighted_content,
  548. matched_text,
  549. "highlight-text ocr-match",
  550. f"OCR: {matched_text}"
  551. )
  552. # 🎯 调用渲染方法(样式已内置)
  553. self.render_content_by_mode(
  554. highlighted_content,
  555. "HTML渲染",
  556. font_size,
  557. container_height,
  558. layout_type
  559. )
  560. with right_col:
  561. self.create_aligned_image_display(zoom_level, "compact")
  562. def create_aligned_image_display(self, zoom_level: float = 1.0, layout_type: str = "aligned"):
  563. """创建响应式图片显示"""
  564. # st.header("🖼️ 原图标注")
  565. # 图片控制选项
  566. col1, col2, col3, col4, col5 = st.columns(5, vertical_alignment="center", border= False)
  567. with col1:
  568. # 判断{layout_type}_show_all_boxes是否有值,如果有值直接使用,否则默认False
  569. # if f"{layout_type}_show_all_boxes" not in st.session_state:
  570. # st.session_state[f"{layout_type}_show_all_boxes"] = False
  571. show_all_boxes = st.checkbox(
  572. "显示所有框",
  573. # value=st.session_state[f"{layout_type}_show_all_boxes"],
  574. value = self.show_all_boxes,
  575. key=f"{layout_type}_show_all_boxes"
  576. )
  577. if show_all_boxes != self.show_all_boxes:
  578. self.show_all_boxes = show_all_boxes
  579. with col2:
  580. if st.button("🔄 旋转90度", type="secondary", key=f"{layout_type}_manual_angle"):
  581. self.rotated_angle = (self.rotated_angle + 90) % 360
  582. # 需要清除图片缓存,以及text_bbox_mapping中的bbox
  583. self.clear_image_cache()
  584. self.validator.process_data()
  585. st.rerun()
  586. with col3:
  587. # 显示当前角度状态
  588. current_angle = self.get_rotation_angle()
  589. st.metric("当前角度", f"{current_angle}°", label_visibility="collapsed")
  590. with col4:
  591. if st.button("↺ 重置角度", key=f"{layout_type}_reset_angle"):
  592. self.rotated_angle = 0.0
  593. st.success("已重置旋转角度")
  594. # 需要清除图片缓存,以及text_bbox_mapping中的bbox
  595. self.clear_image_cache()
  596. self.validator.process_data()
  597. st.rerun()
  598. with col5:
  599. st.button(
  600. "🧹 清除选择",
  601. key=f"{layout_type}_clear_selection",
  602. on_click=self._clear_selection_callback,
  603. kwargs={"layout_type": layout_type},
  604. )
  605. # 使用增强的图像加载方法
  606. image = self.load_and_rotate_image(self.validator.image_path)
  607. if image:
  608. try:
  609. resized_image, all_boxes, selected_boxes = self.zoom_image(image, self.zoom_level)
  610. # 创建交互式图片
  611. fig = self.create_resized_interactive_plot(resized_image, selected_boxes, self.zoom_level, all_boxes)
  612. plot_config = {
  613. 'displayModeBar': True,
  614. 'modeBarButtonsToRemove': ['zoom2d', 'select2d', 'lasso2d', 'autoScale2d'],
  615. 'scrollZoom': True,
  616. 'doubleClick': 'reset',
  617. 'responsive': False, # 关键:禁用响应式,使用固定尺寸
  618. 'toImageButtonOptions': {
  619. 'format': 'png',
  620. 'filename': 'ocr_image',
  621. 'height': None, # 使用当前高度
  622. 'width': None, # 使用当前宽度
  623. 'scale': 1
  624. }
  625. }
  626. # 🔧 修复:使用 use_container_width 替代废弃的参数
  627. st.plotly_chart(
  628. fig,
  629. width='stretch', # 🎯 使用容器宽度
  630. config=plot_config,
  631. key=f"{layout_type}_plot"
  632. )
  633. except Exception as e:
  634. st.error(f"❌ 图片处理失败: {e}")
  635. st.exception(e)
  636. else:
  637. st.error("未找到对应的图片文件")
  638. if self.validator.image_path:
  639. st.write(f"期望路径: {self.validator.image_path}")
  640. # st.markdown('</div>', unsafe_allow_html=True)
  641. def zoom_image(self, image: Image.Image, current_zoom: float) -> Tuple[Image.Image, List[List[int]], List[List[int]]]:
  642. """缩放图像"""
  643. # 根据缩放级别调整图片大小
  644. new_width = int(image.width * current_zoom)
  645. new_height = int(image.height * current_zoom)
  646. resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
  647. # 计算选中的bbox
  648. selected_boxes = []
  649. if st.session_state.selected_text and st.session_state.selected_text in self.validator.text_bbox_mapping:
  650. info_list = self.validator.text_bbox_mapping[st.session_state.selected_text]
  651. for info in info_list:
  652. if 'bbox' in info:
  653. bbox = info['bbox']
  654. selected_box = [int(coord * current_zoom) for coord in bbox]
  655. selected_boxes.append(selected_box)
  656. # 收集所有框
  657. all_boxes = []
  658. if self.show_all_boxes:
  659. for text, info_list in self.validator.text_bbox_mapping.items():
  660. for info in info_list:
  661. bbox = info['bbox']
  662. if len(bbox) >= 4:
  663. scaled_bbox = [coord * current_zoom for coord in bbox]
  664. all_boxes.append(scaled_bbox)
  665. return resized_image, all_boxes, selected_boxes
  666. def _add_bboxes_to_plot_batch(self, fig: go.Figure, bboxes: List[List[int]],
  667. image_height: int,
  668. line_color: str = "blue",
  669. line_width: int = 2,
  670. fill_color: str = "rgba(0, 100, 200, 0.2)"):
  671. """
  672. 批量添加边界框(性能优化版)
  673. """
  674. if not bboxes or len(bboxes) == 0:
  675. return
  676. # 🎯 关键优化:构建 shapes 列表,一次性添加
  677. shapes = []
  678. for bbox in bboxes:
  679. if len(bbox) < 4:
  680. continue
  681. x1, y1, x2, y2 = bbox[:4]
  682. # 转换坐标
  683. plot_x1 = x1
  684. plot_x2 = x2
  685. plot_y1 = image_height - y2
  686. plot_y2 = image_height - y1
  687. shapes.append(dict(
  688. type="rect",
  689. x0=plot_x1, y0=plot_y1,
  690. x1=plot_x2, y1=plot_y2,
  691. line=dict(color=line_color, width=line_width),
  692. fillcolor=fill_color,
  693. ))
  694. # 🎯 一次性更新所有形状
  695. fig.update_layout(shapes=fig.layout.shapes + tuple(shapes))
  696. def _add_bboxes_as_scatter(self, fig: go.Figure, bboxes: List[List[int]],
  697. image_height: int,
  698. line_color: str = "blue",
  699. line_width: int = 2,
  700. name: str = "boxes"):
  701. """
  702. 使用 Scatter 绘制边界框(极致性能优化)
  703. """
  704. if not bboxes or len(bboxes) == 0:
  705. return
  706. # 🎯 收集所有矩形的边框线坐标
  707. x_coords = []
  708. y_coords = []
  709. for bbox in bboxes:
  710. if len(bbox) < 4:
  711. continue
  712. x1, y1, x2, y2 = bbox[:4]
  713. # 转换坐标
  714. plot_y1 = image_height - y2
  715. plot_y2 = image_height - y1
  716. # 绘制矩形:5个点(闭合)
  717. x_coords.extend([x1, x2, x2, x1, x1, None]) # None用于断开线段
  718. y_coords.extend([plot_y1, plot_y1, plot_y2, plot_y2, plot_y1, None])
  719. # 🎯 一次性添加所有边框
  720. fig.add_trace(go.Scatter(
  721. x=x_coords,
  722. y=y_coords,
  723. mode='lines',
  724. line=dict(color=line_color, width=line_width),
  725. name=name,
  726. showlegend=False,
  727. hoverinfo='skip'
  728. ))
  729. def create_resized_interactive_plot(self, image: Image.Image, selected_boxes: List[List[int]],
  730. zoom_level: float, all_boxes: List[List[int]]) -> go.Figure:
  731. """创建可调整大小的交互式图片 - 修复容器溢出问题"""
  732. fig = go.Figure()
  733. # 添加图片 - Plotly坐标系,原点在左下角
  734. fig.add_layout_image(
  735. dict(
  736. source=image,
  737. xref="x", yref="y",
  738. x=0, y=image.height, # 图片左下角在Plotly坐标系中的位置
  739. sizex=image.width,
  740. sizey=image.height,
  741. sizing="stretch",
  742. opacity=1.0,
  743. layer="below",
  744. yanchor="top" # 确保图片顶部对齐
  745. )
  746. )
  747. # 显示所有bbox(淡蓝色)
  748. if all_boxes:
  749. self._add_bboxes_as_scatter(
  750. fig=fig,
  751. bboxes=all_boxes,
  752. image_height=image.height,
  753. line_color="rgba(0, 100, 200, 0.8)",
  754. line_width=2,
  755. name="all_boxes"
  756. )
  757. # 高亮显示选中的bbox(红色)
  758. if selected_boxes:
  759. self._add_bboxes_to_plot_batch(
  760. fig=fig,
  761. bboxes=selected_boxes,
  762. image_height=image.height,
  763. line_color="red",
  764. line_width=2,
  765. fill_color="rgba(255, 0, 0, 0.3)"
  766. )
  767. # 修复:优化显示尺寸计算
  768. max_display_width = 1500
  769. max_display_height = 1000
  770. # 计算合适的显示尺寸,保持宽高比
  771. aspect_ratio = image.width / image.height
  772. if self.fit_to_container:
  773. # 自适应容器模式
  774. if aspect_ratio > 1: # 宽图
  775. display_width = min(max_display_width, image.width)
  776. display_height = int(display_width / aspect_ratio)
  777. else: # 高图
  778. display_height = min(max_display_height, image.height)
  779. display_width = int(display_height * aspect_ratio)
  780. # 确保不会太小
  781. display_width = max(display_width, 800)
  782. display_height = max(display_height, 600)
  783. else:
  784. # 固定尺寸模式,但仍要考虑容器限制
  785. display_width = min(image.width, max_display_width)
  786. display_height = min(image.height, max_display_height)
  787. # 设置布局 - 关键修改
  788. fig.update_layout(
  789. width=display_width,
  790. height=display_height,
  791. margin=dict(l=0, r=0, t=0, b=0),
  792. showlegend=False,
  793. plot_bgcolor='white',
  794. dragmode="pan",
  795. # 关键:让图表自适应容器
  796. # autosize=True, # 启用自动调整大小
  797. xaxis=dict(
  798. visible=False,
  799. range=[0, image.width],
  800. constrain="domain",
  801. fixedrange=False,
  802. autorange=False,
  803. showgrid=False,
  804. zeroline=False,
  805. ),
  806. # 修复:Y轴设置,确保范围正确
  807. yaxis=dict(
  808. visible=False,
  809. range=[0, image.height], # 确保Y轴范围从0到图片高度
  810. constrain="domain",
  811. scaleanchor="x",
  812. scaleratio=1,
  813. fixedrange=False,
  814. autorange=False,
  815. showgrid=False,
  816. zeroline=False
  817. )
  818. )
  819. return fig