ocr_validator_layout.py 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845
  1. #!/usr/bin/env python3
  2. """
  3. OCR验证工具的布局管理模块
  4. 包含标准布局、滚动布局、紧凑布局的实现
  5. """
  6. import streamlit as st
  7. from pathlib import Path
  8. from PIL import Image
  9. from typing import Dict, List, Optional
  10. import plotly.graph_objects as go
  11. from typing import Tuple
  12. from ocr_validator_utils import (
  13. rotate_image_and_coordinates,
  14. get_ocr_tool_rotation_config,
  15. )
  16. from ocr_validator_file_utils import (
  17. convert_html_table_to_markdown,
  18. parse_html_tables,
  19. draw_bbox_on_image,
  20. detect_image_orientation_by_opencv # 新增导入
  21. )
  22. class OCRLayoutManager:
  23. """OCR布局管理器"""
  24. def __init__(self, validator):
  25. self.validator = validator
  26. self.config = validator.config
  27. self._rotated_image_cache = {}
  28. self._cache_max_size = 10
  29. self._orientation_cache = {} # 缓存方向检测结果
  30. self.rotated_angle = 0.0 # 自动检测的旋转角度缓存
  31. self.show_all_boxes = False
  32. self.fit_to_container = False
  33. self.zoom_level = 1.0
  34. def clear_image_cache(self):
  35. """清理所有图像缓存"""
  36. self._rotated_image_cache.clear()
  37. def clear_cache_for_image(self, image_path: str):
  38. """清理指定图像的所有缓存"""
  39. keys_to_remove = [key for key in self._rotated_image_cache.keys() if key.startswith(image_path)]
  40. for key in keys_to_remove:
  41. del self._rotated_image_cache[key]
  42. def get_cache_info(self) -> dict:
  43. """获取缓存信息"""
  44. return {
  45. 'cache_size': len(self._rotated_image_cache),
  46. 'cached_images': list(self._rotated_image_cache.keys()),
  47. 'max_size': self._cache_max_size
  48. }
  49. def _manage_cache_size(self):
  50. """管理缓存大小,超出限制时清理最旧的缓存"""
  51. if len(self._rotated_image_cache) > self._cache_max_size:
  52. # 删除最旧的缓存项(FIFO策略)
  53. oldest_key = next(iter(self._rotated_image_cache))
  54. del self._rotated_image_cache[oldest_key]
  55. def detect_and_suggest_rotation(self, image_path: str) -> Dict:
  56. """检测并建议图片旋转角度"""
  57. if image_path in self._orientation_cache:
  58. return self._orientation_cache[image_path]
  59. # 使用自动检测功能
  60. detection_result = detect_image_orientation_by_opencv(image_path)
  61. # 缓存结果
  62. self._orientation_cache[image_path] = detection_result
  63. return detection_result
  64. def get_rotation_angle(self) -> float:
  65. """获取旋转角度 - 增强版本支持自动检测"""
  66. # 如果没有预设角度,优先人工设置
  67. if hasattr(self, 'rotated_angle') and self.rotated_angle != 0:
  68. return self.rotated_angle
  69. # 尝试从OCR数据中获取(PPStructV3等)
  70. if self.validator.ocr_data:
  71. for item in self.validator.ocr_data:
  72. if isinstance(item, dict) and 'rotation_angle' in item:
  73. return item['rotation_angle']
  74. # 如果没有预设角度,尝试自动检测
  75. if hasattr(self, 'rotated_angle'):
  76. return self.rotated_angle
  77. return 0.0
  78. def load_and_rotate_image(self, image_path: str) -> Optional[Image.Image]:
  79. """加载并根据需要旋转图像"""
  80. if not image_path or not Path(image_path).exists():
  81. return None
  82. # 检查缓存
  83. rotation_angle = self.get_rotation_angle()
  84. cache_key = f"{image_path}_{rotation_angle}"
  85. if cache_key in self._rotated_image_cache:
  86. return self._rotated_image_cache[cache_key]
  87. try:
  88. image = Image.open(image_path)
  89. # 如果需要旋转
  90. if rotation_angle != 0:
  91. # 获取OCR工具的旋转配置
  92. rotation_config = get_ocr_tool_rotation_config(self.validator.ocr_data, self.config)
  93. # st.info(f"🔄 检测到文档旋转角度: {rotation_angle}°,正在处理图像和坐标...")
  94. # st.info(f"📋 OCR工具配置: 坐标{'已预旋转' if rotation_config['coordinates_are_pre_rotated'] else '需要旋转'}")
  95. # 判断是否需要旋转坐标
  96. if rotation_config['coordinates_are_pre_rotated']:
  97. # 图片的角度与坐标的角度不一致,比如PPStructV3,图片0度,坐标已旋转270度
  98. # 这种情况下,只需要旋转图片,坐标不变
  99. # PPStructV3: 坐标已经是旋转后的,只旋转图像
  100. img_rotation_angle = (rotation_angle + self.rotated_angle) % 360
  101. if img_rotation_angle == 270:
  102. rotated_image = image.rotate(-90, expand=True) # 顺时针90度
  103. elif img_rotation_angle == 90:
  104. rotated_image = image.rotate(90, expand=True) # 逆时针90度
  105. elif img_rotation_angle == 180:
  106. rotated_image = image.rotate(180, expand=True) # 180度
  107. else:
  108. rotated_image = image.rotate(-img_rotation_angle, expand=True)
  109. if self.rotated_angle == 0:
  110. # 坐标不需要变换,因为JSON中已经是正确的坐标
  111. self._rotated_image_cache[cache_key] = rotated_image
  112. self._manage_cache_size()
  113. return rotated_image
  114. image = rotated_image # 继续使用旋转后的图像进行后续处理
  115. # Dots OCR: 需要同时旋转图像和坐标
  116. # 收集所有bbox坐标
  117. all_bboxes = []
  118. text_to_bbox_map = {} # 记录文本到bbox索引的映射
  119. bbox_index = 0
  120. for text, info_list in self.validator.text_bbox_mapping.items():
  121. text_to_bbox_map[text] = []
  122. for info in info_list:
  123. all_bboxes.append(info['bbox'])
  124. text_to_bbox_map[text].append(bbox_index)
  125. bbox_index += 1
  126. # 旋转图像和坐标
  127. rotated_image, rotated_bboxes = rotate_image_and_coordinates(
  128. image, rotation_angle, all_bboxes,
  129. rotate_coordinates=not rotation_config['coordinates_are_pre_rotated']
  130. )
  131. # 更新bbox映射 - 使用映射关系确保正确对应
  132. for text, bbox_indices in text_to_bbox_map.items():
  133. for i, bbox_idx in enumerate(bbox_indices):
  134. if bbox_idx < len(rotated_bboxes) and i < len(self.validator.text_bbox_mapping[text]):
  135. self.validator.text_bbox_mapping[text][i]['bbox'] = rotated_bboxes[bbox_idx]
  136. # 缓存结果
  137. self._rotated_image_cache[cache_key] = rotated_image
  138. self._manage_cache_size()
  139. return rotated_image
  140. else:
  141. # 无需旋转,直接缓存原图
  142. self._rotated_image_cache[cache_key] = image
  143. self._manage_cache_size() # 检查并管理缓存大小
  144. return image
  145. except Exception as e:
  146. st.error(f"❌ 图像加载失败: {e}")
  147. return None
  148. def render_content_by_mode(self, content: str, render_mode: str, font_size: int,
  149. container_height: int, layout_type: str,
  150. highlight_config: Optional[Dict] = None):
  151. """
  152. 根据渲染模式显示内容 - 增强版本
  153. Args:
  154. content: 要渲染的内容
  155. render_mode: 渲染模式
  156. font_size: 字体大小
  157. container_height: 容器高度
  158. layout_type: 布局类型
  159. highlight_config: 高亮配置 {'has_bbox': bool, 'match_type': str}
  160. """
  161. if content is None or render_mode is None:
  162. return
  163. if render_mode == "HTML渲染":
  164. # 🎯 构建样式 - 包含基础样式和高亮样式
  165. content_style = f"""
  166. <style>
  167. /* ========== 基础容器样式 ========== */
  168. .{layout_type}-content-display {{
  169. height: {container_height}px;
  170. overflow-x: auto;
  171. overflow-y: auto;
  172. font-size: {font_size}px !important;
  173. line-height: 1.4;
  174. color: #333333 !important;
  175. background-color: #fafafa !important;
  176. padding: 10px;
  177. border-radius: 5px;
  178. border: 1px solid #ddd;
  179. max-width: 100%;
  180. }}
  181. /* ========== 表格样式 ========== */
  182. .{layout_type}-content-display table {{
  183. width: 100%;
  184. border-collapse: collapse;
  185. margin: 10px 0;
  186. white-space: nowrap;
  187. }}
  188. .{layout_type}-content-display th,
  189. .{layout_type}-content-display td {{
  190. border: 1px solid #ddd;
  191. padding: 8px;
  192. text-align: left;
  193. max-width: 300px;
  194. word-wrap: break-word;
  195. word-break: break-all;
  196. vertical-align: top;
  197. }}
  198. .{layout_type}-content-display th {{
  199. background-color: #f5f5f5;
  200. position: sticky;
  201. top: 0;
  202. z-index: 1;
  203. font-weight: bold;
  204. }}
  205. /* 数字列右对齐 */
  206. .{layout_type}-content-display td.number {{
  207. text-align: right;
  208. white-space: nowrap;
  209. font-family: 'Monaco', 'Menlo', monospace;
  210. }}
  211. /* 短文本列不换行 */
  212. .{layout_type}-content-display td.short-text {{
  213. white-space: nowrap;
  214. min-width: 80px;
  215. }}
  216. /* ========== 图片样式 ========== */
  217. .{layout_type}-content-display img {{
  218. max-width: 100%;
  219. height: auto;
  220. border-radius: 4px;
  221. margin: 10px 0;
  222. }}
  223. /* ========== 响应式设计 ========== */
  224. @media (max-width: 768px) {{
  225. .{layout_type}-content-display table {{
  226. font-size: {max(font_size-2, 8)}px;
  227. }}
  228. .{layout_type}-content-display th,
  229. .{layout_type}-content-display td {{
  230. padding: 4px;
  231. max-width: 150px;
  232. }}
  233. }}
  234. /* ========== 高亮文本样式 ========== */
  235. .{layout_type}-content-display .highlight-text {{
  236. padding: 2px 4px;
  237. border-radius: 3px;
  238. cursor: pointer;
  239. font-weight: 500;
  240. transition: all 0.2s ease;
  241. }}
  242. .{layout_type}-content-display .highlight-text:hover {{
  243. opacity: 0.8;
  244. transform: scale(1.02);
  245. }}
  246. /* 🎯 精确匹配且有框 - 绿色 */
  247. .{layout_type}-content-display .highlight-text.selected-highlight {{
  248. background-color: #4caf50 !important;
  249. color: white !important;
  250. border: 1px solid #2e7d32 !important;
  251. }}
  252. /* 🎯 OCR匹配 - 蓝色 */
  253. .{layout_type}-content-display .highlight-text.ocr-match {{
  254. background-color: #2196f3 !important;
  255. color: white !important;
  256. border: 1px solid #1565c0 !important;
  257. }}
  258. /* 🎯 无边界框 - 橙色虚线 */
  259. .{layout_type}-content-display .highlight-text.no-bbox {{
  260. background-color: #ff9800 !important;
  261. color: white !important;
  262. border: 1px dashed #f57c00 !important;
  263. }}
  264. /* 🎯 默认高亮 - 黄色 */
  265. .{layout_type}-content-display .highlight-text.default {{
  266. background-color: #ffeb3b !important;
  267. color: #333333 !important;
  268. border: 1px solid #fbc02d !important;
  269. }}
  270. </style>
  271. """
  272. st.markdown(content_style, unsafe_allow_html=True)
  273. st.markdown(f'<div class="{layout_type}-content-display">{content}</div>',
  274. unsafe_allow_html=True)
  275. elif render_mode == "Markdown渲染":
  276. converted_content = convert_html_table_to_markdown(content)
  277. st.markdown(converted_content, unsafe_allow_html=True)
  278. elif render_mode == "DataFrame表格":
  279. if '<table' in content.lower():
  280. self.validator.display_html_table_as_dataframe(content)
  281. else:
  282. st.info("当前内容中没有检测到HTML表格")
  283. st.markdown(content, unsafe_allow_html=True)
  284. else: # 原始文本
  285. st.text_area(
  286. "MD内容预览",
  287. content,
  288. height=300,
  289. key=f"{layout_type}_text_area"
  290. )
  291. def create_compact_layout(self, config: Dict):
  292. """创建紧凑的对比布局 - 增强搜索功能"""
  293. layout = config['styles']['layout']
  294. font_size = config['styles'].get('font_size', 10)
  295. container_height = layout.get('default_height', 600)
  296. zoom_level = layout.get('default_zoom', 1.0)
  297. layout_type = "compact"
  298. left_col, right_col = st.columns([layout['content_width'], layout['sidebar_width']],
  299. vertical_alignment='top', border=True)
  300. with left_col:
  301. if self.validator.text_bbox_mapping:
  302. # 搜索输入框
  303. search_col, select_col = st.columns([1, 2])
  304. if "compact_search_query" not in st.session_state:
  305. st.session_state.compact_search_query = ""
  306. with search_col:
  307. search_query = st.text_input(
  308. "搜索文本",
  309. placeholder="输入关键词...",
  310. value=st.session_state.compact_search_query,
  311. key=f"{layout_type}_search_input",
  312. label_visibility="collapsed"
  313. )
  314. st.session_state.compact_search_query = search_query
  315. # 🎯 增强搜索逻辑:构建选项列表
  316. text_options = ["请选择文本..."]
  317. text_display = ["请选择文本..."]
  318. match_info = [None] # 记录匹配信息
  319. for text, info_list in self.validator.text_bbox_mapping.items():
  320. # 🔑 关键改进:同时搜索 text 和 matched_text
  321. if search_query and search_query.strip():
  322. query_lower = search_query.lower()
  323. # 1. 检查原始文本
  324. text_match = query_lower in text.lower()
  325. # 2. 检查 matched_text(OCR识别文本)
  326. matched_text_match = False
  327. matched_text = None
  328. if info_list and isinstance(info_list[0], dict):
  329. matched_text = info_list[0].get('matched_text', '')
  330. matched_text_match = query_lower in matched_text.lower() if matched_text else False
  331. # 如果都不匹配,跳过
  332. if not text_match and not matched_text_match:
  333. continue
  334. # 记录匹配类型
  335. if text_match:
  336. match_type = "exact"
  337. match_source = text
  338. else:
  339. match_type = "ocr"
  340. match_source = matched_text
  341. else:
  342. match_type = None
  343. match_source = text
  344. text_options.append(text)
  345. # 🎯 构建显示文本(带匹配提示)
  346. if info_list and isinstance(info_list[0], dict):
  347. first_info = info_list[0]
  348. # 检查是否有 bbox
  349. has_bbox = 'bbox' in first_info and first_info['bbox']
  350. # 表格单元格显示
  351. if 'row' in first_info and 'col' in first_info:
  352. display_text = f"[R{first_info['row']},C{first_info['col']}] {text}"
  353. else:
  354. display_text = text
  355. # 🎯 添加匹配提示
  356. if match_type == "ocr":
  357. display_text = f"🔍 {display_text} (OCR: {match_source[:20]}...)"
  358. elif not has_bbox:
  359. display_text = f"⚠️ {display_text} (无框)"
  360. # 截断过长文本
  361. if len(display_text) > 60:
  362. display_text = display_text[:57] + "..."
  363. else:
  364. display_text = text[:57] + "..." if len(text) > 60 else text
  365. text_display.append(display_text)
  366. match_info.append({
  367. 'type': match_type,
  368. 'source': match_source,
  369. 'has_bbox': has_bbox if info_list else False
  370. })
  371. # 🎯 显示搜索统计
  372. if search_query and search_query.strip():
  373. ocr_matches = sum(1 for m in match_info[1:] if m and m['type'] == 'ocr')
  374. no_bbox_count = sum(1 for m in match_info[1:] if m and not m['has_bbox'])
  375. stat_parts = [f"找到 {len(text_options)-1} 个匹配项"]
  376. if ocr_matches > 0:
  377. stat_parts.append(f"🔍 {ocr_matches} 个OCR匹配")
  378. if no_bbox_count > 0:
  379. stat_parts.append(f"⚠️ {no_bbox_count} 个无框")
  380. st.caption(" | ".join(stat_parts))
  381. # 确定默认选中的索引
  382. default_index = 0
  383. if st.session_state.selected_text and st.session_state.selected_text in text_options:
  384. default_index = text_options.index(st.session_state.selected_text)
  385. with select_col:
  386. selected_index = st.selectbox(
  387. "快速定位文本",
  388. range(len(text_options)),
  389. index=default_index,
  390. format_func=lambda x: text_display[x] if x < len(text_display) else "",
  391. label_visibility="collapsed",
  392. key=f"{layout_type}_quick_text_selector"
  393. )
  394. # 🎯 显示匹配详情
  395. if selected_index > 0:
  396. st.session_state.selected_text = text_options[selected_index]
  397. # 获取匹配信息
  398. selected_match_info = match_info[selected_index]
  399. if selected_match_info:
  400. if selected_match_info['type'] == 'ocr':
  401. st.info(f"🔍 **OCR识别文本匹配**: `{selected_match_info['source']}`")
  402. elif not selected_match_info['has_bbox']:
  403. st.warning(f"⚠️ **未找到边界框**: 文本在MD中存在,但没有对应的坐标信息")
  404. # 🎯 增强高亮显示逻辑
  405. if self.validator.md_content:
  406. highlighted_content = self.validator.md_content
  407. if st.session_state.selected_text:
  408. selected_text = st.session_state.selected_text
  409. # 获取匹配信息
  410. info_list = self.validator.text_bbox_mapping.get(selected_text, [])
  411. has_bbox = False
  412. matched_text = None
  413. match_type = None
  414. if info_list and isinstance(info_list[0], dict):
  415. has_bbox = 'bbox' in info_list[0] and info_list[0]['bbox']
  416. matched_text = info_list[0].get('matched_text', '')
  417. # 🔑 判断匹配类型
  418. if matched_text and matched_text != selected_text:
  419. match_type = "ocr"
  420. elif has_bbox:
  421. match_type = "exact"
  422. else:
  423. match_type = "no_bbox"
  424. # 🎯 应用高亮
  425. if len(selected_text) > 2:
  426. # 1. 高亮原始文本
  427. if selected_text in highlighted_content:
  428. if match_type == "exact":
  429. highlight_class = "highlight-text selected-highlight"
  430. elif match_type == "no_bbox":
  431. highlight_class = "highlight-text no-bbox"
  432. else:
  433. highlight_class = "highlight-text default"
  434. highlighted_content = highlighted_content.replace(
  435. selected_text,
  436. f'<span class="{highlight_class}" title="{selected_text}">{selected_text}</span>'
  437. )
  438. # 2. 如果有 matched_text 且不同,也高亮
  439. if matched_text and matched_text != selected_text and matched_text in highlighted_content:
  440. highlighted_content = highlighted_content.replace(
  441. matched_text,
  442. f'<span class="highlight-text ocr-match" title="OCR: {matched_text}">{matched_text}</span>'
  443. )
  444. # 🎯 调用渲染方法(样式已内置)
  445. self.render_content_by_mode(
  446. highlighted_content,
  447. "HTML渲染",
  448. font_size,
  449. container_height,
  450. layout_type
  451. )
  452. with right_col:
  453. self.create_aligned_image_display(zoom_level, "compact")
  454. def create_aligned_image_display(self, zoom_level: float = 1.0, layout_type: str = "aligned"):
  455. """创建响应式图片显示"""
  456. # st.header("🖼️ 原图标注")
  457. # 图片控制选项
  458. col1, col2, col3, col4, col5 = st.columns(5, vertical_alignment="center", border= False)
  459. with col1:
  460. # 判断{layout_type}_show_all_boxes是否有值,如果有值直接使用,否则默认False
  461. # if f"{layout_type}_show_all_boxes" not in st.session_state:
  462. # st.session_state[f"{layout_type}_show_all_boxes"] = False
  463. show_all_boxes = st.checkbox(
  464. "显示所有框",
  465. # value=st.session_state[f"{layout_type}_show_all_boxes"],
  466. value = self.show_all_boxes,
  467. key=f"{layout_type}_show_all_boxes"
  468. )
  469. if show_all_boxes != self.show_all_boxes:
  470. self.show_all_boxes = show_all_boxes
  471. with col2:
  472. if st.button("🔄 旋转90度", type="secondary", key=f"{layout_type}_manual_angle"):
  473. self.rotated_angle = (self.rotated_angle + 90) % 360
  474. # 需要清除图片缓存,以及text_bbox_mapping中的bbox
  475. self.clear_image_cache()
  476. self.validator.process_data()
  477. st.rerun()
  478. with col3:
  479. # 显示当前角度状态
  480. current_angle = self.get_rotation_angle()
  481. st.metric("当前角度", f"{current_angle}°", label_visibility="collapsed")
  482. with col4:
  483. if st.button("↺ 重置角度", key=f"{layout_type}_reset_angle"):
  484. self.rotated_angle = 0.0
  485. st.success("已重置旋转角度")
  486. # 需要清除图片缓存,以及text_bbox_mapping中的bbox
  487. self.clear_image_cache()
  488. self.validator.process_data()
  489. st.rerun()
  490. with col5:
  491. if st.button("🧹 清除选择", key=f"{layout_type}_clear_selection"):
  492. # 清除选中的文本
  493. st.session_state.selected_text = None
  494. # 清除搜索框内容
  495. st.session_state.compact_search_query = None
  496. st.rerun()
  497. # 使用增强的图像加载方法
  498. image = self.load_and_rotate_image(self.validator.image_path)
  499. if image:
  500. try:
  501. resized_image, all_boxes, selected_boxes = self.zoom_image(image, self.zoom_level)
  502. # 创建交互式图片
  503. fig = self.create_resized_interactive_plot(resized_image, selected_boxes, self.zoom_level, all_boxes)
  504. plot_config = {
  505. 'displayModeBar': True,
  506. 'modeBarButtonsToRemove': ['zoom2d', 'select2d', 'lasso2d', 'autoScale2d'],
  507. 'scrollZoom': True,
  508. 'doubleClick': 'reset',
  509. 'responsive': False, # 关键:禁用响应式,使用固定尺寸
  510. 'toImageButtonOptions': {
  511. 'format': 'png',
  512. 'filename': 'ocr_image',
  513. 'height': None, # 使用当前高度
  514. 'width': None, # 使用当前宽度
  515. 'scale': 1
  516. }
  517. }
  518. # 🔧 修复:使用 use_container_width 替代废弃的参数
  519. st.plotly_chart(
  520. fig,
  521. use_container_width=True, # 🎯 使用容器宽度
  522. config=plot_config,
  523. key=f"{layout_type}_plot"
  524. )
  525. except Exception as e:
  526. st.error(f"❌ 图片处理失败: {e}")
  527. st.exception(e)
  528. else:
  529. st.error("未找到对应的图片文件")
  530. if self.validator.image_path:
  531. st.write(f"期望路径: {self.validator.image_path}")
  532. # st.markdown('</div>', unsafe_allow_html=True)
  533. def zoom_image(self, image: Image.Image, current_zoom: float) -> Tuple[Image.Image, List[List[int]], List[List[int]]]:
  534. """缩放图像"""
  535. # 根据缩放级别调整图片大小
  536. new_width = int(image.width * current_zoom)
  537. new_height = int(image.height * current_zoom)
  538. resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
  539. # 计算选中的bbox
  540. selected_boxes = []
  541. if st.session_state.selected_text and st.session_state.selected_text in self.validator.text_bbox_mapping:
  542. info_list = self.validator.text_bbox_mapping[st.session_state.selected_text]
  543. for info in info_list:
  544. if 'bbox' in info:
  545. bbox = info['bbox']
  546. selected_box = [int(coord * current_zoom) for coord in bbox]
  547. selected_boxes.append(selected_box)
  548. # 收集所有框
  549. all_boxes = []
  550. if self.show_all_boxes:
  551. for text, info_list in self.validator.text_bbox_mapping.items():
  552. for info in info_list:
  553. bbox = info['bbox']
  554. if len(bbox) >= 4:
  555. scaled_bbox = [coord * current_zoom for coord in bbox]
  556. all_boxes.append(scaled_bbox)
  557. return resized_image, all_boxes, selected_boxes
  558. def _add_bboxes_to_plot_batch(self, fig: go.Figure, bboxes: List[List[int]],
  559. image_height: int,
  560. line_color: str = "blue",
  561. line_width: int = 1,
  562. fill_color: str = "rgba(0, 100, 200, 0.2)"):
  563. """
  564. 批量添加边界框(性能优化版)
  565. """
  566. if not bboxes or len(bboxes) == 0:
  567. return
  568. # 🎯 关键优化:构建 shapes 列表,一次性添加
  569. shapes = []
  570. for bbox in bboxes:
  571. if len(bbox) < 4:
  572. continue
  573. x1, y1, x2, y2 = bbox[:4]
  574. # 转换坐标
  575. plot_x1 = x1
  576. plot_x2 = x2
  577. plot_y1 = image_height - y2
  578. plot_y2 = image_height - y1
  579. shapes.append(dict(
  580. type="rect",
  581. x0=plot_x1, y0=plot_y1,
  582. x1=plot_x2, y1=plot_y2,
  583. line=dict(color=line_color, width=line_width),
  584. fillcolor=fill_color,
  585. ))
  586. # 🎯 一次性更新所有形状
  587. fig.update_layout(shapes=fig.layout.shapes + tuple(shapes))
  588. def _add_bboxes_as_scatter(self, fig: go.Figure, bboxes: List[List[int]],
  589. image_height: int,
  590. line_color: str = "blue",
  591. line_width: int = 1,
  592. name: str = "boxes"):
  593. """
  594. 使用 Scatter 绘制边界框(极致性能优化)
  595. """
  596. if not bboxes or len(bboxes) == 0:
  597. return
  598. # 🎯 收集所有矩形的边框线坐标
  599. x_coords = []
  600. y_coords = []
  601. for bbox in bboxes:
  602. if len(bbox) < 4:
  603. continue
  604. x1, y1, x2, y2 = bbox[:4]
  605. # 转换坐标
  606. plot_y1 = image_height - y2
  607. plot_y2 = image_height - y1
  608. # 绘制矩形:5个点(闭合)
  609. x_coords.extend([x1, x2, x2, x1, x1, None]) # None用于断开线段
  610. y_coords.extend([plot_y1, plot_y1, plot_y2, plot_y2, plot_y1, None])
  611. # 🎯 一次性添加所有边框
  612. fig.add_trace(go.Scatter(
  613. x=x_coords,
  614. y=y_coords,
  615. mode='lines',
  616. line=dict(color=line_color, width=line_width),
  617. name=name,
  618. showlegend=False,
  619. hoverinfo='skip'
  620. ))
  621. def create_resized_interactive_plot(self, image: Image.Image, selected_boxes: List[List[int]],
  622. zoom_level: float, all_boxes: List[List[int]]) -> go.Figure:
  623. """创建可调整大小的交互式图片 - 修复容器溢出问题"""
  624. fig = go.Figure()
  625. # 添加图片 - Plotly坐标系,原点在左下角
  626. fig.add_layout_image(
  627. dict(
  628. source=image,
  629. xref="x", yref="y",
  630. x=0, y=image.height, # 图片左下角在Plotly坐标系中的位置
  631. sizex=image.width,
  632. sizey=image.height,
  633. sizing="stretch",
  634. opacity=1.0,
  635. layer="below",
  636. yanchor="top" # 确保图片顶部对齐
  637. )
  638. )
  639. # 显示所有bbox(淡蓝色)
  640. if all_boxes:
  641. self._add_bboxes_as_scatter(
  642. fig=fig,
  643. bboxes=all_boxes,
  644. image_height=image.height,
  645. line_color="rgba(0, 100, 200, 0.8)",
  646. line_width=1,
  647. name="all_boxes"
  648. )
  649. # 高亮显示选中的bbox(红色)
  650. if selected_boxes:
  651. self._add_bboxes_to_plot_batch(
  652. fig=fig,
  653. bboxes=selected_boxes,
  654. image_height=image.height,
  655. line_color="red",
  656. line_width=1,
  657. fill_color="rgba(255, 0, 0, 0.3)"
  658. )
  659. # 修复:优化显示尺寸计算
  660. max_display_width = 1500
  661. max_display_height = 1000
  662. # 计算合适的显示尺寸,保持宽高比
  663. aspect_ratio = image.width / image.height
  664. if self.fit_to_container:
  665. # 自适应容器模式
  666. if aspect_ratio > 1: # 宽图
  667. display_width = min(max_display_width, image.width)
  668. display_height = int(display_width / aspect_ratio)
  669. else: # 高图
  670. display_height = min(max_display_height, image.height)
  671. display_width = int(display_height * aspect_ratio)
  672. # 确保不会太小
  673. display_width = max(display_width, 800)
  674. display_height = max(display_height, 600)
  675. else:
  676. # 固定尺寸模式,但仍要考虑容器限制
  677. display_width = min(image.width, max_display_width)
  678. display_height = min(image.height, max_display_height)
  679. # 设置布局 - 关键修改
  680. fig.update_layout(
  681. width=display_width,
  682. height=display_height,
  683. margin=dict(l=0, r=0, t=0, b=0),
  684. showlegend=False,
  685. plot_bgcolor='white',
  686. dragmode="pan",
  687. # 关键:让图表自适应容器
  688. # autosize=True, # 启用自动调整大小
  689. xaxis=dict(
  690. visible=False,
  691. range=[0, image.width],
  692. constrain="domain",
  693. fixedrange=False,
  694. autorange=False,
  695. showgrid=False,
  696. zeroline=False,
  697. ),
  698. # 修复:Y轴设置,确保范围正确
  699. yaxis=dict(
  700. visible=False,
  701. range=[0, image.height], # 确保Y轴范围从0到图片高度
  702. constrain="domain",
  703. scaleanchor="x",
  704. scaleratio=1,
  705. fixedrange=False,
  706. autorange=False,
  707. showgrid=False,
  708. zeroline=False
  709. )
  710. )
  711. return fig