#!/usr/bin/env python3 """ OCR验证工具的布局管理模块 包含标准布局、滚动布局、紧凑布局的实现 """ import streamlit as st from pathlib import Path from PIL import Image from typing import Dict, List, Optional import plotly.graph_objects as go from typing import Tuple from ocr_validator_utils import ( rotate_image_and_coordinates, get_ocr_tool_rotation_config, ) from ocr_validator_file_utils import ( convert_html_table_to_markdown, parse_html_tables, draw_bbox_on_image, detect_image_orientation_by_opencv # 新增导入 ) class OCRLayoutManager: """OCR布局管理器""" def __init__(self, validator): self.validator = validator self.config = validator.config self._rotated_image_cache = {} self._cache_max_size = 10 self._orientation_cache = {} # 缓存方向检测结果 self.rotated_angle = 0.0 # 自动检测的旋转角度缓存 self.show_all_boxes = False self.fit_to_container = False self.zoom_level = 1.0 def clear_image_cache(self): """清理所有图像缓存""" self._rotated_image_cache.clear() def clear_cache_for_image(self, image_path: str): """清理指定图像的所有缓存""" keys_to_remove = [key for key in self._rotated_image_cache.keys() if key.startswith(image_path)] for key in keys_to_remove: del self._rotated_image_cache[key] def get_cache_info(self) -> dict: """获取缓存信息""" return { 'cache_size': len(self._rotated_image_cache), 'cached_images': list(self._rotated_image_cache.keys()), 'max_size': self._cache_max_size } def _manage_cache_size(self): """管理缓存大小,超出限制时清理最旧的缓存""" if len(self._rotated_image_cache) > self._cache_max_size: # 删除最旧的缓存项(FIFO策略) oldest_key = next(iter(self._rotated_image_cache)) del self._rotated_image_cache[oldest_key] def detect_and_suggest_rotation(self, image_path: str) -> Dict: """检测并建议图片旋转角度""" if image_path in self._orientation_cache: return self._orientation_cache[image_path] # 使用自动检测功能 detection_result = detect_image_orientation_by_opencv(image_path) # 缓存结果 self._orientation_cache[image_path] = detection_result return detection_result def get_rotation_angle(self) -> float: """获取旋转角度 - 增强版本支持自动检测""" # 如果没有预设角度,优先人工设置 if hasattr(self, 'rotated_angle') and self.rotated_angle != 0: return self.rotated_angle # 尝试从OCR数据中获取(PPStructV3等) if self.validator.ocr_data: for item in self.validator.ocr_data: if isinstance(item, dict) and 'rotation_angle' in item: return item['rotation_angle'] # 如果没有预设角度,尝试自动检测 if hasattr(self, 'rotated_angle'): return self.rotated_angle return 0.0 def load_and_rotate_image(self, image_path: str) -> Optional[Image.Image]: """加载并根据需要旋转图像""" if not image_path or not Path(image_path).exists(): return None # 检查缓存 rotation_angle = self.get_rotation_angle() cache_key = f"{image_path}_{rotation_angle}" if cache_key in self._rotated_image_cache: return self._rotated_image_cache[cache_key] try: image = Image.open(image_path) # 如果需要旋转 if rotation_angle != 0: # 获取OCR工具的旋转配置 rotation_config = get_ocr_tool_rotation_config(self.validator.ocr_data, self.config) # st.info(f"🔄 检测到文档旋转角度: {rotation_angle}°,正在处理图像和坐标...") # st.info(f"📋 OCR工具配置: 坐标{'已预旋转' if rotation_config['coordinates_are_pre_rotated'] else '需要旋转'}") # 判断是否需要旋转坐标 if rotation_config['coordinates_are_pre_rotated']: # 图片的角度与坐标的角度不一致,比如PPStructV3,图片0度,坐标已旋转270度 # 这种情况下,只需要旋转图片,坐标不变 # PPStructV3: 坐标已经是旋转后的,只旋转图像 img_rotation_angle = (rotation_angle + self.rotated_angle) % 360 if img_rotation_angle == 270: rotated_image = image.rotate(-90, expand=True) # 顺时针90度 elif img_rotation_angle == 90: rotated_image = image.rotate(90, expand=True) # 逆时针90度 elif img_rotation_angle == 180: rotated_image = image.rotate(180, expand=True) # 180度 else: rotated_image = image.rotate(-img_rotation_angle, expand=True) if self.rotated_angle == 0: # 坐标不需要变换,因为JSON中已经是正确的坐标 self._rotated_image_cache[cache_key] = rotated_image self._manage_cache_size() return rotated_image image = rotated_image # 继续使用旋转后的图像进行后续处理 # Dots OCR: 需要同时旋转图像和坐标 # 收集所有bbox坐标 all_bboxes = [] text_to_bbox_map = {} # 记录文本到bbox索引的映射 bbox_index = 0 for text, info_list in self.validator.text_bbox_mapping.items(): text_to_bbox_map[text] = [] for info in info_list: all_bboxes.append(info['bbox']) text_to_bbox_map[text].append(bbox_index) bbox_index += 1 # 旋转图像和坐标 rotated_image, rotated_bboxes = rotate_image_and_coordinates( image, rotation_angle, all_bboxes, rotate_coordinates=not rotation_config['coordinates_are_pre_rotated'] ) # 更新bbox映射 - 使用映射关系确保正确对应 for text, bbox_indices in text_to_bbox_map.items(): for i, bbox_idx in enumerate(bbox_indices): if bbox_idx < len(rotated_bboxes) and i < len(self.validator.text_bbox_mapping[text]): self.validator.text_bbox_mapping[text][i]['bbox'] = rotated_bboxes[bbox_idx] # 缓存结果 self._rotated_image_cache[cache_key] = rotated_image self._manage_cache_size() return rotated_image else: # 无需旋转,直接缓存原图 self._rotated_image_cache[cache_key] = image self._manage_cache_size() # 检查并管理缓存大小 return image except Exception as e: st.error(f"❌ 图像加载失败: {e}") return None def render_content_section(self, layout_type: str = "compact"): """渲染内容区域 - 统一方法""" st.header("📄 OCR识别内容") # 显示旋转信息 # rotation_angle = self.get_rotation_angle() # if rotation_angle != 0: # st.info(f"📐 文档旋转角度: {rotation_angle}°") # 文本选择器 if self.validator.text_bbox_mapping: text_options = ["请选择文本..."] + list(self.validator.text_bbox_mapping.keys()) selected_index = st.selectbox( "选择要校验的文本", range(len(text_options)), format_func=lambda x: text_options[x][:50] + "..." if len(text_options[x]) > 50 else text_options[x], key=f"{layout_type}_text_selector" ) if selected_index > 0: st.session_state.selected_text = text_options[selected_index] else: st.warning("没有找到可点击的文本") def render_md_content(self, layout_type: str): """渲染Markdown内容 - 统一方法""" if not self.validator.md_content: return None, None # 搜索功能 search_term = st.text_input( "🔍 搜索文本内容", placeholder="输入关键词搜索...", key=f"{layout_type}_search" ) display_content = self.validator.md_content if search_term: lines = display_content.split('\n') filtered_lines = [line for line in lines if search_term.lower() in line.lower()] display_content = '\n'.join(filtered_lines) if filtered_lines: st.success(f"找到 {len(filtered_lines)} 行包含 '{search_term}'") else: st.warning(f"未找到包含 '{search_term}' 的内容") return display_content def render_content_by_mode(self, content: str, render_mode: str, font_size: int, container_height: int, layout_type: str): """根据渲染模式显示内容 - 增强版本""" if content is None or render_mode is None: return if render_mode == "HTML渲染": # 增强的HTML渲染样式,支持横向滚动 content_style = f""" """ st.markdown(content_style, unsafe_allow_html=True) st.markdown(f'
{content}
', unsafe_allow_html=True) elif render_mode == "Markdown渲染": converted_content = convert_html_table_to_markdown(content) st.markdown(converted_content, unsafe_allow_html=True) elif render_mode == "DataFrame表格": if ' 30 else text_options[x], label_visibility="collapsed", key="compact_quick_text_selector" # 使用不同的key ) if selected_index > 0: st.session_state.selected_text = text_options[selected_index] # 处理并显示OCR内容 if self.validator.md_content: # 高亮可点击文本 highlighted_content = self.validator.md_content for text in self.validator.text_bbox_mapping.keys(): if len(text) > 2: # 避免高亮过短的文本 css_class = "highlight-text selected-highlight" if text == st.session_state.selected_text else "highlight-text" highlighted_content = highlighted_content.replace( text, # f'{text}' f'{text}' ) self.render_content_by_mode(highlighted_content, "HTML渲染", font_size, container_height, layout_type) with right_col: # 修复的对齐图片显示 self.create_aligned_image_display(zoom_level, "compact") def create_aligned_image_display(self, zoom_level: float = 1.0, layout_type: str = "aligned"): """创建响应式图片显示""" # st.header("🖼️ 原图标注") # 图片控制选项 col1, col2, col3, col4, col5 = st.columns(5, vertical_alignment="center", border= False) with col1: # 判断{layout_type}_show_all_boxes是否有值,如果有值直接使用,否则默认False # if f"{layout_type}_show_all_boxes" not in st.session_state: # st.session_state[f"{layout_type}_show_all_boxes"] = False show_all_boxes = st.checkbox( "显示所有框", # value=st.session_state[f"{layout_type}_show_all_boxes"], value = self.show_all_boxes, key=f"{layout_type}_show_all_boxes" ) if show_all_boxes != self.show_all_boxes: self.show_all_boxes = show_all_boxes with col2: # if st.button("应用手动角度", key=f"{layout_type}_apply_manual"): if st.button("🔄 旋转90度", type="secondary", key=f"{layout_type}_manual_angle"): self.rotated_angle = (self.rotated_angle + 90) % 360 # st.success(f"已设置旋转角度为 {manual_angle}") # 需要清除图片缓存,以及text_bbox_mapping中的bbox self.clear_image_cache() self.validator.process_data() st.rerun() with col3: # 显示当前角度状态 current_angle = self.get_rotation_angle() st.metric("当前角度", f"{current_angle}°", label_visibility="collapsed") with col4: if st.button("↺ 重置角度", key=f"{layout_type}_reset_angle"): self.rotated_angle = 0.0 st.success("已重置旋转角度") # 需要清除图片缓存,以及text_bbox_mapping中的bbox self.clear_image_cache() self.validator.process_data() st.rerun() with col5: if st.button("🧹 清除选择"): st.session_state.selected_text = None st.rerun() # 使用增强的图像加载方法 image = self.load_and_rotate_image(self.validator.image_path) if image: try: resized_image, all_boxes, selected_bbox = self.zoom_image(image, self.zoom_level) # 创建交互式图片 fig = self.create_resized_interactive_plot(resized_image, selected_bbox, self.zoom_level, all_boxes) plot_config = { 'displayModeBar': True, 'modeBarButtonsToRemove': ['zoom2d', 'select2d', 'lasso2d', 'autoScale2d'], 'scrollZoom': True, 'doubleClick': 'reset', 'responsive': False, # 关键:禁用响应式,使用固定尺寸 'toImageButtonOptions': { 'format': 'png', 'filename': 'ocr_image', 'height': None, # 使用当前高度 'width': None, # 使用当前宽度 'scale': 1 } } st.plotly_chart( fig, # use_container_width=fit_to_container, use_container_width=False, config=plot_config, key=f"{layout_type}_plot" ) except Exception as e: st.error(f"❌ 图片处理失败: {e}") st.exception(e) else: st.error("未找到对应的图片文件") if self.validator.image_path: st.write(f"期望路径: {self.validator.image_path}") # st.markdown('', unsafe_allow_html=True) def zoom_image(self, image: Image.Image, current_zoom: float) -> Tuple[Image.Image, List[List[int]], Optional[List[int]]]: """缩放图像""" # 根据缩放级别调整图片大小 new_width = int(image.width * current_zoom) new_height = int(image.height * current_zoom) resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) # 计算选中的bbox selected_bbox = None if st.session_state.selected_text and st.session_state.selected_text in self.validator.text_bbox_mapping: info = self.validator.text_bbox_mapping[st.session_state.selected_text][0] bbox = info['bbox'] selected_bbox = [int(coord * current_zoom) for coord in bbox] # 收集所有框 all_boxes = [] if self.show_all_boxes: for text, info_list in self.validator.text_bbox_mapping.items(): for info in info_list: bbox = info['bbox'] if len(bbox) >= 4: scaled_bbox = [coord * current_zoom for coord in bbox] all_boxes.append(scaled_bbox) return resized_image, all_boxes, selected_bbox def create_resized_interactive_plot(self, image: Image.Image, selected_bbox: Optional[List[int]], zoom_level: float, all_boxes: List[List[int]]) -> go.Figure: """创建可调整大小的交互式图片 - 修复容器溢出问题""" fig = go.Figure() # 添加图片 - Plotly坐标系,原点在左下角 fig.add_layout_image( dict( source=image, xref="x", yref="y", x=0, y=image.height, # 图片左下角在Plotly坐标系中的位置 sizex=image.width, sizey=image.height, sizing="stretch", opacity=1.0, layer="below", yanchor="top" # 确保图片顶部对齐 ) ) # 显示所有bbox - 需要坐标转换 if len(all_boxes) > 0: for bbox in all_boxes: if len(bbox) >= 4: x1, y1, x2, y2 = bbox[:4] # 转换为Plotly坐标系(翻转Y轴) plot_x1 = x1 plot_x2 = x2 plot_y1 = image.height - y2 # JSON的y2 -> Plotly的底部 plot_y2 = image.height - y1 # JSON的y1 -> Plotly的顶部 color = "rgba(0, 100, 200, 0.2)" fig.add_shape( type="rect", x0=plot_x1, y0=plot_y1, x1=plot_x2, y1=plot_y2, line=dict(color="blue", width=1), fillcolor=color, ) # 高亮显示选中的bbox if selected_bbox and len(selected_bbox) >= 4: x1, y1, x2, y2 = selected_bbox[:4] # 转换为Plotly坐标系 plot_x1 = x1 plot_x2 = x2 plot_y1 = image.height - y2 # 翻转Y坐坐标 plot_y2 = image.height - y1 # 翻转Y坐标 fig.add_shape( type="rect", x0=plot_x1, y0=plot_y1, x1=plot_x2, y1=plot_y2, line=dict(color="red", width=3), fillcolor="rgba(255, 0, 0, 0.3)", ) # 修复:优化显示尺寸计算 max_display_width = 1500 max_display_height = 1000 # 计算合适的显示尺寸,保持宽高比 aspect_ratio = image.width / image.height if self.fit_to_container: # 自适应容器模式 if aspect_ratio > 1: # 宽图 display_width = min(max_display_width, image.width) display_height = int(display_width / aspect_ratio) else: # 高图 display_height = min(max_display_height, image.height) display_width = int(display_height * aspect_ratio) # 确保不会太小 display_width = max(display_width, 800) display_height = max(display_height, 600) else: # 固定尺寸模式,但仍要考虑容器限制 display_width = min(image.width, max_display_width) display_height = min(image.height, max_display_height) # 设置布局 - 关键修改 fig.update_layout( width=display_width, # 注释掉固定宽度 height=display_height, # 注释掉固定高度 margin=dict(l=0, r=0, t=0, b=0), showlegend=False, plot_bgcolor='white', dragmode="pan", # 关键:让图表自适应容器 # autosize=True, # 启用自动调整大小 xaxis=dict( visible=False, range=[0, image.width], constrain="domain", fixedrange=False, autorange=False, showgrid=False, zeroline=False, ), # 修复:Y轴设置,确保范围正确 yaxis=dict( visible=False, range=[0, image.height], # 确保Y轴范围从0到图片高度 constrain="domain", scaleanchor="x", scaleratio=1, fixedrange=False, autorange=False, showgrid=False, zeroline=False ) ) return fig