Преглед изворни кода

增强图像方向检测功能,新增自动检测和手动设置角度选项,优化缓存管理和调试信息展示

zhch158_admin пре 2 месеци
родитељ
комит
1f2da377f9
1 измењених фајлова са 125 додато и 31 уклоњено
  1. 125 31
      ocr_validator_layout.py

+ 125 - 31
ocr_validator_layout.py

@@ -16,7 +16,8 @@ from ocr_validator_utils import (
     parse_html_tables,
     draw_bbox_on_image,
     rotate_image_and_coordinates,
-    get_ocr_tool_rotation_config  # 新增导入
+    get_ocr_tool_rotation_config,
+    detect_image_orientation_by_opencv  # 新增导入
 )
 
 
@@ -26,8 +27,10 @@ class OCRLayoutManager:
     def __init__(self, validator):
         self.validator = validator
         self.config = validator.config
-        self._rotated_image_cache = {}  # 缓存旋转后的图像
-        self._cache_max_size = 10  # 最大缓存数量
+        self._rotated_image_cache = {}
+        self._cache_max_size = 10
+        self._orientation_cache = {}  # 缓存方向检测结果
+        # self._auto_detected_angle = 0.0  # 自动检测的旋转角度缓存
     
     def clear_image_cache(self):
         """清理所有图像缓存"""
@@ -54,12 +57,30 @@ class OCRLayoutManager:
             oldest_key = next(iter(self._rotated_image_cache))
             del self._rotated_image_cache[oldest_key]
     
+    def detect_and_suggest_rotation(self, image_path: str) -> Dict:
+        """检测并建议图片旋转角度"""
+        if image_path in self._orientation_cache:
+            return self._orientation_cache[image_path]
+        
+        # 使用自动检测功能
+        detection_result = detect_image_orientation_by_opencv(image_path)
+        
+        # 缓存结果
+        self._orientation_cache[image_path] = detection_result
+        return detection_result
+    
     def get_rotation_angle(self) -> float:
-        """从OCR数据中获取旋转角度"""
+        """获取旋转角度 - 增强版本支持自动检测"""
+        # 首先尝试从OCR数据中获取(PPStructV3等)
         if self.validator.ocr_data:
             for item in self.validator.ocr_data:
                 if isinstance(item, dict) and 'rotation_angle' in item:
                     return item['rotation_angle']
+        
+        # 如果没有预设角度,尝试自动检测
+        if hasattr(self, '_auto_detected_angle'):
+            return self._auto_detected_angle
+        
         return 0.0
     
     def load_and_rotate_image(self, image_path: str) -> Optional[Image.Image]:
@@ -119,7 +140,7 @@ class OCRLayoutManager:
                     # 旋转图像和坐标
                     rotated_image, rotated_bboxes = rotate_image_and_coordinates(
                         image, rotation_angle, all_bboxes, 
-                        rotate_coordinates=rotation_config['coordinates_need_rotation']
+                        rotate_coordinates=not rotation_config['coordinates_are_pre_rotated']
                     )
                     
                     # 更新bbox映射 - 使用映射关系确保正确对应
@@ -396,16 +417,101 @@ class OCRLayoutManager:
         st.markdown(f'<div class="aligned-image-container-{layout_type}">', unsafe_allow_html=True)
         st.header("🖼️ 原图标注")
         
+        # 方向检测控制面板
+        with st.expander("🔄 图片方向检测", expanded=False):
+            col1, col2, col3 = st.columns(3)
+            
+            with col1:
+                if st.button("🔍 自动检测方向", key=f"{layout_type}_detect_orientation"):
+                    if self.validator.image_path:
+                        with st.spinner("正在检测图片方向..."):
+                            detection_result = self.detect_and_suggest_rotation(self.validator.image_path)
+                            st.session_state[f'{layout_type}_detection_result'] = detection_result
+                        st.rerun()
+            
+            with col2:
+                manual_angle = st.selectbox(
+                    "手动设置角度",
+                    [0, 90, 180, 270],
+                    key=f"{layout_type}_manual_angle"
+                )
+                if st.button("应用手动角度", key=f"{layout_type}_apply_manual"):
+                    self._auto_detected_angle = float(manual_angle)
+                    st.success(f"已设置旋转角度为 {manual_angle}°")
+                    # 需要清除图片缓存,以及text_bbox_mapping中的bbox
+                    self.clear_image_cache()
+                    self.validator.process_data()
+                    st.rerun()
+            
+            with col3:
+                if st.button("🔄 重置角度", key=f"{layout_type}_reset_angle"):
+                    if hasattr(self, '_auto_detected_angle'):
+                        delattr(self, '_auto_detected_angle')
+                    st.success("已重置旋转角度")
+                    # 需要清除图片缓存,以及text_bbox_mapping中的bbox
+                    self.clear_image_cache()
+                    self.validator.process_data()
+                    st.rerun()
+            
+            # 显示检测结果
+            if f'{layout_type}_detection_result' in st.session_state:
+                result = st.session_state[f'{layout_type}_detection_result']
+                
+                st.markdown("### 🎯 检测结果")
+                
+                # 结果概览
+                result_col1, result_col2, result_col3 = st.columns(3)
+                with result_col1:
+                    st.metric("建议角度", f"{result['detected_angle']}°")
+                with result_col2:
+                    st.metric("置信度", f"{result['confidence']:.2%}")
+                with result_col3:
+                    confidence_color = "🟢" if result['confidence'] > 0.7 else "🟡" if result['confidence'] > 0.4 else "🔴"
+                    st.metric("可信度", f"{confidence_color}")
+                
+                # 详细信息
+                st.write(f"**检测信息:** {result['message']}")
+                
+                if 'method_details' in result:
+                    st.write("**方法详情:**")
+                    for detail in result['method_details']:
+                        st.write(f"• {detail}")
+                
+                # 应用建议角度
+                if result['confidence'] > 0.3 and result['detected_angle'] != 0:
+                    if st.button(f"✅ 应用建议角度 {result['detected_angle']}°", key=f"{layout_type}_apply_suggested"):
+                        self._auto_detected_angle = result['detected_angle']
+                        st.success(f"已应用建议角度 {result['detected_angle']}°")
+                        # 需要清除图片缓存,以及text_bbox_mapping中的bbox
+                        self.clear_image_cache()
+                        self.validator.process_data()
+                        st.rerun()
+                
+                # 显示个别方法的结果
+                if 'individual_results' in result and len(result['individual_results']) > 1:
+                    with st.expander("📊 各方法检测详情", expanded=False):
+                        for i, individual in enumerate(result['individual_results']):
+                            st.write(f"**方法 {i+1}: {individual['method']}**")
+                            st.write(f"角度: {individual['detected_angle']}°, 置信度: {individual['confidence']:.2f}")
+                            st.write(f"信息: {individual['message']}")
+                            if 'error' in individual:
+                                st.error(f"错误: {individual['error']}")
+                            st.write("---")
+        
         # 图片控制选项
-        col1, col2, col3 = st.columns(3)
+        col1, col2, col3, col4 = st.columns(4)
         with col1:
             current_zoom = st.slider("图片缩放", 0.3, 2.0, zoom_level, 0.1, key=f"{layout_type}_zoom_level")
         with col2:
             show_all_boxes = st.checkbox("显示所有框", value=False, key=f"{layout_type}_show_all_boxes")
         with col3:
             fit_to_container = st.checkbox("适应容器", value=True, key=f"{layout_type}_fit_container")
+        with col4:
+            # 显示当前角度状态
+            current_angle = self.get_rotation_angle()
+            st.metric("当前角度", f"{current_angle}°")
         
-        # 使用新的图像加载方法
+        # 使用增强的图像加载方法
         image = self.load_and_rotate_image(self.validator.image_path)
         
         if image:
@@ -415,11 +521,10 @@ class OCRLayoutManager:
                 new_height = int(image.height * current_zoom)
                 resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
                 
-                # 计算选中的bbox - 注意bbox已经是旋转后的坐标
+                # 计算选中的bbox
                 selected_bbox = None
                 if st.session_state.selected_text and st.session_state.selected_text in self.validator.text_bbox_mapping:
                     info = self.validator.text_bbox_mapping[st.session_state.selected_text][0]
-                    # bbox已经是旋转后的坐标,只需要应用缩放
                     bbox = info['bbox']
                     selected_bbox = [int(coord * current_zoom) for coord in bbox]
 
@@ -433,46 +538,35 @@ class OCRLayoutManager:
                                 scaled_bbox = [coord * current_zoom for coord in bbox]
                                 all_boxes.append(scaled_bbox)
                 
-                # 添加调试信息
+                # 增强的调试信息
                 with st.expander("🔍 图像和坐标调试信息", expanded=False):
                     rotation_angle = self.get_rotation_angle()
                     rotation_config = get_ocr_tool_rotation_config(self.validator.ocr_data, self.config)
                     
-                    col_debug1, col_debug2 = st.columns(2)
+                    col_debug1, col_debug2, col_debug3 = st.columns(3)
                     with col_debug1:
                         st.write("**图像信息:**")
                         st.write(f"原始尺寸: {image.width} x {image.height}")
                         st.write(f"缩放后尺寸: {resized_image.width} x {resized_image.height}")
-                        st.write(f"旋转角度: {rotation_angle}°")
+                        st.write(f"当前角度: {rotation_angle}°")
                         
                     with col_debug2:
                         st.write("**坐标信息:**")
                         if selected_bbox:
                             st.write(f"选中框: {selected_bbox}")
                         st.write(f"总框数: {len(all_boxes)}")
-                        st.write(f"工具配置: {'预旋转' if rotation_config.get('coordinates_are_pre_rotated') else '需旋转'}")
-                    
-                    if st.session_state.selected_text:
-                        info = self.validator.text_bbox_mapping[st.session_state.selected_text][0]
-                        original_bbox = info['bbox']
-                        
-                        # 验证坐标是否在图像范围内
-                        x1, y1, x2, y2 = original_bbox[:4]
-                        in_bounds = (0 <= x1 < image.width and 
-                                   0 <= x2 <= image.width and 
-                                   0 <= y1 < image.height and 
-                                   0 <= y2 <= image.height)
-                        
-                        color = "🟢" if in_bounds else "🔴"
-                        st.write(f"{color} 坐标范围检查: {in_bounds}")
+                        st.write(f"文本框数: {len(self.validator.text_bbox_mapping)}")
                         
-                        if not in_bounds:
-                            st.warning("⚠️ 坐标超出图像范围,可能存在坐标系问题")
+                    with col_debug3:
+                        st.write("**配置信息:**")
+                        st.write(f"工具类型: {rotation_config.get('coordinates_are_pre_rotated', 'unknown')}")
+                        st.write(f"缓存状态: {len(self._rotated_image_cache)} 项")
+                        if hasattr(self, '_auto_detected_angle'):
+                            st.write(f"自动检测角度: {self._auto_detected_angle}°")
                 
                 # 创建交互式图片
                 fig = self.create_resized_interactive_plot(resized_image, selected_bbox, current_zoom, all_boxes)
                 
-                # 修复:使用合适的配置显示图表
                 plot_config = {
                     'displayModeBar': True,
                     'modeBarButtonsToRemove': ['zoom2d', 'select2d', 'lasso2d', 'autoScale2d'],
@@ -522,7 +616,7 @@ class OCRLayoutManager:
                         
             except Exception as e:
                 st.error(f"❌ 图片处理失败: {e}")
-                st.exception(e)  # 显示完整的错误堆栈
+                st.exception(e)
         else:
             st.error("未找到对应的图片文件")
             if self.validator.image_path: