2 месяцев назад · 2fd2181eb9
--- a/ocr_validator_utils.py
+++ b/ocr_validator_utils.py
@@ -13,6 +13,9 @@ from io import StringIO, BytesIO
 
				 import re
			
 
				 from html import unescape
			
 
				 import yaml
			
 
				+import base64
			
 
				+from urllib.parse import urlparse
			
 
				+import os
			
 
				 
			
 
				 
			
 
				 def load_config(config_path: str = "config.yaml") -> Dict:
			
@@ -81,18 +84,24 @@ def load_css_styles(css_path: str = "styles.css") -> str:
 
				         """
			
 
				 
			
 
				 
			
 
				-def rotate_image_and_coordinates(image: Image.Image, angle: float, coordinates_list: List[List[int]]) -> Tuple[Image.Image, List[List[int]]]:
			
 
				+def rotate_image_and_coordinates(
			
 
				+    image: Image.Image, 
			
 
				+    angle: float, 
			
 
				+    coordinates_list: List[List[int]], 
			
 
				+    rotate_coordinates: bool = True
			
 
				+) -> Tuple[Image.Image, List[List[int]]]:
			
 
				     """
			
 
				-    根据角度旋转图像和坐标 - 修复坐标变换和图片显示
			
 
				+    根据角度旋转图像和坐标
			
 
				     
			
 
				     Args:
			
 
				         image: 原始图像
			
 
				         angle: 旋转角度（度数）
			
 
				         coordinates_list: 坐标列表，每个坐标为[x1, y1, x2, y2]格式
			
 
				+        rotate_coordinates: 是否需要旋转坐标（针对不同OCR工具的处理方式）
			
 
				     
			
 
				     Returns:
			
 
				         rotated_image: 旋转后的图像
			
 
				-        rotated_coordinates: 旋转后的坐标列表
			
 
				+        rotated_coordinates: 处理后的坐标列表
			
 
				     """
			
 
				     if angle == 0:
			
 
				         return image, coordinates_list
			
@@ -110,6 +119,10 @@ def rotate_image_and_coordinates(image: Image.Image, angle: float, coordinates_l
 
				     # 旋转图像
			
 
				     rotated_image = image.rotate(rotation_angle, expand=True)
			
 
				     
			
 
				+    # 如果不需要旋转坐标，直接返回原坐标
			
 
				+    if not rotate_coordinates:
			
 
				+        return rotated_image, coordinates_list
			
 
				+    
			
 
				     # 获取原始和旋转后的图像尺寸
			
 
				     orig_width, orig_height = image.size
			
 
				     new_width, new_height = rotated_image.size
			
@@ -124,7 +137,13 @@ def rotate_image_and_coordinates(image: Image.Image, angle: float, coordinates_l
 
				             
			
 
				         x1, y1, x2, y2 = coord[:4]
			
 
				         
			
 
				-        # 根据旋转角度变换坐标 - 修复变换逻辑
			
 
				+        # 验证原始坐标是否有效
			
 
				+        if x1 < 0 or y1 < 0 or x2 <= x1 or y2 <= y1:
			
 
				+            print(f"警告: 无效坐标 {coord}")
			
 
				+            rotated_coordinates.append([0, 0, 50, 50])  # 使用默认坐标
			
 
				+            continue
			
 
				+        
			
 
				+        # 根据旋转角度变换坐标
			
 
				         if rotation_angle == -90:  # 顺时针90度 (270度逆时针)
			
 
				             # 变换公式: (x, y) -> (y, orig_width - x)
			
 
				             new_x1 = y1
			
@@ -322,6 +341,65 @@ def get_rotation_angle_from_ppstructv3(data: Dict) -> float:
 
				     return 0.0
			
 
				 
			
 
				 
			
 
				+def process_markdown_images(md_content: str, json_path: str) -> str:
			
 
				+    """
			
 
				+    处理Markdown中的图片引用，将本地图片转换为base64
			
 
				+    """
			
 
				+    import re
			
 
				+    
			
 
				+    # 匹配Markdown图片语法: ![alt](path)
			
 
				+    img_pattern = r'!\[([^\]]*)\]\(([^)]+)\)'
			
 
				+    
			
 
				+    def replace_image(match):
			
 
				+        alt_text = match.group(1)
			
 
				+        img_path = match.group(2)
			
 
				+        
			
 
				+        # 如果已经是base64或者网络链接，直接返回
			
 
				+        if img_path.startswith('data:image') or img_path.startswith('http'):
			
 
				+            return match.group(0)
			
 
				+        
			
 
				+        # 处理相对路径
			
 
				+        if not os.path.isabs(img_path):
			
 
				+            # 相对于JSON文件的路径
			
 
				+            json_dir = os.path.dirname(json_path)
			
 
				+            full_img_path = os.path.join(json_dir, img_path)
			
 
				+        else:
			
 
				+            full_img_path = img_path
			
 
				+        
			
 
				+        # 尝试转换为base64
			
 
				+        try:
			
 
				+            if os.path.exists(full_img_path):
			
 
				+                with open(full_img_path, 'rb') as img_file:
			
 
				+                    img_data = img_file.read()
			
 
				+                    
			
 
				+                # 获取文件扩展名确定MIME类型
			
 
				+                ext = os.path.splitext(full_img_path)[1].lower()
			
 
				+                mime_type = {
			
 
				+                    '.png': 'image/png',
			
 
				+                    '.jpg': 'image/jpeg',
			
 
				+                    '.jpeg': 'image/jpeg',
			
 
				+                    '.gif': 'image/gif',
			
 
				+                    '.bmp': 'image/bmp',
			
 
				+                    '.webp': 'image/webp'
			
 
				+                }.get(ext, 'image/jpeg')
			
 
				+                
			
 
				+                # 转换为base64
			
 
				+                img_base64 = base64.b64encode(img_data).decode('utf-8')
			
 
				+                data_url = f"data:{mime_type};base64,{img_base64}"
			
 
				+                
			
 
				+                return f'![{alt_text}]({data_url})'
			
 
				+            else:
			
 
				+                # 文件不存在，返回原始链接但添加警告
			
 
				+                return f'![{alt_text} (文件不存在)]({img_path})'
			
 
				+        except Exception as e:
			
 
				+            # 转换失败，返回原始链接
			
 
				+            return f'![{alt_text} (加载失败)]({img_path})'
			
 
				+    
			
 
				+    # 替换所有图片引用
			
 
				+    processed_content = re.sub(img_pattern, replace_image, md_content)
			
 
				+    return processed_content
			
 
				+
			
 
				+
			
 
				 def load_ocr_data_file(json_path: str, config: Dict) -> Tuple[List, str, str]:
			
 
				     """加载OCR相关数据文件"""
			
 
				     json_file = Path(json_path)
			
@@ -354,7 +432,10 @@ def load_ocr_data_file(json_path: str, config: Dict) -> Tuple[List, str, str]:
 
				     md_file = json_file.with_suffix('.md')
			
 
				     if md_file.exists():
			
 
				         with open(md_file, 'r', encoding='utf-8') as f:
			
 
				-            md_content = f.read()
			
 
				+            raw_md_content = f.read()
			
 
				+            
			
 
				+        # 处理Markdown中的图片引用
			
 
				+        md_content = process_markdown_images(raw_md_content, str(json_file))
			
 
				     
			
 
				     # 推断图片路径
			
 
				     image_name = json_file.stem
			
@@ -425,7 +506,9 @@ def find_available_ocr_files(ocr_out_dir: str) -> List[str]:
 
				             # 递归搜索JSON文件
			
 
				             for json_file in search_dir.rglob("*.json"):
			
 
				                 available_files.append(str(json_file))
			
 
				-    
			
 
				+    # 去重并排序
			
 
				+    available_files = sorted(list(set(available_files)))
			
 
				+
			
 
				     return available_files
			
 
				 
			
 
				 
			
@@ -610,4 +693,32 @@ def group_texts_by_category(text_bbox_mapping: Dict[str, List]) -> Dict[str, Lis
 
				         if category not in categories:
			
 
				             categories[category] = []
			
 
				         categories[category].append(text)
			
 
				-    return categories
			
 
				+    return categories
			
 
				+
			
 
				+
			
 
				+def get_ocr_tool_rotation_config(ocr_data: List, config: Dict) -> Dict:
			
 
				+    """获取OCR工具的旋转配置"""
			
 
				+    if not ocr_data or not isinstance(ocr_data, list):
			
 
				+        # 默认配置
			
 
				+        return {
			
 
				+            'coordinates_need_rotation': True,
			
 
				+            'coordinates_are_pre_rotated': False
			
 
				+        }
			
 
				+    
			
 
				+    # 从第一个OCR数据项获取工具类型
			
 
				+    first_item = ocr_data[0] if ocr_data else {}
			
 
				+    source_tool = first_item.get('source_tool', 'dots_ocr')
			
 
				+    
			
 
				+    # 获取工具配置
			
 
				+    tools_config = config.get('ocr', {}).get('tools', {})
			
 
				+    
			
 
				+    if source_tool in tools_config:
			
 
				+        tool_config = tools_config[source_tool]
			
 
				+        return tool_config.get('rotation', {
			
 
				+            'coordinates_are_pre_rotated': False
			
 
				+        })
			
 
				+    else:
			
 
				+        # 默认配置
			
 
				+        return {
			
 
				+            'coordinates_are_pre_rotated': False
			
 
				+        }