ocr_validator_file_utils.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. """
  2. OCR Validator 文件处理工具(Streamlit 特定)
  3. 保留 Streamlit 特定的文件处理函数,通用函数已迁移到 ocr_utils
  4. """
  5. import os
  6. import cv2
  7. import numpy as np
  8. from typing import Optional, Dict
  9. def load_css_styles(css_path: str = "styles.css") -> str:
  10. """
  11. 加载CSS样式文件(Streamlit 特定)
  12. Args:
  13. css_path: CSS 文件路径
  14. Returns:
  15. CSS 样式内容
  16. """
  17. try:
  18. with open(css_path, 'r', encoding='utf-8') as f:
  19. return f.read()
  20. except Exception:
  21. # 返回基本样式
  22. return """
  23. .main > div { background-color: white !important; color: #333333 !important; }
  24. .stApp { background-color: white !important; }
  25. .block-container { background-color: white !important; color: #333333 !important; }
  26. """
  27. def detect_image_orientation_by_opencv(image_path: str) -> Dict:
  28. """
  29. 使用OpenCV的文本检测来判断图片方向
  30. Args:
  31. image_path: 图片路径
  32. Returns:
  33. 包含检测结果的字典
  34. """
  35. try:
  36. # 读取图像
  37. image = cv2.imread(image_path)
  38. if image is None:
  39. raise ValueError("无法读取图像文件")
  40. height, width = image.shape[:2]
  41. gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  42. # 使用EAST文本检测器或其他方法
  43. # 这里使用简单的边缘检测和轮廓分析
  44. edges = cv2.Canny(gray, 50, 150, apertureSize=3)
  45. # 检测直线
  46. lines = cv2.HoughLines(edges, 1, np.pi/180, threshold=100)
  47. if lines is None:
  48. return {
  49. 'detected_angle': 0.0,
  50. 'confidence': 0.0,
  51. 'method': 'opencv_analysis',
  52. 'message': '未检测到足够的直线特征'
  53. }
  54. # 分析直线角度
  55. angles = []
  56. for rho, theta in lines[:, 0]:
  57. angle = theta * 180 / np.pi
  58. # 将角度标准化到0-180度
  59. if angle > 90:
  60. angle = angle - 180
  61. angles.append(angle)
  62. # 统计主要角度
  63. angle_hist = np.histogram(angles, bins=36, range=(-90, 90))[0]
  64. dominant_angle_idx = np.argmax(angle_hist)
  65. dominant_angle = -90 + dominant_angle_idx * 5 # 每个bin 5度
  66. # 将角度映射到标准旋转角度
  67. if -22.5 <= dominant_angle <= 22.5:
  68. detected_angle = 0.0
  69. elif 22.5 < dominant_angle <= 67.5:
  70. detected_angle = 270.0
  71. elif 67.5 < dominant_angle <= 90 or -90 <= dominant_angle < -67.5:
  72. detected_angle = 90.0
  73. else:
  74. detected_angle = 180.0
  75. confidence = angle_hist[dominant_angle_idx] / len(lines) if len(lines) > 0 else 0.0
  76. return {
  77. 'detected_angle': detected_angle,
  78. 'confidence': min(1.0, confidence),
  79. 'method': 'opencv_analysis',
  80. 'line_count': len(lines),
  81. 'dominant_angle': dominant_angle,
  82. 'message': f'基于{len(lines)}条直线检测到旋转角度: {detected_angle}°'
  83. }
  84. except Exception as e:
  85. return {
  86. 'detected_angle': 0.0,
  87. 'confidence': 0.0,
  88. 'method': 'opencv_analysis',
  89. 'error': str(e),
  90. 'message': f'OpenCV检测过程中发生错误: {str(e)}'
  91. }