contrast.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. """水印 对比度增强(由 ocr_utils.watermark_utils 迁入)。"""
  2. from __future__ import annotations
  3. import json
  4. import re
  5. from pathlib import Path
  6. from typing import Any, Dict, Optional, Tuple, Union
  7. import cv2
  8. import numpy as np
  9. from loguru import logger
  10. from PIL import Image
  11. def _enhance_text_restore(
  12. gray: np.ndarray,
  13. *,
  14. background_threshold: int = 248,
  15. text_lo_percentile: float = 1.0,
  16. text_hi_percentile: float = 99.0,
  17. text_black_target: int = 85,
  18. ) -> np.ndarray:
  19. """
  20. 仅对非背景像素做动态范围压缩,将最深笔画拉向 text_black_target(默认 ~85,接近扫描件原图)。
  21. 背景(>= background_threshold)保持白色,避免整图 gamma 导致背景发灰。
  22. """
  23. result = gray.copy()
  24. bg_th = int(np.clip(background_threshold, 200, 255))
  25. text_mask = gray < bg_th
  26. if not np.any(text_mask):
  27. return result
  28. vals = gray[text_mask].astype(np.float32)
  29. lo = float(np.percentile(vals, text_lo_percentile))
  30. hi = float(np.percentile(vals, text_hi_percentile))
  31. target = int(np.clip(text_black_target, 10, 200))
  32. if hi <= lo + 1.0:
  33. return result
  34. stretched = (vals - lo) * target / (hi - lo)
  35. result[text_mask] = np.clip(stretched, 0, 255).astype(np.uint8)
  36. return result
  37. def enhance_document_contrast(
  38. gray: np.ndarray,
  39. method: str = "text_restore",
  40. *,
  41. clip_limit: float = 2.0,
  42. tile_grid_size: int = 8,
  43. gamma: float = 0.85,
  44. black_percentile: float = 2.0,
  45. white_percentile: float = 98.0,
  46. background_threshold: int = 248,
  47. text_lo_percentile: float = 1.0,
  48. text_hi_percentile: float = 99.0,
  49. text_black_target: int = 85,
  50. ) -> np.ndarray:
  51. """
  52. 文档灰度图对比度增强(常用于去水印后恢复笔画深度)。
  53. Args:
  54. gray: 单通道 uint8 灰度图
  55. method: text_restore | clahe | gamma | linear
  56. clip_limit: CLAHE 对比度限制
  57. tile_grid_size: CLAHE 分块大小
  58. gamma: gamma 校正指数,<1 加深文字(去水印后发浅时适用)
  59. black_percentile: linear 拉伸下分位(映射到 0)
  60. white_percentile: linear 拉伸上分位(映射到 255)
  61. background_threshold: text_restore 背景阈值(>= 视为白底不处理)
  62. text_lo_percentile: text_restore 笔画下分位
  63. text_hi_percentile: text_restore 笔画上分位(映射到 text_black_target)
  64. text_black_target: text_restore 最深笔画目标灰度(越小越深,建议 75~95)
  65. Returns:
  66. 增强后的灰度图
  67. """
  68. if gray is None or gray.size == 0:
  69. return gray
  70. if gray.ndim != 2:
  71. raise ValueError("enhance_document_contrast expects single-channel grayscale image")
  72. method = (method or "text_restore").lower().strip()
  73. if method == "text_restore":
  74. return _enhance_text_restore(
  75. gray,
  76. background_threshold=background_threshold,
  77. text_lo_percentile=text_lo_percentile,
  78. text_hi_percentile=text_hi_percentile,
  79. text_black_target=text_black_target,
  80. )
  81. if method == "gamma":
  82. gamma = max(0.1, min(float(gamma), 3.0))
  83. inv_gamma = 1.0 / gamma
  84. table = np.array(
  85. [((i / 255.0) ** inv_gamma) * 255 for i in range(256)],
  86. dtype=np.uint8,
  87. )
  88. return cv2.LUT(gray, table)
  89. if method == "linear":
  90. p_low = float(np.percentile(gray, black_percentile))
  91. p_high = float(np.percentile(gray, white_percentile))
  92. if p_high <= p_low + 1.0:
  93. return gray
  94. stretched = (gray.astype(np.float32) - p_low) * 255.0 / (p_high - p_low)
  95. return np.clip(stretched, 0, 255).astype(np.uint8)
  96. # 默认 CLAHE:局部对比度,适合扫描件
  97. tile = max(2, int(tile_grid_size))
  98. clahe = cv2.createCLAHE(
  99. clipLimit=max(0.1, float(clip_limit)),
  100. tileGridSize=(tile, tile),
  101. )
  102. return clahe.apply(gray)
  103. def apply_contrast_enhancement_config(
  104. gray: np.ndarray,
  105. contrast_cfg: Optional[Dict[str, Any]],
  106. ) -> np.ndarray:
  107. """按配置字典应用对比度增强;未启用时原样返回。"""
  108. if not contrast_cfg or not contrast_cfg.get("enabled", False):
  109. return gray
  110. return enhance_document_contrast(
  111. gray,
  112. method=contrast_cfg.get("method", "text_restore"),
  113. clip_limit=contrast_cfg.get("clip_limit", 2.0),
  114. tile_grid_size=contrast_cfg.get("tile_grid_size", 8),
  115. gamma=contrast_cfg.get("gamma", 0.85),
  116. black_percentile=contrast_cfg.get("black_percentile", 2.0),
  117. white_percentile=contrast_cfg.get("white_percentile", 98.0),
  118. background_threshold=contrast_cfg.get("background_threshold", 248),
  119. text_lo_percentile=contrast_cfg.get("text_lo_percentile", 1.0),
  120. text_hi_percentile=contrast_cfg.get("text_hi_percentile", 99.0),
  121. text_black_target=contrast_cfg.get("text_black_target", 75),
  122. )