processors.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import math
  15. import numpy as np
  16. from PIL import Image
  17. from .....utils.deps import class_requires_deps, is_dep_available
  18. from ....utils.benchmark import benchmark
  19. from . import funcs as F
  20. if is_dep_available("opencv-contrib-python"):
  21. import cv2
  22. @class_requires_deps("opencv-contrib-python")
  23. class _BaseResize:
  24. def __init__(self, size_divisor, interp, backend="cv2"):
  25. _CV2_INTERP_DICT = {
  26. "NEAREST": cv2.INTER_NEAREST,
  27. "LINEAR": cv2.INTER_LINEAR,
  28. "BICUBIC": cv2.INTER_CUBIC,
  29. "AREA": cv2.INTER_AREA,
  30. "LANCZOS4": cv2.INTER_LANCZOS4,
  31. }
  32. _PIL_INTERP_DICT = {
  33. "NEAREST": Image.NEAREST,
  34. "BILINEAR": Image.BILINEAR,
  35. "BICUBIC": Image.BICUBIC,
  36. "BOX": Image.BOX,
  37. "LANCZOS4": Image.LANCZOS,
  38. }
  39. super().__init__()
  40. if size_divisor is not None:
  41. assert isinstance(
  42. size_divisor, int
  43. ), "`size_divisor` should be None or int."
  44. self.size_divisor = size_divisor
  45. try:
  46. interp = interp.upper()
  47. if backend == "cv2":
  48. interp = _CV2_INTERP_DICT[interp]
  49. elif backend == "pil":
  50. interp = _PIL_INTERP_DICT[interp]
  51. else:
  52. raise ValueError("backend must be `cv2` or `pil`")
  53. except KeyError:
  54. raise ValueError(
  55. "For backend '{}', `interp` should be one of {}. Please ensure the interpolation method matches the selected backend.".format(
  56. backend,
  57. (
  58. _CV2_INTERP_DICT.keys()
  59. if backend == "cv2"
  60. else _PIL_INTERP_DICT.keys()
  61. ),
  62. )
  63. )
  64. self.interp = interp
  65. self.backend = backend
  66. @staticmethod
  67. def _rescale_size(img_size, target_size):
  68. """rescale size"""
  69. scale = min(max(target_size) / max(img_size), min(target_size) / min(img_size))
  70. rescaled_size = [round(i * scale) for i in img_size]
  71. return rescaled_size, scale
  72. @benchmark.timeit
  73. class Resize(_BaseResize):
  74. """Resize the image."""
  75. def __init__(
  76. self,
  77. target_size,
  78. keep_ratio=False,
  79. size_divisor=None,
  80. interp="LINEAR",
  81. backend="cv2",
  82. ):
  83. """
  84. Initialize the instance.
  85. Args:
  86. target_size (list|tuple|int): Target width and height.
  87. keep_ratio (bool, optional): Whether to keep the aspect ratio of resized
  88. image. Default: False.
  89. size_divisor (int|None, optional): Divisor of resized image size.
  90. Default: None.
  91. interp (str, optional): Interpolation method. Choices are 'NEAREST',
  92. 'LINEAR', 'CUBIC', 'AREA', and 'LANCZOS4'. Default: 'LINEAR'.
  93. """
  94. super().__init__(size_divisor=size_divisor, interp=interp, backend=backend)
  95. if isinstance(target_size, int):
  96. target_size = [target_size, target_size]
  97. F.check_image_size(target_size)
  98. self.target_size = target_size
  99. self.keep_ratio = keep_ratio
  100. def __call__(self, imgs):
  101. """apply"""
  102. return [self.resize(img) for img in imgs]
  103. def resize(self, img):
  104. target_size = self.target_size
  105. original_size = img.shape[:2][::-1]
  106. if self.keep_ratio:
  107. h, w = img.shape[0:2]
  108. target_size, _ = self._rescale_size((w, h), self.target_size)
  109. if self.size_divisor:
  110. target_size = [
  111. math.ceil(i / self.size_divisor) * self.size_divisor
  112. for i in target_size
  113. ]
  114. img = F.resize(img, target_size, interp=self.interp, backend=self.backend)
  115. return img
  116. @benchmark.timeit
  117. class ResizeByLong(_BaseResize):
  118. """
  119. Proportionally resize the image by specifying the target length of the
  120. longest side.
  121. """
  122. def __init__(
  123. self, target_long_edge, size_divisor=None, interp="LINEAR", backend="cv2"
  124. ):
  125. """
  126. Initialize the instance.
  127. Args:
  128. target_long_edge (int): Target length of the longest side of image.
  129. size_divisor (int|None, optional): Divisor of resized image size.
  130. Default: None.
  131. interp (str, optional): Interpolation method. Choices are 'NEAREST',
  132. 'LINEAR', 'CUBIC', 'AREA', and 'LANCZOS4'. Default: 'LINEAR'.
  133. """
  134. super().__init__(size_divisor=size_divisor, interp=interp, backend=backend)
  135. self.target_long_edge = target_long_edge
  136. def __call__(self, imgs):
  137. """apply"""
  138. return [self.resize(img) for img in imgs]
  139. def resize(self, img):
  140. h, w = img.shape[:2]
  141. scale = self.target_long_edge / max(h, w)
  142. h_resize = round(h * scale)
  143. w_resize = round(w * scale)
  144. if self.size_divisor is not None:
  145. h_resize = math.ceil(h_resize / self.size_divisor) * self.size_divisor
  146. w_resize = math.ceil(w_resize / self.size_divisor) * self.size_divisor
  147. img = F.resize(
  148. img, (w_resize, h_resize), interp=self.interp, backend=self.backend
  149. )
  150. return img
  151. @benchmark.timeit
  152. class ResizeByShort(_BaseResize):
  153. """
  154. Proportionally resize the image by specifying the target length of the
  155. shortest side.
  156. """
  157. def __init__(
  158. self, target_short_edge, size_divisor=None, interp="LINEAR", backend="cv2"
  159. ):
  160. """
  161. Initialize the instance.
  162. Args:
  163. target_short_edge (int): Target length of the shortest side of image.
  164. size_divisor (int|None, optional): Divisor of resized image size.
  165. Default: None.
  166. interp (str, optional): Interpolation method. Choices are 'NEAREST',
  167. 'LINEAR', 'CUBIC', 'AREA', and 'LANCZOS4'. Default: 'LINEAR'.
  168. """
  169. super().__init__(size_divisor=size_divisor, interp=interp, backend=backend)
  170. self.target_short_edge = target_short_edge
  171. def __call__(self, imgs):
  172. """apply"""
  173. return [self.resize(img) for img in imgs]
  174. def resize(self, img):
  175. h, w = img.shape[:2]
  176. scale = self.target_short_edge / min(h, w)
  177. h_resize = round(h * scale)
  178. w_resize = round(w * scale)
  179. if self.size_divisor is not None:
  180. h_resize = math.ceil(h_resize / self.size_divisor) * self.size_divisor
  181. w_resize = math.ceil(w_resize / self.size_divisor) * self.size_divisor
  182. img = F.resize(
  183. img, (w_resize, h_resize), interp=self.interp, backend=self.backend
  184. )
  185. return img
  186. @benchmark.timeit
  187. @class_requires_deps("opencv-contrib-python")
  188. class Normalize:
  189. """Normalize the three-channel image."""
  190. def __init__(self, scale=1.0 / 255, mean=0.5, std=0.5):
  191. """
  192. Initialize the instance.
  193. Args:
  194. scale (float, optional): Scaling factor to apply to the image before
  195. applying normalization. Default: 1/255.
  196. mean (float|tuple|list, optional): Means for each channel of the image.
  197. Default: 0.5.
  198. std (float|tuple|list|np.ndarray, optional): Standard deviations for each channel
  199. of the image. Default: 0.5.
  200. """
  201. super().__init__()
  202. if isinstance(mean, float):
  203. mean = [mean] * 3
  204. elif len(mean) != 3:
  205. raise ValueError(
  206. f"Expected `mean` to be a tuple or list of length 3, but got {len(mean)} elements."
  207. )
  208. if isinstance(std, float):
  209. std = [std] * 3
  210. elif len(std) != 3:
  211. raise ValueError(
  212. f"Expected `std` to be a tuple or list of length 3, but got {len(std)} elements."
  213. )
  214. self.alpha = [scale / std[i] for i in range(len(std))]
  215. self.beta = [-mean[i] / std[i] for i in range(len(std))]
  216. def norm(self, img):
  217. split_im = list(cv2.split(img))
  218. for c in range(img.shape[2]):
  219. split_im[c] = split_im[c].astype(np.float32)
  220. split_im[c] *= self.alpha[c]
  221. split_im[c] += self.beta[c]
  222. res = cv2.merge(split_im)
  223. return res
  224. def __call__(self, imgs):
  225. """apply"""
  226. return [self.norm(img) for img in imgs]
  227. @benchmark.timeit
  228. class ToCHWImage:
  229. """Reorder the dimensions of the image from HWC to CHW."""
  230. def __call__(self, imgs):
  231. """apply"""
  232. return [img.transpose((2, 0, 1)) for img in imgs]
  233. @benchmark.timeit
  234. class ToBatch:
  235. def __call__(self, imgs):
  236. return [np.stack(imgs, axis=0).astype(dtype=np.float32, copy=False)]