processors.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import ast
  16. import math
  17. from pathlib import Path
  18. from copy import deepcopy
  19. import numpy as np
  20. import cv2
  21. from PIL import Image
  22. from . import funcs as F
  23. from ....utils.benchmark import benchmark
  24. class _BaseResize:
  25. _CV2_INTERP_DICT = {
  26. "NEAREST": cv2.INTER_NEAREST,
  27. "LINEAR": cv2.INTER_LINEAR,
  28. "BICUBIC": cv2.INTER_CUBIC,
  29. "AREA": cv2.INTER_AREA,
  30. "LANCZOS4": cv2.INTER_LANCZOS4,
  31. }
  32. _PIL_INTERP_DICT = {
  33. "NEAREST": Image.NEAREST,
  34. "BILINEAR": Image.BILINEAR,
  35. "BICUBIC": Image.BICUBIC,
  36. "BOX": Image.BOX,
  37. "LANCZOS4": Image.LANCZOS,
  38. }
  39. def __init__(self, size_divisor, interp, backend="cv2"):
  40. super().__init__()
  41. if size_divisor is not None:
  42. assert isinstance(
  43. size_divisor, int
  44. ), "`size_divisor` should be None or int."
  45. self.size_divisor = size_divisor
  46. try:
  47. interp = interp.upper()
  48. if backend == "cv2":
  49. interp = self._CV2_INTERP_DICT[interp]
  50. elif backend == "pil":
  51. interp = self._PIL_INTERP_DICT[interp]
  52. else:
  53. raise ValueError("backend must be `cv2` or `pil`")
  54. except KeyError:
  55. raise ValueError(
  56. "For backend '{}', `interp` should be one of {}. Please ensure the interpolation method matches the selected backend.".format(
  57. backend,
  58. (
  59. self._CV2_INTERP_DICT.keys()
  60. if backend == "cv2"
  61. else self._PIL_INTERP_DICT.keys()
  62. ),
  63. )
  64. )
  65. self.interp = interp
  66. self.backend = backend
  67. @staticmethod
  68. def _rescale_size(img_size, target_size):
  69. """rescale size"""
  70. scale = min(max(target_size) / max(img_size), min(target_size) / min(img_size))
  71. rescaled_size = [round(i * scale) for i in img_size]
  72. return rescaled_size, scale
  73. @benchmark.timeit
  74. class Resize(_BaseResize):
  75. """Resize the image."""
  76. def __init__(
  77. self,
  78. target_size,
  79. keep_ratio=False,
  80. size_divisor=None,
  81. interp="LINEAR",
  82. backend="cv2",
  83. ):
  84. """
  85. Initialize the instance.
  86. Args:
  87. target_size (list|tuple|int): Target width and height.
  88. keep_ratio (bool, optional): Whether to keep the aspect ratio of resized
  89. image. Default: False.
  90. size_divisor (int|None, optional): Divisor of resized image size.
  91. Default: None.
  92. interp (str, optional): Interpolation method. Choices are 'NEAREST',
  93. 'LINEAR', 'CUBIC', 'AREA', and 'LANCZOS4'. Default: 'LINEAR'.
  94. """
  95. super().__init__(size_divisor=size_divisor, interp=interp, backend=backend)
  96. if isinstance(target_size, int):
  97. target_size = [target_size, target_size]
  98. F.check_image_size(target_size)
  99. self.target_size = target_size
  100. self.keep_ratio = keep_ratio
  101. def __call__(self, imgs):
  102. """apply"""
  103. return [self.resize(img) for img in imgs]
  104. def resize(self, img):
  105. target_size = self.target_size
  106. original_size = img.shape[:2][::-1]
  107. if self.keep_ratio:
  108. h, w = img.shape[0:2]
  109. target_size, _ = self._rescale_size((w, h), self.target_size)
  110. if self.size_divisor:
  111. target_size = [
  112. math.ceil(i / self.size_divisor) * self.size_divisor
  113. for i in target_size
  114. ]
  115. img = F.resize(img, target_size, interp=self.interp, backend=self.backend)
  116. return img
  117. @benchmark.timeit
  118. class ResizeByLong(_BaseResize):
  119. """
  120. Proportionally resize the image by specifying the target length of the
  121. longest side.
  122. """
  123. def __init__(
  124. self, target_long_edge, size_divisor=None, interp="LINEAR", backend="cv2"
  125. ):
  126. """
  127. Initialize the instance.
  128. Args:
  129. target_long_edge (int): Target length of the longest side of image.
  130. size_divisor (int|None, optional): Divisor of resized image size.
  131. Default: None.
  132. interp (str, optional): Interpolation method. Choices are 'NEAREST',
  133. 'LINEAR', 'CUBIC', 'AREA', and 'LANCZOS4'. Default: 'LINEAR'.
  134. """
  135. super().__init__(size_divisor=size_divisor, interp=interp, backend=backend)
  136. self.target_long_edge = target_long_edge
  137. def __call__(self, imgs):
  138. """apply"""
  139. return [self.resize(img) for img in imgs]
  140. def resize(self, img):
  141. h, w = img.shape[:2]
  142. scale = self.target_long_edge / max(h, w)
  143. h_resize = round(h * scale)
  144. w_resize = round(w * scale)
  145. if self.size_divisor is not None:
  146. h_resize = math.ceil(h_resize / self.size_divisor) * self.size_divisor
  147. w_resize = math.ceil(w_resize / self.size_divisor) * self.size_divisor
  148. img = F.resize(
  149. img, (w_resize, h_resize), interp=self.interp, backend=self.backend
  150. )
  151. return img
  152. @benchmark.timeit
  153. class ResizeByShort(_BaseResize):
  154. """
  155. Proportionally resize the image by specifying the target length of the
  156. shortest side.
  157. """
  158. def __init__(
  159. self, target_short_edge, size_divisor=None, interp="LINEAR", backend="cv2"
  160. ):
  161. """
  162. Initialize the instance.
  163. Args:
  164. target_short_edge (int): Target length of the shortest side of image.
  165. size_divisor (int|None, optional): Divisor of resized image size.
  166. Default: None.
  167. interp (str, optional): Interpolation method. Choices are 'NEAREST',
  168. 'LINEAR', 'CUBIC', 'AREA', and 'LANCZOS4'. Default: 'LINEAR'.
  169. """
  170. super().__init__(size_divisor=size_divisor, interp=interp, backend=backend)
  171. self.target_short_edge = target_short_edge
  172. def __call__(self, imgs):
  173. """apply"""
  174. return [self.resize(img) for img in imgs]
  175. def resize(self, img):
  176. h, w = img.shape[:2]
  177. scale = self.target_short_edge / min(h, w)
  178. h_resize = round(h * scale)
  179. w_resize = round(w * scale)
  180. if self.size_divisor is not None:
  181. h_resize = math.ceil(h_resize / self.size_divisor) * self.size_divisor
  182. w_resize = math.ceil(w_resize / self.size_divisor) * self.size_divisor
  183. img = F.resize(
  184. img, (w_resize, h_resize), interp=self.interp, backend=self.backend
  185. )
  186. return img
  187. @benchmark.timeit
  188. class Normalize:
  189. """Normalize the image."""
  190. def __init__(self, scale=1.0 / 255, mean=0.5, std=0.5, preserve_dtype=False):
  191. """
  192. Initialize the instance.
  193. Args:
  194. scale (float, optional): Scaling factor to apply to the image before
  195. applying normalization. Default: 1/255.
  196. mean (float|tuple|list, optional): Means for each channel of the image.
  197. Default: 0.5.
  198. std (float|tuple|list, optional): Standard deviations for each channel
  199. of the image. Default: 0.5.
  200. preserve_dtype (bool, optional): Whether to preserve the original dtype
  201. of the image.
  202. """
  203. super().__init__()
  204. self.scale = np.float32(scale)
  205. if isinstance(mean, float):
  206. mean = [mean]
  207. self.mean = np.asarray(mean).astype("float32")
  208. if isinstance(std, float):
  209. std = [std]
  210. self.std = np.asarray(std).astype("float32")
  211. self.preserve_dtype = preserve_dtype
  212. def __call__(self, imgs):
  213. """apply"""
  214. old_type = imgs[0].dtype
  215. # XXX: If `old_type` has higher precision than float32,
  216. # we will lose some precision.
  217. imgs = np.array(imgs).astype("float32", copy=False)
  218. imgs *= self.scale
  219. imgs -= self.mean
  220. imgs /= self.std
  221. if self.preserve_dtype:
  222. imgs = imgs.astype(old_type, copy=False)
  223. return list(imgs)
  224. @benchmark.timeit
  225. class ToCHWImage:
  226. """Reorder the dimensions of the image from HWC to CHW."""
  227. def __call__(self, imgs):
  228. """apply"""
  229. return [img.transpose((2, 0, 1)) for img in imgs]
  230. @benchmark.timeit
  231. class ToBatch:
  232. def __call__(self, imgs):
  233. return [np.stack(imgs, axis=0).astype(dtype=np.float32, copy=False)]