processors.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import ast
  16. import math
  17. from pathlib import Path
  18. from copy import deepcopy
  19. import numpy as np
  20. import cv2
  21. from PIL import Image
  22. from . import funcs as F
  23. class _BaseResize:
  24. _CV2_INTERP_DICT = {
  25. "NEAREST": cv2.INTER_NEAREST,
  26. "LINEAR": cv2.INTER_LINEAR,
  27. "BICUBIC": cv2.INTER_CUBIC,
  28. "AREA": cv2.INTER_AREA,
  29. "LANCZOS4": cv2.INTER_LANCZOS4,
  30. }
  31. _PIL_INTERP_DICT = {
  32. "NEAREST": Image.NEAREST,
  33. "BILINEAR": Image.BILINEAR,
  34. "BICUBIC": Image.BICUBIC,
  35. "BOX": Image.BOX,
  36. "LANCZOS4": Image.LANCZOS,
  37. }
  38. def __init__(self, size_divisor, interp, backend="cv2"):
  39. super().__init__()
  40. if size_divisor is not None:
  41. assert isinstance(
  42. size_divisor, int
  43. ), "`size_divisor` should be None or int."
  44. self.size_divisor = size_divisor
  45. try:
  46. interp = interp.upper()
  47. if backend == "cv2":
  48. interp = self._CV2_INTERP_DICT[interp]
  49. elif backend == "pil":
  50. interp = self._PIL_INTERP_DICT[interp]
  51. else:
  52. raise ValueError("backend must be `cv2` or `pil`")
  53. except KeyError:
  54. raise ValueError(
  55. "For backend '{}', `interp` should be one of {}. Please ensure the interpolation method matches the selected backend.".format(
  56. backend,
  57. (
  58. self._CV2_INTERP_DICT.keys()
  59. if backend == "cv2"
  60. else self._PIL_INTERP_DICT.keys()
  61. ),
  62. )
  63. )
  64. self.interp = interp
  65. self.backend = backend
  66. @staticmethod
  67. def _rescale_size(img_size, target_size):
  68. """rescale size"""
  69. scale = min(max(target_size) / max(img_size), min(target_size) / min(img_size))
  70. rescaled_size = [round(i * scale) for i in img_size]
  71. return rescaled_size, scale
  72. class Resize(_BaseResize):
  73. """Resize the image."""
  74. def __init__(
  75. self,
  76. target_size,
  77. keep_ratio=False,
  78. size_divisor=None,
  79. interp="LINEAR",
  80. backend="cv2",
  81. ):
  82. """
  83. Initialize the instance.
  84. Args:
  85. target_size (list|tuple|int): Target width and height.
  86. keep_ratio (bool, optional): Whether to keep the aspect ratio of resized
  87. image. Default: False.
  88. size_divisor (int|None, optional): Divisor of resized image size.
  89. Default: None.
  90. interp (str, optional): Interpolation method. Choices are 'NEAREST',
  91. 'LINEAR', 'CUBIC', 'AREA', and 'LANCZOS4'. Default: 'LINEAR'.
  92. """
  93. super().__init__(size_divisor=size_divisor, interp=interp, backend=backend)
  94. if isinstance(target_size, int):
  95. target_size = [target_size, target_size]
  96. F.check_image_size(target_size)
  97. self.target_size = target_size
  98. self.keep_ratio = keep_ratio
  99. def __call__(self, imgs):
  100. """apply"""
  101. return [self.resize(img) for img in imgs]
  102. def resize(self, img):
  103. target_size = self.target_size
  104. original_size = img.shape[:2][::-1]
  105. if self.keep_ratio:
  106. h, w = img.shape[0:2]
  107. target_size, _ = self._rescale_size((w, h), self.target_size)
  108. if self.size_divisor:
  109. target_size = [
  110. math.ceil(i / self.size_divisor) * self.size_divisor
  111. for i in target_size
  112. ]
  113. img = F.resize(img, target_size, interp=self.interp, backend=self.backend)
  114. return img
  115. class ResizeByLong(_BaseResize):
  116. """
  117. Proportionally resize the image by specifying the target length of the
  118. longest side.
  119. """
  120. def __init__(
  121. self, target_long_edge, size_divisor=None, interp="LINEAR", backend="cv2"
  122. ):
  123. """
  124. Initialize the instance.
  125. Args:
  126. target_long_edge (int): Target length of the longest side of image.
  127. size_divisor (int|None, optional): Divisor of resized image size.
  128. Default: None.
  129. interp (str, optional): Interpolation method. Choices are 'NEAREST',
  130. 'LINEAR', 'CUBIC', 'AREA', and 'LANCZOS4'. Default: 'LINEAR'.
  131. """
  132. super().__init__(size_divisor=size_divisor, interp=interp, backend=backend)
  133. self.target_long_edge = target_long_edge
  134. def __call__(self, imgs):
  135. """apply"""
  136. return [self.resize(img) for img in imgs]
  137. def resize(self, img):
  138. h, w = img.shape[:2]
  139. scale = self.target_long_edge / max(h, w)
  140. h_resize = round(h * scale)
  141. w_resize = round(w * scale)
  142. if self.size_divisor is not None:
  143. h_resize = math.ceil(h_resize / self.size_divisor) * self.size_divisor
  144. w_resize = math.ceil(w_resize / self.size_divisor) * self.size_divisor
  145. img = F.resize(
  146. img, (w_resize, h_resize), interp=self.interp, backend=self.backend
  147. )
  148. return img
  149. class ResizeByShort(_BaseResize):
  150. """
  151. Proportionally resize the image by specifying the target length of the
  152. shortest side.
  153. """
  154. def __init__(
  155. self, target_short_edge, size_divisor=None, interp="LINEAR", backend="cv2"
  156. ):
  157. """
  158. Initialize the instance.
  159. Args:
  160. target_short_edge (int): Target length of the shortest side of image.
  161. size_divisor (int|None, optional): Divisor of resized image size.
  162. Default: None.
  163. interp (str, optional): Interpolation method. Choices are 'NEAREST',
  164. 'LINEAR', 'CUBIC', 'AREA', and 'LANCZOS4'. Default: 'LINEAR'.
  165. """
  166. super().__init__(size_divisor=size_divisor, interp=interp, backend=backend)
  167. self.target_short_edge = target_short_edge
  168. def __call__(self, imgs):
  169. """apply"""
  170. return [self.resize(img) for img in imgs]
  171. def resize(self, img):
  172. h, w = img.shape[:2]
  173. scale = self.target_short_edge / min(h, w)
  174. h_resize = round(h * scale)
  175. w_resize = round(w * scale)
  176. if self.size_divisor is not None:
  177. h_resize = math.ceil(h_resize / self.size_divisor) * self.size_divisor
  178. w_resize = math.ceil(w_resize / self.size_divisor) * self.size_divisor
  179. img = F.resize(
  180. img, (w_resize, h_resize), interp=self.interp, backend=self.backend
  181. )
  182. return img
  183. class Normalize:
  184. """Normalize the image."""
  185. def __init__(self, scale=1.0 / 255, mean=0.5, std=0.5, preserve_dtype=False):
  186. """
  187. Initialize the instance.
  188. Args:
  189. scale (float, optional): Scaling factor to apply to the image before
  190. applying normalization. Default: 1/255.
  191. mean (float|tuple|list, optional): Means for each channel of the image.
  192. Default: 0.5.
  193. std (float|tuple|list, optional): Standard deviations for each channel
  194. of the image. Default: 0.5.
  195. preserve_dtype (bool, optional): Whether to preserve the original dtype
  196. of the image.
  197. """
  198. super().__init__()
  199. self.scale = np.float32(scale)
  200. if isinstance(mean, float):
  201. mean = [mean]
  202. self.mean = np.asarray(mean).astype("float32")
  203. if isinstance(std, float):
  204. std = [std]
  205. self.std = np.asarray(std).astype("float32")
  206. self.preserve_dtype = preserve_dtype
  207. def __call__(self, imgs):
  208. """apply"""
  209. old_type = imgs[0].dtype
  210. # XXX: If `old_type` has higher precision than float32,
  211. # we will lose some precision.
  212. imgs = np.array(imgs).astype("float32", copy=False)
  213. imgs *= self.scale
  214. imgs -= self.mean
  215. imgs /= self.std
  216. if self.preserve_dtype:
  217. imgs = imgs.astype(old_type, copy=False)
  218. return list(imgs)
  219. class ToCHWImage:
  220. """Reorder the dimensions of the image from HWC to CHW."""
  221. def __call__(self, imgs):
  222. """apply"""
  223. return [img.transpose((2, 0, 1)) for img in imgs]
  224. class ToBatch:
  225. def __call__(self, imgs):
  226. return [np.stack(imgs, axis=0).astype(dtype=np.float32, copy=False)]