processors.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from typing import List, Optional, Sequence, Tuple, Union
  15. import numpy as np
  16. from ....utils.deps import class_requires_deps, is_dep_available
  17. from ...utils.benchmark import benchmark
  18. if is_dep_available("opencv-contrib-python"):
  19. import cv2
  20. @benchmark.timeit
  21. @class_requires_deps("opencv-contrib-python")
  22. class Scale:
  23. """Scale images."""
  24. def __init__(
  25. self,
  26. short_size: int,
  27. fixed_ratio: bool = True,
  28. keep_ratio: Union[bool, None] = None,
  29. do_round: bool = False,
  30. ) -> None:
  31. """
  32. Initializes the Scale class.
  33. Args:
  34. short_size (int): The target size for the shorter side of the image.
  35. fixed_ratio (bool): Whether to maintain a fixed aspect ratio of 4:3.
  36. keep_ratio (Union[bool, None]): Whether to keep the aspect ratio. Cannot be True if fixed_ratio is True.
  37. do_round (bool): Whether to round the scaling factor.
  38. """
  39. super().__init__()
  40. self.short_size = short_size
  41. assert (fixed_ratio and not keep_ratio) or (
  42. not fixed_ratio
  43. ), f"fixed_ratio and keep_ratio cannot be true at the same time"
  44. self.fixed_ratio = fixed_ratio
  45. self.keep_ratio = keep_ratio
  46. self.do_round = do_round
  47. def scale(self, video: List[np.ndarray]) -> List[np.ndarray]:
  48. """
  49. Performs resize operations on a sequence of images.
  50. Args:
  51. video (List[np.ndarray]): List where each item is an image, as a numpy array.
  52. For example, [np.ndarray0, np.ndarray1, np.ndarray2, ...]
  53. Returns:
  54. List[np.ndarray]: List where each item is a np.ndarray after scaling.
  55. """
  56. imgs = video
  57. resized_imgs = []
  58. for i in range(len(imgs)):
  59. img = imgs[i]
  60. if isinstance(img, np.ndarray):
  61. h, w, _ = img.shape
  62. else:
  63. raise NotImplementedError
  64. if (w <= h and w == self.short_size) or (h <= w and h == self.short_size):
  65. resized_imgs.append(img)
  66. continue
  67. if w <= h:
  68. ow = self.short_size
  69. if self.fixed_ratio:
  70. oh = int(self.short_size * 4.0 / 3.0)
  71. elif self.keep_ratio is False:
  72. oh = self.short_size
  73. else:
  74. scale_factor = self.short_size / w
  75. oh = (
  76. int(h * float(scale_factor) + 0.5)
  77. if self.do_round
  78. else int(h * self.short_size / w)
  79. )
  80. ow = (
  81. int(w * float(scale_factor) + 0.5)
  82. if self.do_round
  83. else self.short_size
  84. )
  85. else:
  86. oh = self.short_size
  87. if self.fixed_ratio:
  88. ow = int(self.short_size * 4.0 / 3.0)
  89. elif self.keep_ratio is False:
  90. ow = self.short_size
  91. else:
  92. scale_factor = self.short_size / h
  93. oh = (
  94. int(h * float(scale_factor) + 0.5)
  95. if self.do_round
  96. else self.short_size
  97. )
  98. ow = (
  99. int(w * float(scale_factor) + 0.5)
  100. if self.do_round
  101. else int(w * self.short_size / h)
  102. )
  103. resized_imgs.append(
  104. cv2.resize(img, (ow, oh), interpolation=cv2.INTER_LINEAR)
  105. )
  106. imgs = resized_imgs
  107. return imgs
  108. def __call__(self, videos: List[np.ndarray]) -> List[np.ndarray]:
  109. """
  110. Apply the scaling operation to a list of videos.
  111. Args:
  112. videos (List[np.ndarray]): A list of videos, where each video is a sequence
  113. of images.
  114. Returns:
  115. List[np.ndarray]: A list of videos after scaling, where each video is a list of images.
  116. """
  117. return [self.scale(video) for video in videos]
  118. @benchmark.timeit
  119. class CenterCrop:
  120. """Center crop images."""
  121. def __init__(self, target_size: int, do_round: bool = True) -> None:
  122. """
  123. Initializes the CenterCrop class.
  124. Args:
  125. target_size (int): The size of the cropped area.
  126. do_round (bool): Whether to round the crop coordinates.
  127. """
  128. super().__init__()
  129. self.target_size = target_size
  130. self.do_round = do_round
  131. def center_crop(self, imgs: List[np.ndarray]) -> List[np.ndarray]:
  132. """
  133. Performs center crop operations on images.
  134. Args:
  135. imgs (List[np.ndarray]): A sequence of images (a numpy array).
  136. Returns:
  137. List[np.ndarray]: A list of images after center cropping or a cropped numpy array.
  138. """
  139. crop_imgs = []
  140. th, tw = self.target_size, self.target_size
  141. for img in imgs:
  142. h, w, _ = img.shape
  143. assert (w >= self.target_size) and (
  144. h >= self.target_size
  145. ), "image width({}) and height({}) should be larger than crop size".format(
  146. w, h, self.target_size
  147. )
  148. x1 = int(round((w - tw) / 2.0)) if self.do_round else (w - tw) // 2
  149. y1 = int(round((h - th) / 2.0)) if self.do_round else (h - th) // 2
  150. crop_imgs.append(img[y1 : y1 + th, x1 : x1 + tw])
  151. return crop_imgs
  152. def __call__(self, videos: List[np.ndarray]) -> List[np.ndarray]:
  153. """
  154. Apply the center crop operation to a list of videos.
  155. Args:
  156. videos (List[np.ndarray]): A list of videos, where each video is a sequence of images.
  157. Returns:
  158. List[np.ndarray]: A list of videos after center cropping.
  159. """
  160. return [self.center_crop(video) for video in videos]
  161. @benchmark.timeit
  162. class Image2Array:
  163. """Convert a sequence of images to a numpy array with optional transposition."""
  164. def __init__(self, transpose: bool = True, data_format: str = "tchw") -> None:
  165. """
  166. Initializes the Image2Array class.
  167. Args:
  168. transpose (bool): Whether to transpose the resulting numpy array.
  169. data_format (str): The format to transpose to, either 'tchw' or 'cthw'.
  170. Raises:
  171. AssertionError: If data_format is not one of the allowed values.
  172. """
  173. super().__init__()
  174. assert data_format in [
  175. "tchw",
  176. "cthw",
  177. ], f"Target format must in ['tchw', 'cthw'], but got {data_format}"
  178. self.transpose = transpose
  179. self.data_format = data_format
  180. def img2array(self, imgs: List[np.ndarray]) -> np.ndarray:
  181. """
  182. Converts a sequence of images to a numpy array and optionally transposes it.
  183. Args:
  184. imgs (List[np.ndarray]): A list of images to be converted to a numpy array.
  185. Returns:
  186. np.ndarray: A numpy array representation of the images.
  187. """
  188. t_imgs = np.stack(imgs).astype("float32")
  189. if self.transpose:
  190. if self.data_format == "tchw":
  191. t_imgs = t_imgs.transpose([0, 3, 1, 2]) # tchw
  192. else:
  193. t_imgs = t_imgs.transpose([3, 0, 1, 2]) # cthw
  194. return t_imgs
  195. def __call__(self, videos: List[np.ndarray]) -> List[np.ndarray]:
  196. """
  197. Apply the image to array conversion to a list of videos.
  198. Args:
  199. videos (List[Sequence[np.ndarray]]): A list of videos, where each video is a sequence of images.
  200. Returns:
  201. List[np.ndarray]: A list of numpy arrays, one for each video.
  202. """
  203. return [self.img2array(video) for video in videos]
  204. @benchmark.timeit
  205. @class_requires_deps("opencv-contrib-python")
  206. class NormalizeVideo:
  207. """
  208. Normalize video frames by subtracting the mean and dividing by the standard deviation.
  209. """
  210. def __init__(
  211. self,
  212. mean: Sequence[float],
  213. std: Sequence[float],
  214. tensor_shape: Sequence[int] = [3, 1, 1],
  215. inplace: bool = False,
  216. ) -> None:
  217. """
  218. Initializes the NormalizeVideo class.
  219. Args:
  220. mean (Sequence[float]): The mean values for each channel.
  221. std (Sequence[float]): The standard deviation values for each channel.
  222. tensor_shape (Sequence[int]): The shape of the mean and std tensors.
  223. inplace (bool): Whether to perform normalization in place.
  224. """
  225. super().__init__()
  226. self.inplace = inplace
  227. if not inplace:
  228. self.mean = np.array(mean).reshape(tensor_shape).astype(np.float32)
  229. self.std = np.array(std).reshape(tensor_shape).astype(np.float32)
  230. else:
  231. self.mean = np.array(mean, dtype=np.float32)
  232. self.std = np.array(std, dtype=np.float32)
  233. def normalize_video(self, imgs: np.ndarray) -> np.ndarray:
  234. """
  235. Normalizes a sequence of images.
  236. Args:
  237. imgs (np.ndarray): A numpy array of images to be normalized.
  238. Returns:
  239. np.ndarray: The normalized images as a numpy array.
  240. """
  241. if self.inplace:
  242. n = len(imgs)
  243. h, w, c = imgs[0].shape
  244. norm_imgs = np.empty((n, h, w, c), dtype=np.float32)
  245. for i, img in enumerate(imgs):
  246. norm_imgs[i] = img
  247. for img in norm_imgs: # [n,h,w,c]
  248. mean = np.float64(self.mean.reshape(1, -1)) # [1, 3]
  249. stdinv = 1 / np.float64(self.std.reshape(1, -1)) # [1, 3]
  250. cv2.subtract(img, mean, img)
  251. cv2.multiply(img, stdinv, img)
  252. else:
  253. imgs = imgs
  254. norm_imgs = imgs / 255.0
  255. norm_imgs -= self.mean
  256. norm_imgs /= self.std
  257. imgs = norm_imgs
  258. imgs = np.expand_dims(imgs, axis=0).copy()
  259. return imgs
  260. def __call__(self, videos: List[np.ndarray]) -> List[np.ndarray]:
  261. """
  262. Apply normalization to a list of videos.
  263. Args:
  264. videos (List[np.ndarray]): A list of videos, where each video is a numpy array of images.
  265. Returns:
  266. List[np.ndarray]: A list of normalized videos as numpy arrays.
  267. """
  268. return [self.normalize_video(video) for video in videos]
  269. @benchmark.timeit
  270. class VideoClasTopk:
  271. """Applies a top-k transformation on video classification predictions."""
  272. def __init__(self, class_ids: Optional[Sequence[Union[str, int]]] = None) -> None:
  273. """
  274. Initializes the VideoClasTopk class.
  275. Args:
  276. class_ids (Optional[Sequence[Union[str, int]]]): A list of class labels corresponding to class indices.
  277. """
  278. super().__init__()
  279. self.class_id_map = self._parse_class_id_map(class_ids)
  280. def softmax(self, data: np.ndarray) -> np.ndarray:
  281. """
  282. Applies the softmax function to an array of data.
  283. Args:
  284. data (np.ndarray): An array of data for which to compute softmax.
  285. Returns:
  286. np.ndarray: The softmax-transformed data.
  287. """
  288. x_max = np.max(data, axis=-1, keepdims=True)
  289. e_x = np.exp(data - x_max)
  290. return e_x / np.sum(e_x, axis=-1, keepdims=True)
  291. def _parse_class_id_map(
  292. self, class_ids: Optional[Sequence[Union[str, int]]]
  293. ) -> Optional[dict]:
  294. """
  295. Parses a list of class IDs into a mapping from class index to class label.
  296. Args:
  297. class_ids (Optional[Sequence[Union[str, int]]]): A list of class labels.
  298. Returns:
  299. Optional[dict]: A dictionary mapping class indices to labels, or None if no class_ids are provided.
  300. """
  301. if class_ids is None:
  302. return None
  303. class_id_map = {id: str(lb) for id, lb in enumerate(class_ids)}
  304. return class_id_map
  305. def __call__(
  306. self, preds: np.ndarray, topk: int = 5
  307. ) -> Tuple[np.ndarray, List[np.ndarray], List[List[str]]]:
  308. """
  309. Selects the top-k predictions from the classification output.
  310. Args:
  311. preds (np.ndarray): A 2D array of prediction scores.
  312. topk (int): The number of top predictions to return.
  313. Returns:
  314. Tuple[np.ndarray, List[np.ndarray], List[List[str]]]: A tuple containing:
  315. - An array of indices of the top-k predictions.
  316. - A list of arrays of scores for the top-k predictions.
  317. - A list of lists of label names for the top-k predictions.
  318. """
  319. preds[0] = self.softmax(preds[0])
  320. indexes = preds[0].argsort(axis=1)[:, -topk:][:, ::-1].astype("int32")
  321. scores = [
  322. list(np.around(pred[index], decimals=5))
  323. for pred, index in zip(preds[0], indexes)
  324. ]
  325. label_names = [[self.class_id_map[i] for i in index] for index in indexes]
  326. return indexes, scores, label_names
  327. @benchmark.timeit
  328. class ToBatch:
  329. """A class for batching videos."""
  330. def __call__(self, videos: List[np.ndarray]) -> List[np.ndarray]:
  331. """Call method to stack videos into a batch.
  332. Args:
  333. videos (list of np.ndarrays): List of videos to process.
  334. Returns:
  335. list of np.ndarrays: List containing a stacked tensor of the videos.
  336. """
  337. return [np.concatenate(videos, axis=0).astype(dtype=np.float32, copy=False)]