common.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import math
  15. from pathlib import Path
  16. from copy import deepcopy
  17. import numpy as np
  18. import cv2
  19. from .....utils.cache import CACHE_DIR
  20. from ....utils.io import ImageReader, ImageWriter
  21. from ...utils.mixin import BatchSizeMixin
  22. from ...base import BaseComponent
  23. from ..read_data import _BaseRead
  24. from . import funcs as F
  25. __all__ = [
  26. "ReadImage",
  27. "Flip",
  28. "Crop",
  29. "Resize",
  30. "ResizeByLong",
  31. "ResizeByShort",
  32. "Pad",
  33. "Normalize",
  34. "ToCHWImage",
  35. "PadStride",
  36. ]
  37. def _check_image_size(input_):
  38. """check image size"""
  39. if not (
  40. isinstance(input_, (list, tuple))
  41. and len(input_) == 2
  42. and isinstance(input_[0], int)
  43. and isinstance(input_[1], int)
  44. ):
  45. raise TypeError(f"{input_} cannot represent a valid image size.")
  46. class ReadImage(_BaseRead):
  47. """Load image from the file."""
  48. INPUT_KEYS = ["img"]
  49. OUTPUT_KEYS = ["img", "img_size", "ori_img", "ori_img_size"]
  50. DEAULT_INPUTS = {"img": "img"}
  51. DEAULT_OUTPUTS = {
  52. "img": "img",
  53. "img_path": "img_path",
  54. "img_size": "img_size",
  55. "ori_img": "ori_img",
  56. "ori_img_size": "ori_img_size",
  57. }
  58. _FLAGS_DICT = {
  59. "BGR": cv2.IMREAD_COLOR,
  60. "RGB": cv2.IMREAD_COLOR,
  61. "GRAY": cv2.IMREAD_GRAYSCALE,
  62. }
  63. SUFFIX = ["jpg", "png", "jpeg", "JPEG", "JPG", "bmp"]
  64. def __init__(self, batch_size=1, format="BGR"):
  65. """
  66. Initialize the instance.
  67. Args:
  68. format (str, optional): Target color format to convert the image to.
  69. Choices are 'BGR', 'RGB', and 'GRAY'. Default: 'BGR'.
  70. """
  71. super().__init__(batch_size)
  72. self.format = format
  73. flags = self._FLAGS_DICT[self.format]
  74. self._reader = ImageReader(backend="opencv", flags=flags)
  75. self._writer = ImageWriter(backend="opencv")
  76. def apply(self, img):
  77. """apply"""
  78. if not isinstance(img, str):
  79. img_path = (Path(CACHE_DIR) / "predict_input" / "tmp_img.jpg").as_posix()
  80. self._writer.write(img_path, img)
  81. yield [
  82. {
  83. "img_path": img_path,
  84. "img": img,
  85. "img_size": [img.shape[1], img.shape[0]],
  86. "ori_img": deepcopy(img),
  87. "ori_img_size": deepcopy([img.shape[1], img.shape[0]]),
  88. }
  89. ]
  90. else:
  91. img_path = img
  92. img_path = self._download_from_url(img_path)
  93. file_list = self._get_files_list(img_path)
  94. batch = []
  95. for img_path in file_list:
  96. img = self._read_img(img_path)
  97. batch.append(img)
  98. if len(batch) >= self.batch_size:
  99. yield batch
  100. batch = []
  101. if len(batch) > 0:
  102. yield batch
  103. def _read_img(self, img_path):
  104. blob = self._reader.read(img_path)
  105. if blob is None:
  106. raise Exception("Image read Error")
  107. if self.format == "RGB":
  108. if blob.ndim != 3:
  109. raise RuntimeError("Array is not 3-dimensional.")
  110. # BGR to RGB
  111. blob = blob[..., ::-1]
  112. return {
  113. "img_path": img_path,
  114. "img": blob,
  115. "img_size": [blob.shape[1], blob.shape[0]],
  116. "ori_img": deepcopy(blob),
  117. "ori_img_size": deepcopy([blob.shape[1], blob.shape[0]]),
  118. }
  119. class GetImageInfo(BaseComponent):
  120. """Get Image Info"""
  121. INPUT_KEYS = "img"
  122. OUTPUT_KEYS = "img_size"
  123. DEAULT_INPUTS = {"img": "img"}
  124. DEAULT_OUTPUTS = {"img_size": "img_size"}
  125. def __init__(self):
  126. super().__init__()
  127. def apply(self, img):
  128. """apply"""
  129. return {"img_size": [img.shape[1], img.shape[0]]}
  130. class Flip(BaseComponent):
  131. """Flip the image vertically or horizontally."""
  132. INPUT_KEYS = "img"
  133. OUTPUT_KEYS = "img"
  134. DEAULT_INPUTS = {"img": "img"}
  135. DEAULT_OUTPUTS = {"img": "img"}
  136. def __init__(self, mode="H"):
  137. """
  138. Initialize the instance.
  139. Args:
  140. mode (str, optional): 'H' for horizontal flipping and 'V' for vertical
  141. flipping. Default: 'H'.
  142. """
  143. super().__init__()
  144. if mode not in ("H", "V"):
  145. raise ValueError("`mode` should be 'H' or 'V'.")
  146. self.mode = mode
  147. def apply(self, img):
  148. """apply"""
  149. if self.mode == "H":
  150. img = F.flip_h(img)
  151. elif self.mode == "V":
  152. img = F.flip_v(img)
  153. return {"img": img}
  154. class Crop(BaseComponent):
  155. """Crop region from the image."""
  156. INPUT_KEYS = "img"
  157. OUTPUT_KEYS = ["img", "img_size"]
  158. DEAULT_INPUTS = {"img": "img"}
  159. DEAULT_OUTPUTS = {"img": "img", "img_size": "img_size"}
  160. def __init__(self, crop_size, mode="C"):
  161. """
  162. Initialize the instance.
  163. Args:
  164. crop_size (list|tuple|int): Width and height of the region to crop.
  165. mode (str, optional): 'C' for cropping the center part and 'TL' for
  166. cropping the top left part. Default: 'C'.
  167. """
  168. super().__init__()
  169. if isinstance(crop_size, int):
  170. crop_size = [crop_size, crop_size]
  171. _check_image_size(crop_size)
  172. self.crop_size = crop_size
  173. if mode not in ("C", "TL"):
  174. raise ValueError("Unsupported interpolation method")
  175. self.mode = mode
  176. def apply(self, img):
  177. """apply"""
  178. h, w = img.shape[:2]
  179. cw, ch = self.crop_size
  180. if self.mode == "C":
  181. x1 = max(0, (w - cw) // 2)
  182. y1 = max(0, (h - ch) // 2)
  183. elif self.mode == "TL":
  184. x1, y1 = 0, 0
  185. x2 = min(w, x1 + cw)
  186. y2 = min(h, y1 + ch)
  187. coords = (x1, y1, x2, y2)
  188. if coords == (0, 0, w, h):
  189. raise ValueError(
  190. f"Input image ({w}, {h}) smaller than the target size ({cw}, {ch})."
  191. )
  192. img = F.slice(img, coords=coords)
  193. return {"img": img, "img_size": [img.shape[1], img.shape[0]]}
  194. class _BaseResize(BaseComponent):
  195. _INTERP_DICT = {
  196. "NEAREST": cv2.INTER_NEAREST,
  197. "LINEAR": cv2.INTER_LINEAR,
  198. "CUBIC": cv2.INTER_CUBIC,
  199. "AREA": cv2.INTER_AREA,
  200. "LANCZOS4": cv2.INTER_LANCZOS4,
  201. }
  202. def __init__(self, size_divisor, interp):
  203. super().__init__()
  204. if size_divisor is not None:
  205. assert isinstance(
  206. size_divisor, int
  207. ), "`size_divisor` should be None or int."
  208. self.size_divisor = size_divisor
  209. try:
  210. interp = self._INTERP_DICT[interp]
  211. except KeyError:
  212. raise ValueError(
  213. "`interp` should be one of {}.".format(self._INTERP_DICT.keys())
  214. )
  215. self.interp = interp
  216. @staticmethod
  217. def _rescale_size(img_size, target_size):
  218. """rescale size"""
  219. scale = min(max(target_size) / max(img_size), min(target_size) / min(img_size))
  220. rescaled_size = [round(i * scale) for i in img_size]
  221. return rescaled_size, scale
  222. class Resize(_BaseResize):
  223. """Resize the image."""
  224. INPUT_KEYS = "img"
  225. OUTPUT_KEYS = ["img", "img_size", "scale_factors"]
  226. DEAULT_INPUTS = {"img": "img"}
  227. DEAULT_OUTPUTS = {
  228. "img": "img",
  229. "img_size": "img_size",
  230. "scale_factors": "scale_factors",
  231. }
  232. def __init__(
  233. self, target_size, keep_ratio=False, size_divisor=None, interp="LINEAR"
  234. ):
  235. """
  236. Initialize the instance.
  237. Args:
  238. target_size (list|tuple|int): Target width and height.
  239. keep_ratio (bool, optional): Whether to keep the aspect ratio of resized
  240. image. Default: False.
  241. size_divisor (int|None, optional): Divisor of resized image size.
  242. Default: None.
  243. interp (str, optional): Interpolation method. Choices are 'NEAREST',
  244. 'LINEAR', 'CUBIC', 'AREA', and 'LANCZOS4'. Default: 'LINEAR'.
  245. """
  246. super().__init__(size_divisor=size_divisor, interp=interp)
  247. if isinstance(target_size, int):
  248. target_size = [target_size, target_size]
  249. _check_image_size(target_size)
  250. self.target_size = target_size
  251. self.keep_ratio = keep_ratio
  252. def apply(self, img):
  253. """apply"""
  254. target_size = self.target_size
  255. original_size = img.shape[:2]
  256. if self.keep_ratio:
  257. h, w = img.shape[0:2]
  258. target_size, _ = self._rescale_size((w, h), self.target_size)
  259. if self.size_divisor:
  260. target_size = [
  261. math.ceil(i / self.size_divisor) * self.size_divisor
  262. for i in target_size
  263. ]
  264. img_scale_w, img_scale_h = [
  265. target_size[1] / original_size[1],
  266. target_size[0] / original_size[0],
  267. ]
  268. img = F.resize(img, target_size, interp=self.interp)
  269. return {
  270. "img": img,
  271. "img_size": [img.shape[1], img.shape[0]],
  272. "scale_factors": [img_scale_w, img_scale_h],
  273. }
  274. class ResizeByLong(_BaseResize):
  275. """
  276. Proportionally resize the image by specifying the target length of the
  277. longest side.
  278. """
  279. INPUT_KEYS = "img"
  280. OUTPUT_KEYS = ["img", "img_size"]
  281. DEAULT_INPUTS = {"img": "img"}
  282. DEAULT_OUTPUTS = {"img": "img", "img_size": "img_size"}
  283. def __init__(self, target_long_edge, size_divisor=None, interp="LINEAR"):
  284. """
  285. Initialize the instance.
  286. Args:
  287. target_long_edge (int): Target length of the longest side of image.
  288. size_divisor (int|None, optional): Divisor of resized image size.
  289. Default: None.
  290. interp (str, optional): Interpolation method. Choices are 'NEAREST',
  291. 'LINEAR', 'CUBIC', 'AREA', and 'LANCZOS4'. Default: 'LINEAR'.
  292. """
  293. super().__init__(size_divisor=size_divisor, interp=interp)
  294. self.target_long_edge = target_long_edge
  295. def apply(self, img):
  296. """apply"""
  297. h, w = img.shape[:2]
  298. scale = self.target_long_edge / max(h, w)
  299. h_resize = round(h * scale)
  300. w_resize = round(w * scale)
  301. if self.size_divisor is not None:
  302. h_resize = math.ceil(h_resize / self.size_divisor) * self.size_divisor
  303. w_resize = math.ceil(w_resize / self.size_divisor) * self.size_divisor
  304. img = F.resize(img, (w_resize, h_resize), interp=self.interp)
  305. return {"img": img, "img_size": [img.shape[1], img.shape[0]]}
  306. class ResizeByShort(_BaseResize):
  307. """
  308. Proportionally resize the image by specifying the target length of the
  309. shortest side.
  310. """
  311. INPUT_KEYS = "img"
  312. OUTPUT_KEYS = ["img", "img_size"]
  313. DEAULT_INPUTS = {"img": "img"}
  314. DEAULT_OUTPUTS = {"img": "img", "img_size": "img_size"}
  315. def __init__(self, target_short_edge, size_divisor=None, interp="LINEAR"):
  316. """
  317. Initialize the instance.
  318. Args:
  319. target_short_edge (int): Target length of the shortest side of image.
  320. size_divisor (int|None, optional): Divisor of resized image size.
  321. Default: None.
  322. interp (str, optional): Interpolation method. Choices are 'NEAREST',
  323. 'LINEAR', 'CUBIC', 'AREA', and 'LANCZOS4'. Default: 'LINEAR'.
  324. """
  325. super().__init__(size_divisor=size_divisor, interp=interp)
  326. self.target_short_edge = target_short_edge
  327. def apply(self, img):
  328. """apply"""
  329. h, w = img.shape[:2]
  330. scale = self.target_short_edge / min(h, w)
  331. h_resize = round(h * scale)
  332. w_resize = round(w * scale)
  333. if self.size_divisor is not None:
  334. h_resize = math.ceil(h_resize / self.size_divisor) * self.size_divisor
  335. w_resize = math.ceil(w_resize / self.size_divisor) * self.size_divisor
  336. img = F.resize(img, (w_resize, h_resize), interp=self.interp)
  337. return {"img": img, "img_size": [img.shape[1], img.shape[0]]}
  338. class Pad(BaseComponent):
  339. """Pad the image."""
  340. INPUT_KEYS = "img"
  341. OUTPUT_KEYS = ["img", "img_size"]
  342. DEAULT_INPUTS = {"img": "img"}
  343. DEAULT_OUTPUTS = {"img": "img", "img_size": "img_size"}
  344. def __init__(self, target_size, val=127.5):
  345. """
  346. Initialize the instance.
  347. Args:
  348. target_size (list|tuple|int): Target width and height of the image after
  349. padding.
  350. val (float, optional): Value to fill the padded area. Default: 127.5.
  351. """
  352. super().__init__()
  353. if isinstance(target_size, int):
  354. target_size = [target_size, target_size]
  355. _check_image_size(target_size)
  356. self.target_size = target_size
  357. self.val = val
  358. def apply(self, img):
  359. """apply"""
  360. h, w = img.shape[:2]
  361. tw, th = self.target_size
  362. ph = th - h
  363. pw = tw - w
  364. if ph < 0 or pw < 0:
  365. raise ValueError(
  366. f"Input image ({w}, {h}) smaller than the target size ({tw}, {th})."
  367. )
  368. else:
  369. img = F.pad(img, pad=(0, ph, 0, pw), val=self.val)
  370. return {"img": img, "img_size": [img.shape[1], img.shape[0]]}
  371. class PadStride(BaseComponent):
  372. """padding image for model with FPN , instead PadBatch(pad_to_stride, pad_gt) in original config
  373. Args:
  374. stride (bool): model with FPN need image shape % stride == 0
  375. """
  376. INPUT_KEYS = "img"
  377. OUTPUT_KEYS = "img"
  378. DEAULT_INPUTS = {"img": "img"}
  379. DEAULT_OUTPUTS = {"img": "img"}
  380. def __init__(self, stride=0):
  381. super().__init__()
  382. self.coarsest_stride = stride
  383. def apply(self, img):
  384. """
  385. Args:
  386. im (np.ndarray): image (np.ndarray)
  387. Returns:
  388. im (np.ndarray): processed image (np.ndarray)
  389. """
  390. im = img
  391. coarsest_stride = self.coarsest_stride
  392. if coarsest_stride <= 0:
  393. return {"img": im}
  394. im_c, im_h, im_w = im.shape
  395. pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
  396. pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
  397. padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
  398. padding_im[:, :im_h, :im_w] = im
  399. return {"img": padding_im}
  400. class Normalize(BaseComponent):
  401. """Normalize the image."""
  402. INPUT_KEYS = "img"
  403. OUTPUT_KEYS = "img"
  404. DEAULT_INPUTS = {"img": "img"}
  405. DEAULT_OUTPUTS = {"img": "img"}
  406. def __init__(self, scale=1.0 / 255, mean=0.5, std=0.5, preserve_dtype=False):
  407. """
  408. Initialize the instance.
  409. Args:
  410. scale (float, optional): Scaling factor to apply to the image before
  411. applying normalization. Default: 1/255.
  412. mean (float|tuple|list, optional): Means for each channel of the image.
  413. Default: 0.5.
  414. std (float|tuple|list, optional): Standard deviations for each channel
  415. of the image. Default: 0.5.
  416. preserve_dtype (bool, optional): Whether to preserve the original dtype
  417. of the image.
  418. """
  419. super().__init__()
  420. self.scale = np.float32(scale)
  421. if isinstance(mean, float):
  422. mean = [mean]
  423. self.mean = np.asarray(mean).astype("float32")
  424. if isinstance(std, float):
  425. std = [std]
  426. self.std = np.asarray(std).astype("float32")
  427. self.preserve_dtype = preserve_dtype
  428. def apply(self, img):
  429. """apply"""
  430. old_type = img.dtype
  431. # XXX: If `old_type` has higher precision than float32,
  432. # we will lose some precision.
  433. img = img.astype("float32", copy=False)
  434. img *= self.scale
  435. img -= self.mean
  436. img /= self.std
  437. if self.preserve_dtype:
  438. img = img.astype(old_type, copy=False)
  439. return {"img": img}
  440. class ToCHWImage(BaseComponent):
  441. """Reorder the dimensions of the image from HWC to CHW."""
  442. INPUT_KEYS = "img"
  443. OUTPUT_KEYS = "img"
  444. DEAULT_INPUTS = {"img": "img"}
  445. DEAULT_OUTPUTS = {"img": "img"}
  446. def apply(self, img):
  447. """apply"""
  448. img = img.transpose((2, 0, 1))
  449. return {"img": img}