| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285 |
- # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import math
- import numpy as np
- from PIL import Image
- from .....utils.deps import class_requires_deps, is_dep_available
- from ....utils.benchmark import benchmark
- from . import funcs as F
- if is_dep_available("opencv-contrib-python"):
- import cv2
- @class_requires_deps("opencv-contrib-python")
- class _BaseResize:
- def __init__(self, size_divisor, interp, backend="cv2"):
- _CV2_INTERP_DICT = {
- "NEAREST": cv2.INTER_NEAREST,
- "LINEAR": cv2.INTER_LINEAR,
- "BICUBIC": cv2.INTER_CUBIC,
- "AREA": cv2.INTER_AREA,
- "LANCZOS4": cv2.INTER_LANCZOS4,
- }
- _PIL_INTERP_DICT = {
- "NEAREST": Image.NEAREST,
- "BILINEAR": Image.BILINEAR,
- "BICUBIC": Image.BICUBIC,
- "BOX": Image.BOX,
- "LANCZOS4": Image.LANCZOS,
- }
- super().__init__()
- if size_divisor is not None:
- assert isinstance(
- size_divisor, int
- ), "`size_divisor` should be None or int."
- self.size_divisor = size_divisor
- try:
- interp = interp.upper()
- if backend == "cv2":
- interp = _CV2_INTERP_DICT[interp]
- elif backend == "pil":
- interp = _PIL_INTERP_DICT[interp]
- else:
- raise ValueError("backend must be `cv2` or `pil`")
- except KeyError:
- raise ValueError(
- "For backend '{}', `interp` should be one of {}. Please ensure the interpolation method matches the selected backend.".format(
- backend,
- (
- _CV2_INTERP_DICT.keys()
- if backend == "cv2"
- else _PIL_INTERP_DICT.keys()
- ),
- )
- )
- self.interp = interp
- self.backend = backend
- @staticmethod
- def _rescale_size(img_size, target_size):
- """rescale size"""
- scale = min(max(target_size) / max(img_size), min(target_size) / min(img_size))
- rescaled_size = [round(i * scale) for i in img_size]
- return rescaled_size, scale
- @benchmark.timeit
- class Resize(_BaseResize):
- """Resize the image."""
- def __init__(
- self,
- target_size,
- keep_ratio=False,
- size_divisor=None,
- interp="LINEAR",
- backend="cv2",
- ):
- """
- Initialize the instance.
- Args:
- target_size (list|tuple|int): Target width and height.
- keep_ratio (bool, optional): Whether to keep the aspect ratio of resized
- image. Default: False.
- size_divisor (int|None, optional): Divisor of resized image size.
- Default: None.
- interp (str, optional): Interpolation method. Choices are 'NEAREST',
- 'LINEAR', 'CUBIC', 'AREA', and 'LANCZOS4'. Default: 'LINEAR'.
- """
- super().__init__(size_divisor=size_divisor, interp=interp, backend=backend)
- if isinstance(target_size, int):
- target_size = [target_size, target_size]
- F.check_image_size(target_size)
- self.target_size = target_size
- self.keep_ratio = keep_ratio
- def __call__(self, imgs):
- """apply"""
- return [self.resize(img) for img in imgs]
- def resize(self, img):
- target_size = self.target_size
- original_size = img.shape[:2][::-1]
- if self.keep_ratio:
- h, w = img.shape[0:2]
- target_size, _ = self._rescale_size((w, h), self.target_size)
- if self.size_divisor:
- target_size = [
- math.ceil(i / self.size_divisor) * self.size_divisor
- for i in target_size
- ]
- img = F.resize(img, target_size, interp=self.interp, backend=self.backend)
- return img
- @benchmark.timeit
- class ResizeByLong(_BaseResize):
- """
- Proportionally resize the image by specifying the target length of the
- longest side.
- """
- def __init__(
- self, target_long_edge, size_divisor=None, interp="LINEAR", backend="cv2"
- ):
- """
- Initialize the instance.
- Args:
- target_long_edge (int): Target length of the longest side of image.
- size_divisor (int|None, optional): Divisor of resized image size.
- Default: None.
- interp (str, optional): Interpolation method. Choices are 'NEAREST',
- 'LINEAR', 'CUBIC', 'AREA', and 'LANCZOS4'. Default: 'LINEAR'.
- """
- super().__init__(size_divisor=size_divisor, interp=interp, backend=backend)
- self.target_long_edge = target_long_edge
- def __call__(self, imgs):
- """apply"""
- return [self.resize(img) for img in imgs]
- def resize(self, img):
- h, w = img.shape[:2]
- scale = self.target_long_edge / max(h, w)
- h_resize = round(h * scale)
- w_resize = round(w * scale)
- if self.size_divisor is not None:
- h_resize = math.ceil(h_resize / self.size_divisor) * self.size_divisor
- w_resize = math.ceil(w_resize / self.size_divisor) * self.size_divisor
- img = F.resize(
- img, (w_resize, h_resize), interp=self.interp, backend=self.backend
- )
- return img
- @benchmark.timeit
- class ResizeByShort(_BaseResize):
- """
- Proportionally resize the image by specifying the target length of the
- shortest side.
- """
- def __init__(
- self, target_short_edge, size_divisor=None, interp="LINEAR", backend="cv2"
- ):
- """
- Initialize the instance.
- Args:
- target_short_edge (int): Target length of the shortest side of image.
- size_divisor (int|None, optional): Divisor of resized image size.
- Default: None.
- interp (str, optional): Interpolation method. Choices are 'NEAREST',
- 'LINEAR', 'CUBIC', 'AREA', and 'LANCZOS4'. Default: 'LINEAR'.
- """
- super().__init__(size_divisor=size_divisor, interp=interp, backend=backend)
- self.target_short_edge = target_short_edge
- def __call__(self, imgs):
- """apply"""
- return [self.resize(img) for img in imgs]
- def resize(self, img):
- h, w = img.shape[:2]
- scale = self.target_short_edge / min(h, w)
- h_resize = round(h * scale)
- w_resize = round(w * scale)
- if self.size_divisor is not None:
- h_resize = math.ceil(h_resize / self.size_divisor) * self.size_divisor
- w_resize = math.ceil(w_resize / self.size_divisor) * self.size_divisor
- img = F.resize(
- img, (w_resize, h_resize), interp=self.interp, backend=self.backend
- )
- return img
- @benchmark.timeit
- @class_requires_deps("opencv-contrib-python")
- class Normalize:
- """Normalize the three-channel image."""
- def __init__(self, scale=1.0 / 255, mean=0.5, std=0.5):
- """
- Initialize the instance.
- Args:
- scale (float, optional): Scaling factor to apply to the image before
- applying normalization. Default: 1/255.
- mean (float|tuple|list, optional): Means for each channel of the image.
- Default: 0.5.
- std (float|tuple|list|np.ndarray, optional): Standard deviations for each channel
- of the image. Default: 0.5.
- """
- super().__init__()
- if isinstance(mean, float):
- mean = [mean] * 3
- elif len(mean) != 3:
- raise ValueError(
- f"Expected `mean` to be a tuple or list of length 3, but got {len(mean)} elements."
- )
- if isinstance(std, float):
- std = [std] * 3
- elif len(std) != 3:
- raise ValueError(
- f"Expected `std` to be a tuple or list of length 3, but got {len(std)} elements."
- )
- self.alpha = [scale / std[i] for i in range(len(std))]
- self.beta = [-mean[i] / std[i] for i in range(len(std))]
- def norm(self, img):
- split_im = list(cv2.split(img))
- for c in range(img.shape[2]):
- split_im[c] = split_im[c].astype(np.float32)
- split_im[c] *= self.alpha[c]
- split_im[c] += self.beta[c]
- res = cv2.merge(split_im)
- return res
- def __call__(self, imgs):
- """apply"""
- return [self.norm(img) for img in imgs]
- @benchmark.timeit
- class ToCHWImage:
- """Reorder the dimensions of the image from HWC to CHW."""
- def __call__(self, imgs):
- """apply"""
- return [img.transpose((2, 0, 1)) for img in imgs]
- @benchmark.timeit
- class ToBatch:
- def __call__(self, imgs):
- return [np.stack(imgs, axis=0).astype(dtype=np.float32, copy=False)]
|