ops.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import cv2
  15. import math
  16. import numpy as np
  17. from PIL import Image, ImageEnhance
  18. def normalize(im, mean, std):
  19. im = im / 255.0
  20. im -= mean
  21. im /= std
  22. return im
  23. def permute(im, to_bgr=False):
  24. im = np.swapaxes(im, 1, 2)
  25. im = np.swapaxes(im, 1, 0)
  26. if to_bgr:
  27. im = im[[2, 1, 0], :, :]
  28. return im
  29. def resize_long(im, long_size=224, interpolation=cv2.INTER_LINEAR):
  30. value = max(im.shape[0], im.shape[1])
  31. scale = float(long_size) / float(value)
  32. resized_width = int(round(im.shape[1] * scale))
  33. resized_height = int(round(im.shape[0] * scale))
  34. im = cv2.resize(
  35. im, (resized_width, resized_height), interpolation=interpolation)
  36. return im
  37. def resize(im, target_size=608, interp=cv2.INTER_LINEAR):
  38. if isinstance(target_size, list) or isinstance(target_size, tuple):
  39. w = target_size[0]
  40. h = target_size[1]
  41. else:
  42. w = target_size
  43. h = target_size
  44. im = cv2.resize(im, (w, h), interpolation=interp)
  45. return im
  46. def random_crop(im,
  47. crop_size=224,
  48. lower_scale=0.08,
  49. lower_ratio=3. / 4,
  50. upper_ratio=4. / 3):
  51. scale = [lower_scale, 1.0]
  52. ratio = [lower_ratio, upper_ratio]
  53. aspect_ratio = math.sqrt(np.random.uniform(*ratio))
  54. w = 1. * aspect_ratio
  55. h = 1. / aspect_ratio
  56. bound = min((float(im.shape[0]) / im.shape[1]) / (h**2),
  57. (float(im.shape[1]) / im.shape[0]) / (w**2))
  58. scale_max = min(scale[1], bound)
  59. scale_min = min(scale[0], bound)
  60. target_area = im.shape[0] * im.shape[1] * np.random.uniform(
  61. scale_min, scale_max)
  62. target_size = math.sqrt(target_area)
  63. w = int(target_size * w)
  64. h = int(target_size * h)
  65. i = np.random.randint(0, im.shape[0] - h + 1)
  66. j = np.random.randint(0, im.shape[1] - w + 1)
  67. im = im[i:i + h, j:j + w, :]
  68. im = cv2.resize(im, (crop_size, crop_size))
  69. return im
  70. def center_crop(im, crop_size=224):
  71. height, width = im.shape[:2]
  72. w_start = (width - crop_size) // 2
  73. h_start = (height - crop_size) // 2
  74. w_end = w_start + crop_size
  75. h_end = h_start + crop_size
  76. im = im[h_start:h_end, w_start:w_end, :]
  77. return im
  78. def horizontal_flip(im):
  79. if len(im.shape) == 3:
  80. im = im[:, ::-1, :]
  81. elif len(im.shape) == 2:
  82. im = im[:, ::-1]
  83. return im
  84. def vertical_flip(im):
  85. if len(im.shape) == 3:
  86. im = im[::-1, :, :]
  87. elif len(im.shape) == 2:
  88. im = im[::-1, :]
  89. return im
  90. def bgr2rgb(im):
  91. return im[:, :, ::-1]
  92. def hue(im, hue_lower, hue_upper):
  93. delta = np.random.uniform(hue_lower, hue_upper)
  94. u = np.cos(delta * np.pi)
  95. w = np.sin(delta * np.pi)
  96. bt = np.array([[1.0, 0.0, 0.0], [0.0, u, -w], [0.0, w, u]])
  97. tyiq = np.array([[0.299, 0.587, 0.114], [0.596, -0.274, -0.321],
  98. [0.211, -0.523, 0.311]])
  99. ityiq = np.array([[1.0, 0.956, 0.621], [1.0, -0.272, -0.647],
  100. [1.0, -1.107, 1.705]])
  101. t = np.dot(np.dot(ityiq, bt), tyiq).T
  102. im = np.dot(im, t)
  103. return im
  104. def saturation(im, saturation_lower, saturation_upper):
  105. delta = np.random.uniform(saturation_lower, saturation_upper)
  106. gray = im * np.array([[[0.299, 0.587, 0.114]]], dtype=np.float32)
  107. gray = gray.sum(axis=2, keepdims=True)
  108. gray *= (1.0 - delta)
  109. im *= delta
  110. im += gray
  111. return im
  112. def contrast(im, contrast_lower, contrast_upper):
  113. delta = np.random.uniform(contrast_lower, contrast_upper)
  114. im *= delta
  115. return im
  116. def brightness(im, brightness_lower, brightness_upper):
  117. delta = np.random.uniform(brightness_lower, brightness_upper)
  118. im += delta
  119. return im
  120. def rotate(im, rotate_lower, rotate_upper):
  121. rotate_delta = np.random.uniform(rotate_lower, rotate_upper)
  122. im = im.rotate(int(rotate_delta))
  123. return im
  124. def resize_padding(im, max_side_len=2400):
  125. '''
  126. resize image to a size multiple of 32 which is required by the network
  127. :param im: the resized image
  128. :param max_side_len: limit of max image size to avoid out of memory in gpu
  129. :return: the resized image and the resize ratio
  130. '''
  131. h, w, _ = im.shape
  132. resize_w = w
  133. resize_h = h
  134. # limit the max side
  135. if max(resize_h, resize_w) > max_side_len:
  136. ratio = float(
  137. max_side_len) / resize_h if resize_h > resize_w else float(
  138. max_side_len) / resize_w
  139. else:
  140. ratio = 1.
  141. resize_h = int(resize_h * ratio)
  142. resize_w = int(resize_w * ratio)
  143. resize_h = resize_h if resize_h % 32 == 0 else (resize_h // 32 - 1) * 32
  144. resize_w = resize_w if resize_w % 32 == 0 else (resize_w // 32 - 1) * 32
  145. resize_h = max(32, resize_h)
  146. resize_w = max(32, resize_w)
  147. im = cv2.resize(im, (int(resize_w), int(resize_h)))
  148. #im = cv2.resize(im, (512, 512))
  149. ratio_h = resize_h / float(h)
  150. ratio_w = resize_w / float(w)
  151. _ratio = np.array([ratio_h, ratio_w]).reshape(-1, 2)
  152. return im, _ratio