det.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import cv2
  16. import numpy as np
  17. from ...utils.io import ImageReader
  18. from ..base import BaseComponent
  19. def restructured_boxes(boxes, labels, img_size):
  20. box_list = []
  21. w, h = img_size
  22. for box in boxes:
  23. xmin, ymin, xmax, ymax = list(map(int, box[2:]))
  24. xmin = max(0, xmin)
  25. ymin = max(0, ymin)
  26. xmax = min(w, xmax)
  27. ymax = min(h, ymax)
  28. box_list.append(
  29. {
  30. "cls_id": int(box[0]),
  31. "label": labels[int(box[0])],
  32. "score": float(box[1]),
  33. "coordinate": [xmin, ymin, xmax, ymax],
  34. }
  35. )
  36. return box_list
  37. def rotate_point(pt, angle_rad):
  38. """Rotate a point by an angle.
  39. Args:
  40. pt (list[float]): 2 dimensional point to be rotated
  41. angle_rad (float): rotation angle by radian
  42. Returns:
  43. list[float]: Rotated point.
  44. """
  45. assert len(pt) == 2
  46. sn, cs = np.sin(angle_rad), np.cos(angle_rad)
  47. new_x = pt[0] * cs - pt[1] * sn
  48. new_y = pt[0] * sn + pt[1] * cs
  49. rotated_pt = [new_x, new_y]
  50. return rotated_pt
  51. def _get_3rd_point(a, b):
  52. """To calculate the affine matrix, three pairs of points are required. This
  53. function is used to get the 3rd point, given 2D points a & b.
  54. The 3rd point is defined by rotating vector `a - b` by 90 degrees
  55. anticlockwise, using b as the rotation center.
  56. Args:
  57. a (np.ndarray): point(x,y)
  58. b (np.ndarray): point(x,y)
  59. Returns:
  60. np.ndarray: The 3rd point.
  61. """
  62. assert len(a) == 2
  63. assert len(b) == 2
  64. direction = a - b
  65. third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32)
  66. return third_pt
  67. def get_affine_transform(
  68. center, input_size, rot, output_size, shift=(0.0, 0.0), inv=False
  69. ):
  70. """Get the affine transform matrix, given the center/scale/rot/output_size.
  71. Args:
  72. center (np.ndarray[2, ]): Center of the bounding box (x, y).
  73. scale (np.ndarray[2, ]): Scale of the bounding box
  74. wrt [width, height].
  75. rot (float): Rotation angle (degree).
  76. output_size (np.ndarray[2, ]): Size of the destination heatmaps.
  77. shift (0-100%): Shift translation ratio wrt the width/height.
  78. Default (0., 0.).
  79. inv (bool): Option to inverse the affine transform direction.
  80. (inv=False: src->dst or inv=True: dst->src)
  81. Returns:
  82. np.ndarray: The transform matrix.
  83. """
  84. assert len(center) == 2
  85. assert len(output_size) == 2
  86. assert len(shift) == 2
  87. if not isinstance(input_size, (np.ndarray, list)):
  88. input_size = np.array([input_size, input_size], dtype=np.float32)
  89. scale_tmp = input_size
  90. shift = np.array(shift)
  91. src_w = scale_tmp[0]
  92. dst_w = output_size[0]
  93. dst_h = output_size[1]
  94. rot_rad = np.pi * rot / 180
  95. src_dir = rotate_point([0.0, src_w * -0.5], rot_rad)
  96. dst_dir = np.array([0.0, dst_w * -0.5])
  97. src = np.zeros((3, 2), dtype=np.float32)
  98. src[0, :] = center + scale_tmp * shift
  99. src[1, :] = center + src_dir + scale_tmp * shift
  100. src[2, :] = _get_3rd_point(src[0, :], src[1, :])
  101. dst = np.zeros((3, 2), dtype=np.float32)
  102. dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
  103. dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
  104. dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
  105. if inv:
  106. trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
  107. else:
  108. trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
  109. return trans
  110. class WarpAffine(BaseComponent):
  111. """Warp affine the image"""
  112. INPUT_KEYS = ["img"]
  113. OUTPUT_KEYS = ["img", "img_size", "scale_factors"]
  114. DEAULT_INPUTS = {"img": "img"}
  115. DEAULT_OUTPUTS = {
  116. "img": "img",
  117. "img_size": "img_size",
  118. "scale_factors": "scale_factors",
  119. }
  120. def __init__(
  121. self,
  122. keep_res=False,
  123. pad=31,
  124. input_h=512,
  125. input_w=512,
  126. scale=0.4,
  127. shift=0.1,
  128. down_ratio=4,
  129. ):
  130. super().__init__()
  131. self.keep_res = keep_res
  132. self.pad = pad
  133. self.input_h = input_h
  134. self.input_w = input_w
  135. self.scale = scale
  136. self.shift = shift
  137. self.down_ratio = down_ratio
  138. def apply(self, img):
  139. img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
  140. h, w = img.shape[:2]
  141. if self.keep_res:
  142. # True in detection eval/infer
  143. input_h = (h | self.pad) + 1
  144. input_w = (w | self.pad) + 1
  145. s = np.array([input_w, input_h], dtype=np.float32)
  146. c = np.array([w // 2, h // 2], dtype=np.float32)
  147. else:
  148. # False in centertrack eval_mot/eval_mot
  149. s = max(h, w) * 1.0
  150. input_h, input_w = self.input_h, self.input_w
  151. c = np.array([w / 2.0, h / 2.0], dtype=np.float32)
  152. trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
  153. img = cv2.resize(img, (w, h))
  154. inp = cv2.warpAffine(
  155. img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR
  156. )
  157. if not self.keep_res:
  158. out_h = input_h // self.down_ratio
  159. out_w = input_w // self.down_ratio
  160. trans_output = get_affine_transform(c, s, 0, [out_w, out_h])
  161. im_scale_w, im_scale_h = [input_w / w, input_h / h]
  162. return {
  163. "img": inp,
  164. "img_size": [inp.shape[1], inp.shape[0]],
  165. "scale_factors": [im_scale_w, im_scale_h],
  166. }
  167. class DetPostProcess(BaseComponent):
  168. """Save Result Transform"""
  169. INPUT_KEYS = ["input_path", "boxes", "img_size"]
  170. OUTPUT_KEYS = ["boxes"]
  171. DEAULT_INPUTS = {"boxes": "boxes", "img_size": "ori_img_size"}
  172. DEAULT_OUTPUTS = {"boxes": "boxes"}
  173. def __init__(self, threshold=0.5, labels=None):
  174. super().__init__()
  175. self.threshold = threshold
  176. self.labels = labels
  177. def apply(self, boxes, img_size):
  178. """apply"""
  179. expect_boxes = (boxes[:, 1] > self.threshold) & (boxes[:, 0] > -1)
  180. boxes = boxes[expect_boxes, :]
  181. boxes = restructured_boxes(boxes, self.labels, img_size)
  182. result = {"boxes": boxes}
  183. return result
  184. class CropByBoxes(BaseComponent):
  185. """Crop Image by Box"""
  186. YIELD_BATCH = False
  187. INPUT_KEYS = ["input_path", "boxes"]
  188. OUTPUT_KEYS = ["img", "box", "label"]
  189. DEAULT_INPUTS = {"input_path": "input_path", "boxes": "boxes"}
  190. DEAULT_OUTPUTS = {"img": "img", "box": "box", "label": "label"}
  191. def __init__(self):
  192. super().__init__()
  193. self._reader = ImageReader(backend="opencv")
  194. def apply(self, input_path, boxes):
  195. output_list = []
  196. img = self._reader.read(input_path)
  197. for bbox in boxes:
  198. label_id = bbox["cls_id"]
  199. box = bbox["coordinate"]
  200. label = bbox.get("label", label_id)
  201. xmin, ymin, xmax, ymax = [int(i) for i in box]
  202. img_crop = img[ymin:ymax, xmin:xmax]
  203. output_list.append({"img": img_crop, "box": box, "label": label})
  204. return output_list
  205. class DetPad(BaseComponent):
  206. INPUT_KEYS = "img"
  207. OUTPUT_KEYS = "img"
  208. DEAULT_INPUTS = {"img": "img"}
  209. DEAULT_OUTPUTS = {"img": "img"}
  210. def __init__(self, size, fill_value=[114.0, 114.0, 114.0]):
  211. """
  212. Pad image to a specified size.
  213. Args:
  214. size (list[int]): image target size
  215. fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0)
  216. """
  217. super().__init__()
  218. if isinstance(size, int):
  219. size = [size, size]
  220. self.size = size
  221. self.fill_value = fill_value
  222. def apply(self, img):
  223. im = img
  224. im_h, im_w = im.shape[:2]
  225. h, w = self.size
  226. if h == im_h and w == im_w:
  227. return {"img": im}
  228. canvas = np.ones((h, w, 3), dtype=np.float32)
  229. canvas *= np.array(self.fill_value, dtype=np.float32)
  230. canvas[0:im_h, 0:im_w, :] = im.astype(np.float32)
  231. return {"img": canvas}