det.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import cv2
  16. import numpy as np
  17. from ...utils.io import ImageReader
  18. from ..base import BaseComponent
  19. def restructured_boxes(boxes, labels, img_size):
  20. box_list = []
  21. w, h = img_size
  22. for box in boxes:
  23. xmin, ymin, xmax, ymax = box[2:]
  24. xmin = max(0, xmin)
  25. ymin = max(0, ymin)
  26. xmax = min(w, xmax)
  27. ymax = min(h, ymax)
  28. box_list.append(
  29. {
  30. "cls_id": int(box[0]),
  31. "label": labels[int(box[0])],
  32. "score": float(box[1]),
  33. "coordinate": [xmin, ymin, xmax, ymax],
  34. }
  35. )
  36. return box_list
  37. def restructured_rotated_boxes(boxes, labels, img_size):
  38. box_list = []
  39. w, h = img_size
  40. assert boxes.shape[1] == 10, 'The shape of rotated boxes should be [N, 10]'
  41. for box in boxes:
  42. x1, y1, x2, y2, x3, y3, x4, y4 = box[2:]
  43. x1 = min(max(0, x1), w)
  44. y1 = min(max(0, y1), h)
  45. x2 = min(max(0, x2), w)
  46. y2 = min(max(0, y2), h)
  47. x3 = min(max(0, x3), w)
  48. y3 = min(max(0, y3), h)
  49. x4 = min(max(0, x4), w)
  50. y4 = min(max(0, y4), h)
  51. box_list.append(
  52. {
  53. "cls_id": int(box[0]),
  54. "label": labels[int(box[0])],
  55. "score": float(box[1]),
  56. "coordinate": [x1, y1, x2, y2, x3, y3, x4, y4],
  57. }
  58. )
  59. return box_list
  60. def rotate_point(pt, angle_rad):
  61. """Rotate a point by an angle.
  62. Args:
  63. pt (list[float]): 2 dimensional point to be rotated
  64. angle_rad (float): rotation angle by radian
  65. Returns:
  66. list[float]: Rotated point.
  67. """
  68. assert len(pt) == 2
  69. sn, cs = np.sin(angle_rad), np.cos(angle_rad)
  70. new_x = pt[0] * cs - pt[1] * sn
  71. new_y = pt[0] * sn + pt[1] * cs
  72. rotated_pt = [new_x, new_y]
  73. return rotated_pt
  74. def _get_3rd_point(a, b):
  75. """To calculate the affine matrix, three pairs of points are required. This
  76. function is used to get the 3rd point, given 2D points a & b.
  77. The 3rd point is defined by rotating vector `a - b` by 90 degrees
  78. anticlockwise, using b as the rotation center.
  79. Args:
  80. a (np.ndarray): point(x,y)
  81. b (np.ndarray): point(x,y)
  82. Returns:
  83. np.ndarray: The 3rd point.
  84. """
  85. assert len(a) == 2
  86. assert len(b) == 2
  87. direction = a - b
  88. third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32)
  89. return third_pt
  90. def get_affine_transform(
  91. center, input_size, rot, output_size, shift=(0.0, 0.0), inv=False
  92. ):
  93. """Get the affine transform matrix, given the center/scale/rot/output_size.
  94. Args:
  95. center (np.ndarray[2, ]): Center of the bounding box (x, y).
  96. scale (np.ndarray[2, ]): Scale of the bounding box
  97. wrt [width, height].
  98. rot (float): Rotation angle (degree).
  99. output_size (np.ndarray[2, ]): Size of the destination heatmaps.
  100. shift (0-100%): Shift translation ratio wrt the width/height.
  101. Default (0., 0.).
  102. inv (bool): Option to inverse the affine transform direction.
  103. (inv=False: src->dst or inv=True: dst->src)
  104. Returns:
  105. np.ndarray: The transform matrix.
  106. """
  107. assert len(center) == 2
  108. assert len(output_size) == 2
  109. assert len(shift) == 2
  110. if not isinstance(input_size, (np.ndarray, list)):
  111. input_size = np.array([input_size, input_size], dtype=np.float32)
  112. scale_tmp = input_size
  113. shift = np.array(shift)
  114. src_w = scale_tmp[0]
  115. dst_w = output_size[0]
  116. dst_h = output_size[1]
  117. rot_rad = np.pi * rot / 180
  118. src_dir = rotate_point([0.0, src_w * -0.5], rot_rad)
  119. dst_dir = np.array([0.0, dst_w * -0.5])
  120. src = np.zeros((3, 2), dtype=np.float32)
  121. src[0, :] = center + scale_tmp * shift
  122. src[1, :] = center + src_dir + scale_tmp * shift
  123. src[2, :] = _get_3rd_point(src[0, :], src[1, :])
  124. dst = np.zeros((3, 2), dtype=np.float32)
  125. dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
  126. dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
  127. dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
  128. if inv:
  129. trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
  130. else:
  131. trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
  132. return trans
  133. class WarpAffine(BaseComponent):
  134. """Warp affine the image"""
  135. INPUT_KEYS = ["img"]
  136. OUTPUT_KEYS = ["img", "img_size", "scale_factors"]
  137. DEAULT_INPUTS = {"img": "img"}
  138. DEAULT_OUTPUTS = {
  139. "img": "img",
  140. "img_size": "img_size",
  141. "scale_factors": "scale_factors",
  142. }
  143. def __init__(
  144. self,
  145. keep_res=False,
  146. pad=31,
  147. input_h=512,
  148. input_w=512,
  149. scale=0.4,
  150. shift=0.1,
  151. down_ratio=4,
  152. ):
  153. super().__init__()
  154. self.keep_res = keep_res
  155. self.pad = pad
  156. self.input_h = input_h
  157. self.input_w = input_w
  158. self.scale = scale
  159. self.shift = shift
  160. self.down_ratio = down_ratio
  161. def apply(self, img):
  162. img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
  163. h, w = img.shape[:2]
  164. if self.keep_res:
  165. # True in detection eval/infer
  166. input_h = (h | self.pad) + 1
  167. input_w = (w | self.pad) + 1
  168. s = np.array([input_w, input_h], dtype=np.float32)
  169. c = np.array([w // 2, h // 2], dtype=np.float32)
  170. else:
  171. # False in centertrack eval_mot/eval_mot
  172. s = max(h, w) * 1.0
  173. input_h, input_w = self.input_h, self.input_w
  174. c = np.array([w / 2.0, h / 2.0], dtype=np.float32)
  175. trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
  176. img = cv2.resize(img, (w, h))
  177. inp = cv2.warpAffine(
  178. img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR
  179. )
  180. if not self.keep_res:
  181. out_h = input_h // self.down_ratio
  182. out_w = input_w // self.down_ratio
  183. trans_output = get_affine_transform(c, s, 0, [out_w, out_h])
  184. im_scale_w, im_scale_h = [input_w / w, input_h / h]
  185. return {
  186. "img": inp,
  187. "img_size": [inp.shape[1], inp.shape[0]],
  188. "scale_factors": [im_scale_w, im_scale_h],
  189. }
  190. class DetPostProcess(BaseComponent):
  191. """Save Result Transform"""
  192. INPUT_KEYS = ["input_path", "boxes", "img_size"]
  193. OUTPUT_KEYS = ["boxes"]
  194. DEAULT_INPUTS = {"boxes": "boxes", "img_size": "ori_img_size"}
  195. DEAULT_OUTPUTS = {"boxes": "boxes"}
  196. def __init__(self, threshold=0.5, labels=None):
  197. super().__init__()
  198. self.threshold = threshold
  199. self.labels = labels
  200. def apply(self, boxes, img_size):
  201. """apply"""
  202. expect_boxes = (boxes[:, 1] > self.threshold) & (boxes[:, 0] > -1)
  203. boxes = boxes[expect_boxes, :]
  204. if boxes.shape[1] == 6:
  205. """For Normal Object Detection"""
  206. boxes = restructured_boxes(boxes, self.labels, img_size)
  207. elif boxes.shape[1] == 10:
  208. """Adapt For Rotated Object Detection"""
  209. boxes = restructured_rotated_boxes(boxes, self.labels, img_size)
  210. else:
  211. """Unexpected Input Box Shape"""
  212. raise ValueError(
  213. f"The shape of boxes should be 6 or 10, instead of {boxes.shape[1]}"
  214. )
  215. result = {"boxes": boxes}
  216. return result
  217. class CropByBoxes(BaseComponent):
  218. """Crop Image by Box"""
  219. YIELD_BATCH = False
  220. INPUT_KEYS = ["input_path", "boxes"]
  221. OUTPUT_KEYS = ["img", "box", "label"]
  222. DEAULT_INPUTS = {"input_path": "input_path", "boxes": "boxes"}
  223. DEAULT_OUTPUTS = {"img": "img", "box": "box", "label": "label"}
  224. def __init__(self):
  225. super().__init__()
  226. self._reader = ImageReader(backend="opencv")
  227. def apply(self, input_path, boxes):
  228. output_list = []
  229. img = self._reader.read(input_path)
  230. for bbox in boxes:
  231. label_id = bbox["cls_id"]
  232. box = bbox["coordinate"]
  233. label = bbox.get("label", label_id)
  234. xmin, ymin, xmax, ymax = [int(i) for i in box]
  235. img_crop = img[ymin:ymax, xmin:xmax]
  236. output_list.append({"img": img_crop, "box": box, "label": label})
  237. return output_list
  238. class DetPad(BaseComponent):
  239. INPUT_KEYS = "img"
  240. OUTPUT_KEYS = "img"
  241. DEAULT_INPUTS = {"img": "img"}
  242. DEAULT_OUTPUTS = {"img": "img"}
  243. def __init__(self, size, fill_value=[114.0, 114.0, 114.0]):
  244. """
  245. Pad image to a specified size.
  246. Args:
  247. size (list[int]): image target size
  248. fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0)
  249. """
  250. super().__init__()
  251. if isinstance(size, int):
  252. size = [size, size]
  253. self.size = size
  254. self.fill_value = fill_value
  255. def apply(self, img):
  256. im = img
  257. im_h, im_w = im.shape[:2]
  258. h, w = self.size
  259. if h == im_h and w == im_w:
  260. return {"img": im}
  261. canvas = np.ones((h, w, 3), dtype=np.float32)
  262. canvas *= np.array(self.fill_value, dtype=np.float32)
  263. canvas[0:im_h, 0:im_w, :] = im.astype(np.float32)
  264. return {"img": canvas}