det.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import cv2
  16. import numpy as np
  17. from ...utils.io import ImageReader
  18. from ..base import BaseComponent
  19. def restructured_boxes(boxes, labels, img_size):
  20. box_list = []
  21. w, h = img_size
  22. for box in boxes:
  23. xmin, ymin, xmax, ymax = box[2:]
  24. xmin = max(0, xmin)
  25. ymin = max(0, ymin)
  26. xmax = min(w, xmax)
  27. ymax = min(h, ymax)
  28. box_list.append(
  29. {
  30. "cls_id": int(box[0]),
  31. "label": labels[int(box[0])],
  32. "score": float(box[1]),
  33. "coordinate": [xmin, ymin, xmax, ymax],
  34. }
  35. )
  36. return box_list
  37. def restructured_rotated_boxes(boxes, labels, img_size):
  38. box_list = []
  39. w, h = img_size
  40. assert boxes.shape[1] == 10, 'The shape of rotated boxes should be [N, 10]'
  41. for box in boxes:
  42. x1, y1, x2, y2, x3, y3, x4, y4 = box[2:]
  43. x1 = min(max(0, x1), w)
  44. y1 = min(max(0, y1), h)
  45. x2 = min(max(0, x2), w)
  46. y2 = min(max(0, y2), h)
  47. x3 = min(max(0, x3), w)
  48. y3 = min(max(0, y3), h)
  49. x4 = min(max(0, x4), w)
  50. y4 = min(max(0, y4), h)
  51. box_list.append(
  52. {
  53. "cls_id": int(box[0]),
  54. "label": labels[int(box[0])],
  55. "score": float(box[1]),
  56. "coordinate": [x1, y1, x2, y2, x3, y3, x4, y4],
  57. }
  58. )
  59. return box_list
  60. def rotate_point(pt, angle_rad):
  61. """Rotate a point by an angle.
  62. Args:
  63. pt (list[float]): 2 dimensional point to be rotated
  64. angle_rad (float): rotation angle by radian
  65. Returns:
  66. list[float]: Rotated point.
  67. """
  68. assert len(pt) == 2
  69. sn, cs = np.sin(angle_rad), np.cos(angle_rad)
  70. new_x = pt[0] * cs - pt[1] * sn
  71. new_y = pt[0] * sn + pt[1] * cs
  72. rotated_pt = [new_x, new_y]
  73. return rotated_pt
  74. def _get_3rd_point(a, b):
  75. """To calculate the affine matrix, three pairs of points are required. This
  76. function is used to get the 3rd point, given 2D points a & b.
  77. The 3rd point is defined by rotating vector `a - b` by 90 degrees
  78. anticlockwise, using b as the rotation center.
  79. Args:
  80. a (np.ndarray): point(x,y)
  81. b (np.ndarray): point(x,y)
  82. Returns:
  83. np.ndarray: The 3rd point.
  84. """
  85. assert len(a) == 2
  86. assert len(b) == 2
  87. direction = a - b
  88. third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32)
  89. return third_pt
  90. def get_affine_transform(
  91. center, input_size, rot, output_size, shift=(0.0, 0.0), inv=False
  92. ):
  93. """Get the affine transform matrix, given the center/scale/rot/output_size.
  94. Args:
  95. center (np.ndarray[2, ]): Center of the bounding box (x, y).
  96. scale (np.ndarray[2, ]): Scale of the bounding box
  97. wrt [width, height].
  98. rot (float): Rotation angle (degree).
  99. output_size (np.ndarray[2, ]): Size of the destination heatmaps.
  100. shift (0-100%): Shift translation ratio wrt the width/height.
  101. Default (0., 0.).
  102. inv (bool): Option to inverse the affine transform direction.
  103. (inv=False: src->dst or inv=True: dst->src)
  104. Returns:
  105. np.ndarray: The transform matrix.
  106. """
  107. assert len(center) == 2
  108. assert len(output_size) == 2
  109. assert len(shift) == 2
  110. if not isinstance(input_size, (np.ndarray, list)):
  111. input_size = np.array([input_size, input_size], dtype=np.float32)
  112. scale_tmp = input_size
  113. shift = np.array(shift)
  114. src_w = scale_tmp[0]
  115. dst_w = output_size[0]
  116. dst_h = output_size[1]
  117. rot_rad = np.pi * rot / 180
  118. src_dir = rotate_point([0.0, src_w * -0.5], rot_rad)
  119. dst_dir = np.array([0.0, dst_w * -0.5])
  120. src = np.zeros((3, 2), dtype=np.float32)
  121. src[0, :] = center + scale_tmp * shift
  122. src[1, :] = center + src_dir + scale_tmp * shift
  123. src[2, :] = _get_3rd_point(src[0, :], src[1, :])
  124. dst = np.zeros((3, 2), dtype=np.float32)
  125. dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
  126. dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
  127. dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
  128. if inv:
  129. trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
  130. else:
  131. trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
  132. return trans
  133. class WarpAffine(BaseComponent):
  134. """Warp affine the image"""
  135. INPUT_KEYS = ["img"]
  136. OUTPUT_KEYS = ["img", "img_size", "scale_factors"]
  137. DEAULT_INPUTS = {"img": "img"}
  138. DEAULT_OUTPUTS = {
  139. "img": "img",
  140. "img_size": "img_size",
  141. "scale_factors": "scale_factors",
  142. }
  143. def __init__(
  144. self,
  145. keep_res=False,
  146. pad=31,
  147. input_h=512,
  148. input_w=512,
  149. scale=0.4,
  150. shift=0.1,
  151. down_ratio=4,
  152. ):
  153. super().__init__()
  154. self.keep_res = keep_res
  155. self.pad = pad
  156. self.input_h = input_h
  157. self.input_w = input_w
  158. self.scale = scale
  159. self.shift = shift
  160. self.down_ratio = down_ratio
  161. def apply(self, img):
  162. img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
  163. h, w = img.shape[:2]
  164. if self.keep_res:
  165. # True in detection eval/infer
  166. input_h = (h | self.pad) + 1
  167. input_w = (w | self.pad) + 1
  168. s = np.array([input_w, input_h], dtype=np.float32)
  169. c = np.array([w // 2, h // 2], dtype=np.float32)
  170. else:
  171. # False in centertrack eval_mot/eval_mot
  172. s = max(h, w) * 1.0
  173. input_h, input_w = self.input_h, self.input_w
  174. c = np.array([w / 2.0, h / 2.0], dtype=np.float32)
  175. trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
  176. img = cv2.resize(img, (w, h))
  177. inp = cv2.warpAffine(
  178. img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR
  179. )
  180. if not self.keep_res:
  181. out_h = input_h // self.down_ratio
  182. out_w = input_w // self.down_ratio
  183. trans_output = get_affine_transform(c, s, 0, [out_w, out_h])
  184. im_scale_w, im_scale_h = [input_w / w, input_h / h]
  185. return {
  186. "img": inp,
  187. "img_size": [inp.shape[1], inp.shape[0]],
  188. "scale_factors": [im_scale_w, im_scale_h],
  189. }
  190. def compute_iou(box1, box2):
  191. x1 = max(box1[0], box2[0])
  192. y1 = max(box1[1], box2[1])
  193. x2 = min(box1[2], box2[2])
  194. y2 = min(box1[3], box2[3])
  195. inter_area = max(0, x2 - x1 + 1) * max(0, y2 - y1 + 1)
  196. box1_area = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
  197. box2_area = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)
  198. iou = inter_area / float(box1_area + box2_area - inter_area)
  199. return iou
  200. def is_box_mostly_inside(inner_box, outer_box, threshold=0.9):
  201. x1 = max(inner_box[0], outer_box[0])
  202. y1 = max(inner_box[1], outer_box[1])
  203. x2 = min(inner_box[2], outer_box[2])
  204. y2 = min(inner_box[3], outer_box[3])
  205. inter_area = max(0, x2 - x1 + 1) * max(0, y2 - y1 + 1)
  206. inner_box_area = (inner_box[2] - inner_box[0] + 1) * (inner_box[3] - inner_box[1] + 1)
  207. return (inter_area / inner_box_area) >= threshold
  208. def non_max_suppression(boxes, scores, iou_threshold):
  209. if len(boxes) == 0:
  210. return []
  211. x1 = boxes[:, 0]
  212. y1 = boxes[:, 1]
  213. x2 = boxes[:, 2]
  214. y2 = boxes[:, 3]
  215. areas = (x2 - x1 + 1) * (y2 - y1 + 1)
  216. order = scores.argsort()[::-1]
  217. keep = []
  218. while order.size > 0:
  219. i = order[0]
  220. keep.append(i)
  221. xx1 = np.maximum(x1[i], x1[order[1:]])
  222. yy1 = np.maximum(y1[i], y1[order[1:]])
  223. xx2 = np.minimum(x2[i], x2[order[1:]])
  224. yy2 = np.minimum(y2[i], y2[order[1:]])
  225. w = np.maximum(0.0, xx2 - xx1 + 1)
  226. h = np.maximum(0.0, yy2 - yy1 + 1)
  227. inter = w * h
  228. iou = inter / (areas[i] + areas[order[1:]] - inter)
  229. inds = np.where(iou <= iou_threshold)[0]
  230. order = order[inds + 1]
  231. return keep
  232. class DetPostProcess(BaseComponent):
  233. """Save Result Transform"""
  234. INPUT_KEYS = ["input_path", "boxes", "img_size"]
  235. OUTPUT_KEYS = ["boxes"]
  236. DEAULT_INPUTS = {"boxes": "boxes", "img_size": "ori_img_size"}
  237. DEAULT_OUTPUTS = {"boxes": "boxes"}
  238. def __init__(self, threshold=0.5, labels=None, layout_postprocess=False):
  239. super().__init__()
  240. self.threshold = threshold
  241. self.labels = labels
  242. self.layout_postprocess = layout_postprocess
  243. def apply(self, boxes, img_size):
  244. """apply"""
  245. if isinstance(self.threshold, float):
  246. expect_boxes = (boxes[:, 1] > self.threshold) & (boxes[:, 0] > -1)
  247. boxes = boxes[expect_boxes, :]
  248. elif isinstance(self.threshold, dict):
  249. category_filtered_boxes = []
  250. for cat_id in np.unique(boxes[:, 0]):
  251. category_boxes = boxes[boxes[:, 0] == cat_id]
  252. category_scores = category_boxes[:, 1]
  253. category_threshold = self.threshold.get(int(cat_id), 0.5)
  254. selected_indices = category_scores > category_threshold
  255. category_filtered_boxes.append(category_boxes[selected_indices])
  256. boxes = np.vstack(category_filtered_boxes) if category_filtered_boxes else np.array([])
  257. if self.layout_postprocess:
  258. filtered_boxes = []
  259. ### Layout postprocess for NMS
  260. for cat_id in np.unique(boxes[:, 0]):
  261. category_boxes = boxes[boxes[:, 0] == cat_id]
  262. category_scores = category_boxes[:, 1]
  263. if len(category_boxes) > 0:
  264. nms_indices = non_max_suppression(category_boxes[:, 2:], category_scores, 0.5)
  265. category_boxes = category_boxes[nms_indices]
  266. keep_boxes = []
  267. for i, box in enumerate(category_boxes):
  268. if all(not is_box_mostly_inside(box[2:], other_box[2:]) for j, other_box in enumerate(category_boxes) if i != j):
  269. keep_boxes.append(box)
  270. filtered_boxes.extend(keep_boxes)
  271. boxes = np.array(filtered_boxes)
  272. ### Layout postprocess for removing boxes inside image category box
  273. if self.labels and "image" in self.labels:
  274. image_cls_id = self.labels.index('image')
  275. if len(boxes) > 0:
  276. image_boxes = boxes[boxes[:, 0] == image_cls_id]
  277. other_boxes = boxes[boxes[:, 0] != image_cls_id]
  278. to_keep = []
  279. for box in other_boxes:
  280. keep = True
  281. for img_box in image_boxes:
  282. if (box[2] >= img_box[2] and box[3] >= img_box[3] and
  283. box[4] <= img_box[4] and box[5] <= img_box[5]):
  284. keep = False
  285. break
  286. if keep:
  287. to_keep.append(box)
  288. boxes = np.vstack([image_boxes, to_keep]) if to_keep else image_boxes
  289. ### Layout postprocess for overlaps
  290. final_boxes = []
  291. while len(boxes) > 0:
  292. current_box = boxes[0]
  293. current_score = current_box[1]
  294. overlaps = [current_box]
  295. non_overlaps = []
  296. for other_box in boxes[1:]:
  297. iou = compute_iou(current_box[2:], other_box[2:])
  298. if iou > 0.95:
  299. if other_box[1] > current_score:
  300. overlaps.append(other_box)
  301. else:
  302. non_overlaps.append(other_box)
  303. best_box = max(overlaps, key=lambda x: x[1])
  304. final_boxes.append(best_box)
  305. boxes = np.array(non_overlaps)
  306. boxes = np.array(final_boxes)
  307. if boxes.shape[1] == 6:
  308. """For Normal Object Detection"""
  309. boxes = restructured_boxes(boxes, self.labels, img_size)
  310. elif boxes.shape[1] == 10:
  311. """Adapt For Rotated Object Detection"""
  312. boxes = restructured_rotated_boxes(boxes, self.labels, img_size)
  313. else:
  314. """Unexpected Input Box Shape"""
  315. raise ValueError(
  316. f"The shape of boxes should be 6 or 10, instead of {boxes.shape[1]}"
  317. )
  318. result = {"boxes": boxes}
  319. return result
  320. class CropByBoxes(BaseComponent):
  321. """Crop Image by Box"""
  322. YIELD_BATCH = False
  323. INPUT_KEYS = ["input_path", "boxes"]
  324. OUTPUT_KEYS = ["img", "box", "label"]
  325. DEAULT_INPUTS = {"input_path": "input_path", "boxes": "boxes"}
  326. DEAULT_OUTPUTS = {"img": "img", "box": "box", "label": "label"}
  327. def __init__(self):
  328. super().__init__()
  329. self._reader = ImageReader(backend="opencv")
  330. def apply(self, input_path, boxes):
  331. output_list = []
  332. img = self._reader.read(input_path)
  333. for bbox in boxes:
  334. label_id = bbox["cls_id"]
  335. box = bbox["coordinate"]
  336. label = bbox.get("label", label_id)
  337. xmin, ymin, xmax, ymax = [int(i) for i in box]
  338. img_crop = img[ymin:ymax, xmin:xmax]
  339. output_list.append({"img": img_crop, "box": box, "label": label})
  340. return output_list
  341. class DetPad(BaseComponent):
  342. INPUT_KEYS = "img"
  343. OUTPUT_KEYS = "img"
  344. DEAULT_INPUTS = {"img": "img"}
  345. DEAULT_OUTPUTS = {"img": "img"}
  346. def __init__(self, size, fill_value=[114.0, 114.0, 114.0]):
  347. """
  348. Pad image to a specified size.
  349. Args:
  350. size (list[int]): image target size
  351. fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0)
  352. """
  353. super().__init__()
  354. if isinstance(size, int):
  355. size = [size, size]
  356. self.size = size
  357. self.fill_value = fill_value
  358. def apply(self, img):
  359. im = img
  360. im_h, im_w = im.shape[:2]
  361. h, w = self.size
  362. if h == im_h and w == im_w:
  363. return {"img": im}
  364. canvas = np.ones((h, w, 3), dtype=np.float32)
  365. canvas *= np.array(self.fill_value, dtype=np.float32)
  366. canvas[0:im_h, 0:im_w, :] = im.astype(np.float32)
  367. return {"img": canvas}