| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284 |
- # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import os
- import cv2
- import numpy as np
- from ...utils.io import ImageReader
- from ..base import BaseComponent
- def restructured_boxes(boxes, labels, img_size):
- box_list = []
- w, h = img_size
- for box in boxes:
- xmin, ymin, xmax, ymax = list(map(int, box[2:]))
- xmin = max(0, xmin)
- ymin = max(0, ymin)
- xmax = min(w, xmax)
- ymax = min(h, ymax)
- box_list.append(
- {
- "cls_id": int(box[0]),
- "label": labels[int(box[0])],
- "score": float(box[1]),
- "coordinate": [xmin, ymin, xmax, ymax],
- }
- )
- return box_list
- def rotate_point(pt, angle_rad):
- """Rotate a point by an angle.
- Args:
- pt (list[float]): 2 dimensional point to be rotated
- angle_rad (float): rotation angle by radian
- Returns:
- list[float]: Rotated point.
- """
- assert len(pt) == 2
- sn, cs = np.sin(angle_rad), np.cos(angle_rad)
- new_x = pt[0] * cs - pt[1] * sn
- new_y = pt[0] * sn + pt[1] * cs
- rotated_pt = [new_x, new_y]
- return rotated_pt
- def _get_3rd_point(a, b):
- """To calculate the affine matrix, three pairs of points are required. This
- function is used to get the 3rd point, given 2D points a & b.
- The 3rd point is defined by rotating vector `a - b` by 90 degrees
- anticlockwise, using b as the rotation center.
- Args:
- a (np.ndarray): point(x,y)
- b (np.ndarray): point(x,y)
- Returns:
- np.ndarray: The 3rd point.
- """
- assert len(a) == 2
- assert len(b) == 2
- direction = a - b
- third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32)
- return third_pt
- def get_affine_transform(
- center, input_size, rot, output_size, shift=(0.0, 0.0), inv=False
- ):
- """Get the affine transform matrix, given the center/scale/rot/output_size.
- Args:
- center (np.ndarray[2, ]): Center of the bounding box (x, y).
- scale (np.ndarray[2, ]): Scale of the bounding box
- wrt [width, height].
- rot (float): Rotation angle (degree).
- output_size (np.ndarray[2, ]): Size of the destination heatmaps.
- shift (0-100%): Shift translation ratio wrt the width/height.
- Default (0., 0.).
- inv (bool): Option to inverse the affine transform direction.
- (inv=False: src->dst or inv=True: dst->src)
- Returns:
- np.ndarray: The transform matrix.
- """
- assert len(center) == 2
- assert len(output_size) == 2
- assert len(shift) == 2
- if not isinstance(input_size, (np.ndarray, list)):
- input_size = np.array([input_size, input_size], dtype=np.float32)
- scale_tmp = input_size
- shift = np.array(shift)
- src_w = scale_tmp[0]
- dst_w = output_size[0]
- dst_h = output_size[1]
- rot_rad = np.pi * rot / 180
- src_dir = rotate_point([0.0, src_w * -0.5], rot_rad)
- dst_dir = np.array([0.0, dst_w * -0.5])
- src = np.zeros((3, 2), dtype=np.float32)
- src[0, :] = center + scale_tmp * shift
- src[1, :] = center + src_dir + scale_tmp * shift
- src[2, :] = _get_3rd_point(src[0, :], src[1, :])
- dst = np.zeros((3, 2), dtype=np.float32)
- dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
- dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
- dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
- if inv:
- trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
- else:
- trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
- return trans
- class WarpAffine(BaseComponent):
- """Warp affine the image"""
- INPUT_KEYS = ["img"]
- OUTPUT_KEYS = ["img", "img_size", "scale_factors"]
- DEAULT_INPUTS = {"img": "img"}
- DEAULT_OUTPUTS = {
- "img": "img",
- "img_size": "img_size",
- "scale_factors": "scale_factors",
- }
- def __init__(
- self,
- keep_res=False,
- pad=31,
- input_h=512,
- input_w=512,
- scale=0.4,
- shift=0.1,
- down_ratio=4,
- ):
- super().__init__()
- self.keep_res = keep_res
- self.pad = pad
- self.input_h = input_h
- self.input_w = input_w
- self.scale = scale
- self.shift = shift
- self.down_ratio = down_ratio
- def apply(self, img):
- img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
- h, w = img.shape[:2]
- if self.keep_res:
- # True in detection eval/infer
- input_h = (h | self.pad) + 1
- input_w = (w | self.pad) + 1
- s = np.array([input_w, input_h], dtype=np.float32)
- c = np.array([w // 2, h // 2], dtype=np.float32)
- else:
- # False in centertrack eval_mot/eval_mot
- s = max(h, w) * 1.0
- input_h, input_w = self.input_h, self.input_w
- c = np.array([w / 2.0, h / 2.0], dtype=np.float32)
- trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
- img = cv2.resize(img, (w, h))
- inp = cv2.warpAffine(
- img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR
- )
- if not self.keep_res:
- out_h = input_h // self.down_ratio
- out_w = input_w // self.down_ratio
- trans_output = get_affine_transform(c, s, 0, [out_w, out_h])
- im_scale_w, im_scale_h = [input_w / w, input_h / h]
- return {
- "img": inp,
- "img_size": [inp.shape[1], inp.shape[0]],
- "scale_factors": [im_scale_w, im_scale_h],
- }
- class DetPostProcess(BaseComponent):
- """Save Result Transform"""
- INPUT_KEYS = ["input_path", "boxes", "img_size"]
- OUTPUT_KEYS = ["boxes"]
- DEAULT_INPUTS = {"boxes": "boxes", "img_size": "ori_img_size"}
- DEAULT_OUTPUTS = {"boxes": "boxes"}
- def __init__(self, threshold=0.5, labels=None):
- super().__init__()
- self.threshold = threshold
- self.labels = labels
- def apply(self, boxes, img_size):
- """apply"""
- expect_boxes = (boxes[:, 1] > self.threshold) & (boxes[:, 0] > -1)
- boxes = boxes[expect_boxes, :]
- boxes = restructured_boxes(boxes, self.labels, img_size)
- result = {"boxes": boxes}
- return result
- class CropByBoxes(BaseComponent):
- """Crop Image by Box"""
- YIELD_BATCH = False
- INPUT_KEYS = ["input_path", "boxes"]
- OUTPUT_KEYS = ["img", "box", "label"]
- DEAULT_INPUTS = {"input_path": "input_path", "boxes": "boxes"}
- DEAULT_OUTPUTS = {"img": "img", "box": "box", "label": "label"}
- def __init__(self):
- super().__init__()
- self._reader = ImageReader(backend="opencv")
- def apply(self, input_path, boxes):
- output_list = []
- img = self._reader.read(input_path)
- for bbox in boxes:
- label_id = bbox["cls_id"]
- box = bbox["coordinate"]
- label = bbox.get("label", label_id)
- xmin, ymin, xmax, ymax = [int(i) for i in box]
- img_crop = img[ymin:ymax, xmin:xmax]
- output_list.append({"img": img_crop, "box": box, "label": label})
- return output_list
- class DetPad(BaseComponent):
- INPUT_KEYS = "img"
- OUTPUT_KEYS = "img"
- DEAULT_INPUTS = {"img": "img"}
- DEAULT_OUTPUTS = {"img": "img"}
- def __init__(self, size, fill_value=[114.0, 114.0, 114.0]):
- """
- Pad image to a specified size.
- Args:
- size (list[int]): image target size
- fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0)
- """
- super().__init__()
- if isinstance(size, int):
- size = [size, size]
- self.size = size
- self.fill_value = fill_value
- def apply(self, img):
- im = img
- im_h, im_w = im.shape[:2]
- h, w = self.size
- if h == im_h and w == im_w:
- return {"img": im}
- canvas = np.ones((h, w, 3), dtype=np.float32)
- canvas *= np.array(self.fill_value, dtype=np.float32)
- canvas[0:im_h, 0:im_w, :] = im.astype(np.float32)
- return {"img": canvas}
|