# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import cv2 import numpy as np from ...utils.io import ImageReader from ..base import BaseComponent def restructured_boxes(boxes, labels, img_size): box_list = [] w, h = img_size for box in boxes: xmin, ymin, xmax, ymax = list(map(int, box[2:])) xmin = max(0, xmin) ymin = max(0, ymin) xmax = min(w, xmax) ymax = min(h, ymax) box_list.append( { "cls_id": int(box[0]), "label": labels[int(box[0])], "score": float(box[1]), "coordinate": [xmin, ymin, xmax, ymax], } ) return box_list def rotate_point(pt, angle_rad): """Rotate a point by an angle. Args: pt (list[float]): 2 dimensional point to be rotated angle_rad (float): rotation angle by radian Returns: list[float]: Rotated point. """ assert len(pt) == 2 sn, cs = np.sin(angle_rad), np.cos(angle_rad) new_x = pt[0] * cs - pt[1] * sn new_y = pt[0] * sn + pt[1] * cs rotated_pt = [new_x, new_y] return rotated_pt def _get_3rd_point(a, b): """To calculate the affine matrix, three pairs of points are required. This function is used to get the 3rd point, given 2D points a & b. The 3rd point is defined by rotating vector `a - b` by 90 degrees anticlockwise, using b as the rotation center. Args: a (np.ndarray): point(x,y) b (np.ndarray): point(x,y) Returns: np.ndarray: The 3rd point. """ assert len(a) == 2 assert len(b) == 2 direction = a - b third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32) return third_pt def get_affine_transform( center, input_size, rot, output_size, shift=(0.0, 0.0), inv=False ): """Get the affine transform matrix, given the center/scale/rot/output_size. Args: center (np.ndarray[2, ]): Center of the bounding box (x, y). scale (np.ndarray[2, ]): Scale of the bounding box wrt [width, height]. rot (float): Rotation angle (degree). output_size (np.ndarray[2, ]): Size of the destination heatmaps. shift (0-100%): Shift translation ratio wrt the width/height. Default (0., 0.). inv (bool): Option to inverse the affine transform direction. (inv=False: src->dst or inv=True: dst->src) Returns: np.ndarray: The transform matrix. """ assert len(center) == 2 assert len(output_size) == 2 assert len(shift) == 2 if not isinstance(input_size, (np.ndarray, list)): input_size = np.array([input_size, input_size], dtype=np.float32) scale_tmp = input_size shift = np.array(shift) src_w = scale_tmp[0] dst_w = output_size[0] dst_h = output_size[1] rot_rad = np.pi * rot / 180 src_dir = rotate_point([0.0, src_w * -0.5], rot_rad) dst_dir = np.array([0.0, dst_w * -0.5]) src = np.zeros((3, 2), dtype=np.float32) src[0, :] = center + scale_tmp * shift src[1, :] = center + src_dir + scale_tmp * shift src[2, :] = _get_3rd_point(src[0, :], src[1, :]) dst = np.zeros((3, 2), dtype=np.float32) dst[0, :] = [dst_w * 0.5, dst_h * 0.5] dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :]) if inv: trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) else: trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) return trans class WarpAffine(BaseComponent): """Warp affine the image""" INPUT_KEYS = ["img"] OUTPUT_KEYS = ["img", "img_size", "scale_factors"] DEAULT_INPUTS = {"img": "img"} DEAULT_OUTPUTS = { "img": "img", "img_size": "img_size", "scale_factors": "scale_factors", } def __init__( self, keep_res=False, pad=31, input_h=512, input_w=512, scale=0.4, shift=0.1, down_ratio=4, ): super().__init__() self.keep_res = keep_res self.pad = pad self.input_h = input_h self.input_w = input_w self.scale = scale self.shift = shift self.down_ratio = down_ratio def apply(self, img): img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) h, w = img.shape[:2] if self.keep_res: # True in detection eval/infer input_h = (h | self.pad) + 1 input_w = (w | self.pad) + 1 s = np.array([input_w, input_h], dtype=np.float32) c = np.array([w // 2, h // 2], dtype=np.float32) else: # False in centertrack eval_mot/eval_mot s = max(h, w) * 1.0 input_h, input_w = self.input_h, self.input_w c = np.array([w / 2.0, h / 2.0], dtype=np.float32) trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) img = cv2.resize(img, (w, h)) inp = cv2.warpAffine( img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR ) if not self.keep_res: out_h = input_h // self.down_ratio out_w = input_w // self.down_ratio trans_output = get_affine_transform(c, s, 0, [out_w, out_h]) im_scale_w, im_scale_h = [input_w / w, input_h / h] return { "img": inp, "img_size": [inp.shape[1], inp.shape[0]], "scale_factors": [im_scale_w, im_scale_h], } class DetPostProcess(BaseComponent): """Save Result Transform""" INPUT_KEYS = ["input_path", "boxes", "img_size"] OUTPUT_KEYS = ["boxes"] DEAULT_INPUTS = {"boxes": "boxes", "img_size": "ori_img_size"} DEAULT_OUTPUTS = {"boxes": "boxes"} def __init__(self, threshold=0.5, labels=None): super().__init__() self.threshold = threshold self.labels = labels def apply(self, boxes, img_size): """apply""" expect_boxes = (boxes[:, 1] > self.threshold) & (boxes[:, 0] > -1) boxes = boxes[expect_boxes, :] boxes = restructured_boxes(boxes, self.labels, img_size) result = {"boxes": boxes} return result class CropByBoxes(BaseComponent): """Crop Image by Box""" YIELD_BATCH = False INPUT_KEYS = ["input_path", "boxes"] OUTPUT_KEYS = ["img", "box", "label"] DEAULT_INPUTS = {"input_path": "input_path", "boxes": "boxes"} DEAULT_OUTPUTS = {"img": "img", "box": "box", "label": "label"} def __init__(self): super().__init__() self._reader = ImageReader(backend="opencv") def apply(self, input_path, boxes): output_list = [] img = self._reader.read(input_path) for bbox in boxes: label_id = bbox["cls_id"] box = bbox["coordinate"] label = bbox.get("label", label_id) xmin, ymin, xmax, ymax = [int(i) for i in box] img_crop = img[ymin:ymax, xmin:xmax] output_list.append({"img": img_crop, "box": box, "label": label}) return output_list class DetPad(BaseComponent): INPUT_KEYS = "img" OUTPUT_KEYS = "img" DEAULT_INPUTS = {"img": "img"} DEAULT_OUTPUTS = {"img": "img"} def __init__(self, size, fill_value=[114.0, 114.0, 114.0]): """ Pad image to a specified size. Args: size (list[int]): image target size fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0) """ super().__init__() if isinstance(size, int): size = [size, size] self.size = size self.fill_value = fill_value def apply(self, img): im = img im_h, im_w = im.shape[:2] h, w = self.size if h == im_h and w == im_w: return {"img": im} canvas = np.ones((h, w, 3), dtype=np.float32) canvas *= np.array(self.fill_value, dtype=np.float32) canvas[0:im_h, 0:im_w, :] = im.astype(np.float32) return {"img": canvas}