|
|
@@ -13,6 +13,7 @@
|
|
|
# limitations under the License.
|
|
|
|
|
|
import os
|
|
|
+import cv2
|
|
|
|
|
|
import numpy as np
|
|
|
from ...utils.io import ImageReader
|
|
|
@@ -42,6 +43,163 @@ def restructured_boxes(boxes, labels, img_size):
|
|
|
return box_list
|
|
|
|
|
|
|
|
|
+def rotate_point(pt, angle_rad):
|
|
|
+ """Rotate a point by an angle.
|
|
|
+ Args:
|
|
|
+ pt (list[float]): 2 dimensional point to be rotated
|
|
|
+ angle_rad (float): rotation angle by radian
|
|
|
+ Returns:
|
|
|
+ list[float]: Rotated point.
|
|
|
+ """
|
|
|
+ assert len(pt) == 2
|
|
|
+ sn, cs = np.sin(angle_rad), np.cos(angle_rad)
|
|
|
+ new_x = pt[0] * cs - pt[1] * sn
|
|
|
+ new_y = pt[0] * sn + pt[1] * cs
|
|
|
+ rotated_pt = [new_x, new_y]
|
|
|
+
|
|
|
+ return rotated_pt
|
|
|
+
|
|
|
+
|
|
|
+def _get_3rd_point(a, b):
|
|
|
+ """To calculate the affine matrix, three pairs of points are required. This
|
|
|
+ function is used to get the 3rd point, given 2D points a & b.
|
|
|
+ The 3rd point is defined by rotating vector `a - b` by 90 degrees
|
|
|
+ anticlockwise, using b as the rotation center.
|
|
|
+ Args:
|
|
|
+ a (np.ndarray): point(x,y)
|
|
|
+ b (np.ndarray): point(x,y)
|
|
|
+ Returns:
|
|
|
+ np.ndarray: The 3rd point.
|
|
|
+ """
|
|
|
+ assert len(a) == 2
|
|
|
+ assert len(b) == 2
|
|
|
+ direction = a - b
|
|
|
+ third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32)
|
|
|
+
|
|
|
+ return third_pt
|
|
|
+
|
|
|
+
|
|
|
+def get_affine_transform(
|
|
|
+ center, input_size, rot, output_size, shift=(0.0, 0.0), inv=False
|
|
|
+):
|
|
|
+ """Get the affine transform matrix, given the center/scale/rot/output_size.
|
|
|
+ Args:
|
|
|
+ center (np.ndarray[2, ]): Center of the bounding box (x, y).
|
|
|
+ scale (np.ndarray[2, ]): Scale of the bounding box
|
|
|
+ wrt [width, height].
|
|
|
+ rot (float): Rotation angle (degree).
|
|
|
+ output_size (np.ndarray[2, ]): Size of the destination heatmaps.
|
|
|
+ shift (0-100%): Shift translation ratio wrt the width/height.
|
|
|
+ Default (0., 0.).
|
|
|
+ inv (bool): Option to inverse the affine transform direction.
|
|
|
+ (inv=False: src->dst or inv=True: dst->src)
|
|
|
+ Returns:
|
|
|
+ np.ndarray: The transform matrix.
|
|
|
+ """
|
|
|
+ assert len(center) == 2
|
|
|
+ assert len(output_size) == 2
|
|
|
+ assert len(shift) == 2
|
|
|
+ if not isinstance(input_size, (np.ndarray, list)):
|
|
|
+ input_size = np.array([input_size, input_size], dtype=np.float32)
|
|
|
+ scale_tmp = input_size
|
|
|
+
|
|
|
+ shift = np.array(shift)
|
|
|
+ src_w = scale_tmp[0]
|
|
|
+ dst_w = output_size[0]
|
|
|
+ dst_h = output_size[1]
|
|
|
+
|
|
|
+ rot_rad = np.pi * rot / 180
|
|
|
+ src_dir = rotate_point([0.0, src_w * -0.5], rot_rad)
|
|
|
+ dst_dir = np.array([0.0, dst_w * -0.5])
|
|
|
+
|
|
|
+ src = np.zeros((3, 2), dtype=np.float32)
|
|
|
+ src[0, :] = center + scale_tmp * shift
|
|
|
+ src[1, :] = center + src_dir + scale_tmp * shift
|
|
|
+ src[2, :] = _get_3rd_point(src[0, :], src[1, :])
|
|
|
+
|
|
|
+ dst = np.zeros((3, 2), dtype=np.float32)
|
|
|
+ dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
|
|
|
+ dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
|
|
|
+ dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
|
|
|
+
|
|
|
+ if inv:
|
|
|
+ trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
|
|
|
+ else:
|
|
|
+ trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
|
|
|
+
|
|
|
+ return trans
|
|
|
+
|
|
|
+
|
|
|
+class WarpAffine(BaseComponent):
|
|
|
+ """Warp affine the image"""
|
|
|
+
|
|
|
+ INPUT_KEYS = ["img"]
|
|
|
+ OUTPUT_KEYS = ["img", "img_size", "scale_factors"]
|
|
|
+ DEAULT_INPUTS = {"img": "img"}
|
|
|
+ DEAULT_OUTPUTS = {
|
|
|
+ "img": "img",
|
|
|
+ "img_size": "img_size",
|
|
|
+ "scale_factors": "scale_factors",
|
|
|
+ }
|
|
|
+
|
|
|
+ def __init__(
|
|
|
+ self,
|
|
|
+ keep_res=False,
|
|
|
+ pad=31,
|
|
|
+ input_h=512,
|
|
|
+ input_w=512,
|
|
|
+ scale=0.4,
|
|
|
+ shift=0.1,
|
|
|
+ down_ratio=4,
|
|
|
+ ):
|
|
|
+ super().__init__()
|
|
|
+ self.keep_res = keep_res
|
|
|
+ self.pad = pad
|
|
|
+ self.input_h = input_h
|
|
|
+ self.input_w = input_w
|
|
|
+ self.scale = scale
|
|
|
+ self.shift = shift
|
|
|
+ self.down_ratio = down_ratio
|
|
|
+
|
|
|
+ def apply(self, img):
|
|
|
+
|
|
|
+ img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
|
|
+
|
|
|
+ h, w = img.shape[:2]
|
|
|
+
|
|
|
+ if self.keep_res:
|
|
|
+ # True in detection eval/infer
|
|
|
+ input_h = (h | self.pad) + 1
|
|
|
+ input_w = (w | self.pad) + 1
|
|
|
+ s = np.array([input_w, input_h], dtype=np.float32)
|
|
|
+ c = np.array([w // 2, h // 2], dtype=np.float32)
|
|
|
+
|
|
|
+ else:
|
|
|
+ # False in centertrack eval_mot/eval_mot
|
|
|
+ s = max(h, w) * 1.0
|
|
|
+ input_h, input_w = self.input_h, self.input_w
|
|
|
+ c = np.array([w / 2.0, h / 2.0], dtype=np.float32)
|
|
|
+
|
|
|
+ trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
|
|
|
+ img = cv2.resize(img, (w, h))
|
|
|
+ inp = cv2.warpAffine(
|
|
|
+ img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR
|
|
|
+ )
|
|
|
+
|
|
|
+ if not self.keep_res:
|
|
|
+ out_h = input_h // self.down_ratio
|
|
|
+ out_w = input_w // self.down_ratio
|
|
|
+ trans_output = get_affine_transform(c, s, 0, [out_w, out_h])
|
|
|
+
|
|
|
+ im_scale_w, im_scale_h = [input_w / w, input_h / h]
|
|
|
+
|
|
|
+ return {
|
|
|
+ "img": inp,
|
|
|
+ "img_size": [inp.shape[1], inp.shape[0]],
|
|
|
+ "scale_factors": [im_scale_w, im_scale_h],
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
class DetPostProcess(BaseComponent):
|
|
|
"""Save Result Transform"""
|
|
|
|