zhengchun
/
PaddleX


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939
							# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os, sys
import numpy as np
from numpy import cos, sin, arctan, sqrt
import cv2
import copy
import time

from .....utils import logging

def Homography(
    image,
    img_points,
    world_width,
    world_height,
    interpolation=cv2.INTER_CUBIC,
    ratio_width=1.0,
    ratio_height=1.0,
):
    _points = np.array(img_points).reshape(-1, 2).astype(np.float32)

    expand_x = int(0.5 * world_width * (ratio_width - 1))
    expand_y = int(0.5 * world_height * (ratio_height - 1))

    pt_lefttop = [expand_x, expand_y]
    pt_righttop = [expand_x + world_width, expand_y]
    pt_leftbottom = [expand_x + world_width, expand_y + world_height]
    pt_rightbottom = [expand_x, expand_y + world_height]

    pts_std = np.float32([pt_lefttop, pt_righttop, pt_leftbottom, pt_rightbottom])

    img_crop_width = int(world_width * ratio_width)
    img_crop_height = int(world_height * ratio_height)

    M = cv2.getPerspectiveTransform(_points, pts_std)

    dst_img = cv2.warpPerspective(
        image,
        M,
        (img_crop_width, img_crop_height),
        borderMode=cv2.BORDER_CONSTANT,  # BORDER_CONSTANT BORDER_REPLICATE
        flags=interpolation,
    )

    return dst_img


class PlanB:
    def __call__(
        self,
        image,
        points,
        curveTextRectifier,
        interpolation=cv2.INTER_LINEAR,
        ratio_width=1.0,
        ratio_height=1.0,
        loss_thresh=5.0,
        square=False,
    ):
        """
        Plan B using sub-image when it failed in original image
        :param image:
        :param points:
        :param curveTextRectifier: CurveTextRectifier
        :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
        :param ratio_width:  roi_image width expansion. It should not be smaller than 1.0
        :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
        :param loss_thresh: if loss greater than loss_thresh --> get_rotate_crop_image
        :param square: crop square image or not. True or False. The default is False
        :return:
        """
        h, w = image.shape[:2]
        _points = np.array(points).reshape(-1, 2).astype(np.float32)
        x_min = int(np.min(_points[:, 0]))
        y_min = int(np.min(_points[:, 1]))
        x_max = int(np.max(_points[:, 0]))
        y_max = int(np.max(_points[:, 1]))
        dx = x_max - x_min
        dy = y_max - y_min
        max_d = max(dx, dy)
        mean_pt = np.mean(_points, 0)

        expand_x = (ratio_width - 1.0) * 0.5 * max_d
        expand_y = (ratio_height - 1.0) * 0.5 * max_d

        if square:
            x_min = np.clip(int(mean_pt[0] - max_d - expand_x), 0, w - 1)
            y_min = np.clip(int(mean_pt[1] - max_d - expand_y), 0, h - 1)
            x_max = np.clip(int(mean_pt[0] + max_d + expand_x), 0, w - 1)
            y_max = np.clip(int(mean_pt[1] + max_d + expand_y), 0, h - 1)
        else:
            x_min = np.clip(int(x_min - expand_x), 0, w - 1)
            y_min = np.clip(int(y_min - expand_y), 0, h - 1)
            x_max = np.clip(int(x_max + expand_x), 0, w - 1)
            y_max = np.clip(int(y_max + expand_y), 0, h - 1)

        new_image = image[y_min:y_max, x_min:x_max, :].copy()
        new_points = _points.copy()
        new_points[:, 0] -= x_min
        new_points[:, 1] -= y_min

        dst_img, loss = curveTextRectifier(
            new_image,
            new_points,
            interpolation,
            ratio_width,
            ratio_height,
            mode="calibration",
        )

        return dst_img, loss


class CurveTextRectifier:
    """
    spatial transformer via monocular vision
    """

    def __init__(self):
        self.get_virtual_camera_parameter()

    def get_virtual_camera_parameter(self):
        vcam_thz = 0
        vcam_thx1 = 180
        vcam_thy = 180
        vcam_thx2 = 0

        vcam_x = 0
        vcam_y = 0
        vcam_z = 100

        radian = np.pi / 180

        angle_z = radian * vcam_thz
        angle_x1 = radian * vcam_thx1
        angle_y = radian * vcam_thy
        angle_x2 = radian * vcam_thx2

        optic_x = vcam_x
        optic_y = vcam_y
        optic_z = vcam_z

        fu = 100
        fv = 100

        matT = np.zeros((4, 4))
        matT[0, 0] = cos(angle_z) * cos(angle_y) - sin(angle_z) * sin(angle_x1) * sin(
            angle_y
        )
        matT[0, 1] = cos(angle_z) * sin(angle_y) * sin(angle_x2) - sin(angle_z) * (
            cos(angle_x1) * cos(angle_x2) - sin(angle_x1) * cos(angle_y) * sin(angle_x2)
        )
        matT[0, 2] = cos(angle_z) * sin(angle_y) * cos(angle_x2) + sin(angle_z) * (
            cos(angle_x1) * sin(angle_x2) + sin(angle_x1) * cos(angle_y) * cos(angle_x2)
        )
        matT[0, 3] = optic_x
        matT[1, 0] = sin(angle_z) * cos(angle_y) + cos(angle_z) * sin(angle_x1) * sin(
            angle_y
        )
        matT[1, 1] = sin(angle_z) * sin(angle_y) * sin(angle_x2) + cos(angle_z) * (
            cos(angle_x1) * cos(angle_x2) - sin(angle_x1) * cos(angle_y) * sin(angle_x2)
        )
        matT[1, 2] = sin(angle_z) * sin(angle_y) * cos(angle_x2) - cos(angle_z) * (
            cos(angle_x1) * sin(angle_x2) + sin(angle_x1) * cos(angle_y) * cos(angle_x2)
        )
        matT[1, 3] = optic_y
        matT[2, 0] = -cos(angle_x1) * sin(angle_y)
        matT[2, 1] = cos(angle_x1) * cos(angle_y) * sin(angle_x2) + sin(angle_x1) * cos(
            angle_x2
        )
        matT[2, 2] = cos(angle_x1) * cos(angle_y) * cos(angle_x2) - sin(angle_x1) * sin(
            angle_x2
        )
        matT[2, 3] = optic_z
        matT[3, 0] = 0
        matT[3, 1] = 0
        matT[3, 2] = 0
        matT[3, 3] = 1

        matS = np.zeros((4, 4))
        matS[2, 3] = 0.5
        matS[3, 2] = 0.5

        self.ifu = 1 / fu
        self.ifv = 1 / fv

        self.matT = matT
        self.matS = matS
        self.K = np.dot(matT.T, matS)
        self.K = np.dot(self.K, matT)

    def vertical_text_process(self, points, org_size):
        """
        change sequence amd process
        :param points:
        :param org_size:
        :return:
        """
        org_w, org_h = org_size
        _points = np.array(points).reshape(-1).tolist()
        _points = np.array(_points[2:] + _points[:2]).reshape(-1, 2)

        # convert to horizontal points
        adjusted_points = np.zeros(_points.shape, dtype=np.float32)
        adjusted_points[:, 0] = _points[:, 1]
        adjusted_points[:, 1] = org_h - _points[:, 0] - 1

        _image_coord, _world_coord, _new_image_size = self.horizontal_text_process(
            adjusted_points
        )

        # # convert to vertical points back
        image_coord = _points.reshape(1, -1, 2)
        world_coord = np.zeros(_world_coord.shape, dtype=np.float32)
        world_coord[:, :, 0] = 0 - _world_coord[:, :, 1]
        world_coord[:, :, 1] = _world_coord[:, :, 0]
        world_coord[:, :, 2] = _world_coord[:, :, 2]
        new_image_size = (_new_image_size[1], _new_image_size[0])

        return image_coord, world_coord, new_image_size

    def horizontal_text_process(self, points):
        """
        get image coordinate and world coordinate
        :param points:
        :return:
        """
        poly = np.array(points).reshape(-1)

        dx_list = []
        dy_list = []
        for i in range(1, len(poly) // 2):
            xdx = poly[i * 2] - poly[(i - 1) * 2]
            xdy = poly[i * 2 + 1] - poly[(i - 1) * 2 + 1]
            d = sqrt(xdx**2 + xdy**2)
            dx_list.append(d)

        for i in range(0, len(poly) // 4):
            ydx = poly[i * 2] - poly[len(poly) - 1 - (i * 2 + 1)]
            ydy = poly[i * 2 + 1] - poly[len(poly) - 1 - (i * 2)]
            d = sqrt(ydx**2 + ydy**2)
            dy_list.append(d)

        dx_list = [
            (dx_list[i] + dx_list[len(dx_list) - 1 - i]) / 2
            for i in range(len(dx_list) // 2)
        ]

        height = np.around(np.mean(dy_list))

        rect_coord = [0, 0]
        for i in range(0, len(poly) // 4 - 1):
            x = rect_coord[-2]
            x += dx_list[i]
            y = 0
            rect_coord.append(x)
            rect_coord.append(y)

        rect_coord_half = copy.deepcopy(rect_coord)
        for i in range(0, len(poly) // 4):
            x = rect_coord_half[len(rect_coord_half) - 2 * i - 2]
            y = height
            rect_coord.append(x)
            rect_coord.append(y)

        np_rect_coord = np.array(rect_coord).reshape(-1, 2)
        x_min = np.min(np_rect_coord[:, 0])
        y_min = np.min(np_rect_coord[:, 1])
        x_max = np.max(np_rect_coord[:, 0])
        y_max = np.max(np_rect_coord[:, 1])
        new_image_size = (int(x_max - x_min + 0.5), int(y_max - y_min + 0.5))
        x_mean = (x_max - x_min) / 2
        y_mean = (y_max - y_min) / 2
        np_rect_coord[:, 0] -= x_mean
        np_rect_coord[:, 1] -= y_mean
        rect_coord = np_rect_coord.reshape(-1).tolist()

        rect_coord = np.array(rect_coord).reshape(-1, 2)
        world_coord = np.ones((len(rect_coord), 3)) * 0

        world_coord[:, :2] = rect_coord

        image_coord = np.array(poly).reshape(1, -1, 2)
        world_coord = world_coord.reshape(1, -1, 3)

        return image_coord, world_coord, new_image_size

    def horizontal_text_estimate(self, points):
        """
        horizontal or vertical text
        :param points:
        :return:
        """
        pts = np.array(points).reshape(-1, 2)
        x_min = int(np.min(pts[:, 0]))
        y_min = int(np.min(pts[:, 1]))
        x_max = int(np.max(pts[:, 0]))
        y_max = int(np.max(pts[:, 1]))
        x = x_max - x_min
        y = y_max - y_min
        is_horizontal_text = True
        if y / x > 1.5:  # vertical text condition
            is_horizontal_text = False
        return is_horizontal_text

    def virtual_camera_to_world(self, size):
        ifu, ifv = self.ifu, self.ifv
        K, matT = self.K, self.matT

        ppu = size[0] / 2 + 1e-6
        ppv = size[1] / 2 + 1e-6

        P = np.zeros((size[1], size[0], 3))

        lu = np.array([i for i in range(size[0])])
        lv = np.array([i for i in range(size[1])])
        u, v = np.meshgrid(lu, lv)

        yp = (v - ppv) * ifv
        xp = (u - ppu) * ifu
        angle_a = arctan(sqrt(xp * xp + yp * yp))
        angle_b = arctan(yp / xp)

        D0 = sin(angle_a) * cos(angle_b)
        D1 = sin(angle_a) * sin(angle_b)
        D2 = cos(angle_a)

        D0[xp <= 0] = -D0[xp <= 0]
        D1[xp <= 0] = -D1[xp <= 0]

        ratio_a = (
            K[0, 0] * D0 * D0
            + K[1, 1] * D1 * D1
            + K[2, 2] * D2 * D2
            + (K[0, 1] + K[1, 0]) * D0 * D1
            + (K[0, 2] + K[2, 0]) * D0 * D2
            + (K[1, 2] + K[2, 1]) * D1 * D2
        )
        ratio_b = (
            (K[0, 3] + K[3, 0]) * D0
            + (K[1, 3] + K[3, 1]) * D1
            + (K[2, 3] + K[3, 2]) * D2
        )
        ratio_c = K[3, 3] * np.ones(ratio_b.shape)

        delta = ratio_b * ratio_b - 4 * ratio_a * ratio_c
        t = np.zeros(delta.shape)
        t[ratio_a == 0] = -ratio_c[ratio_a == 0] / ratio_b[ratio_a == 0]
        t[ratio_a != 0] = (-ratio_b[ratio_a != 0] + sqrt(delta[ratio_a != 0])) / (
            2 * ratio_a[ratio_a != 0]
        )
        t[delta < 0] = 0

        P[:, :, 0] = matT[0, 3] + t * (
            matT[0, 0] * D0 + matT[0, 1] * D1 + matT[0, 2] * D2
        )
        P[:, :, 1] = matT[1, 3] + t * (
            matT[1, 0] * D0 + matT[1, 1] * D1 + matT[1, 2] * D2
        )
        P[:, :, 2] = matT[2, 3] + t * (
            matT[2, 0] * D0 + matT[2, 1] * D1 + matT[2, 2] * D2
        )

        return P

    def world_to_image(self, image_size, world, intrinsic, distCoeffs, rotation, tvec):
        r11 = rotation[0, 0]
        r12 = rotation[0, 1]
        r13 = rotation[0, 2]
        r21 = rotation[1, 0]
        r22 = rotation[1, 1]
        r23 = rotation[1, 2]
        r31 = rotation[2, 0]
        r32 = rotation[2, 1]
        r33 = rotation[2, 2]

        t1 = tvec[0]
        t2 = tvec[1]
        t3 = tvec[2]

        k1 = distCoeffs[0]
        k2 = distCoeffs[1]
        p1 = distCoeffs[2]
        p2 = distCoeffs[3]
        k3 = distCoeffs[4]
        k4 = distCoeffs[5]
        k5 = distCoeffs[6]
        k6 = distCoeffs[7]

        if len(distCoeffs) > 8:
            s1 = distCoeffs[8]
            s2 = distCoeffs[9]
            s3 = distCoeffs[10]
            s4 = distCoeffs[11]
        else:
            s1 = s2 = s3 = s4 = 0

        if len(distCoeffs) > 12:
            tx = distCoeffs[12]
            ty = distCoeffs[13]
        else:
            tx = ty = 0

        fu = intrinsic[0, 0]
        fv = intrinsic[1, 1]
        ppu = intrinsic[0, 2]
        ppv = intrinsic[1, 2]

        cos_tx = cos(tx)
        cos_ty = cos(ty)
        sin_tx = sin(tx)
        sin_ty = sin(ty)

        tao11 = cos_ty * cos_tx * cos_ty + sin_ty * cos_tx * sin_ty
        tao12 = cos_ty * cos_tx * sin_ty * sin_tx - sin_ty * cos_tx * cos_ty * sin_tx
        tao13 = -cos_ty * cos_tx * sin_ty * cos_tx + sin_ty * cos_tx * cos_ty * cos_tx
        tao21 = -sin_tx * sin_ty
        tao22 = cos_ty * cos_tx * cos_tx + sin_tx * cos_ty * sin_tx
        tao23 = cos_ty * cos_tx * sin_tx - sin_tx * cos_ty * cos_tx

        P = np.zeros((image_size[1], image_size[0], 2))

        c3 = r31 * world[:, :, 0] + r32 * world[:, :, 1] + r33 * world[:, :, 2] + t3
        c1 = r11 * world[:, :, 0] + r12 * world[:, :, 1] + r13 * world[:, :, 2] + t1
        c2 = r21 * world[:, :, 0] + r22 * world[:, :, 1] + r23 * world[:, :, 2] + t2

        x1 = c1 / c3
        y1 = c2 / c3
        x12 = x1 * x1
        y12 = y1 * y1
        x1y1 = 2 * x1 * y1
        r2 = x12 + y12
        r4 = r2 * r2
        r6 = r2 * r4

        radial_distortion = (1 + k1 * r2 + k2 * r4 + k3 * r6) / (
            1 + k4 * r2 + k5 * r4 + k6 * r6
        )
        x2 = (
            x1 * radial_distortion + p1 * x1y1 + p2 * (r2 + 2 * x12) + s1 * r2 + s2 * r4
        )
        y2 = (
            y1 * radial_distortion + p2 * x1y1 + p1 * (r2 + 2 * y12) + s3 * r2 + s4 * r4
        )

        x3 = tao11 * x2 + tao12 * y2 + tao13
        y3 = tao21 * x2 + tao22 * y2 + tao23

        P[:, :, 0] = fu * x3 + ppu
        P[:, :, 1] = fv * y3 + ppv
        P[c3 <= 0] = 0

        return P

    def spatial_transform(
        self, image_data, new_image_size, mtx, dist, rvecs, tvecs, interpolation
    ):
        rotation, _ = cv2.Rodrigues(rvecs)
        world_map = self.virtual_camera_to_world(new_image_size)
        image_map = self.world_to_image(
            new_image_size, world_map, mtx, dist, rotation, tvecs
        )
        image_map = image_map.astype(np.float32)
        dst = cv2.remap(
            image_data, image_map[:, :, 0], image_map[:, :, 1], interpolation
        )
        return dst

    def calibrate(self, org_size, image_coord, world_coord):
        """
        calibration
        :param org_size:
        :param image_coord:
        :param world_coord:
        :return:
        """
        # flag = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_TILTED_MODEL  | cv2.CALIB_THIN_PRISM_MODEL
        flag = cv2.CALIB_RATIONAL_MODEL
        flag2 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_TILTED_MODEL
        flag3 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_THIN_PRISM_MODEL
        flag4 = (
            cv2.CALIB_RATIONAL_MODEL
            | cv2.CALIB_ZERO_TANGENT_DIST
            | cv2.CALIB_FIX_ASPECT_RATIO
        )
        flag5 = (
            cv2.CALIB_RATIONAL_MODEL
            | cv2.CALIB_TILTED_MODEL
            | cv2.CALIB_ZERO_TANGENT_DIST
        )
        flag6 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_FIX_ASPECT_RATIO
        flag_list = [flag2, flag3, flag4, flag5, flag6]

        ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(
            world_coord.astype(np.float32),
            image_coord.astype(np.float32),
            org_size,
            None,
            None,
            flags=flag,
        )
        if ret > 2:
            # strategies
            min_ret = ret
            for i, flag in enumerate(flag_list):
                _ret, _mtx, _dist, _rvecs, _tvecs = cv2.calibrateCamera(
                    world_coord.astype(np.float32),
                    image_coord.astype(np.float32),
                    org_size,
                    None,
                    None,
                    flags=flag,
                )
                if _ret < min_ret:
                    min_ret = _ret
                    ret, mtx, dist, rvecs, tvecs = _ret, _mtx, _dist, _rvecs, _tvecs

        return ret, mtx, dist, rvecs, tvecs

    def dc_homo(
        self,
        img,
        img_points,
        obj_points,
        is_horizontal_text,
        interpolation=cv2.INTER_LINEAR,
        ratio_width=1.0,
        ratio_height=1.0,
    ):
        """
        divide and conquer: homography
        # ratio_width and ratio_height must be 1.0 here
        """
        _img_points = img_points.reshape(-1, 2)
        _obj_points = obj_points.reshape(-1, 3)

        homo_img_list = []
        width_list = []
        height_list = []
        # divide and conquer
        for i in range(len(_img_points) // 2 - 1):
            new_img_points = np.zeros((4, 2)).astype(np.float32)
            new_obj_points = np.zeros((4, 2)).astype(np.float32)

            new_img_points[0:2, :] = _img_points[i : (i + 2), :2]
            new_img_points[2:4, :] = _img_points[::-1, :][i : (i + 2), :2][::-1, :]

            new_obj_points[0:2, :] = _obj_points[i : (i + 2), :2]
            new_obj_points[2:4, :] = _obj_points[::-1, :][i : (i + 2), :2][::-1, :]

            if is_horizontal_text:
                world_width = np.abs(new_obj_points[1, 0] - new_obj_points[0, 0])
                world_height = np.abs(new_obj_points[3, 1] - new_obj_points[0, 1])
            else:
                world_width = np.abs(new_obj_points[1, 1] - new_obj_points[0, 1])
                world_height = np.abs(new_obj_points[3, 0] - new_obj_points[0, 0])

            homo_img = Homography(
                img,
                new_img_points,
                world_width,
                world_height,
                interpolation=interpolation,
                ratio_width=ratio_width,
                ratio_height=ratio_height,
            )

            homo_img_list.append(homo_img)
            _h, _w = homo_img.shape[:2]
            width_list.append(_w)
            height_list.append(_h)

        # stitching
        rectified_image = np.zeros((np.max(height_list), sum(width_list), 3)).astype(
            np.uint8
        )

        st = 0
        for homo_img, w, h in zip(homo_img_list, width_list, height_list):
            rectified_image[:h, st : st + w, :] = homo_img
            st += w

        if not is_horizontal_text:
            # vertical rotation
            rectified_image = np.rot90(rectified_image, 3)

        return rectified_image

    def Homography(
        self,
        image,
        img_points,
        world_width,
        world_height,
        interpolation=cv2.INTER_CUBIC,
        ratio_width=1.0,
        ratio_height=1.0,
    ):
        _points = np.array(img_points).reshape(-1, 2).astype(np.float32)

        expand_x = int(0.5 * world_width * (ratio_width - 1))
        expand_y = int(0.5 * world_height * (ratio_height - 1))

        pt_lefttop = [expand_x, expand_y]
        pt_righttop = [expand_x + world_width, expand_y]
        pt_leftbottom = [expand_x + world_width, expand_y + world_height]
        pt_rightbottom = [expand_x, expand_y + world_height]

        pts_std = np.float32([pt_lefttop, pt_righttop, pt_leftbottom, pt_rightbottom])

        img_crop_width = int(world_width * ratio_width)
        img_crop_height = int(world_height * ratio_height)

        M = cv2.getPerspectiveTransform(_points, pts_std)

        dst_img = cv2.warpPerspective(
            image,
            M,
            (img_crop_width, img_crop_height),
            borderMode=cv2.BORDER_CONSTANT,  # BORDER_CONSTANT BORDER_REPLICATE
            flags=interpolation,
        )

        return dst_img

    def __call__(
        self,
        image_data,
        points,
        interpolation=cv2.INTER_LINEAR,
        ratio_width=1.0,
        ratio_height=1.0,
        mode="calibration",
    ):
        """
        spatial transform for a poly text
        :param image_data:
        :param points: [x1,y1,x2,y2,x3,y3,...], clockwise order, (x1,y1) must be the top-left of first char.
        :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
        :param ratio_width:  roi_image width expansion. It should not be smaller than 1.0
        :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
        :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
        :return:
        """
        org_h, org_w = image_data.shape[:2]
        org_size = (org_w, org_h)
        self.image = image_data

        is_horizontal_text = self.horizontal_text_estimate(points)
        if is_horizontal_text:
            image_coord, world_coord, new_image_size = self.horizontal_text_process(
                points
            )
        else:
            image_coord, world_coord, new_image_size = self.vertical_text_process(
                points, org_size
            )

        if mode.lower() == "calibration":
            ret, mtx, dist, rvecs, tvecs = self.calibrate(
                org_size, image_coord, world_coord
            )

            st_size = (
                int(new_image_size[0] * ratio_width),
                int(new_image_size[1] * ratio_height),
            )
            dst = self.spatial_transform(
                image_data, st_size, mtx, dist[0], rvecs[0], tvecs[0], interpolation
            )
        elif mode.lower() == "homography":
            # ratio_width and ratio_height must be 1.0 here and ret set to 0.01 without loss manually
            ret = 0.01
            dst = self.dc_homo(
                image_data,
                image_coord,
                world_coord,
                is_horizontal_text,
                interpolation=interpolation,
                ratio_width=1.0,
                ratio_height=1.0,
            )
        else:
            raise ValueError(
                'mode must be ["calibration", "homography"], but got {}'.format(mode)
            )

        return dst, ret


class AutoRectifier:
    def __init__(self):
        self.npoints = 10
        self.curveTextRectifier = CurveTextRectifier()

    @staticmethod
    def get_rotate_crop_image(
        img, points, interpolation=cv2.INTER_CUBIC, ratio_width=1.0, ratio_height=1.0
    ):
        """
        crop or homography
        :param img:
        :param points:
        :param interpolation:
        :param ratio_width:
        :param ratio_height:
        :return:
        """
        h, w = img.shape[:2]
        _points = np.array(points).reshape(-1, 2).astype(np.float32)

        if len(_points) != 4:
            x_min = int(np.min(_points[:, 0]))
            y_min = int(np.min(_points[:, 1]))
            x_max = int(np.max(_points[:, 0]))
            y_max = int(np.max(_points[:, 1]))
            dx = x_max - x_min
            dy = y_max - y_min
            expand_x = int(0.5 * dx * (ratio_width - 1))
            expand_y = int(0.5 * dy * (ratio_height - 1))
            x_min = np.clip(int(x_min - expand_x), 0, w - 1)
            y_min = np.clip(int(y_min - expand_y), 0, h - 1)
            x_max = np.clip(int(x_max + expand_x), 0, w - 1)
            y_max = np.clip(int(y_max + expand_y), 0, h - 1)

            dst_img = img[y_min:y_max, x_min:x_max, :].copy()
        else:
            img_crop_width = int(
                max(
                    np.linalg.norm(_points[0] - _points[1]),
                    np.linalg.norm(_points[2] - _points[3]),
                )
            )
            img_crop_height = int(
                max(
                    np.linalg.norm(_points[0] - _points[3]),
                    np.linalg.norm(_points[1] - _points[2]),
                )
            )

            dst_img = Homography(
                img,
                _points,
                img_crop_width,
                img_crop_height,
                interpolation,
                ratio_width,
                ratio_height,
            )

        return dst_img

    def visualize(self, image_data, points_list):
        visualization = image_data.copy()

        for box in points_list:
            box = np.array(box).reshape(-1, 2).astype(np.int32)
            cv2.drawContours(
                visualization, [np.array(box).reshape((-1, 1, 2))], -1, (0, 0, 255), 2
            )
            for i, p in enumerate(box):
                if i != 0:
                    cv2.circle(
                        visualization,
                        tuple(p),
                        radius=1,
                        color=(255, 0, 0),
                        thickness=2,
                    )
                else:
                    cv2.circle(
                        visualization,
                        tuple(p),
                        radius=1,
                        color=(255, 255, 0),
                        thickness=2,
                    )
        return visualization

    def __call__(
        self,
        image_data,
        points,
        interpolation=cv2.INTER_LINEAR,
        ratio_width=1.0,
        ratio_height=1.0,
        loss_thresh=5.0,
        mode="calibration",
    ):
        """
        rectification in strategies for a poly text
        :param image_data:
        :param points: [x1,y1,x2,y2,x3,y3,...], clockwise order, (x1,y1) must be the top-left of first char.
        :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
        :param ratio_width:  roi_image width expansion. It should not be smaller than 1.0
        :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
        :param loss_thresh: if loss greater than loss_thresh --> get_rotate_crop_image
        :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
        :return:
        """
        _points = np.array(points).reshape(-1, 2)
        if len(_points) >= self.npoints and len(_points) % 2 == 0:
            try:
                curveTextRectifier = CurveTextRectifier()

                dst_img, loss = curveTextRectifier(
                    image_data, points, interpolation, ratio_width, ratio_height, mode
                )
                if loss >= 2:
                    # for robust
                    # large loss means it cannot be reconstruct correctly, we must find other way to reconstruct
                    img_list, loss_list = [dst_img], [loss]
                    _dst_img, _loss = PlanB()(
                        image_data,
                        points,
                        curveTextRectifier,
                        interpolation,
                        ratio_width,
                        ratio_height,
                        loss_thresh=loss_thresh,
                        square=True,
                    )
                    img_list += [_dst_img]
                    loss_list += [_loss]

                    _dst_img, _loss = PlanB()(
                        image_data,
                        points,
                        curveTextRectifier,
                        interpolation,
                        ratio_width,
                        ratio_height,
                        loss_thresh=loss_thresh,
                        square=False,
                    )
                    img_list += [_dst_img]
                    loss_list += [_loss]

                    min_loss = min(loss_list)
                    dst_img = img_list[loss_list.index(min_loss)]

                    if min_loss >= loss_thresh:
                        logging.warning(
                            "calibration loss: {} is too large for spatial transformer. It is failed. Using get_rotate_crop_image".format(
                                loss
                            )
                        )
                        dst_img = self.get_rotate_crop_image(
                            image_data, points, interpolation, ratio_width, ratio_height
                        )
            except Exception as e:
                logging.warning(f"Exception caught: {e}")
                dst_img = self.get_rotate_crop_image(
                    image_data, points, interpolation, ratio_width, ratio_height
                )
        else:
            dst_img = self.get_rotate_crop_image(
                image_data, _points, interpolation, ratio_width, ratio_height
            )

        return dst_img

    def run(
        self,
        image_data,
        points_list,
        interpolation=cv2.INTER_LINEAR,
        ratio_width=1.0,
        ratio_height=1.0,
        loss_thresh=5.0,
        mode="calibration",
    ):
        """
        run for texts in an image
        :param image_data: numpy.ndarray. The shape is [h, w, 3]
        :param points_list: [[x1,y1,x2,y2,x3,y3,...], [x1,y1,x2,y2,x3,y3,...], ...], clockwise order, (x1,y1) must be the top-left of first char.
        :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
        :param ratio_width:  roi_image width expansion. It should not be smaller than 1.0
        :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
        :param loss_thresh: if loss greater than loss_thresh --> get_rotate_crop_image
        :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
        :return: res: roi-image list, visualized_image: draw polys in original image
        """
        if image_data is None:
            raise ValueError
        if not isinstance(points_list, list):
            raise ValueError
        for points in points_list:
            if not isinstance(points, list):
                raise ValueError

        if ratio_width < 1.0 or ratio_height < 1.0:
            raise ValueError(
                "ratio_width and ratio_height cannot be smaller than 1, but got {}",
                (ratio_width, ratio_height),
            )

        if mode.lower() != "calibration" and mode.lower() != "homography":
            raise ValueError(
                'mode must be ["calibration", "homography"], but got {}'.format(mode)
            )

        if mode.lower() == "homography" and ratio_width != 1.0 and ratio_height != 1.0:
            raise ValueError(
                "ratio_width and ratio_height must be 1.0 when mode is homography, but got mode:{}, ratio:({},{})".format(
                    mode, ratio_width, ratio_height
                )
            )

        res = []
        for points in points_list:
            rectified_img = self(
                image_data,
                points,
                interpolation,
                ratio_width,
                ratio_height,
                loss_thresh=loss_thresh,
                mode=mode,
            )
            res.append(rectified_img)

        # visualize
        visualized_image = self.visualize(image_data, points_list)

        return res, visualized_image