1 year ago · 00afa21e33
--- a/paddlex/__init__.py
+++ b/paddlex/__init__.py
@@ -25,11 +25,10 @@ from .modules import (
 
				     build_dataset_checker,
			
 
				     build_trainer,
			
 
				     build_evaluater,
			
 
				-    build_predictor,
			
 
				 )
			
 
				 
			
 
				 
			
 
				-from .inference import create_predictor, create_pipeline
			
 
				+from .inference import create_model, create_pipeline
			
 
				 
			
 
				 
			
 
				 def _initialize():
			
--- a/paddlex/inference/__init__.py
+++ b/paddlex/inference/__init__.py
@@ -12,6 +12,6 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-from .predictors import create_predictor
			
 
				+from .models import create_model
			
 
				 from .pipelines import create_pipeline
			
 
				 from .utils.pp_option import PaddlePredictorOption
			
--- a/paddlex/inference/predictors/__init__.py
+++ b/paddlex/inference/predictors/__init__.py
@@ -56,7 +56,7 @@ def _create_hp_predictor(
 
				     )
			
 
				 
			
 
				 
			
 
				-def create_predictor(
			
 
				+def create_model(
			
 
				     model: str, device: str = None, *args, use_hpip=False, hpi_params=None, **kwargs
			
 
				 ) -> BasePredictor:
			
 
				     model_dir = check_model(model)
			
--- a/paddlex/inference/predictors/base.py
+++ b/paddlex/inference/predictors/base.py
--- a/paddlex/inference/predictors/general_recognition.py
+++ b/paddlex/inference/predictors/general_recognition.py
--- a/paddlex/inference/predictors/image_classification.py
+++ b/paddlex/inference/predictors/image_classification.py
--- a/paddlex/inference/predictors/image_unwarping.py
+++ b/paddlex/inference/predictors/image_unwarping.py
--- a/paddlex/inference/predictors/instance_segmentation.py
+++ b/paddlex/inference/predictors/instance_segmentation.py
--- a/paddlex/inference/predictors/object_detection.py
+++ b/paddlex/inference/predictors/object_detection.py
--- a/paddlex/inference/predictors/official_models.py
+++ b/paddlex/inference/predictors/official_models.py
@@ -254,4 +254,4 @@ class OfficialModelsDict(dict):
 
				         return save_dir / f"{key}"
			
 
				 
			
 
				 
			
 
				-official_models = OfficialModelsDict(OFFICIAL_MODELS)
			
 
				+official_models = OfficialModelsDict(OFFICIAL_MODELS)
			
--- a/paddlex/inference/predictors/semantic_segmentation.py
+++ b/paddlex/inference/predictors/semantic_segmentation.py
--- a/paddlex/inference/predictors/table_recognition.py
+++ b/paddlex/inference/predictors/table_recognition.py
--- a/paddlex/inference/predictors/text_detection.py
+++ b/paddlex/inference/predictors/text_detection.py
--- a/paddlex/inference/predictors/text_recognition.py
+++ b/paddlex/inference/predictors/text_recognition.py
--- a/paddlex/inference/predictors/ts.py
+++ b/paddlex/inference/predictors/ts.py
--- a/paddlex/inference/predictors/ts_cls.py
+++ b/paddlex/inference/predictors/ts_cls.py
--- a/paddlex/inference/predictors/ts_fc.py
+++ b/paddlex/inference/predictors/ts_fc.py
--- a/paddlex/inference/pipelines/base.py
+++ b/paddlex/inference/pipelines/base.py
@@ -16,7 +16,7 @@ from abc import ABC
 
				 from typing import Any, Dict, Optional
			
 
				 
			
 
				 from ...utils.subclass_register import AutoRegisterABCMetaClass
			
 
				-from ..predictors import create_predictor
			
 
				+from ..models import create_model
			
 
				 
			
 
				 
			
 
				 class BasePipeline(ABC, metaclass=AutoRegisterABCMetaClass):
			
@@ -34,5 +34,5 @@ class BasePipeline(ABC, metaclass=AutoRegisterABCMetaClass):
 
				     def __call__(self, *args, **kwargs):
			
 
				         yield from self.predict(*args, **kwargs)
			
 
				 
			
 
				-    def _create_predictor(self, *args, **kwargs):
			
 
				-        return create_predictor(*args, **kwargs, **self._predictor_kwargs)
			
 
				+    def _create_model(self, *args, **kwargs):
			
 
				+        return create_model(*args, **kwargs, **self._predictor_kwargs)
			
--- a/paddlex/inference/pipelines/ocr.py
+++ b/paddlex/inference/pipelines/ocr.py
@@ -31,8 +31,8 @@ class OCRPipeline(BasePipeline):
 
				         predictor_kwargs=None,
			
 
				     ):
			
 
				         super().__init__(predictor_kwargs)
			
 
				-        self._det_predict = self._create_predictor(det_model, device=device)
			
 
				-        self._rec_predict = self._create_predictor(
			
 
				+        self._det_predict = self._create_model(det_model, device=device)
			
 
				+        self._rec_predict = self._create_model(
			
 
				             rec_model, batch_size=rec_batch_size, device=device
			
 
				         )
			
 
				         self.is_curve = self._det_predict.model_name in [
			
--- a/paddlex/inference/pipelines/single_model_pipeline.py
+++ b/paddlex/inference/pipelines/single_model_pipeline.py
@@ -26,9 +26,7 @@ class SingleModelPipeline(BasePipeline):
 
				 
			
 
				     def __init__(self, model, batch_size=1, device="gpu", predictor_kwargs=None):
			
 
				         super().__init__(predictor_kwargs)
			
 
				-        self._predict = self._create_predictor(
			
 
				-            model, batch_size=batch_size, device=device
			
 
				-        )
			
 
				+        self._predict = self._create_model(model, batch_size=batch_size, device=device)
			
 
				 
			
 
				     def predict(self, x):
			
 
				         yield from self._predict(x)
			
--- a/paddlex/inference/pipelines/table_recognition/table_recognition.py
+++ b/paddlex/inference/pipelines/table_recognition/table_recognition.py
@@ -38,7 +38,7 @@ class TableRecPipeline(BasePipeline):
 
				     ):
			
 
				         super().__init__(predictor_kwargs)
			
 
				 
			
 
				-        self.layout_predictor = self._create_predictor(
			
 
				+        self.layout_predictor = self._create_model(
			
 
				             model=layout_model, device=device, batch_size=batch_size
			
 
				         )
			
 
				 
			
@@ -50,7 +50,7 @@ class TableRecPipeline(BasePipeline):
 
				             det_device=device,
			
 
				             predictor_kwargs=predictor_kwargs,
			
 
				         )
			
 
				-        self.table_predictor = self._create_predictor(
			
 
				+        self.table_predictor = self._create_model(
			
 
				             model=table_model, device=device, batch_size=batch_size
			
 
				         )
			
 
				         self._crop_by_boxes = CropByBoxes()
			
--- a/paddlex/modules/predictor.py
+++ b/paddlex/modules/predictor.py
@@ -13,7 +13,7 @@
 
				 # limitations under the License.
			
 
				 
			
 
				 from copy import deepcopy
			
 
				-from ..inference.predictors import create_predictor
			
 
				+from ..inference.models import create_model
			
 
				 from ..inference.utils.pp_option import PaddlePredictorOption
			
 
				 from ..utils.config import AttrDict
			
 
				 
			
@@ -28,10 +28,10 @@ class Predictor(object):
 
				         model = model_name if model_dir is None else model_dir
			
 
				         self.input_path = predict_config.pop("input_path")
			
 
				         pp_option = PaddlePredictorOption(**predict_config.pop("kernel_option", {}))
			
 
				-        self.predictor = create_predictor(model, pp_option=pp_option, **predict_config)
			
 
				+        self.model = create_model(model, pp_option=pp_option, **predict_config)
			
 
				 
			
 
				     def predict(self):
			
 
				-        for res in self.predictor(self.input_path):
			
 
				+        for res in self.model(self.input_path):
			
 
				             res.print()
			
 
				 
			
 
				 
			
--- a/paddlex/modules/text_detection/predictor/predictor.py
+++ b/paddlex/modules/text_detection/predictor/predictor.py
@@ -1,115 +0,0 @@
 
				-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-
			
 
				-from operator import le
			
 
				-import os
			
 
				-
			
 
				-import numpy as np
			
 
				-from . import transforms as T
			
 
				-from ....utils import logging
			
 
				-from ...base import BasePredictor
			
 
				-from ...base.predictor.transforms import image_common
			
 
				-from .keys import TextDetKeys as K
			
 
				-from ..model_list import MODELS
			
 
				-
			
 
				-
			
 
				-class TextDetPredictor(BasePredictor):
			
 
				-    """TextDetPredictor"""
			
 
				-
			
 
				-    entities = MODELS
			
 
				-
			
 
				-    @classmethod
			
 
				-    def get_input_keys(cls):
			
 
				-        """get input keys"""
			
 
				-        return [[K.IMAGE], [K.IM_PATH]]
			
 
				-
			
 
				-    @classmethod
			
 
				-    def get_output_keys(cls):
			
 
				-        """get output keys"""
			
 
				-        return [K.PROB_MAP, K.SHAPE]
			
 
				-
			
 
				-    def _run(self, batch_input):
			
 
				-        """_run"""
			
 
				-        if len(batch_input) != 1:
			
 
				-            raise ValueError(
			
 
				-                f"For `{self.__class__.__name__}`, batch size can only be set to 1."
			
 
				-            )
			
 
				-        images = [data[K.IMAGE] for data in batch_input]
			
 
				-        input_ = np.stack(images, axis=0)
			
 
				-        if input_.ndim == 3:
			
 
				-            input_ = input_[:, np.newaxis]
			
 
				-        input_ = input_.astype(dtype=np.float32, copy=False)
			
 
				-        outputs = self._predictor.predict([input_])
			
 
				-
			
 
				-        pred = batch_input
			
 
				-        pred[0][K.PROB_MAP] = outputs
			
 
				-
			
 
				-        return pred
			
 
				-
			
 
				-    def _get_pre_transforms_from_config(self):
			
 
				-        """get preprocess transforms"""
			
 
				-
			
 
				-        if self.model_name in ['PP-OCRv4_server_seal_det', 'PP-OCRv4_mobile_seal_det']:
			
 
				-            limit_side_len = 736
			
 
				-        else:
			
 
				-            limit_side_len = 960
			
 
				-    
			
 
				-        return [
			
 
				-            image_common.ReadImage(),
			
 
				-            T.DetResizeForTest(limit_side_len=limit_side_len, limit_type="max"),
			
 
				-            T.NormalizeImage(
			
 
				-                mean=[0.485, 0.456, 0.406],
			
 
				-                std=[0.229, 0.224, 0.225],
			
 
				-                scale=1.0 / 255,
			
 
				-                order="hwc",
			
 
				-            ),
			
 
				-            image_common.ToCHWImage(),
			
 
				-        ]
			
 
				-
			
 
				-    def _get_post_transforms_from_config(self):
			
 
				-        """get postprocess transforms"""
			
 
				-        if self.model_name in ['PP-OCRv4_server_seal_det', 'PP-OCRv4_mobile_seal_det']:
			
 
				-            task = 'poly'
			
 
				-            post_transforms = [
			
 
				-                T.DBPostProcess(
			
 
				-                    thresh=0.2,
			
 
				-                    box_thresh=0.6,
			
 
				-                    max_candidates=1000,
			
 
				-                    unclip_ratio=1.5,
			
 
				-                    use_dilation=False,
			
 
				-                    score_mode="fast",
			
 
				-                    box_type="poly",
			
 
				-                )
			
 
				-            ]
			
 
				-        else:
			
 
				-            task = 'quad'
			
 
				-            post_transforms = [
			
 
				-                T.DBPostProcess(
			
 
				-                    thresh=0.3,
			
 
				-                    box_thresh=0.6,
			
 
				-                    max_candidates=1000,
			
 
				-                    unclip_ratio=1.5,
			
 
				-                    use_dilation=False,
			
 
				-                    score_mode="fast",
			
 
				-                    box_type="quad",
			
 
				-                )
			
 
				-            ]
			
 
				-        if not self.disable_print:
			
 
				-            post_transforms.append(T.PrintResult())
			
 
				-        if not self.disable_save:
			
 
				-            post_transforms.append(
			
 
				-                T.SaveTextDetResults(self.output, task),
			
 
				-            )
			
 
				-        return post_transforms
			
--- a/paddlex/modules/text_detection/predictor/transforms.py
+++ b/paddlex/modules/text_detection/predictor/transforms.py
@@ -1,987 +0,0 @@
 
				-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-
			
 
				-import os
			
 
				-import sys
			
 
				-import cv2
			
 
				-import copy
			
 
				-import math
			
 
				-import pyclipper
			
 
				-import numpy as np
			
 
				-from numpy.linalg import norm
			
 
				-from PIL import Image
			
 
				-from shapely.geometry import Polygon
			
 
				-
			
 
				-from ....utils import logging
			
 
				-from ...base.predictor.io.writers import ImageWriter
			
 
				-from ...base.predictor.io.readers import ImageReader
			
 
				-from ...base.predictor import BaseTransform
			
 
				-from .keys import TextDetKeys as K
			
 
				-from .utils import AutoRectifier
			
 
				-
			
 
				-__all__ = [
			
 
				-    "DetResizeForTest",
			
 
				-    "NormalizeImage",
			
 
				-    "DBPostProcess",
			
 
				-    "SaveTextDetResults",
			
 
				-    "PrintResult",
			
 
				-]
			
 
				-
			
 
				-
			
 
				-class DetResizeForTest(BaseTransform):
			
 
				-    """DetResizeForTest"""
			
 
				-
			
 
				-    def __init__(self, **kwargs):
			
 
				-        super(DetResizeForTest, self).__init__()
			
 
				-        self.resize_type = 0
			
 
				-        self.keep_ratio = False
			
 
				-        if "image_shape" in kwargs:
			
 
				-            self.image_shape = kwargs["image_shape"]
			
 
				-            self.resize_type = 1
			
 
				-            if "keep_ratio" in kwargs:
			
 
				-                self.keep_ratio = kwargs["keep_ratio"]
			
 
				-        elif "limit_side_len" in kwargs:
			
 
				-            self.limit_side_len = kwargs["limit_side_len"]
			
 
				-            self.limit_type = kwargs.get("limit_type", "min")
			
 
				-        elif "resize_long" in kwargs:
			
 
				-            self.resize_type = 2
			
 
				-            self.resize_long = kwargs.get("resize_long", 960)
			
 
				-        else:
			
 
				-            self.limit_side_len = 736
			
 
				-            self.limit_type = "min"
			
 
				-
			
 
				-    def apply(self, data):
			
 
				-        """apply"""
			
 
				-        img = data[K.IMAGE]
			
 
				-        src_h, src_w, _ = img.shape
			
 
				-        if sum([src_h, src_w]) < 64:
			
 
				-            img = self.image_padding(img)
			
 
				-
			
 
				-        if self.resize_type == 0:
			
 
				-            # img, shape = self.resize_image_type0(img)
			
 
				-            img, [ratio_h, ratio_w] = self.resize_image_type0(img)
			
 
				-        elif self.resize_type == 2:
			
 
				-            img, [ratio_h, ratio_w] = self.resize_image_type2(img)
			
 
				-        else:
			
 
				-            # img, shape = self.resize_image_type1(img)
			
 
				-            img, [ratio_h, ratio_w] = self.resize_image_type1(img)
			
 
				-        data[K.IMAGE] = img
			
 
				-        data[K.SHAPE] = np.array([src_h, src_w, ratio_h, ratio_w])
			
 
				-        return data
			
 
				-
			
 
				-    @classmethod
			
 
				-    def get_input_keys(cls):
			
 
				-        """get input keys"""
			
 
				-
			
 
				-        return [K.IMAGE]
			
 
				-
			
 
				-    @classmethod
			
 
				-    def get_output_keys(cls):
			
 
				-        """get output keys"""
			
 
				-
			
 
				-        return [K.IMAGE, K.SHAPE]
			
 
				-
			
 
				-    def image_padding(self, im, value=0):
			
 
				-        """padding image"""
			
 
				-        h, w, c = im.shape
			
 
				-        im_pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + value
			
 
				-        im_pad[:h, :w, :] = im
			
 
				-        return im_pad
			
 
				-
			
 
				-    def resize_image_type1(self, img):
			
 
				-        """resize the image"""
			
 
				-        resize_h, resize_w = self.image_shape
			
 
				-        ori_h, ori_w = img.shape[:2]  # (h, w, c)
			
 
				-        if self.keep_ratio is True:
			
 
				-            resize_w = ori_w * resize_h / ori_h
			
 
				-            N = math.ceil(resize_w / 32)
			
 
				-            resize_w = N * 32
			
 
				-        ratio_h = float(resize_h) / ori_h
			
 
				-        ratio_w = float(resize_w) / ori_w
			
 
				-        img = cv2.resize(img, (int(resize_w), int(resize_h)))
			
 
				-        # return img, np.array([ori_h, ori_w])
			
 
				-        return img, [ratio_h, ratio_w]
			
 
				-
			
 
				-    def resize_image_type0(self, img):
			
 
				-        """
			
 
				-        resize image to a size multiple of 32 which is required by the network
			
 
				-        args:
			
 
				-            img(array): array with shape [h, w, c]
			
 
				-        return(tuple):
			
 
				-            img, (ratio_h, ratio_w)
			
 
				-        """
			
 
				-        limit_side_len = self.limit_side_len
			
 
				-        h, w, c = img.shape
			
 
				-
			
 
				-        # limit the max side
			
 
				-        if self.limit_type == "max":
			
 
				-            if max(h, w) > limit_side_len:
			
 
				-                if h > w:
			
 
				-                    ratio = float(limit_side_len) / h
			
 
				-                else:
			
 
				-                    ratio = float(limit_side_len) / w
			
 
				-            else:
			
 
				-                ratio = 1.0
			
 
				-        elif self.limit_type == "min":
			
 
				-            if min(h, w) < limit_side_len:
			
 
				-                if h < w:
			
 
				-                    ratio = float(limit_side_len) / h
			
 
				-                else:
			
 
				-                    ratio = float(limit_side_len) / w
			
 
				-            else:
			
 
				-                ratio = 1.0
			
 
				-        elif self.limit_type == "resize_long":
			
 
				-            ratio = float(limit_side_len) / max(h, w)
			
 
				-        else:
			
 
				-            raise Exception("not support limit type, image ")
			
 
				-        resize_h = int(h * ratio)
			
 
				-        resize_w = int(w * ratio)
			
 
				-
			
 
				-        resize_h = max(int(round(resize_h / 32) * 32), 32)
			
 
				-        resize_w = max(int(round(resize_w / 32) * 32), 32)
			
 
				-
			
 
				-        try:
			
 
				-            if int(resize_w) <= 0 or int(resize_h) <= 0:
			
 
				-                return None, (None, None)
			
 
				-            img = cv2.resize(img, (int(resize_w), int(resize_h)))
			
 
				-        except:
			
 
				-            logging.info(img.shape, resize_w, resize_h)
			
 
				-            sys.exit(0)
			
 
				-        ratio_h = resize_h / float(h)
			
 
				-        ratio_w = resize_w / float(w)
			
 
				-        return img, [ratio_h, ratio_w]
			
 
				-
			
 
				-    def resize_image_type2(self, img):
			
 
				-        """resize image size"""
			
 
				-        h, w, _ = img.shape
			
 
				-
			
 
				-        resize_w = w
			
 
				-        resize_h = h
			
 
				-
			
 
				-        if resize_h > resize_w:
			
 
				-            ratio = float(self.resize_long) / resize_h
			
 
				-        else:
			
 
				-            ratio = float(self.resize_long) / resize_w
			
 
				-
			
 
				-        resize_h = int(resize_h * ratio)
			
 
				-        resize_w = int(resize_w * ratio)
			
 
				-
			
 
				-        max_stride = 128
			
 
				-        resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
			
 
				-        resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
			
 
				-        img = cv2.resize(img, (int(resize_w), int(resize_h)))
			
 
				-        ratio_h = resize_h / float(h)
			
 
				-        ratio_w = resize_w / float(w)
			
 
				-
			
 
				-        return img, [ratio_h, ratio_w]
			
 
				-
			
 
				-
			
 
				-class NormalizeImage(BaseTransform):
			
 
				-    """normalize image such as substract mean, divide std"""
			
 
				-
			
 
				-    def __init__(self, scale=None, mean=None, std=None, order="chw", **kwargs):
			
 
				-        if isinstance(scale, str):
			
 
				-            scale = eval(scale)
			
 
				-        self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
			
 
				-        mean = mean if mean is not None else [0.485, 0.456, 0.406]
			
 
				-        std = std if std is not None else [0.229, 0.224, 0.225]
			
 
				-
			
 
				-        shape = (3, 1, 1) if order == "chw" else (1, 1, 3)
			
 
				-        self.mean = np.array(mean).reshape(shape).astype("float32")
			
 
				-        self.std = np.array(std).reshape(shape).astype("float32")
			
 
				-
			
 
				-    def apply(self, data):
			
 
				-        """apply"""
			
 
				-        img = data[K.IMAGE]
			
 
				-        from PIL import Image
			
 
				-
			
 
				-        if isinstance(img, Image.Image):
			
 
				-            img = np.array(img)
			
 
				-        assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage"
			
 
				-        data[K.IMAGE] = (img.astype("float32") * self.scale - self.mean) / self.std
			
 
				-        return data
			
 
				-
			
 
				-    @classmethod
			
 
				-    def get_input_keys(cls):
			
 
				-        """get input keys"""
			
 
				-        return [K.IMAGE]
			
 
				-
			
 
				-    @classmethod
			
 
				-    def get_output_keys(cls):
			
 
				-        """get output keys"""
			
 
				-        return [K.IMAGE]
			
 
				-
			
 
				-
			
 
				-class DBPostProcess(BaseTransform):
			
 
				-    """
			
 
				-    The post process for Differentiable Binarization (DB).
			
 
				-    """
			
 
				-
			
 
				-    def __init__(
			
 
				-        self,
			
 
				-        thresh=0.3,
			
 
				-        box_thresh=0.7,
			
 
				-        max_candidates=1000,
			
 
				-        unclip_ratio=2.0,
			
 
				-        use_dilation=False,
			
 
				-        score_mode="fast",
			
 
				-        box_type="quad",
			
 
				-        **kwargs
			
 
				-    ):
			
 
				-        self.thresh = thresh
			
 
				-        self.box_thresh = box_thresh
			
 
				-        self.max_candidates = max_candidates
			
 
				-        self.unclip_ratio = unclip_ratio
			
 
				-        self.min_size = 3
			
 
				-        self.score_mode = score_mode
			
 
				-        self.box_type = box_type
			
 
				-        assert score_mode in [
			
 
				-            "slow",
			
 
				-            "fast",
			
 
				-        ], "Score mode must be in [slow, fast] but got: {}".format(score_mode)
			
 
				-
			
 
				-        self.dilation_kernel = None if not use_dilation else np.array([[1, 1], [1, 1]])
			
 
				-
			
 
				-    def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
			
 
				-        """_bitmap: single map with shape (1, H, W), whose values are binarized as {0, 1}"""
			
 
				-
			
 
				-        bitmap = _bitmap
			
 
				-        height, width = bitmap.shape
			
 
				-
			
 
				-        boxes = []
			
 
				-        scores = []
			
 
				-
			
 
				-        contours, _ = cv2.findContours(
			
 
				-            (bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE
			
 
				-        )
			
 
				-
			
 
				-        for contour in contours[: self.max_candidates]:
			
 
				-            epsilon = 0.002 * cv2.arcLength(contour, True)
			
 
				-            approx = cv2.approxPolyDP(contour, epsilon, True)
			
 
				-            points = approx.reshape((-1, 2))
			
 
				-            if points.shape[0] < 4:
			
 
				-                continue
			
 
				-
			
 
				-            score = self.box_score_fast(pred, points.reshape(-1, 2))
			
 
				-            if self.box_thresh > score:
			
 
				-                continue
			
 
				-
			
 
				-            if points.shape[0] > 2:
			
 
				-                box = self.unclip(points, self.unclip_ratio)
			
 
				-                if len(box) > 1:
			
 
				-                    continue
			
 
				-            else:
			
 
				-                continue
			
 
				-            box = box.reshape(-1, 2)
			
 
				-
			
 
				-            _, sside = self.get_mini_boxes(box.reshape((-1, 1, 2)))
			
 
				-            if sside < self.min_size + 2:
			
 
				-                continue
			
 
				-
			
 
				-            box = np.array(box)
			
 
				-            box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
			
 
				-            box[:, 1] = np.clip(
			
 
				-                np.round(box[:, 1] / height * dest_height), 0, dest_height
			
 
				-            )
			
 
				-            boxes.append(box.tolist())
			
 
				-            scores.append(score)
			
 
				-        return boxes, scores
			
 
				-
			
 
				-    def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
			
 
				-        """_bitmap: single map with shape (1, H, W), whose values are binarized as {0, 1}"""
			
 
				-
			
 
				-        bitmap = _bitmap
			
 
				-        height, width = bitmap.shape
			
 
				-
			
 
				-        outs = cv2.findContours(
			
 
				-            (bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE
			
 
				-        )
			
 
				-        if len(outs) == 3:
			
 
				-            img, contours, _ = outs[0], outs[1], outs[2]
			
 
				-        elif len(outs) == 2:
			
 
				-            contours, _ = outs[0], outs[1]
			
 
				-
			
 
				-        num_contours = min(len(contours), self.max_candidates)
			
 
				-
			
 
				-        boxes = []
			
 
				-        scores = []
			
 
				-        for index in range(num_contours):
			
 
				-            contour = contours[index]
			
 
				-            points, sside = self.get_mini_boxes(contour)
			
 
				-            if sside < self.min_size:
			
 
				-                continue
			
 
				-            points = np.array(points)
			
 
				-            if self.score_mode == "fast":
			
 
				-                score = self.box_score_fast(pred, points.reshape(-1, 2))
			
 
				-            else:
			
 
				-                score = self.box_score_slow(pred, contour)
			
 
				-            if self.box_thresh > score:
			
 
				-                continue
			
 
				-
			
 
				-            box = self.unclip(points, self.unclip_ratio).reshape(-1, 1, 2)
			
 
				-            box, sside = self.get_mini_boxes(box)
			
 
				-            if sside < self.min_size + 2:
			
 
				-                continue
			
 
				-            box = np.array(box)
			
 
				-
			
 
				-            box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
			
 
				-            box[:, 1] = np.clip(
			
 
				-                np.round(box[:, 1] / height * dest_height), 0, dest_height
			
 
				-            )
			
 
				-            boxes.append(box.astype(np.int16))
			
 
				-            scores.append(score)
			
 
				-        return np.array(boxes, dtype=np.int16), scores
			
 
				-
			
 
				-    def unclip(self, box, unclip_ratio):
			
 
				-        """unclip"""
			
 
				-        poly = Polygon(box)
			
 
				-        distance = poly.area * unclip_ratio / poly.length
			
 
				-        offset = pyclipper.PyclipperOffset()
			
 
				-        offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
			
 
				-        expanded = np.array(offset.Execute(distance))
			
 
				-        return expanded
			
 
				-
			
 
				-    def get_mini_boxes(self, contour):
			
 
				-        """get mini boxes"""
			
 
				-        bounding_box = cv2.minAreaRect(contour)
			
 
				-        points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
			
 
				-
			
 
				-        index_1, index_2, index_3, index_4 = 0, 1, 2, 3
			
 
				-        if points[1][1] > points[0][1]:
			
 
				-            index_1 = 0
			
 
				-            index_4 = 1
			
 
				-        else:
			
 
				-            index_1 = 1
			
 
				-            index_4 = 0
			
 
				-        if points[3][1] > points[2][1]:
			
 
				-            index_2 = 2
			
 
				-            index_3 = 3
			
 
				-        else:
			
 
				-            index_2 = 3
			
 
				-            index_3 = 2
			
 
				-
			
 
				-        box = [points[index_1], points[index_2], points[index_3], points[index_4]]
			
 
				-        return box, min(bounding_box[1])
			
 
				-
			
 
				-    def box_score_fast(self, bitmap, _box):
			
 
				-        """box_score_fast: use bbox mean score as the mean score"""
			
 
				-        h, w = bitmap.shape[:2]
			
 
				-        box = _box.copy()
			
 
				-        xmin = np.clip(np.floor(box[:, 0].min()).astype("int"), 0, w - 1)
			
 
				-        xmax = np.clip(np.ceil(box[:, 0].max()).astype("int"), 0, w - 1)
			
 
				-        ymin = np.clip(np.floor(box[:, 1].min()).astype("int"), 0, h - 1)
			
 
				-        ymax = np.clip(np.ceil(box[:, 1].max()).astype("int"), 0, h - 1)
			
 
				-
			
 
				-        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
			
 
				-        box[:, 0] = box[:, 0] - xmin
			
 
				-        box[:, 1] = box[:, 1] - ymin
			
 
				-        cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
			
 
				-        return cv2.mean(bitmap[ymin : ymax + 1, xmin : xmax + 1], mask)[0]
			
 
				-
			
 
				-    def box_score_slow(self, bitmap, contour):
			
 
				-        """box_score_slow: use polyon mean score as the mean score"""
			
 
				-        h, w = bitmap.shape[:2]
			
 
				-        contour = contour.copy()
			
 
				-        contour = np.reshape(contour, (-1, 2))
			
 
				-
			
 
				-        xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
			
 
				-        xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
			
 
				-        ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
			
 
				-        ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
			
 
				-
			
 
				-        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
			
 
				-
			
 
				-        contour[:, 0] = contour[:, 0] - xmin
			
 
				-        contour[:, 1] = contour[:, 1] - ymin
			
 
				-
			
 
				-        cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1)
			
 
				-        return cv2.mean(bitmap[ymin : ymax + 1, xmin : xmax + 1], mask)[0]
			
 
				-
			
 
				-    def apply(self, data):
			
 
				-        """apply"""
			
 
				-        pred = data[K.PROB_MAP]
			
 
				-        shape_list = [data[K.SHAPE]]
			
 
				-        pred = pred[0][:, 0, :, :]
			
 
				-        segmentation = pred > self.thresh
			
 
				-
			
 
				-        boxes_batch = []
			
 
				-        for batch_index in range(pred.shape[0]):
			
 
				-            src_h, src_w, ratio_h, ratio_w = shape_list[batch_index]
			
 
				-            if self.dilation_kernel is not None:
			
 
				-                mask = cv2.dilate(
			
 
				-                    np.array(segmentation[batch_index]).astype(np.uint8),
			
 
				-                    self.dilation_kernel,
			
 
				-                )
			
 
				-            else:
			
 
				-                mask = segmentation[batch_index]
			
 
				-            if self.box_type == "poly":
			
 
				-                boxes, scores = self.polygons_from_bitmap(
			
 
				-                    pred[batch_index], mask, src_w, src_h
			
 
				-                )
			
 
				-            elif self.box_type == "quad":
			
 
				-                boxes, scores = self.boxes_from_bitmap(
			
 
				-                    pred[batch_index], mask, src_w, src_h
			
 
				-                )
			
 
				-            else:
			
 
				-                raise ValueError("box_type can only be one of ['quad', 'poly']")
			
 
				-
			
 
				-            data[K.DT_POLYS] = boxes
			
 
				-            data[K.DT_SCORES] = scores
			
 
				-
			
 
				-        return data
			
 
				-
			
 
				-    @classmethod
			
 
				-    def get_input_keys(cls):
			
 
				-        """get input keys"""
			
 
				-        return [K.PROB_MAP]
			
 
				-
			
 
				-    @classmethod
			
 
				-    def get_output_keys(cls):
			
 
				-        """get output keys"""
			
 
				-        return [K.DT_POLYS, K.DT_SCORES]
			
 
				-
			
 
				-
			
 
				-class CropByPolys(BaseTransform):
			
 
				-    """Crop Image by Polys"""
			
 
				-
			
 
				-    def __init__(self, det_box_type="quad"):
			
 
				-        super().__init__()
			
 
				-        self.det_box_type = det_box_type
			
 
				-
			
 
				-    def apply(self, data):
			
 
				-        """apply"""
			
 
				-        ori_im = data[K.ORI_IM]
			
 
				-        if self.det_box_type == "quad":
			
 
				-            dt_boxes = self.sorted_boxes(data[K.DT_POLYS])
			
 
				-            dt_boxes = np.array(dt_boxes)
			
 
				-            img_crop_list = []
			
 
				-            for bno in range(len(dt_boxes)):
			
 
				-                tmp_box = copy.deepcopy(dt_boxes[bno])
			
 
				-                img_crop = self.get_minarea_rect_crop(ori_im, tmp_box)
			
 
				-                img_crop_list.append(img_crop)
			
 
				-        elif self.det_box_type == "poly":
			
 
				-            img_crop_list = []
			
 
				-            dt_boxes = data[K.DT_POLYS]
			
 
				-            for bno in range(len(dt_boxes)):
			
 
				-                tmp_box = copy.deepcopy(dt_boxes[bno])
			
 
				-                img_crop = self.get_poly_rect_crop(ori_im.copy(), tmp_box)
			
 
				-                img_crop_list.append(img_crop)
			
 
				-        else:
			
 
				-            raise NotImplementedError
			
 
				-        data[K.SUB_IMGS] = img_crop_list
			
 
				-        return data
			
 
				-
			
 
				-    @classmethod
			
 
				-    def get_input_keys(cls):
			
 
				-        """get input keys"""
			
 
				-        return [K.IM_PATH, K.DT_POLYS]
			
 
				-
			
 
				-    @classmethod
			
 
				-    def get_output_keys(cls):
			
 
				-        """get output keys"""
			
 
				-        return [K.SUB_IMGS]
			
 
				-
			
 
				-    def sorted_boxes(self, dt_boxes):
			
 
				-        """
			
 
				-        Sort text boxes in order from top to bottom, left to right
			
 
				-        args:
			
 
				-            dt_boxes(array):detected text boxes with shape [4, 2]
			
 
				-        return:
			
 
				-            sorted boxes(array) with shape [4, 2]
			
 
				-        """
			
 
				-        dt_boxes = np.array(dt_boxes)
			
 
				-        num_boxes = dt_boxes.shape[0]
			
 
				-        sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
			
 
				-        _boxes = list(sorted_boxes)
			
 
				-
			
 
				-        for i in range(num_boxes - 1):
			
 
				-            for j in range(i, -1, -1):
			
 
				-                if abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 and (
			
 
				-                    _boxes[j + 1][0][0] < _boxes[j][0][0]
			
 
				-                ):
			
 
				-                    tmp = _boxes[j]
			
 
				-                    _boxes[j] = _boxes[j + 1]
			
 
				-                    _boxes[j + 1] = tmp
			
 
				-                else:
			
 
				-                    break
			
 
				-        return _boxes
			
 
				-
			
 
				-    def get_minarea_rect_crop(self, img, points):
			
 
				-        """get_minarea_rect_crop"""
			
 
				-        bounding_box = cv2.minAreaRect(np.array(points).astype(np.int32))
			
 
				-        points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
			
 
				-
			
 
				-        index_a, index_b, index_c, index_d = 0, 1, 2, 3
			
 
				-        if points[1][1] > points[0][1]:
			
 
				-            index_a = 0
			
 
				-            index_d = 1
			
 
				-        else:
			
 
				-            index_a = 1
			
 
				-            index_d = 0
			
 
				-        if points[3][1] > points[2][1]:
			
 
				-            index_b = 2
			
 
				-            index_c = 3
			
 
				-        else:
			
 
				-            index_b = 3
			
 
				-            index_c = 2
			
 
				-
			
 
				-        box = [points[index_a], points[index_b], points[index_c], points[index_d]]
			
 
				-        crop_img = self.get_rotate_crop_image(img, np.array(box))
			
 
				-        return crop_img
			
 
				-
			
 
				-
			
 
				-    def get_rotate_crop_image(self, img, points):
			
 
				-        """
			
 
				-        img_height, img_width = img.shape[0:2]
			
 
				-        left = int(np.min(points[:, 0]))
			
 
				-        right = int(np.max(points[:, 0]))
			
 
				-        top = int(np.min(points[:, 1]))
			
 
				-        bottom = int(np.max(points[:, 1]))
			
 
				-        img_crop = img[top:bottom, left:right, :].copy()
			
 
				-        points[:, 0] = points[:, 0] - left
			
 
				-        points[:, 1] = points[:, 1] - top
			
 
				-        """
			
 
				-        assert len(points) == 4, "shape of points must be 4*2"
			
 
				-        img_crop_width = int(
			
 
				-            max(
			
 
				-                np.linalg.norm(points[0] - points[1]),
			
 
				-                np.linalg.norm(points[2] - points[3]),
			
 
				-            )
			
 
				-        )
			
 
				-        img_crop_height = int(
			
 
				-            max(
			
 
				-                np.linalg.norm(points[0] - points[3]),
			
 
				-                np.linalg.norm(points[1] - points[2]),
			
 
				-            )
			
 
				-        )
			
 
				-        pts_std = np.float32(
			
 
				-            [
			
 
				-                [0, 0],
			
 
				-                [img_crop_width, 0],
			
 
				-                [img_crop_width, img_crop_height],
			
 
				-                [0, img_crop_height],
			
 
				-            ]
			
 
				-        )
			
 
				-        M = cv2.getPerspectiveTransform(points, pts_std)
			
 
				-        dst_img = cv2.warpPerspective(
			
 
				-            img,
			
 
				-            M,
			
 
				-            (img_crop_width, img_crop_height),
			
 
				-            borderMode=cv2.BORDER_REPLICATE,
			
 
				-            flags=cv2.INTER_CUBIC,
			
 
				-        )
			
 
				-        dst_img_height, dst_img_width = dst_img.shape[0:2]
			
 
				-        if dst_img_height * 1.0 / dst_img_width >= 1.5:
			
 
				-            dst_img = np.rot90(dst_img)
			
 
				-        return dst_img
			
 
				-
			
 
				-    def reorder_poly_edge(self, points):
			
 
				-        """Get the respective points composing head edge, tail edge, top
			
 
				-        sideline and bottom sideline.
			
 
				-
			
 
				-        Args:
			
 
				-            points (ndarray): The points composing a text polygon.
			
 
				-
			
 
				-        Returns:
			
 
				-            head_edge (ndarray): The two points composing the head edge of text
			
 
				-                polygon.
			
 
				-            tail_edge (ndarray): The two points composing the tail edge of text
			
 
				-                polygon.
			
 
				-            top_sideline (ndarray): The points composing top curved sideline of
			
 
				-                text polygon.
			
 
				-            bot_sideline (ndarray): The points composing bottom curved sideline
			
 
				-                of text polygon.
			
 
				-        """
			
 
				-
			
 
				-        assert points.ndim == 2
			
 
				-        assert points.shape[0] >= 4
			
 
				-        assert points.shape[1] == 2
			
 
				-
			
 
				-        orientation_thr=2.0             # 一个经验超参数
			
 
				-
			
 
				-        head_inds, tail_inds = self.find_head_tail(points, orientation_thr)
			
 
				-        head_edge, tail_edge = points[head_inds], points[tail_inds]
			
 
				-
			
 
				-
			
 
				-        pad_points = np.vstack([points, points])
			
 
				-        if tail_inds[1] < 1:
			
 
				-            tail_inds[1] = len(points)
			
 
				-        sideline1 = pad_points[head_inds[1]:tail_inds[1]]
			
 
				-        sideline2 = pad_points[tail_inds[1]:(head_inds[1] + len(points))]
			
 
				-        return head_edge, tail_edge, sideline1, sideline2
			
 
				-
			
 
				-    def vector_slope(self, vec):
			
 
				-        assert len(vec) == 2
			
 
				-        return abs(vec[1] / (vec[0] + 1e-8)) 
			
 
				-
			
 
				-    def find_head_tail(self, points, orientation_thr):
			
 
				-        """Find the head edge and tail edge of a text polygon.
			
 
				-
			
 
				-        Args:
			
 
				-            points (ndarray): The points composing a text polygon.
			
 
				-            orientation_thr (float): The threshold for distinguishing between
			
 
				-                head edge and tail edge among the horizontal and vertical edges
			
 
				-                of a quadrangle.
			
 
				-
			
 
				-        Returns:
			
 
				-            head_inds (list): The indexes of two points composing head edge.
			
 
				-            tail_inds (list): The indexes of two points composing tail edge.
			
 
				-        """
			
 
				-
			
 
				-        assert points.ndim == 2
			
 
				-        assert points.shape[0] >= 4
			
 
				-        assert points.shape[1] == 2
			
 
				-        assert isinstance(orientation_thr, float)
			
 
				-
			
 
				-        if len(points) > 4:
			
 
				-            pad_points = np.vstack([points, points[0]])
			
 
				-            edge_vec = pad_points[1:] - pad_points[:-1]
			
 
				-
			
 
				-            theta_sum = []
			
 
				-            adjacent_vec_theta = []
			
 
				-            for i, edge_vec1 in enumerate(edge_vec):
			
 
				-                adjacent_ind = [x % len(edge_vec) for x in [i - 1, i + 1]]
			
 
				-                adjacent_edge_vec = edge_vec[adjacent_ind]
			
 
				-                temp_theta_sum = np.sum(
			
 
				-                    self.vector_angle(edge_vec1, adjacent_edge_vec))
			
 
				-                temp_adjacent_theta = self.vector_angle(adjacent_edge_vec[0],
			
 
				-                                                        adjacent_edge_vec[1])
			
 
				-                theta_sum.append(temp_theta_sum)
			
 
				-                adjacent_vec_theta.append(temp_adjacent_theta)
			
 
				-            theta_sum_score = np.array(theta_sum) / np.pi
			
 
				-            adjacent_theta_score = np.array(adjacent_vec_theta) / np.pi
			
 
				-            poly_center = np.mean(points, axis=0)
			
 
				-            edge_dist = np.maximum(
			
 
				-                norm(
			
 
				-                    pad_points[1:] - poly_center, axis=-1),
			
 
				-                norm(
			
 
				-                    pad_points[:-1] - poly_center, axis=-1))
			
 
				-            dist_score = edge_dist / np.max(edge_dist)
			
 
				-            position_score = np.zeros(len(edge_vec))
			
 
				-            score = 0.5 * theta_sum_score + 0.15 * adjacent_theta_score
			
 
				-            score += 0.35 * dist_score
			
 
				-            if len(points) % 2 == 0:
			
 
				-                position_score[(len(score) // 2 - 1)] += 1
			
 
				-                position_score[-1] += 1
			
 
				-            score += 0.1 * position_score
			
 
				-            pad_score = np.concatenate([score, score])
			
 
				-            score_matrix = np.zeros((len(score), len(score) - 3))
			
 
				-            x = np.arange(len(score) - 3) / float(len(score) - 4)
			
 
				-            gaussian = 1. / (np.sqrt(2. * np.pi) * 0.5) * np.exp(-np.power(
			
 
				-                (x - 0.5) / 0.5, 2.) / 2)
			
 
				-            gaussian = gaussian / np.max(gaussian)
			
 
				-            for i in range(len(score)):
			
 
				-                score_matrix[i, :] = score[i] + pad_score[(i + 2):(i + len(
			
 
				-                    score) - 1)] * gaussian * 0.3
			
 
				-
			
 
				-            head_start, tail_increment = np.unravel_index(score_matrix.argmax(),
			
 
				-                                                            score_matrix.shape)
			
 
				-            tail_start = (head_start + tail_increment + 2) % len(points)
			
 
				-            head_end = (head_start + 1) % len(points)
			
 
				-            tail_end = (tail_start + 1) % len(points)
			
 
				-
			
 
				-            if head_end > tail_end:
			
 
				-                head_start, tail_start = tail_start, head_start
			
 
				-                head_end, tail_end = tail_end, head_end
			
 
				-            head_inds = [head_start, head_end]
			
 
				-            tail_inds = [tail_start, tail_end]
			
 
				-        else:
			
 
				-            if vector_slope(points[1] - points[0]) + vector_slope(points[
			
 
				-                    3] - points[2]) < vector_slope(points[2] - points[
			
 
				-                        1]) + vector_slope(points[0] - points[3]):
			
 
				-                horizontal_edge_inds = [[0, 1], [2, 3]]
			
 
				-                vertical_edge_inds = [[3, 0], [1, 2]]
			
 
				-            else:
			
 
				-                horizontal_edge_inds = [[3, 0], [1, 2]]
			
 
				-                vertical_edge_inds = [[0, 1], [2, 3]]
			
 
				-
			
 
				-            vertical_len_sum = norm(points[vertical_edge_inds[0][0]] - points[
			
 
				-                vertical_edge_inds[0][1]]) + norm(points[vertical_edge_inds[1][
			
 
				-                    0]] - points[vertical_edge_inds[1][1]])
			
 
				-            horizontal_len_sum = norm(points[horizontal_edge_inds[0][
			
 
				-                0]] - points[horizontal_edge_inds[0][1]]) + norm(points[
			
 
				-                    horizontal_edge_inds[1][0]] - points[horizontal_edge_inds[1]
			
 
				-                                                            [1]])
			
 
				-
			
 
				-            if vertical_len_sum > horizontal_len_sum * orientation_thr:
			
 
				-                head_inds = horizontal_edge_inds[0]
			
 
				-                tail_inds = horizontal_edge_inds[1]
			
 
				-            else:
			
 
				-                head_inds = vertical_edge_inds[0]
			
 
				-                tail_inds = vertical_edge_inds[1]
			
 
				-
			
 
				-        return head_inds, tail_inds
			
 
				-
			
 
				-    def vector_angle(self, vec1, vec2):
			
 
				-        if vec1.ndim > 1:
			
 
				-            unit_vec1 = vec1 / (norm(vec1, axis=-1) + 1e-8).reshape((-1, 1))
			
 
				-        else:
			
 
				-            unit_vec1 = vec1 / (norm(vec1, axis=-1) + 1e-8)
			
 
				-        if vec2.ndim > 1:
			
 
				-            unit_vec2 = vec2 / (norm(vec2, axis=-1) + 1e-8).reshape((-1, 1))
			
 
				-        else:
			
 
				-            unit_vec2 = vec2 / (norm(vec2, axis=-1) + 1e-8)
			
 
				-        return np.arccos(np.clip(np.sum(unit_vec1 * unit_vec2, axis=-1), -1.0, 1.0))
			
 
				-
			
 
				-
			
 
				-    def get_minarea_rect(self, img, points):
			
 
				-        bounding_box = cv2.minAreaRect(points)
			
 
				-        points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
			
 
				-
			
 
				-        index_a, index_b, index_c, index_d = 0, 1, 2, 3
			
 
				-        if points[1][1] > points[0][1]:
			
 
				-            index_a = 0
			
 
				-            index_d = 1
			
 
				-        else:
			
 
				-            index_a = 1
			
 
				-            index_d = 0
			
 
				-        if points[3][1] > points[2][1]:
			
 
				-            index_b = 2
			
 
				-            index_c = 3
			
 
				-        else:
			
 
				-            index_b = 3
			
 
				-            index_c = 2
			
 
				-
			
 
				-        box = [points[index_a], points[index_b], points[index_c], points[index_d]]
			
 
				-        crop_img = self.get_rotate_crop_image(img, np.array(box))
			
 
				-        return crop_img, box
			
 
				-
			
 
				-    def sample_points_on_bbox_bp(self, line, n=50):
			
 
				-        """Resample n points on a line.
			
 
				-
			
 
				-        Args:
			
 
				-            line (ndarray): The points composing a line.
			
 
				-            n (int): The resampled points number.
			
 
				-
			
 
				-        Returns:
			
 
				-            resampled_line (ndarray): The points composing the resampled line.
			
 
				-        """
			
 
				-        from numpy.linalg import norm
			
 
				-        # 断言检查输入参数的有效性
			
 
				-        assert line.ndim == 2
			
 
				-        assert line.shape[0] >= 2
			
 
				-        assert line.shape[1] == 2
			
 
				-        assert isinstance(n, int)
			
 
				-        assert n > 0
			
 
				-
			
 
				-        length_list = [
			
 
				-            norm(line[i + 1] - line[i]) for i in range(len(line) - 1)
			
 
				-        ]
			
 
				-        total_length = sum(length_list)
			
 
				-        length_cumsum = np.cumsum([0.0] + length_list)
			
 
				-        delta_length = total_length / (float(n) + 1e-8)
			
 
				-        current_edge_ind = 0
			
 
				-        resampled_line = [line[0]]
			
 
				-
			
 
				-        for i in range(1, n):
			
 
				-            current_line_len = i * delta_length
			
 
				-            while current_edge_ind + 1 < len(
			
 
				-                    length_cumsum) and current_line_len >= length_cumsum[
			
 
				-                        current_edge_ind + 1]:
			
 
				-                current_edge_ind += 1
			
 
				-            current_edge_end_shift = current_line_len - length_cumsum[
			
 
				-                current_edge_ind]
			
 
				-            if current_edge_ind >= len(length_list):
			
 
				-                break
			
 
				-            end_shift_ratio = current_edge_end_shift / length_list[
			
 
				-                current_edge_ind]
			
 
				-            current_point = line[current_edge_ind] + (line[current_edge_ind + 1]
			
 
				-                                                    - line[current_edge_ind]
			
 
				-                                                    ) * end_shift_ratio
			
 
				-            resampled_line.append(current_point)
			
 
				-        resampled_line.append(line[-1])
			
 
				-        resampled_line = np.array(resampled_line)
			
 
				-        return resampled_line
			
 
				-
			
 
				-    def sample_points_on_bbox(self, line, n=50):
			
 
				-        """Resample n points on a line.
			
 
				-
			
 
				-        Args:
			
 
				-            line (ndarray): The points composing a line.
			
 
				-            n (int): The resampled points number.
			
 
				-
			
 
				-        Returns:
			
 
				-            resampled_line (ndarray): The points composing the resampled line.
			
 
				-        """
			
 
				-        assert line.ndim == 2
			
 
				-        assert line.shape[0] >= 2
			
 
				-        assert line.shape[1] == 2
			
 
				-        assert isinstance(n, int)
			
 
				-        assert n > 0
			
 
				-
			
 
				-        length_list = [
			
 
				-            norm(line[i + 1] - line[i]) for i in range(len(line) - 1)
			
 
				-        ]
			
 
				-        total_length = sum(length_list)
			
 
				-        mean_length = total_length / (len(length_list) + 1e-8)
			
 
				-        group = [[0]]
			
 
				-        for i in range(len(length_list)):
			
 
				-            point_id = i+1
			
 
				-            if length_list[i] < 0.9 * mean_length:
			
 
				-                for g in group:
			
 
				-                    if i in g:
			
 
				-                        g.append(point_id)
			
 
				-                        break
			
 
				-            else:
			
 
				-                g = [point_id]
			
 
				-                group.append(g)
			
 
				-
			
 
				-        top_tail_len = norm(line[0] - line[-1])
			
 
				-        if top_tail_len < 0.9 * mean_length:
			
 
				-            group[0].extend(g)
			
 
				-            group.remove(g)
			
 
				-        mean_positions = []  
			
 
				-        for indices in group:  
			
 
				-            x_sum = 0  
			
 
				-            y_sum = 0  
			
 
				-            for index in indices:  
			
 
				-                x, y = line[index]  
			
 
				-                x_sum += x  
			
 
				-                y_sum += y  
			
 
				-            num_points = len(indices)  
			
 
				-            mean_x = x_sum / num_points  
			
 
				-            mean_y = y_sum / num_points  
			
 
				-            mean_positions.append((mean_x, mean_y)) 
			
 
				-        resampled_line = np.array(mean_positions)
			
 
				-        return resampled_line
			
 
				-
			
 
				-    def get_poly_rect_crop(self, img, points):
			
 
				-        '''
			
 
				-            修改该函数，实现使用polygon，对不规则、弯曲文本的矫正以及crop
			
 
				-            args： img: 图片 ndarrary格式
			
 
				-            points： polygon格式的多点坐标 N*2 shape， ndarray格式
			
 
				-            return： 矫正后的图片 ndarray格式
			
 
				-        '''
			
 
				-        points = np.array(points).astype(np.int32).reshape(-1, 2)
			
 
				-        temp_crop_img, temp_box = self.get_minarea_rect(img, points)
			
 
				-        # 计算最小外接矩形与polygon的IoU
			
 
				-        def get_union(pD, pG):
			
 
				-            return Polygon(pD).union(Polygon(pG)).area
			
 
				-
			
 
				-        def get_intersection_over_union(pD, pG):
			
 
				-            return get_intersection(pD, pG) / (get_union(pD, pG)+ 1e-10)
			
 
				-
			
 
				-        def get_intersection(pD, pG):
			
 
				-            return Polygon(pD).intersection(Polygon(pG)).area
			
 
				-
			
 
				-        cal_IoU = get_intersection_over_union(points, temp_box)
			
 
				-
			
 
				-        if cal_IoU >= 0.7:
			
 
				-            points = self.sample_points_on_bbox_bp(points, 31)
			
 
				-            return temp_crop_img
			
 
				-
			
 
				-        points_sample = self.sample_points_on_bbox(points)
			
 
				-        points_sample = points_sample.astype(np.int32)
			
 
				-        head_edge, tail_edge, top_line, bot_line = self.reorder_poly_edge(points_sample)
			
 
				-
			
 
				-        resample_top_line = self.sample_points_on_bbox_bp(top_line, 15)
			
 
				-        resample_bot_line = self.sample_points_on_bbox_bp(bot_line, 15)
			
 
				-
			
 
				-        sideline_mean_shift = np.mean(
			
 
				-            resample_top_line, axis=0) - np.mean(
			
 
				-                resample_bot_line, axis=0)
			
 
				-        if sideline_mean_shift[1] > 0:
			
 
				-            resample_bot_line, resample_top_line = resample_top_line, resample_bot_line
			
 
				-        rectifier = AutoRectifier()
			
 
				-        new_points = np.concatenate([resample_top_line, resample_bot_line])
			
 
				-        new_points_list = list(new_points.astype(np.float32).reshape(1, -1).tolist())
			
 
				-
			
 
				-        if len(img.shape) == 2:
			
 
				-            img = np.stack((img,)*3, axis=-1)
			
 
				-        img_crop, image = rectifier.run(img, new_points_list, mode='homography')
			
 
				-        return img_crop[0]
			
 
				-
			
 
				-
			
 
				-class SaveTextDetResults(BaseTransform):
			
 
				-    """Save Text Det Results"""
			
 
				-
			
 
				-    def __init__(self, save_dir, task='quad'):
			
 
				-        super().__init__()
			
 
				-        self.save_dir = save_dir
			
 
				-        self.task = task
			
 
				-        # We use pillow backend to save both numpy arrays and PIL Image objects
			
 
				-        self._writer = ImageWriter(backend="opencv")
			
 
				-
			
 
				-    def apply(self, data):
			
 
				-        """apply"""
			
 
				-        if self.save_dir is None:
			
 
				-            logging.warning(
			
 
				-                "The `save_dir` has been set to None, so the text detection result won't to be saved."
			
 
				-            )
			
 
				-            return data
			
 
				-        fn = os.path.basename(data["input_path"])
			
 
				-        save_path = os.path.join(self.save_dir, fn)
			
 
				-        bbox_res = data[K.DT_POLYS]
			
 
				-        if self.task == "quad":
			
 
				-            vis_img = self.draw_rectangle(data[K.IM_PATH], bbox_res)
			
 
				-        else:
			
 
				-            vis_img = self.draw_polyline(data[K.IM_PATH], bbox_res)
			
 
				-        self._writer.write(save_path, vis_img)
			
 
				-        return data
			
 
				-
			
 
				-    @classmethod
			
 
				-    def get_input_keys(cls):
			
 
				-        """get input keys"""
			
 
				-        return [K.IM_PATH, K.DT_POLYS, K.DT_SCORES]
			
 
				-
			
 
				-    @classmethod
			
 
				-    def get_output_keys(cls):
			
 
				-        """get output keys"""
			
 
				-        return []
			
 
				-
			
 
				-    def draw_rectangle(self, img_path, boxes):
			
 
				-        """draw rectangle"""
			
 
				-        boxes = np.array(boxes)
			
 
				-        img = cv2.imread(img_path)
			
 
				-        img_show = img.copy()
			
 
				-        for box in boxes.astype(int):
			
 
				-            box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64)
			
 
				-            cv2.polylines(img_show, [box], True, (0, 0, 255), 2)
			
 
				-        return img_show
			
 
				-    
			
 
				-    def draw_polyline(self, img_path, boxes):
			
 
				-        """draw polyline"""
			
 
				-        img = cv2.imread(img_path)
			
 
				-        img_show = img.copy()
			
 
				-        for box in boxes:
			
 
				-            box = np.array(box).astype(int)
			
 
				-            box = np.reshape(box, [-1, 1, 2]).astype(np.int64)
			
 
				-            cv2.polylines(img_show, [box], True, (0, 0, 255), 2)
			
 
				-        return img_show
			
 
				-
			
 
				-
			
 
				-class PrintResult(BaseTransform):
			
 
				-    """Print Result Transform"""
			
 
				-
			
 
				-    def apply(self, data):
			
 
				-        """apply"""
			
 
				-        logging.info("The prediction result is:")
			
 
				-        logging.info(data[K.DT_POLYS])
			
 
				-        return data
			
 
				-
			
 
				-    @classmethod
			
 
				-    def get_input_keys(cls):
			
 
				-        """get input keys"""
			
 
				-        return [K.DT_SCORES]
			
 
				-
			
 
				-    @classmethod
			
 
				-    def get_output_keys(cls):
			
 
				-        """get output keys"""
			
 
				-        return []
			
 
				-
			
 
				-    # DT_SCORES = 'dt_scores'
			
 
				-    # DT_POLYS = 'dt_polys'
			
 
				-
			
 
				-
			
--- a/paddlex/modules/text_detection/predictor/utils.py
+++ b/paddlex/modules/text_detection/predictor/utils.py
@@ -1,698 +0,0 @@
 
				-import os, sys
			
 
				-import numpy as np
			
 
				-from numpy import cos, sin, arctan, sqrt
			
 
				-import cv2
			
 
				-import copy
			
 
				-import time
			
 
				-
			
 
				-def Homography(image, img_points, world_width, world_height,
			
 
				-               interpolation=cv2.INTER_CUBIC, ratio_width=1.0, ratio_height=1.0):
			
 
				-    """
			
 
				-    将图像透视变换到新的视角，返回变换后的图像。
			
 
				-    
			
 
				-    Args:
			
 
				-        image (np.ndarray): 输入的图像，应为numpy数组类型。
			
 
				-        img_points (List[Tuple[int, int]]): 图像上的四个点的坐标，顺序为左上角、右上角、右下角、左下角。
			
 
				-        world_width (int): 变换后图像在世界坐标系中的宽度。
			
 
				-        world_height (int): 变换后图像在世界坐标系中的高度。
			
 
				-        interpolation (int, optional): 插值方式，默认为cv2.INTER_CUBIC。
			
 
				-        ratio_width (float, optional): 变换后图像在x轴上的缩放比例，默认为1.0。
			
 
				-        ratio_height (float, optional): 变换后图像在y轴上的缩放比例，默认为1.0。
			
 
				-    
			
 
				-    Returns:
			
 
				-        np.ndarray: 变换后的图像，为numpy数组类型。
			
 
				-    
			
 
				-    """
			
 
				-    _points = np.array(img_points).reshape(-1, 2).astype(np.float32)
			
 
				-
			
 
				-    expand_x = int(0.5 * world_width * (ratio_width - 1))
			
 
				-    expand_y = int(0.5 * world_height * (ratio_height - 1))
			
 
				-
			
 
				-    pt_lefttop = [expand_x, expand_y]
			
 
				-    pt_righttop = [expand_x + world_width, expand_y]
			
 
				-    pt_leftbottom = [expand_x + world_width, expand_y + world_height]
			
 
				-    pt_rightbottom = [expand_x, expand_y + world_height]
			
 
				-
			
 
				-    pts_std = np.float32([pt_lefttop, pt_righttop,
			
 
				-                          pt_leftbottom, pt_rightbottom])
			
 
				-
			
 
				-    img_crop_width = int(world_width * ratio_width)
			
 
				-    img_crop_height = int(world_height * ratio_height)
			
 
				-
			
 
				-    M = cv2.getPerspectiveTransform(_points, pts_std)
			
 
				-
			
 
				-    dst_img = cv2.warpPerspective(
			
 
				-        image,
			
 
				-        M, (img_crop_width, img_crop_height),
			
 
				-        borderMode=cv2.BORDER_CONSTANT,  # BORDER_CONSTANT BORDER_REPLICATE
			
 
				-        flags=interpolation)
			
 
				-
			
 
				-    return dst_img
			
 
				-
			
 
				-
			
 
				-class CurveTextRectifier:
			
 
				-    """
			
 
				-    spatial transformer via monocular vision
			
 
				-    """
			
 
				-    def __init__(self):
			
 
				-        self.get_virtual_camera_parameter()
			
 
				-
			
 
				-
			
 
				-    def get_virtual_camera_parameter(self):
			
 
				-        vcam_thz = 0
			
 
				-        vcam_thx1 = 180
			
 
				-        vcam_thy = 180
			
 
				-        vcam_thx2 = 0
			
 
				-
			
 
				-        vcam_x = 0
			
 
				-        vcam_y = 0
			
 
				-        vcam_z = 100
			
 
				-
			
 
				-        radian = np.pi / 180
			
 
				-
			
 
				-        angle_z = radian * vcam_thz
			
 
				-        angle_x1 = radian * vcam_thx1
			
 
				-        angle_y = radian * vcam_thy
			
 
				-        angle_x2 = radian * vcam_thx2
			
 
				-
			
 
				-        optic_x = vcam_x
			
 
				-        optic_y = vcam_y
			
 
				-        optic_z = vcam_z
			
 
				-
			
 
				-        fu = 100
			
 
				-        fv = 100
			
 
				-
			
 
				-        matT = np.zeros((4, 4))
			
 
				-        matT[0, 0] = cos(angle_z) * cos(angle_y) - sin(angle_z) * sin(angle_x1) * sin(angle_y)
			
 
				-        matT[0, 1] = cos(angle_z) * sin(angle_y) * sin(angle_x2) - sin(angle_z) * (
			
 
				-                    cos(angle_x1) * cos(angle_x2) - sin(angle_x1) * cos(angle_y) * sin(angle_x2))
			
 
				-        matT[0, 2] = cos(angle_z) * sin(angle_y) * cos(angle_x2) + sin(angle_z) * (
			
 
				-                    cos(angle_x1) * sin(angle_x2) + sin(angle_x1) * cos(angle_y) * cos(angle_x2))
			
 
				-        matT[0, 3] = optic_x
			
 
				-        matT[1, 0] = sin(angle_z) * cos(angle_y) + cos(angle_z) * sin(angle_x1) * sin(angle_y)
			
 
				-        matT[1, 1] = sin(angle_z) * sin(angle_y) * sin(angle_x2) + cos(angle_z) * (
			
 
				-                    cos(angle_x1) * cos(angle_x2) - sin(angle_x1) * cos(angle_y) * sin(angle_x2))
			
 
				-        matT[1, 2] = sin(angle_z) * sin(angle_y) * cos(angle_x2) - cos(angle_z) * (
			
 
				-                    cos(angle_x1) * sin(angle_x2) + sin(angle_x1) * cos(angle_y) * cos(angle_x2))
			
 
				-        matT[1, 3] = optic_y
			
 
				-        matT[2, 0] = -cos(angle_x1) * sin(angle_y)
			
 
				-        matT[2, 1] = cos(angle_x1) * cos(angle_y) * sin(angle_x2) + sin(angle_x1) * cos(angle_x2)
			
 
				-        matT[2, 2] = cos(angle_x1) * cos(angle_y) * cos(angle_x2) - sin(angle_x1) * sin(angle_x2)
			
 
				-        matT[2, 3] = optic_z
			
 
				-        matT[3, 0] = 0
			
 
				-        matT[3, 1] = 0
			
 
				-        matT[3, 2] = 0
			
 
				-        matT[3, 3] = 1
			
 
				-
			
 
				-        matS = np.zeros((4, 4))
			
 
				-        matS[2, 3] = 0.5
			
 
				-        matS[3, 2] = 0.5
			
 
				-
			
 
				-        self.ifu = 1 / fu
			
 
				-        self.ifv = 1 / fv
			
 
				-
			
 
				-        self.matT = matT
			
 
				-        self.matS = matS
			
 
				-        self.K = np.dot(matT.T, matS)
			
 
				-        self.K = np.dot(self.K, matT)
			
 
				-
			
 
				-
			
 
				-    def vertical_text_process(self, points, org_size):
			
 
				-        """
			
 
				-        change sequence amd process
			
 
				-        :param points:
			
 
				-        :param org_size:
			
 
				-        :return:
			
 
				-        """
			
 
				-        org_w, org_h = org_size
			
 
				-        _points = np.array(points).reshape(-1).tolist()
			
 
				-        _points = np.array(_points[2:] + _points[:2]).reshape(-1, 2)
			
 
				-
			
 
				-        # convert to horizontal points
			
 
				-        adjusted_points = np.zeros(_points.shape, dtype=np.float32)
			
 
				-        adjusted_points[:, 0] = _points[:, 1]
			
 
				-        adjusted_points[:, 1] = org_h - _points[:, 0] - 1
			
 
				-
			
 
				-        _image_coord, _world_coord, _new_image_size = self.horizontal_text_process(adjusted_points)
			
 
				-
			
 
				-        # # convert to vertical points back
			
 
				-        image_coord = _points.reshape(1, -1, 2)
			
 
				-        world_coord = np.zeros(_world_coord.shape, dtype=np.float32)
			
 
				-        world_coord[:, :, 0] = 0 - _world_coord[:, :, 1]
			
 
				-        world_coord[:, :, 1] = _world_coord[:, :, 0]
			
 
				-        world_coord[:, :, 2] = _world_coord[:, :, 2]
			
 
				-        new_image_size = (_new_image_size[1], _new_image_size[0])
			
 
				-
			
 
				-        return image_coord, world_coord, new_image_size
			
 
				-
			
 
				-
			
 
				-    def horizontal_text_process(self, points):
			
 
				-        """
			
 
				-        get image coordinate and world coordinate
			
 
				-        :param points:
			
 
				-        :return:
			
 
				-        """
			
 
				-        poly = np.array(points).reshape(-1)
			
 
				-
			
 
				-        dx_list = []
			
 
				-        dy_list = []
			
 
				-        for i in range(1, len(poly) // 2):
			
 
				-            xdx = poly[i * 2] - poly[(i - 1) * 2]
			
 
				-            xdy = poly[i * 2 + 1] - poly[(i - 1) * 2 + 1]
			
 
				-            d = sqrt(xdx ** 2 + xdy ** 2)
			
 
				-            dx_list.append(d)
			
 
				-
			
 
				-        for i in range(0, len(poly) // 4):
			
 
				-            ydx = poly[i * 2] - poly[len(poly) - 1 - (i * 2 + 1)]
			
 
				-            ydy = poly[i * 2 + 1] - poly[len(poly) - 1 - (i * 2)]
			
 
				-            d = sqrt(ydx ** 2 + ydy ** 2)
			
 
				-            dy_list.append(d)
			
 
				-
			
 
				-        dx_list = [(dx_list[i] + dx_list[len(dx_list) - 1 - i]) / 2 for i in range(len(dx_list) // 2)]
			
 
				-
			
 
				-        height = np.around(np.mean(dy_list))
			
 
				-
			
 
				-        rect_coord = [0, 0]
			
 
				-        for i in range(0, len(poly) // 4 - 1):
			
 
				-            x = rect_coord[-2]
			
 
				-            x += dx_list[i]
			
 
				-            y = 0
			
 
				-            rect_coord.append(x)
			
 
				-            rect_coord.append(y)
			
 
				-
			
 
				-        rect_coord_half = copy.deepcopy(rect_coord)
			
 
				-        for i in range(0, len(poly) // 4):
			
 
				-            x = rect_coord_half[len(rect_coord_half) - 2 * i - 2]
			
 
				-            y = height
			
 
				-            rect_coord.append(x)
			
 
				-            rect_coord.append(y)
			
 
				-
			
 
				-        np_rect_coord = np.array(rect_coord).reshape(-1, 2)
			
 
				-        x_min = np.min(np_rect_coord[:, 0])
			
 
				-        y_min = np.min(np_rect_coord[:, 1])
			
 
				-        x_max = np.max(np_rect_coord[:, 0])
			
 
				-        y_max = np.max(np_rect_coord[:, 1])
			
 
				-        new_image_size = (int(x_max - x_min + 0.5), int(y_max - y_min + 0.5))
			
 
				-        x_mean = (x_max - x_min) / 2
			
 
				-        y_mean = (y_max - y_min) / 2
			
 
				-        np_rect_coord[:, 0] -= x_mean
			
 
				-        np_rect_coord[:, 1] -= y_mean
			
 
				-        rect_coord = np_rect_coord.reshape(-1).tolist()
			
 
				-
			
 
				-        rect_coord = np.array(rect_coord).reshape(-1, 2)
			
 
				-        world_coord = np.ones((len(rect_coord), 3)) * 0
			
 
				-
			
 
				-        world_coord[:, :2] = rect_coord
			
 
				-
			
 
				-        image_coord = np.array(poly).reshape(1, -1, 2)
			
 
				-        world_coord = world_coord.reshape(1, -1, 3)
			
 
				-
			
 
				-        return image_coord, world_coord, new_image_size
			
 
				-
			
 
				-
			
 
				-    def horizontal_text_estimate(self, points):
			
 
				-        """
			
 
				-        horizontal or vertical text
			
 
				-        :param points:
			
 
				-        :return:
			
 
				-        """
			
 
				-        pts = np.array(points).reshape(-1, 2)
			
 
				-        x_min = int(np.min(pts[:, 0]))
			
 
				-        y_min = int(np.min(pts[:, 1]))
			
 
				-        x_max = int(np.max(pts[:, 0]))
			
 
				-        y_max = int(np.max(pts[:, 1]))
			
 
				-        x = x_max - x_min
			
 
				-        y = y_max - y_min
			
 
				-        is_horizontal_text = True
			
 
				-        if y / x > 1.5: # vertical text condition
			
 
				-            is_horizontal_text = False
			
 
				-        return is_horizontal_text
			
 
				-
			
 
				-
			
 
				-    def virtual_camera_to_world(self, size):
			
 
				-        ifu, ifv = self.ifu, self.ifv
			
 
				-        K, matT = self.K, self.matT
			
 
				-
			
 
				-        ppu = size[0] / 2 + 1e-6
			
 
				-        ppv = size[1] / 2 + 1e-6
			
 
				-
			
 
				-        P = np.zeros((size[1], size[0], 3))
			
 
				-
			
 
				-        lu = np.array([i for i in range(size[0])])
			
 
				-        lv = np.array([i for i in range(size[1])])
			
 
				-        u, v = np.meshgrid(lu, lv)
			
 
				-
			
 
				-        yp = (v - ppv) * ifv
			
 
				-        xp = (u - ppu) * ifu
			
 
				-        angle_a = arctan(sqrt(xp * xp + yp * yp))
			
 
				-        angle_b = arctan(yp / xp)
			
 
				-
			
 
				-        D0 = sin(angle_a) * cos(angle_b)
			
 
				-        D1 = sin(angle_a) * sin(angle_b)
			
 
				-        D2 = cos(angle_a)
			
 
				-
			
 
				-        D0[xp <= 0] = -D0[xp <= 0]
			
 
				-        D1[xp <= 0] = -D1[xp <= 0]
			
 
				-
			
 
				-        ratio_a = K[0, 0] * D0 * D0 + K[1, 1] * D1 * D1 + K[2, 2] * D2 * D2 + \
			
 
				-                  (K[0, 1] + K[1, 0]) * D0 * D1 + (K[0, 2] + K[2, 0]) * D0 * D2 + (K[1, 2] + K[2, 1]) * D1 * D2
			
 
				-        ratio_b = (K[0, 3] + K[3, 0]) * D0 + (K[1, 3] + K[3, 1]) * D1 + (K[2, 3] + K[3, 2]) * D2
			
 
				-        ratio_c = K[3, 3] * np.ones(ratio_b.shape)
			
 
				-
			
 
				-        delta = ratio_b * ratio_b - 4 * ratio_a * ratio_c
			
 
				-        t = np.zeros(delta.shape)
			
 
				-        t[ratio_a == 0] = -ratio_c[ratio_a == 0] / ratio_b[ratio_a == 0]
			
 
				-        t[ratio_a != 0] = (-ratio_b[ratio_a != 0] + sqrt(delta[ratio_a != 0])) / (2 * ratio_a[ratio_a != 0])
			
 
				-        t[delta < 0] = 0
			
 
				-
			
 
				-        P[:, :, 0] = matT[0, 3] + t * (matT[0, 0] * D0 + matT[0, 1] * D1 + matT[0, 2] * D2)
			
 
				-        P[:, :, 1] = matT[1, 3] + t * (matT[1, 0] * D0 + matT[1, 1] * D1 + matT[1, 2] * D2)
			
 
				-        P[:, :, 2] = matT[2, 3] + t * (matT[2, 0] * D0 + matT[2, 1] * D1 + matT[2, 2] * D2)
			
 
				-
			
 
				-        return P
			
 
				-
			
 
				-
			
 
				-    def world_to_image(self, image_size, world, intrinsic, distCoeffs, rotation, tvec):
			
 
				-        r11 = rotation[0, 0]
			
 
				-        r12 = rotation[0, 1]
			
 
				-        r13 = rotation[0, 2]
			
 
				-        r21 = rotation[1, 0]
			
 
				-        r22 = rotation[1, 1]
			
 
				-        r23 = rotation[1, 2]
			
 
				-        r31 = rotation[2, 0]
			
 
				-        r32 = rotation[2, 1]
			
 
				-        r33 = rotation[2, 2]
			
 
				-
			
 
				-        t1 = tvec[0]
			
 
				-        t2 = tvec[1]
			
 
				-        t3 = tvec[2]
			
 
				-
			
 
				-        k1 = distCoeffs[0]
			
 
				-        k2 = distCoeffs[1]
			
 
				-        p1 = distCoeffs[2]
			
 
				-        p2 = distCoeffs[3]
			
 
				-        k3 = distCoeffs[4]
			
 
				-        k4 = distCoeffs[5]
			
 
				-        k5 = distCoeffs[6]
			
 
				-        k6 = distCoeffs[7]
			
 
				-
			
 
				-        if len(distCoeffs) > 8:
			
 
				-            s1 = distCoeffs[8]
			
 
				-            s2 = distCoeffs[9]
			
 
				-            s3 = distCoeffs[10]
			
 
				-            s4 = distCoeffs[11]
			
 
				-        else:
			
 
				-            s1 = s2 = s3 = s4 = 0
			
 
				-
			
 
				-        if len(distCoeffs) > 12:
			
 
				-            tx = distCoeffs[12]
			
 
				-            ty = distCoeffs[13]
			
 
				-        else:
			
 
				-            tx = ty = 0
			
 
				-
			
 
				-        fu = intrinsic[0, 0]
			
 
				-        fv = intrinsic[1, 1]
			
 
				-        ppu = intrinsic[0, 2]
			
 
				-        ppv = intrinsic[1, 2]
			
 
				-
			
 
				-        cos_tx = cos(tx)
			
 
				-        cos_ty = cos(ty)
			
 
				-        sin_tx = sin(tx)
			
 
				-        sin_ty = sin(ty)
			
 
				-
			
 
				-        tao11 = cos_ty * cos_tx * cos_ty + sin_ty * cos_tx * sin_ty
			
 
				-        tao12 = cos_ty * cos_tx * sin_ty * sin_tx - sin_ty * cos_tx * cos_ty * sin_tx
			
 
				-        tao13 = -cos_ty * cos_tx * sin_ty * cos_tx + sin_ty * cos_tx * cos_ty * cos_tx
			
 
				-        tao21 = -sin_tx * sin_ty
			
 
				-        tao22 = cos_ty * cos_tx * cos_tx + sin_tx * cos_ty * sin_tx
			
 
				-        tao23 = cos_ty * cos_tx * sin_tx - sin_tx * cos_ty * cos_tx
			
 
				-
			
 
				-        P = np.zeros((image_size[1], image_size[0], 2))
			
 
				-
			
 
				-        c3 = r31 * world[:, :, 0] + r32 * world[:, :, 1] + r33 * world[:, :, 2] + t3
			
 
				-        c1 = r11 * world[:, :, 0] + r12 * world[:, :, 1] + r13 * world[:, :, 2] + t1
			
 
				-        c2 = r21 * world[:, :, 0] + r22 * world[:, :, 1] + r23 * world[:, :, 2] + t2
			
 
				-
			
 
				-        x1 = c1 / c3
			
 
				-        y1 = c2 / c3
			
 
				-        x12 = x1 * x1
			
 
				-        y12 = y1 * y1
			
 
				-        x1y1 = 2 * x1 * y1
			
 
				-        r2 = x12 + y12
			
 
				-        r4 = r2 * r2
			
 
				-        r6 = r2 * r4
			
 
				-
			
 
				-        radial_distortion = (1 + k1 * r2 + k2 * r4 + k3 * r6) / (1 + k4 * r2 + k5 * r4 + k6 * r6)
			
 
				-        x2 = x1 * radial_distortion + p1 * x1y1 + p2 * (r2 + 2 * x12) + s1 * r2 + s2 * r4
			
 
				-        y2 = y1 * radial_distortion + p2 * x1y1 + p1 * (r2 + 2 * y12) + s3 * r2 + s4 * r4
			
 
				-
			
 
				-        x3 = tao11 * x2 + tao12 * y2 + tao13
			
 
				-        y3 = tao21 * x2 + tao22 * y2 + tao23
			
 
				-
			
 
				-        P[:, :, 0] = fu * x3 + ppu
			
 
				-        P[:, :, 1] = fv * y3 + ppv
			
 
				-        P[c3 <= 0] = 0
			
 
				-
			
 
				-        return P
			
 
				-
			
 
				-
			
 
				-    def spatial_transform(self, image_data, new_image_size, mtx, dist, rvecs, tvecs, interpolation):
			
 
				-        rotation, _ = cv2.Rodrigues(rvecs)
			
 
				-        world_map = self.virtual_camera_to_world(new_image_size)
			
 
				-        image_map = self.world_to_image(new_image_size, world_map, mtx, dist, rotation, tvecs)
			
 
				-        image_map = image_map.astype(np.float32)
			
 
				-        dst = cv2.remap(image_data, image_map[:, :, 0], image_map[:, :, 1], interpolation)
			
 
				-        return dst
			
 
				-
			
 
				-
			
 
				-    def calibrate(self, org_size, image_coord, world_coord):
			
 
				-        """
			
 
				-        calibration
			
 
				-        :param org_size:
			
 
				-        :param image_coord:
			
 
				-        :param world_coord:
			
 
				-        :return:
			
 
				-        """
			
 
				-        # flag = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_TILTED_MODEL  | cv2.CALIB_THIN_PRISM_MODEL
			
 
				-        flag = cv2.CALIB_RATIONAL_MODEL
			
 
				-        flag2 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_TILTED_MODEL
			
 
				-        flag3 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_THIN_PRISM_MODEL
			
 
				-        flag4 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_ZERO_TANGENT_DIST | cv2.CALIB_FIX_ASPECT_RATIO
			
 
				-        flag5 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_TILTED_MODEL | cv2.CALIB_ZERO_TANGENT_DIST
			
 
				-        flag6 = cv2.CALIB_RATIONAL_MODEL | cv2.CALIB_FIX_ASPECT_RATIO
			
 
				-        flag_list = [flag2, flag3, flag4, flag5, flag6]
			
 
				-
			
 
				-        ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(world_coord.astype(np.float32),
			
 
				-                                                                image_coord.astype(np.float32),
			
 
				-                                                                org_size,
			
 
				-                                                                None,
			
 
				-                                                                None,
			
 
				-                                                                flags=flag)
			
 
				-        if ret > 2:
			
 
				-            # strategies
			
 
				-            min_ret = ret
			
 
				-            for i, flag in enumerate(flag_list):
			
 
				-                _ret, _mtx, _dist, _rvecs, _tvecs = cv2.calibrateCamera(world_coord.astype(np.float32),
			
 
				-                                                                   image_coord.astype(np.float32),
			
 
				-                                                                   org_size,
			
 
				-                                                                   None,
			
 
				-                                                                   None,
			
 
				-                                                                   flags=flag)
			
 
				-                if _ret < min_ret:
			
 
				-                    min_ret = _ret
			
 
				-                    ret, mtx, dist, rvecs, tvecs = _ret, _mtx, _dist, _rvecs, _tvecs
			
 
				-
			
 
				-        return ret, mtx, dist, rvecs, tvecs
			
 
				-
			
 
				-
			
 
				-    def dc_homo(self, img, img_points, obj_points, is_horizontal_text, interpolation=cv2.INTER_LINEAR,
			
 
				-                ratio_width=1.0, ratio_height=1.0):
			
 
				-        """
			
 
				-        divide and conquer: homography
			
 
				-        # ratio_width and ratio_height must be 1.0 here
			
 
				-        """
			
 
				-        _img_points = img_points.reshape(-1, 2)
			
 
				-        _obj_points = obj_points.reshape(-1, 3)
			
 
				-
			
 
				-        homo_img_list = []
			
 
				-        width_list = []
			
 
				-        height_list = []
			
 
				-        # divide and conquer
			
 
				-        for i in range(len(_img_points) // 2 - 1):
			
 
				-            new_img_points = np.zeros((4, 2)).astype(np.float32)
			
 
				-            new_obj_points = np.zeros((4, 2)).astype(np.float32)
			
 
				-
			
 
				-            new_img_points[0:2, :] = _img_points[i:(i + 2), :2]
			
 
				-            new_img_points[2:4, :] = _img_points[::-1, :][i:(i + 2), :2][::-1, :]
			
 
				-
			
 
				-            new_obj_points[0:2, :] = _obj_points[i:(i + 2), :2]
			
 
				-            new_obj_points[2:4, :] = _obj_points[::-1, :][i:(i + 2), :2][::-1, :]
			
 
				-
			
 
				-            if is_horizontal_text:
			
 
				-                world_width = np.abs(new_obj_points[1, 0] - new_obj_points[0, 0])
			
 
				-                world_height = np.abs(new_obj_points[3, 1] - new_obj_points[0, 1])
			
 
				-            else:
			
 
				-                world_width = np.abs(new_obj_points[1, 1] - new_obj_points[0, 1])
			
 
				-                world_height = np.abs(new_obj_points[3, 0] - new_obj_points[0, 0])
			
 
				-
			
 
				-            homo_img = Homography(img, new_img_points, world_width, world_height,
			
 
				-                                              interpolation=interpolation,
			
 
				-                                              ratio_width=ratio_width, ratio_height=ratio_height)
			
 
				-
			
 
				-            homo_img_list.append(homo_img)
			
 
				-            _h, _w = homo_img.shape[:2]
			
 
				-            width_list.append(_w)
			
 
				-            height_list.append(_h)
			
 
				-
			
 
				-        # stitching
			
 
				-        rectified_image = np.zeros((np.max(height_list), sum(width_list), 3)).astype(np.uint8)
			
 
				-
			
 
				-        st = 0
			
 
				-        for (homo_img, w, h) in zip(homo_img_list, width_list, height_list):
			
 
				-            rectified_image[:h, st:st + w, :] = homo_img
			
 
				-            st += w
			
 
				-
			
 
				-        if not is_horizontal_text:
			
 
				-            # vertical rotation
			
 
				-            rectified_image = np.rot90(rectified_image, 3)
			
 
				-
			
 
				-        return rectified_image
			
 
				-
			
 
				-    def Homography(self, image, img_points, world_width, world_height,
			
 
				-                interpolation=cv2.INTER_CUBIC, ratio_width=1.0, ratio_height=1.0):
			
 
				-        """
			
 
				-        将图像透视变换到新的视角，返回变换后的图像。
			
 
				-        
			
 
				-        Args:
			
 
				-            image (np.ndarray): 输入的图像，应为numpy数组类型。
			
 
				-            img_points (List[Tuple[int, int]]): 图像上的四个点的坐标，顺序为左上角、右上角、右下角、左下角。
			
 
				-            world_width (int): 变换后图像在世界坐标系中的宽度。
			
 
				-            world_height (int): 变换后图像在世界坐标系中的高度。
			
 
				-            interpolation (int, optional): 插值方式，默认为cv2.INTER_CUBIC。
			
 
				-            ratio_width (float, optional): 变换后图像在x轴上的缩放比例，默认为1.0。
			
 
				-            ratio_height (float, optional): 变换后图像在y轴上的缩放比例，默认为1.0。
			
 
				-        
			
 
				-        Returns:
			
 
				-            np.ndarray: 变换后的图像，为numpy数组类型。
			
 
				-        
			
 
				-        """
			
 
				-        _points = np.array(img_points).reshape(-1, 2).astype(np.float32)
			
 
				-
			
 
				-        expand_x = int(0.5 * world_width * (ratio_width - 1))
			
 
				-        expand_y = int(0.5 * world_height * (ratio_height - 1))
			
 
				-
			
 
				-        pt_lefttop = [expand_x, expand_y]
			
 
				-        pt_righttop = [expand_x + world_width, expand_y]
			
 
				-        pt_leftbottom = [expand_x + world_width, expand_y + world_height]
			
 
				-        pt_rightbottom = [expand_x, expand_y + world_height]
			
 
				-
			
 
				-        pts_std = np.float32([pt_lefttop, pt_righttop,
			
 
				-                            pt_leftbottom, pt_rightbottom])
			
 
				-
			
 
				-        img_crop_width = int(world_width * ratio_width)
			
 
				-        img_crop_height = int(world_height * ratio_height)
			
 
				-
			
 
				-        M = cv2.getPerspectiveTransform(_points, pts_std)
			
 
				-
			
 
				-        dst_img = cv2.warpPerspective(
			
 
				-            image,
			
 
				-            M, (img_crop_width, img_crop_height),
			
 
				-            borderMode=cv2.BORDER_CONSTANT,  # BORDER_CONSTANT BORDER_REPLICATE
			
 
				-            flags=interpolation)
			
 
				-
			
 
				-        return dst_img
			
 
				-
			
 
				-
			
 
				-    def __call__(self, image_data, points, interpolation=cv2.INTER_LINEAR, ratio_width=1.0, ratio_height=1.0, mode='calibration'):
			
 
				-        """
			
 
				-        spatial transform for a poly text
			
 
				-        :param image_data:
			
 
				-        :param points: [x1,y1,x2,y2,x3,y3,...], clockwise order, (x1,y1) must be the top-left of first char.
			
 
				-        :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
			
 
				-        :param ratio_width:  roi_image width expansion. It should not be smaller than 1.0
			
 
				-        :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
			
 
				-        :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
			
 
				-        :return:
			
 
				-        """
			
 
				-        org_h, org_w = image_data.shape[:2]
			
 
				-        org_size = (org_w, org_h)
			
 
				-        self.image = image_data
			
 
				-
			
 
				-        is_horizontal_text = self.horizontal_text_estimate(points)
			
 
				-        if is_horizontal_text:
			
 
				-            image_coord, world_coord, new_image_size = self.horizontal_text_process(points)
			
 
				-        else:
			
 
				-            image_coord, world_coord, new_image_size = self.vertical_text_process(points, org_size)
			
 
				-
			
 
				-        if mode.lower() == 'calibration':
			
 
				-            ret, mtx, dist, rvecs, tvecs = self.calibrate(org_size, image_coord, world_coord)
			
 
				-
			
 
				-            st_size = (int(new_image_size[0]*ratio_width), int(new_image_size[1]*ratio_height))
			
 
				-            dst = self.spatial_transform(image_data, st_size, mtx, dist[0], rvecs[0], tvecs[0], interpolation)
			
 
				-        elif mode.lower() == 'homography':
			
 
				-            # ratio_width and ratio_height must be 1.0 here and ret set to 0.01 without loss manually
			
 
				-            ret = 0.01
			
 
				-            dst = self.dc_homo(image_data, image_coord, world_coord, is_horizontal_text,
			
 
				-                               interpolation=interpolation, ratio_width=1.0, ratio_height=1.0)
			
 
				-        else:
			
 
				-            raise ValueError('mode must be ["calibration", "homography"], but got {}'.format(mode))
			
 
				-
			
 
				-        return dst, ret
			
 
				-
			
 
				-
			
 
				-class AutoRectifier:
			
 
				-    def __init__(self):
			
 
				-        self.npoints = 10
			
 
				-        self.curveTextRectifier = CurveTextRectifier()
			
 
				-
			
 
				-    @staticmethod
			
 
				-    def get_rotate_crop_image(img, points, interpolation=cv2.INTER_CUBIC, ratio_width=1.0, ratio_height=1.0):
			
 
				-        """
			
 
				-        crop or homography
			
 
				-        :param img:
			
 
				-        :param points:
			
 
				-        :param interpolation:
			
 
				-        :param ratio_width:
			
 
				-        :param ratio_height:
			
 
				-        :return:
			
 
				-        """
			
 
				-        h, w = img.shape[:2]
			
 
				-        _points = np.array(points).reshape(-1, 2).astype(np.float32)
			
 
				-
			
 
				-        if len(_points) != 4:
			
 
				-            x_min = int(np.min(_points[:, 0]))
			
 
				-            y_min = int(np.min(_points[:, 1]))
			
 
				-            x_max = int(np.max(_points[:, 0]))
			
 
				-            y_max = int(np.max(_points[:, 1]))
			
 
				-            dx = x_max - x_min
			
 
				-            dy = y_max - y_min
			
 
				-            expand_x = int(0.5 * dx * (ratio_width - 1))
			
 
				-            expand_y = int(0.5 * dy * (ratio_height - 1))
			
 
				-            x_min = np.clip(int(x_min - expand_x), 0, w - 1)
			
 
				-            y_min = np.clip(int(y_min - expand_y), 0, h - 1)
			
 
				-            x_max = np.clip(int(x_max + expand_x), 0, w - 1)
			
 
				-            y_max = np.clip(int(y_max + expand_y), 0, h - 1)
			
 
				-
			
 
				-            dst_img = img[y_min:y_max, x_min:x_max, :].copy()
			
 
				-        else:
			
 
				-            img_crop_width = int(
			
 
				-                max(
			
 
				-                    np.linalg.norm(_points[0] - _points[1]),
			
 
				-                    np.linalg.norm(_points[2] - _points[3])))
			
 
				-            img_crop_height = int(
			
 
				-                max(
			
 
				-                    np.linalg.norm(_points[0] - _points[3]),
			
 
				-                    np.linalg.norm(_points[1] - _points[2])))
			
 
				-
			
 
				-            dst_img = Homography(img, _points, img_crop_width, img_crop_height, interpolation, ratio_width, ratio_height)
			
 
				-
			
 
				-        return dst_img
			
 
				-
			
 
				-
			
 
				-    def visualize(self, image_data, points_list):
			
 
				-        visualization = image_data.copy()
			
 
				-
			
 
				-        for box in points_list:
			
 
				-            box = np.array(box).reshape(-1, 2).astype(np.int32)
			
 
				-            cv2.drawContours(visualization, [np.array(box).reshape((-1, 1, 2))], -1, (0, 0, 255), 2)
			
 
				-            for i, p in enumerate(box):
			
 
				-                if i != 0:
			
 
				-                    cv2.circle(visualization, tuple(p), radius=1, color=(255, 0, 0), thickness=2)
			
 
				-                else:
			
 
				-                    cv2.circle(visualization, tuple(p), radius=1, color=(255, 255, 0), thickness=2)
			
 
				-        return visualization
			
 
				-
			
 
				-
			
 
				-    def __call__(self, image_data, points, interpolation=cv2.INTER_LINEAR,
			
 
				-                 ratio_width=1.0, ratio_height=1.0, loss_thresh=5.0, mode='calibration'):
			
 
				-        """
			
 
				-        rectification in strategies for a poly text
			
 
				-        :param image_data:
			
 
				-        :param points: [x1,y1,x2,y2,x3,y3,...], clockwise order, (x1,y1) must be the top-left of first char.
			
 
				-        :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
			
 
				-        :param ratio_width:  roi_image width expansion. It should not be smaller than 1.0
			
 
				-        :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
			
 
				-        :param loss_thresh: if loss greater than loss_thresh --> get_rotate_crop_image
			
 
				-        :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
			
 
				-        :return:
			
 
				-        """
			
 
				-        _points = np.array(points).reshape(-1,2)
			
 
				-        if len(_points) >= self.npoints and len(_points) % 2 == 0:
			
 
				-            try:
			
 
				-                curveTextRectifier = CurveTextRectifier()
			
 
				-
			
 
				-                dst_img, loss = curveTextRectifier(image_data, points, interpolation, ratio_width, ratio_height, mode)
			
 
				-                if loss >= 2:
			
 
				-                    # for robust
			
 
				-                    # large loss means it cannot be reconstruct correctly, we must find other way to reconstruct
			
 
				-                    img_list, loss_list = [dst_img], [loss]
			
 
				-                    _dst_img, _loss = PlanB()(image_data, points, curveTextRectifier,
			
 
				-                                              interpolation, ratio_width, ratio_height,
			
 
				-                                              loss_thresh=loss_thresh,
			
 
				-                                              square=True)
			
 
				-                    img_list += [_dst_img]
			
 
				-                    loss_list += [_loss]
			
 
				-
			
 
				-                    _dst_img, _loss = PlanB()(image_data, points, curveTextRectifier,
			
 
				-                                              interpolation, ratio_width, ratio_height,
			
 
				-                                              loss_thresh=loss_thresh, square=False)
			
 
				-                    img_list += [_dst_img]
			
 
				-                    loss_list += [_loss]
			
 
				-
			
 
				-                    min_loss = min(loss_list)
			
 
				-                    dst_img = img_list[loss_list.index(min_loss)]
			
 
				-
			
 
				-                    if min_loss >= loss_thresh:
			
 
				-                        print('calibration loss: {} is too large for spatial transformer. It is failed. Using get_rotate_crop_image'.format(loss))
			
 
				-                        dst_img = self.get_rotate_crop_image(image_data, points, interpolation, ratio_width, ratio_height)
			
 
				-                        print('here')
			
 
				-            except Exception as e:
			
 
				-                print(e)
			
 
				-                dst_img = self.get_rotate_crop_image(image_data, points, interpolation, ratio_width, ratio_height)
			
 
				-        else:
			
 
				-            dst_img = self.get_rotate_crop_image(image_data, _points, interpolation, ratio_width, ratio_height)
			
 
				-
			
 
				-        return dst_img
			
 
				-
			
 
				-
			
 
				-    def run(self, image_data, points_list, interpolation=cv2.INTER_LINEAR,
			
 
				-            ratio_width=1.0, ratio_height=1.0, loss_thresh=5.0, mode='calibration'):
			
 
				-        """
			
 
				-        run for texts in an image
			
 
				-        :param image_data: numpy.ndarray. The shape is [h, w, 3]
			
 
				-        :param points_list: [[x1,y1,x2,y2,x3,y3,...], [x1,y1,x2,y2,x3,y3,...], ...], clockwise order, (x1,y1) must be the top-left of first char.
			
 
				-        :param interpolation: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
			
 
				-        :param ratio_width:  roi_image width expansion. It should not be smaller than 1.0
			
 
				-        :param ratio_height: roi_image height expansion. It should not be smaller than 1.0
			
 
				-        :param loss_thresh: if loss greater than loss_thresh --> get_rotate_crop_image
			
 
				-        :param mode: 'calibration' or 'homography'. when homography, ratio_width and ratio_height must be 1.0
			
 
				-        :return: res: roi-image list, visualized_image: draw polys in original image
			
 
				-        """
			
 
				-        if image_data is None:
			
 
				-            raise ValueError
			
 
				-        if not isinstance(points_list, list):
			
 
				-            raise ValueError
			
 
				-        for points in points_list:
			
 
				-            if not isinstance(points, list):
			
 
				-                raise ValueError
			
 
				-
			
 
				-        if ratio_width < 1.0 or ratio_height < 1.0:
			
 
				-            raise ValueError('ratio_width and ratio_height cannot be smaller than 1, but got {}', (ratio_width, ratio_height))
			
 
				-
			
 
				-        if mode.lower() != 'calibration' and mode.lower() != 'homography':
			
 
				-            raise ValueError('mode must be ["calibration", "homography"], but got {}'.format(mode))
			
 
				-
			
 
				-        if mode.lower() == 'homography' and ratio_width != 1.0 and ratio_height != 1.0:
			
 
				-            raise ValueError('ratio_width and ratio_height must be 1.0 when mode is homography, but got mode:{}, ratio:({},{})'.format(mode, ratio_width, ratio_height))
			
 
				-
			
 
				-        res = []
			
 
				-        for points in points_list:
			
 
				-            rectified_img = self(image_data, points, interpolation, ratio_width, ratio_height,
			
 
				-                                 loss_thresh=loss_thresh, mode=mode)
			
 
				-            res.append(rectified_img)
			
 
				-
			
 
				-        # visualize
			
 
				-        visualized_image = self.visualize(image_data, points_list)
			
 
				-
			
 
				-        return res, visualized_image
			
 
				-
			
--- a/paddlex/pipelines/OCR/pipeline.py
+++ b/paddlex/pipelines/OCR/pipeline.py
@@ -1,175 +0,0 @@
 
				-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-import os
			
 
				-import cv2
			
 
				-
			
 
				-from ..base import BasePipeline
			
 
				-from ...modules.text_detection.model_list import MODELS as text_det_models
			
 
				-from ...modules.text_recognition.model_list import MODELS as text_rec_models
			
 
				-from ...modules import create_model, PaddleInferenceOption
			
 
				-from ...modules.text_detection import transforms as text_det_T
			
 
				-from .utils import draw_ocr_box_txt
			
 
				-
			
 
				-
			
 
				-class OCRPipeline(BasePipeline):
			
 
				-    """OCR Pipeline"""
			
 
				-
			
 
				-    entities = "OCR"
			
 
				-
			
 
				-    def __init__(
			
 
				-        self,
			
 
				-        text_det_model_name=None,
			
 
				-        text_rec_model_name=None,
			
 
				-        text_det_model_dir=None,
			
 
				-        text_rec_model_dir=None,
			
 
				-        text_det_kernel_option=None,
			
 
				-        text_rec_kernel_option=None,
			
 
				-        output="./",
			
 
				-        device="gpu",
			
 
				-        **kwargs,
			
 
				-    ):
			
 
				-        super().__init__(**kwargs)
			
 
				-        self.text_det_model_name = text_det_model_name
			
 
				-        self.text_rec_model_name = text_rec_model_name
			
 
				-        self.text_det_model_dir = text_det_model_dir
			
 
				-        self.text_rec_model_dir = text_rec_model_dir
			
 
				-        self.output = output
			
 
				-        self.device = device
			
 
				-        self.text_det_kernel_option = text_det_kernel_option
			
 
				-        self.text_rec_kernel_option = text_rec_kernel_option
			
 
				-        if self.text_det_model_name in ['PP-OCRv4_server_seal_det', 'PP-OCRv4_mobile_seal_det']:
			
 
				-            self.task = "poly"
			
 
				-        else:
			
 
				-            self.task = "quad"
			
 
				-        
			
 
				-        if (
			
 
				-            self.text_det_model_name is not None
			
 
				-            and self.text_rec_model_name is not None
			
 
				-        ):
			
 
				-            self.load_model()
			
 
				-
			
 
				-    def check_model_name(self):
			
 
				-        """check that model name is valid"""
			
 
				-        assert (
			
 
				-            self.text_det_model_name in text_det_models
			
 
				-        ), f"The model name({self.text_det_model_name}) error. \
			
 
				-Only support: {text_det_models}."
			
 
				-
			
 
				-        assert (
			
 
				-            self.text_rec_model_name in text_rec_models
			
 
				-        ), f"The model name({self.text_rec_model_name}) error. \
			
 
				-Only support: {text_rec_models}."
			
 
				-
			
 
				-    def load_model(self):
			
 
				-        """load model predictor"""
			
 
				-        self.check_model_name()
			
 
				-        text_det_kernel_option = (
			
 
				-            self.get_kernel_option()
			
 
				-            if self.text_det_kernel_option is None
			
 
				-            else self.text_det_kernel_option
			
 
				-        )
			
 
				-        text_rec_kernel_option = (
			
 
				-            self.get_kernel_option()
			
 
				-            if self.text_rec_kernel_option is None
			
 
				-            else self.text_rec_kernel_option
			
 
				-        )
			
 
				-        if self.task == "poly":
			
 
				-            text_det_post_transforms = [
			
 
				-                text_det_T.DBPostProcess(
			
 
				-                    thresh=0.2,
			
 
				-                    box_thresh=0.6,
			
 
				-                    max_candidates=1000,
			
 
				-                    unclip_ratio=1.5,
			
 
				-                    use_dilation=False,
			
 
				-                    score_mode="fast",
			
 
				-                    box_type="poly",
			
 
				-                ),
			
 
				-                # TODO
			
 
				-                text_det_T.CropByPolys(det_box_type="poly"),
			
 
				-            ]
			
 
				-        else:
			
 
				-            text_det_post_transforms = [
			
 
				-                text_det_T.DBPostProcess(
			
 
				-                    thresh=0.3,
			
 
				-                    box_thresh=0.6,
			
 
				-                    max_candidates=1000,
			
 
				-                    unclip_ratio=1.5,
			
 
				-                    use_dilation=False,
			
 
				-                    score_mode="fast",
			
 
				-                    box_type="quad",
			
 
				-                ),
			
 
				-                # TODO
			
 
				-                text_det_T.CropByPolys(det_box_type="quad"),
			
 
				-            ]
			
 
				-
			
 
				-        self.text_det_model = create_model(
			
 
				-            self.text_det_model_name,
			
 
				-            self.text_det_model_dir,
			
 
				-            kernel_option=text_det_kernel_option,
			
 
				-            post_transforms=text_det_post_transforms,
			
 
				-        )
			
 
				-        self.text_rec_model = create_model(
			
 
				-            self.text_rec_model_name,
			
 
				-            self.text_rec_model_dir,
			
 
				-            kernel_option=text_rec_kernel_option,
			
 
				-            disable_print=self.disable_print,
			
 
				-            disable_save=self.disable_save,
			
 
				-        )
			
 
				-
			
 
				-    def predict(self, input):
			
 
				-        """predict"""
			
 
				-        result = self.text_det_model.predict(input)
			
 
				-        all_rec_result = []
			
 
				-        for i, img in enumerate(result["sub_imgs"]):
			
 
				-            rec_result = self.text_rec_model.predict({"image": img})
			
 
				-            all_rec_result.append(rec_result["rec_text"][0])
			
 
				-        result["rec_text"] = all_rec_result
			
 
				-
			
 
				-        if self.output is not None:
			
 
				-            draw_img = draw_ocr_box_txt(
			
 
				-                result["original_image"], result["dt_polys"], result["rec_text"]
			
 
				-            )
			
 
				-            fn = os.path.basename(result["input_path"])
			
 
				-            cv2.imwrite(
			
 
				-                os.path.join(self.output, fn),
			
 
				-                draw_img[:, :, ::-1],
			
 
				-            )
			
 
				-
			
 
				-        return result
			
 
				-
			
 
				-    def update_model(self, model_name_list, model_dir_list):
			
 
				-        """update model
			
 
				-
			
 
				-        Args:
			
 
				-            model_name_list (list): list of model name.
			
 
				-            model_dir_list (list): list of model directory.
			
 
				-        """
			
 
				-        assert len(model_name_list) == 2
			
 
				-        self.text_det_model_name = model_name_list[0]
			
 
				-        self.text_rec_model_name = model_name_list[1]
			
 
				-        if model_dir_list:
			
 
				-            assert len(model_dir_list) == 2
			
 
				-            self.text_det_model_dir = model_dir_list[0]
			
 
				-            self.text_rec_model_dir = model_dir_list[1]
			
 
				-
			
 
				-    def get_kernel_option(self):
			
 
				-        """get kernel option"""
			
 
				-        kernel_option = PaddleInferenceOption()
			
 
				-        kernel_option.set_device(self.device)
			
 
				-        return kernel_option
			
 
				-
			
 
				-    def get_input_keys(self):
			
 
				-        """get dict keys of input argument input"""
			
 
				-        return self.text_det_model.get_input_keys()
			
--- a/paddlex/pipelines/OCR/utils.py
+++ b/paddlex/pipelines/OCR/utils.py
@@ -1,148 +0,0 @@
 
				-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-
			
 
				-import PIL
			
 
				-from PIL import Image, ImageDraw, ImageFont
			
 
				-import cv2
			
 
				-import numpy as np
			
 
				-import random
			
 
				-import math
			
 
				-import copy
			
 
				-
			
 
				-from ...utils.fonts import PINGFANG_FONT_FILE_PATH
			
 
				-
			
 
				-
			
 
				-def get_minarea_rect(points):
			
 
				-    bounding_box = cv2.minAreaRect(points)
			
 
				-    points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
			
 
				-
			
 
				-    index_a, index_b, index_c, index_d = 0, 1, 2, 3
			
 
				-    if points[1][1] > points[0][1]:
			
 
				-        index_a = 0
			
 
				-        index_d = 1
			
 
				-    else:
			
 
				-        index_a = 1
			
 
				-        index_d = 0
			
 
				-    if points[3][1] > points[2][1]:
			
 
				-        index_b = 2
			
 
				-        index_c = 3
			
 
				-    else:
			
 
				-        index_b = 3
			
 
				-        index_c = 2
			
 
				-
			
 
				-    box = np.array([points[index_a], points[index_b], points[index_c], points[index_d]]).astype(np.int32)
			
 
				-
			
 
				-    return box
			
 
				-
			
 
				-def draw_ocr_box_txt(
			
 
				-    img,
			
 
				-    boxes,
			
 
				-    txts=None,
			
 
				-    scores=None,
			
 
				-    drop_score=0.5,
			
 
				-    font_path=PINGFANG_FONT_FILE_PATH,
			
 
				-):
			
 
				-    """draw ocr result"""
			
 
				-    image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
			
 
				-    h, w = image.height, image.width
			
 
				-    img_left = image.copy()
			
 
				-    img_right = np.ones((h, w, 3), dtype=np.uint8) * 255
			
 
				-    random.seed(0)
			
 
				-
			
 
				-    draw_left = ImageDraw.Draw(img_left)
			
 
				-    if txts is None or len(txts) != len(boxes):
			
 
				-        txts = [None] * len(boxes)
			
 
				-    for idx, (box, txt) in enumerate(zip(boxes, txts)):
			
 
				-        try:
			
 
				-            if scores is not None and scores[idx] < drop_score:
			
 
				-                continue
			
 
				-            color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
			
 
				-            box = np.array(box)
			
 
				-            if len(box) > 4:
			
 
				-                pts = [(x, y) for x, y in box.tolist()]
			
 
				-                draw_left.polygon(pts, outline=color, width=8)
			
 
				-                box = get_minarea_rect(box)
			
 
				-                height = int(0.5 * (max(box[:,1]) - min(box[:,1])))
			
 
				-                box[:2,1] = np.mean(box[:,1])
			
 
				-                box[2:,1] = np.mean(box[:,1]) + min(20, height)
			
 
				-            draw_left.polygon(box, fill=color)
			
 
				-            img_right_text = draw_box_txt_fine((w, h), box, txt, font_path)
			
 
				-            pts = np.array(box, np.int32).reshape((-1, 1, 2))
			
 
				-            cv2.polylines(img_right_text, [pts], True, color, 1)
			
 
				-            img_right = cv2.bitwise_and(img_right, img_right_text)
			
 
				-        except:
			
 
				-            continue
			
 
				-
			
 
				-    img_left = Image.blend(image, img_left, 0.5)
			
 
				-    img_show = Image.new("RGB", (w * 2, h), (255, 255, 255))
			
 
				-    img_show.paste(img_left, (0, 0, w, h))
			
 
				-    img_show.paste(Image.fromarray(img_right), (w, 0, w * 2, h))
			
 
				-    return np.array(img_show)
			
 
				-
			
 
				-
			
 
				-def draw_box_txt_fine(img_size, box, txt, font_path=PINGFANG_FONT_FILE_PATH):
			
 
				-    """draw box text"""
			
 
				-    box_height = int(
			
 
				-        math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2)
			
 
				-    )
			
 
				-    box_width = int(
			
 
				-        math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2)
			
 
				-    )
			
 
				-
			
 
				-    if box_height > 2 * box_width and box_height > 30:
			
 
				-        img_text = Image.new("RGB", (box_height, box_width), (255, 255, 255))
			
 
				-        draw_text = ImageDraw.Draw(img_text)
			
 
				-        if txt:
			
 
				-            font = create_font(txt, (box_height, box_width), font_path)
			
 
				-            draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
			
 
				-        img_text = img_text.transpose(Image.ROTATE_270)
			
 
				-    else:
			
 
				-        img_text = Image.new("RGB", (box_width, box_height), (255, 255, 255))
			
 
				-        draw_text = ImageDraw.Draw(img_text)
			
 
				-        if txt:
			
 
				-            font = create_font(txt, (box_width, box_height), font_path)
			
 
				-            draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
			
 
				-
			
 
				-    pts1 = np.float32(
			
 
				-        [[0, 0], [box_width, 0], [box_width, box_height], [0, box_height]]
			
 
				-    )
			
 
				-    pts2 = np.array(box, dtype=np.float32)
			
 
				-    M = cv2.getPerspectiveTransform(pts1, pts2)
			
 
				-
			
 
				-    img_text = np.array(img_text, dtype=np.uint8)
			
 
				-    img_right_text = cv2.warpPerspective(
			
 
				-        img_text,
			
 
				-        M,
			
 
				-        img_size,
			
 
				-        flags=cv2.INTER_NEAREST,
			
 
				-        borderMode=cv2.BORDER_CONSTANT,
			
 
				-        borderValue=(255, 255, 255),
			
 
				-    )
			
 
				-    return img_right_text
			
 
				-
			
 
				-
			
 
				-def create_font(txt, sz, font_path=PINGFANG_FONT_FILE_PATH):
			
 
				-    """create font"""
			
 
				-    font_size = int(sz[1] * 0.8)
			
 
				-    font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
			
 
				-    if int(PIL.__version__.split(".")[0]) < 10:
			
 
				-        length = font.getsize(txt)[0]
			
 
				-    else:
			
 
				-        length = font.getlength(txt)
			
 
				-
			
 
				-    if length > sz[0]:
			
 
				-        font_size = int(font_size * sz[0] / length)
			
 
				-        font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
			
 
				-    return font