9 ヶ月前 · 5fb83f71f3
--- a/paddlex/inference/__init__.py
+++ b/paddlex/inference/__init__.py
@@ -13,16 +13,7 @@
 
				 # limitations under the License.
			
 
				 
			
 
				 from ..utils import logging
			
 
				-from ..utils.flags import USE_NEW_INFERENCE, NEW_PREDICTOR
			
 
				 
			
 
				-if USE_NEW_INFERENCE:
			
 
				-    logging.warning("=" * 20 + " Using pipelines_new " + "=" * 20)
			
 
				-    from .pipelines_new import create_pipeline, load_pipeline_config
			
 
				-else:
			
 
				-    from .pipelines import create_pipeline, load_pipeline_config
			
 
				-if NEW_PREDICTOR:
			
 
				-    logging.warning("=" * 20 + " Using models_new " + "=" * 20)
			
 
				-    from .models_new import create_predictor
			
 
				-else:
			
 
				-    from .models import create_predictor
			
 
				+from .pipelines import create_pipeline, load_pipeline_config
			
 
				+from .models import create_predictor
			
 
				 from .utils.pp_option import PaddlePredictorOption
			
--- a/paddlex/inference/models/__init__.py
+++ b/paddlex/inference/models/__init__.py
@@ -19,22 +19,35 @@ from typing import Any, Dict, Optional
 
				 from ...utils import errors
			
 
				 from ..utils.official_models import official_models
			
 
				 from .base import BasePredictor, BasicPredictor
			
 
				+
			
 
				 from .image_classification import ClasPredictor
			
 
				+from .object_detection import DetPredictor
			
 
				+from .keypoint_detection import KptPredictor
			
 
				 from .text_detection import TextDetPredictor
			
 
				 from .text_recognition import TextRecPredictor
			
 
				-from .table_recognition import TablePredictor
			
 
				-from .object_detection import DetPredictor
			
 
				+from .table_structure_recognition import TablePredictor
			
 
				+from .formula_recognition import FormulaRecPredictor
			
 
				 from .instance_segmentation import InstanceSegPredictor
			
 
				 from .semantic_segmentation import SegPredictor
			
 
				-from .general_recognition import ShiTuRecPredictor
			
 
				-from .ts_fc import TSFcPredictor
			
 
				-from .ts_ad import TSAdPredictor
			
 
				-from .ts_cls import TSClsPredictor
			
 
				+from .image_feature import ImageFeaturePredictor
			
 
				+from .ts_forecasting import TSFcPredictor
			
 
				+from .ts_anomaly_detection import TSAdPredictor
			
 
				+from .ts_classification import TSClsPredictor
			
 
				 from .image_unwarping import WarpPredictor
			
 
				-from .multilabel_classification import MLClasPredictor
			
 
				+from .image_multilabel_classification import MLClasPredictor
			
 
				+from .face_feature import FaceFeaturePredictor
			
 
				+from .open_vocabulary_detection import OVDetPredictor
			
 
				+from .open_vocabulary_segmentation import OVSegPredictor
			
 
				+
			
 
				+
			
 
				+# from .table_recognition import TablePredictor
			
 
				+# from .general_recognition import ShiTuRecPredictor
			
 
				 from .anomaly_detection import UadPredictor
			
 
				-from .formula_recognition import LaTeXOCRPredictor
			
 
				-from .face_recognition import FaceRecPredictor
			
 
				+
			
 
				+# from .face_recognition import FaceRecPredictor
			
 
				+from .multilingual_speech_recognition import WhisperPredictor
			
 
				+from .video_classification import VideoClasPredictor
			
 
				+from .video_detection import VideoDetPredictor
			
 
				 
			
 
				 
			
 
				 def _create_hp_predictor(
			
@@ -63,7 +76,8 @@ def _create_hp_predictor(
 
				 
			
 
				 
			
 
				 def create_predictor(
			
 
				-    model: str,
			
 
				+    model_name: str,
			
 
				+    model_dir: Optional[str] = None,
			
 
				     device=None,
			
 
				     pp_option=None,
			
 
				     use_hpip: bool = False,
			
@@ -71,9 +85,16 @@ def create_predictor(
 
				     *args,
			
 
				     **kwargs,
			
 
				 ) -> BasePredictor:
			
 
				-    model_dir = check_model(model)
			
 
				+    if model_dir is None:
			
 
				+        model_dir = check_model(model_name)
			
 
				+    else:
			
 
				+        assert Path(model_dir).exists(), f"{model_dir} is not exists!"
			
 
				+        model_dir = Path(model_dir)
			
 
				     config = BasePredictor.load_config(model_dir)
			
 
				-    model_name = config["Global"]["model_name"]
			
 
				+    assert (
			
 
				+        model_name == config["Global"]["model_name"]
			
 
				+    ), f"Model name mismatch，please input the correct model dir."
			
 
				+
			
 
				     if use_hpip:
			
 
				         return _create_hp_predictor(
			
 
				             model_name=model_name,
			
--- a/paddlex/inference/models/anomaly_detection.py
+++ b/paddlex/inference/models/anomaly_detection.py
@@ -1,87 +0,0 @@
 
				-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-import numpy as np
			
 
				-
			
 
				-from ...utils.func_register import FuncRegister
			
 
				-from ...modules.anomaly_detection.model_list import MODELS
			
 
				-from ..components import *
			
 
				-from ..results import SegResult
			
 
				-from ..utils.process_hook import batchable_method
			
 
				-from .base import BasicPredictor
			
 
				-
			
 
				-
			
 
				-class UadPredictor(BasicPredictor):
			
 
				-
			
 
				-    entities = MODELS
			
 
				-
			
 
				-    _FUNC_MAP = {}
			
 
				-    register = FuncRegister(_FUNC_MAP)
			
 
				-
			
 
				-    def _build_components(self):
			
 
				-        self._add_component(ReadImage(format="RGB"))
			
 
				-        for cfg in self.config["Deploy"]["transforms"]:
			
 
				-            tf_key = cfg["type"]
			
 
				-            func = self._FUNC_MAP.get(tf_key)
			
 
				-            cfg.pop("type")
			
 
				-            args = cfg
			
 
				-            op = func(self, **args) if args else func(self)
			
 
				-            self._add_component(op)
			
 
				-        self._add_component(ToCHWImage())
			
 
				-        predictor = ImagePredictor(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				-        self._add_component(predictor)
			
 
				-        self._add_component(Map_to_mask())
			
 
				-
			
 
				-    @register("Resize")
			
 
				-    def build_resize(
			
 
				-        self, target_size, keep_ratio=False, size_divisor=None, interp="LINEAR"
			
 
				-    ):
			
 
				-        assert target_size
			
 
				-        op = Resize(
			
 
				-            target_size=target_size,
			
 
				-            keep_ratio=keep_ratio,
			
 
				-            size_divisor=size_divisor,
			
 
				-            interp=interp,
			
 
				-        )
			
 
				-        return op
			
 
				-
			
 
				-    @register("ResizeByLong")
			
 
				-    def build_resizebylong(self, long_size):
			
 
				-        assert long_size
			
 
				-        return ResizeByLong(
			
 
				-            target_long_edge=long_size, size_divisor=size_divisor, interp=interp
			
 
				-        )
			
 
				-
			
 
				-    @register("ResizeByShort")
			
 
				-    def build_resizebylong(self, short_size):
			
 
				-        assert short_size
			
 
				-        return ResizeByLong(
			
 
				-            target_long_edge=short_size, size_divisor=size_divisor, interp=interp
			
 
				-        )
			
 
				-
			
 
				-    @register("Normalize")
			
 
				-    def build_normalize(
			
 
				-        self,
			
 
				-        mean=0.5,
			
 
				-        std=0.5,
			
 
				-    ):
			
 
				-        return Normalize(mean=mean, std=std)
			
 
				-
			
 
				-    def _pack_res(self, single):
			
 
				-        keys = ["input_path", "pred"]
			
 
				-        return SegResult({key: single[key] for key in keys})
			
--- a/paddlex/inference/models_new/anomaly_detection/__init__.py
+++ b/paddlex/inference/models_new/anomaly_detection/__init__.py
--- a/paddlex/inference/models_new/anomaly_detection/predictor.py
+++ b/paddlex/inference/models_new/anomaly_detection/predictor.py
--- a/paddlex/inference/models_new/anomaly_detection/processors.py
+++ b/paddlex/inference/models_new/anomaly_detection/processors.py
--- a/paddlex/inference/models_new/anomaly_detection/result.py
+++ b/paddlex/inference/models_new/anomaly_detection/result.py
--- a/paddlex/inference/models/base/__init__.py
+++ b/paddlex/inference/models/base/__init__.py
@@ -12,5 +12,5 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-from .base_predictor import BasePredictor
			
 
				-from .basic_predictor import BasicPredictor
			
 
				+from .pp_infer import BaseInfer
			
 
				+from .predictor import BasePredictor, BasicPredictor
			
--- a/paddlex/inference/models/base/base_predictor.py
+++ b/paddlex/inference/models/base/base_predictor.py
@@ -1,76 +0,0 @@
 
				-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-import yaml
			
 
				-import codecs
			
 
				-from pathlib import Path
			
 
				-from abc import abstractmethod
			
 
				-
			
 
				-from ...components.base import BaseComponent
			
 
				-from ...utils.process_hook import generatorable_method
			
 
				-
			
 
				-
			
 
				-class BasePredictor(BaseComponent):
			
 
				-
			
 
				-    KEEP_INPUT = False
			
 
				-    YIELD_BATCH = False
			
 
				-
			
 
				-    INPUT_KEYS = "input"
			
 
				-    DEAULT_INPUTS = {"input": "input"}
			
 
				-    OUTPUT_KEYS = "result"
			
 
				-    DEAULT_OUTPUTS = {"result": "result"}
			
 
				-
			
 
				-    MODEL_FILE_PREFIX = "inference"
			
 
				-
			
 
				-    def __init__(self, model_dir, config=None):
			
 
				-        super().__init__()
			
 
				-        self.model_dir = Path(model_dir)
			
 
				-        self.config = config if config else self.load_config(self.model_dir)
			
 
				-
			
 
				-        # alias predict() to the __call__()
			
 
				-        self.predict = self.__call__
			
 
				-
			
 
				-        self.benchmark = None
			
 
				-
			
 
				-    def __call__(self, input, **kwargs):
			
 
				-        self.set_predictor(**kwargs)
			
 
				-        for res in super().__call__(input):
			
 
				-            yield res["result"]
			
 
				-
			
 
				-    @property
			
 
				-    def config_path(self):
			
 
				-        return self.get_config_path(self.model_dir)
			
 
				-
			
 
				-    @property
			
 
				-    def model_name(self) -> str:
			
 
				-        return self.config["Global"]["model_name"]
			
 
				-
			
 
				-    @abstractmethod
			
 
				-    def apply(self, input):
			
 
				-        raise NotImplementedError
			
 
				-
			
 
				-    @abstractmethod
			
 
				-    def set_predictor(self):
			
 
				-        raise NotImplementedError
			
 
				-
			
 
				-    @classmethod
			
 
				-    def get_config_path(cls, model_dir):
			
 
				-        return model_dir / f"{cls.MODEL_FILE_PREFIX}.yml"
			
 
				-
			
 
				-    @classmethod
			
 
				-    def load_config(cls, model_dir):
			
 
				-        config_path = cls.get_config_path(model_dir)
			
 
				-        with codecs.open(config_path, "r", "utf-8") as file:
			
 
				-            dic = yaml.load(file, Loader=yaml.FullLoader)
			
 
				-        return dic
			
--- a/paddlex/inference/models/base/basic_predictor.py
+++ b/paddlex/inference/models/base/basic_predictor.py
@@ -1,130 +0,0 @@
 
				-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-from abc import abstractmethod
			
 
				-import inspect
			
 
				-
			
 
				-from ....utils.subclass_register import AutoRegisterABCMetaClass
			
 
				-from ....utils.flags import (
			
 
				-    INFER_BENCHMARK,
			
 
				-    INFER_BENCHMARK_WARMUP,
			
 
				-)
			
 
				-from ....utils import logging
			
 
				-from ...components.base import BaseComponent, ComponentsEngine
			
 
				-from ...utils.pp_option import PaddlePredictorOption
			
 
				-from ...utils.process_hook import generatorable_method
			
 
				-from ...utils.benchmark import Benchmark
			
 
				-from .base_predictor import BasePredictor
			
 
				-
			
 
				-
			
 
				-class BasicPredictor(
			
 
				-    BasePredictor,
			
 
				-    metaclass=AutoRegisterABCMetaClass,
			
 
				-):
			
 
				-
			
 
				-    __is_base = True
			
 
				-
			
 
				-    def __init__(self, model_dir, config=None, device=None, pp_option=None):
			
 
				-        super().__init__(model_dir=model_dir, config=config)
			
 
				-        if not pp_option:
			
 
				-            pp_option = PaddlePredictorOption(model_name=self.model_name)
			
 
				-        if device:
			
 
				-            pp_option.device = device
			
 
				-        trt_dynamic_shapes = (
			
 
				-            self.config.get("Hpi", {})
			
 
				-            .get("backend_configs", {})
			
 
				-            .get("paddle_infer", {})
			
 
				-            .get("trt_dynamic_shapes", None)
			
 
				-        )
			
 
				-        if trt_dynamic_shapes:
			
 
				-            pp_option.trt_dynamic_shapes = trt_dynamic_shapes
			
 
				-        self.pp_option = pp_option
			
 
				-
			
 
				-        self.components = {}
			
 
				-        self._build_components()
			
 
				-        self.engine = ComponentsEngine(self.components)
			
 
				-        logging.debug(f"{self.__class__.__name__}: {self.model_dir}")
			
 
				-
			
 
				-        if INFER_BENCHMARK:
			
 
				-            self.benchmark = Benchmark(self.components)
			
 
				-
			
 
				-    def __call__(self, input, **kwargs):
			
 
				-        self.set_predictor(**kwargs)
			
 
				-        if self.benchmark:
			
 
				-            self.benchmark.start()
			
 
				-            if INFER_BENCHMARK_WARMUP > 0:
			
 
				-                output = super().__call__(input)
			
 
				-                warmup_num = 0
			
 
				-                for _ in range(INFER_BENCHMARK_WARMUP):
			
 
				-                    try:
			
 
				-                        next(output)
			
 
				-                        warmup_num += 1
			
 
				-                    except StopIteration:
			
 
				-                        logging.warning(
			
 
				-                            f"There are only {warmup_num} batches in input data, but `INFER_BENCHMARK_WARMUP` has been set to {INFER_BENCHMARK_WARMUP}."
			
 
				-                        )
			
 
				-                        break
			
 
				-                self.benchmark.warmup_stop(warmup_num)
			
 
				-            output = list(super().__call__(input))
			
 
				-            self.benchmark.collect(len(output))
			
 
				-        else:
			
 
				-            yield from super().__call__(input)
			
 
				-
			
 
				-    def apply(self, input):
			
 
				-        """predict"""
			
 
				-        yield from self._generate_res(self.engine(input))
			
 
				-
			
 
				-    @generatorable_method
			
 
				-    def _generate_res(self, batch_data):
			
 
				-        return [{"result": self._pack_res(data)} for data in batch_data]
			
 
				-
			
 
				-    def _add_component(self, cmps):
			
 
				-        if not isinstance(cmps, list):
			
 
				-            cmps = [cmps]
			
 
				-
			
 
				-        for cmp in cmps:
			
 
				-            if not isinstance(cmp, (list, tuple)):
			
 
				-                key = cmp.name
			
 
				-            else:
			
 
				-                assert len(cmp) == 2
			
 
				-                key = cmp[0]
			
 
				-                cmp = cmp[1]
			
 
				-            assert isinstance(key, str)
			
 
				-            assert isinstance(cmp, BaseComponent)
			
 
				-            assert (
			
 
				-                key not in self.components
			
 
				-            ), f"The key ({key}) has been used: {self.components}!"
			
 
				-            self.components[key] = cmp
			
 
				-
			
 
				-    def set_predictor(self, batch_size=None, device=None, pp_option=None):
			
 
				-        if batch_size:
			
 
				-            self.components["ReadCmp"].batch_size = batch_size
			
 
				-
			
 
				-            self.pp_option.batch_size = batch_size
			
 
				-        if device and device != self.pp_option.device:
			
 
				-            self.pp_option.device = device
			
 
				-        if pp_option and pp_option != self.pp_option:
			
 
				-            self.pp_option = pp_option
			
 
				-
			
 
				-    def _has_setter(self, attr):
			
 
				-        prop = getattr(self.__class__, attr, None)
			
 
				-        return isinstance(prop, property) and prop.fset is not None
			
 
				-
			
 
				-    @abstractmethod
			
 
				-    def _build_components(self):
			
 
				-        raise NotImplementedError
			
 
				-
			
 
				-    @abstractmethod
			
 
				-    def _pack_res(self, data):
			
 
				-        raise NotImplementedError
			
--- a/paddlex/inference/models_new/base/pp_infer/__init__.py
+++ b/paddlex/inference/models_new/base/pp_infer/__init__.py
--- a/paddlex/inference/models_new/base/pp_infer/base_infer.py
+++ b/paddlex/inference/models_new/base/pp_infer/base_infer.py
--- a/paddlex/inference/models_new/base/predictor/__init__.py
+++ b/paddlex/inference/models_new/base/predictor/__init__.py
--- a/paddlex/inference/models_new/base/predictor/base_predictor.py
+++ b/paddlex/inference/models_new/base/predictor/base_predictor.py
--- a/paddlex/inference/models_new/base/predictor/basic_predictor.py
+++ b/paddlex/inference/models_new/base/predictor/basic_predictor.py
--- a/paddlex/inference/models_new/common/__init__.py
+++ b/paddlex/inference/models_new/common/__init__.py
--- a/paddlex/inference/models_new/common/static_infer.py
+++ b/paddlex/inference/models_new/common/static_infer.py
--- a/paddlex/inference/models_new/common/tokenizer/__init__.py
+++ b/paddlex/inference/models_new/common/tokenizer/__init__.py
--- a/paddlex/inference/models_new/common/tokenizer/bert_tokenizer.py
+++ b/paddlex/inference/models_new/common/tokenizer/bert_tokenizer.py
@@ -50,7 +50,13 @@ class BasicTokenizer(object):
 
				             value for `lowercase` (as in the original BERT).
			
 
				     """
			
 
				 
			
 
				-    def __init__(self, do_lower_case=True, never_split=None, tokenize_chinese_chars=True, strip_accents=None):
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        do_lower_case=True,
			
 
				+        never_split=None,
			
 
				+        tokenize_chinese_chars=True,
			
 
				+        strip_accents=None,
			
 
				+    ):
			
 
				         """Constructs a BasicTokenizer."""
			
 
				         if never_split is None:
			
 
				             never_split = []
			
@@ -81,7 +87,11 @@ class BasicTokenizer(object):
 
				                 '''
			
 
				         """
			
 
				         text = convert_to_unicode(text)
			
 
				-        never_split = self.never_split.union(set(never_split)) if never_split else self.never_split
			
 
				+        never_split = (
			
 
				+            self.never_split.union(set(never_split))
			
 
				+            if never_split
			
 
				+            else self.never_split
			
 
				+        )
			
 
				         text = self._clean_text(text)
			
 
				 
			
 
				         if self.tokenize_chinese_chars:
			
@@ -423,7 +433,9 @@ class BertTokenizer(PretrainedTokenizer):
 
				             raise ValueError(
			
 
				                 "Can't find a vocabulary file at path '{}'. To load the "
			
 
				                 "vocabulary from a pretrained model please use "
			
 
				-                "`tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file)
			
 
				+                "`tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(
			
 
				+                    vocab_file
			
 
				+                )
			
 
				             )
			
 
				         self.do_lower_case = do_lower_case
			
 
				         self.vocab = self.load_vocabulary(vocab_file, unk_token=unk_token)
			
@@ -435,7 +447,9 @@ class BertTokenizer(PretrainedTokenizer):
 
				                 tokenize_chinese_chars=tokenize_chinese_chars,
			
 
				                 strip_accents=strip_accents,
			
 
				             )
			
 
				-        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab, unk_token=unk_token)
			
 
				+        self.wordpiece_tokenizer = WordpieceTokenizer(
			
 
				+            vocab=self.vocab, unk_token=unk_token
			
 
				+        )
			
 
				 
			
 
				     @property
			
 
				     def vocab_size(self):
			
@@ -463,7 +477,9 @@ class BertTokenizer(PretrainedTokenizer):
 
				         """
			
 
				         split_tokens = []
			
 
				         if self.do_basic_tokenize:
			
 
				-            for token in self.basic_tokenizer.tokenize(text, never_split=self.all_special_tokens):
			
 
				+            for token in self.basic_tokenizer.tokenize(
			
 
				+                text, never_split=self.all_special_tokens
			
 
				+            ):
			
 
				                 # If the token is part of the never_split set
			
 
				                 if token in self.basic_tokenizer.never_split:
			
 
				                     split_tokens.append(token)
			
@@ -518,7 +534,11 @@ class BertTokenizer(PretrainedTokenizer):
 
				         """
			
 
				         token_ids_0 = []
			
 
				         token_ids_1 = []
			
 
				-        return len(self.build_inputs_with_special_tokens(token_ids_0, token_ids_1 if pair else None))
			
 
				+        return len(
			
 
				+            self.build_inputs_with_special_tokens(
			
 
				+                token_ids_0, token_ids_1 if pair else None
			
 
				+            )
			
 
				+        )
			
 
				 
			
 
				     def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
			
 
				         """
			
@@ -545,7 +565,9 @@ class BertTokenizer(PretrainedTokenizer):
 
				         _sep = [self.sep_token_id]
			
 
				         return _cls + token_ids_0 + _sep + token_ids_1 + _sep
			
 
				 
			
 
				-    def build_offset_mapping_with_special_tokens(self, offset_mapping_0, offset_mapping_1=None):
			
 
				+    def build_offset_mapping_with_special_tokens(
			
 
				+        self, offset_mapping_0, offset_mapping_1=None
			
 
				+    ):
			
 
				         """
			
 
				         Build offset map from a pair of offset map by concatenating and adding offsets of special tokens.
			
 
				 
			
@@ -595,7 +617,9 @@ class BertTokenizer(PretrainedTokenizer):
 
				             return len(_cls + token_ids_0 + _sep) * [0]
			
 
				         return len(_cls + token_ids_0 + _sep) * [0] + len(token_ids_1 + _sep) * [1]
			
 
				 
			
 
				-    def get_special_tokens_mask(self, token_ids_0, token_ids_1=None, already_has_special_tokens=False):
			
 
				+    def get_special_tokens_mask(
			
 
				+        self, token_ids_0, token_ids_1=None, already_has_special_tokens=False
			
 
				+    ):
			
 
				         """
			
 
				         Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
			
 
				         special tokens using the tokenizer ``encode`` methods.
			
@@ -618,7 +642,9 @@ class BertTokenizer(PretrainedTokenizer):
 
				                     "You should not supply a second sequence if the provided sequence of "
			
 
				                     "ids is already formatted with special tokens for the model."
			
 
				                 )
			
 
				-            return list(map(lambda x: 1 if x in self.all_special_ids else 0, token_ids_0))
			
 
				+            return list(
			
 
				+                map(lambda x: 1 if x in self.all_special_ids else 0, token_ids_0)
			
 
				+            )
			
 
				 
			
 
				         if token_ids_1 is not None:
			
 
				             return [1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1]
			
--- a/paddlex/inference/models_new/common/tokenizer/gpt_tokenizer.py
+++ b/paddlex/inference/models_new/common/tokenizer/gpt_tokenizer.py
--- a/paddlex/inference/models_new/common/tokenizer/tokenizer_utils.py
+++ b/paddlex/inference/models_new/common/tokenizer/tokenizer_utils.py
--- a/paddlex/inference/models_new/common/tokenizer/tokenizer_utils_base.py
+++ b/paddlex/inference/models_new/common/tokenizer/tokenizer_utils_base.py
--- a/paddlex/inference/models_new/common/tokenizer/utils.py
+++ b/paddlex/inference/models_new/common/tokenizer/utils.py
--- a/paddlex/inference/models_new/common/tokenizer/vocab.py
+++ b/paddlex/inference/models_new/common/tokenizer/vocab.py
--- a/paddlex/inference/models_new/common/ts/__init__.py
+++ b/paddlex/inference/models_new/common/ts/__init__.py
--- a/paddlex/inference/models_new/common/ts/funcs.py
+++ b/paddlex/inference/models_new/common/ts/funcs.py
--- a/paddlex/inference/models_new/common/ts/processors.py
+++ b/paddlex/inference/models_new/common/ts/processors.py
--- a/paddlex/inference/models_new/common/vision/__init__.py
+++ b/paddlex/inference/models_new/common/vision/__init__.py
--- a/paddlex/inference/models_new/common/vision/funcs.py
+++ b/paddlex/inference/models_new/common/vision/funcs.py
--- a/paddlex/inference/models_new/common/vision/processors.py
+++ b/paddlex/inference/models_new/common/vision/processors.py
--- a/paddlex/inference/models_new/face_feature/__init__.py
+++ b/paddlex/inference/models_new/face_feature/__init__.py
--- a/paddlex/inference/models_new/face_feature/predictor.py
+++ b/paddlex/inference/models_new/face_feature/predictor.py
--- a/paddlex/inference/models/face_recognition.py
+++ b/paddlex/inference/models/face_recognition.py
@@ -1,21 +0,0 @@
 
				-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-from ...modules.face_recognition.model_list import MODELS
			
 
				-from .general_recognition import ShiTuRecPredictor
			
 
				-
			
 
				-
			
 
				-class FaceRecPredictor(ShiTuRecPredictor):
			
 
				-
			
 
				-    entities = MODELS
			
--- a/paddlex/inference/models/formula_recognition.py
+++ b/paddlex/inference/models/formula_recognition.py
@@ -1,55 +0,0 @@
 
				-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-import numpy as np
			
 
				-
			
 
				-from ...modules.formula_recognition.model_list import MODELS
			
 
				-from ..components import *
			
 
				-from ..results import FormulaRecResult
			
 
				-from .base import BasicPredictor
			
 
				-
			
 
				-
			
 
				-class LaTeXOCRPredictor(BasicPredictor):
			
 
				-
			
 
				-    entities = MODELS
			
 
				-
			
 
				-    def _build_components(self):
			
 
				-        self._add_component(
			
 
				-            [
			
 
				-                ReadImage(format="RGB"),
			
 
				-                LaTeXOCRReisizeNormImg(),
			
 
				-            ]
			
 
				-        )
			
 
				-
			
 
				-        predictor = ImagePredictor(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				-        self._add_component(predictor)
			
 
				-
			
 
				-        op = self.build_postprocess(**self.config["PostProcess"])
			
 
				-        self._add_component(op)
			
 
				-
			
 
				-    def build_postprocess(self, **kwargs):
			
 
				-        if kwargs.get("name") == "LaTeXOCRDecode":
			
 
				-            return LaTeXOCRDecode(
			
 
				-                character_list=kwargs.get("character_dict"),
			
 
				-            )
			
 
				-        else:
			
 
				-            raise Exception()
			
 
				-
			
 
				-    def _pack_res(self, single):
			
 
				-        keys = ["input_path", "rec_text"]
			
 
				-        return FormulaRecResult({key: single[key] for key in keys})
			
--- a/paddlex/inference/models_new/formula_recognition/__init__.py
+++ b/paddlex/inference/models_new/formula_recognition/__init__.py
--- a/paddlex/inference/models_new/formula_recognition/predictor.py
+++ b/paddlex/inference/models_new/formula_recognition/predictor.py
--- a/paddlex/inference/models_new/formula_recognition/processors.py
+++ b/paddlex/inference/models_new/formula_recognition/processors.py
--- a/paddlex/inference/models_new/formula_recognition/result.py
+++ b/paddlex/inference/models_new/formula_recognition/result.py
--- a/paddlex/inference/models/general_recognition.py
+++ b/paddlex/inference/models/general_recognition.py
@@ -1,99 +0,0 @@
 
				-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-import numpy as np
			
 
				-
			
 
				-from ...utils.func_register import FuncRegister
			
 
				-from ...modules.general_recognition.model_list import MODELS
			
 
				-from ..components import *
			
 
				-from ..results import BaseResult
			
 
				-from .base import BasicPredictor
			
 
				-
			
 
				-
			
 
				-class ShiTuRecPredictor(BasicPredictor):
			
 
				-
			
 
				-    entities = MODELS
			
 
				-
			
 
				-    _FUNC_MAP = {}
			
 
				-    register = FuncRegister(_FUNC_MAP)
			
 
				-
			
 
				-    def _build_components(self):
			
 
				-        self._add_component(ReadImage(format="RGB"))
			
 
				-        for cfg in self.config["PreProcess"]["transform_ops"]:
			
 
				-            tf_key = list(cfg.keys())[0]
			
 
				-            func = self._FUNC_MAP[tf_key]
			
 
				-            args = cfg.get(tf_key, {})
			
 
				-            op = func(self, **args) if args else func(self)
			
 
				-            self._add_component(op)
			
 
				-
			
 
				-        predictor = ImagePredictor(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				-        self._add_component(predictor)
			
 
				-
			
 
				-        post_processes = self.config["PostProcess"]
			
 
				-        for key in post_processes:
			
 
				-            func = self._FUNC_MAP.get(key)
			
 
				-            args = post_processes.get(key, {})
			
 
				-            op = func(self, **args) if args else func(self)
			
 
				-            self._add_component(op)
			
 
				-
			
 
				-    @register("ResizeImage")
			
 
				-    # TODO(gaotingquan): backend & interpolation
			
 
				-    def build_resize(
			
 
				-        self,
			
 
				-        resize_short=None,
			
 
				-        size=None,
			
 
				-        backend="cv2",
			
 
				-        interpolation="LINEAR",
			
 
				-        return_numpy=False,
			
 
				-    ):
			
 
				-        assert resize_short or size
			
 
				-        if resize_short:
			
 
				-            op = ResizeByShort(
			
 
				-                target_short_edge=resize_short, size_divisor=None, interp="LINEAR"
			
 
				-            )
			
 
				-        else:
			
 
				-            op = Resize(target_size=size)
			
 
				-        return op
			
 
				-
			
 
				-    @register("CropImage")
			
 
				-    def build_crop(self, size=224):
			
 
				-        return Crop(crop_size=size)
			
 
				-
			
 
				-    @register("NormalizeImage")
			
 
				-    def build_normalize(
			
 
				-        self,
			
 
				-        mean=[0.485, 0.456, 0.406],
			
 
				-        std=[0.229, 0.224, 0.225],
			
 
				-        scale=1 / 255,
			
 
				-        order="",
			
 
				-        channel_num=3,
			
 
				-    ):
			
 
				-        assert channel_num == 3
			
 
				-        return Normalize(mean=mean, std=std)
			
 
				-
			
 
				-    @register("ToCHWImage")
			
 
				-    def build_to_chw(self):
			
 
				-        return ToCHWImage()
			
 
				-
			
 
				-    @register("NormalizeFeatures")
			
 
				-    def build_normalize_features(self):
			
 
				-        return NormalizeFeatures()
			
 
				-
			
 
				-    def _pack_res(self, data):
			
 
				-        keys = ["input_path", "feature"]
			
 
				-        return BaseResult({key: data[key] for key in keys})
			
--- a/paddlex/inference/models/image_classification.py
+++ b/paddlex/inference/models/image_classification.py
@@ -1,101 +0,0 @@
 
				-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-import numpy as np
			
 
				-
			
 
				-from ...utils.func_register import FuncRegister
			
 
				-from ...modules.image_classification.model_list import MODELS
			
 
				-from ..components import *
			
 
				-from ..results import TopkResult
			
 
				-from .base import BasicPredictor
			
 
				-
			
 
				-
			
 
				-class ClasPredictor(BasicPredictor):
			
 
				-
			
 
				-    entities = [*MODELS]
			
 
				-
			
 
				-    _FUNC_MAP = {}
			
 
				-    register = FuncRegister(_FUNC_MAP)
			
 
				-
			
 
				-    def _build_components(self):
			
 
				-        self._add_component(ReadImage(format="RGB"))
			
 
				-        for cfg in self.config["PreProcess"]["transform_ops"]:
			
 
				-            tf_key = list(cfg.keys())[0]
			
 
				-            func = self._FUNC_MAP[tf_key]
			
 
				-            args = cfg.get(tf_key, {})
			
 
				-            op = func(self, **args) if args else func(self)
			
 
				-            self._add_component(op)
			
 
				-
			
 
				-        predictor = ImagePredictor(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				-        self._add_component(predictor)
			
 
				-
			
 
				-        post_processes = self.config["PostProcess"]
			
 
				-        for key in post_processes:
			
 
				-            func = self._FUNC_MAP.get(key)
			
 
				-            args = post_processes.get(key, {})
			
 
				-            op = func(self, **args) if args else func(self)
			
 
				-            self._add_component(op)
			
 
				-
			
 
				-    @register("ResizeImage")
			
 
				-    # TODO(gaotingquan): backend & interpolation
			
 
				-    def build_resize(
			
 
				-        self, resize_short=None, size=None, backend="cv2", interpolation="LINEAR"
			
 
				-    ):
			
 
				-        assert resize_short or size
			
 
				-        if resize_short:
			
 
				-            op = ResizeByShort(
			
 
				-                target_short_edge=resize_short, size_divisor=None, interp="LINEAR"
			
 
				-            )
			
 
				-        else:
			
 
				-            op = Resize(target_size=size)
			
 
				-        return op
			
 
				-
			
 
				-    @register("CropImage")
			
 
				-    def build_crop(self, size=224):
			
 
				-        return Crop(crop_size=size)
			
 
				-
			
 
				-    @register("NormalizeImage")
			
 
				-    def build_normalize(
			
 
				-        self,
			
 
				-        mean=[0.485, 0.456, 0.406],
			
 
				-        std=[0.229, 0.224, 0.225],
			
 
				-        scale=1 / 255,
			
 
				-        order="",
			
 
				-        channel_num=3,
			
 
				-    ):
			
 
				-        assert channel_num == 3
			
 
				-        assert order == ""
			
 
				-        return Normalize(scale=scale, mean=mean, std=std)
			
 
				-
			
 
				-    @register("ToCHWImage")
			
 
				-    def build_to_chw(self):
			
 
				-        return ToCHWImage()
			
 
				-
			
 
				-    @register("Topk")
			
 
				-    def build_topk(self, topk, label_list=None):
			
 
				-        return Topk(topk=int(topk), class_ids=label_list)
			
 
				-
			
 
				-    @register("MultiLabelThreshOutput")
			
 
				-    def build_threshoutput(self, threshold, label_list=None):
			
 
				-        return MultiLabelThreshOutput(threshold=float(threshold), class_ids=label_list)
			
 
				-
			
 
				-    def _pack_res(self, single):
			
 
				-        keys = ["input_path", "class_ids", "scores"]
			
 
				-        if "label_names" in single:
			
 
				-            keys.append("label_names")
			
 
				-        return TopkResult({key: single[key] for key in keys})
			
--- a/paddlex/inference/models_new/image_classification/__init__.py
+++ b/paddlex/inference/models_new/image_classification/__init__.py
--- a/paddlex/inference/models_new/image_classification/predictor.py
+++ b/paddlex/inference/models_new/image_classification/predictor.py
--- a/paddlex/inference/models_new/image_classification/processors.py
+++ b/paddlex/inference/models_new/image_classification/processors.py
--- a/paddlex/inference/models_new/image_classification/result.py
+++ b/paddlex/inference/models_new/image_classification/result.py
--- a/paddlex/inference/models_new/image_feature/__init__.py
+++ b/paddlex/inference/models_new/image_feature/__init__.py
--- a/paddlex/inference/models_new/image_feature/predictor.py
+++ b/paddlex/inference/models_new/image_feature/predictor.py
--- a/paddlex/inference/models_new/image_feature/processors.py
+++ b/paddlex/inference/models_new/image_feature/processors.py
--- a/paddlex/inference/models_new/image_feature/result.py
+++ b/paddlex/inference/models_new/image_feature/result.py
--- a/paddlex/inference/models_new/image_multilabel_classification/__init__.py
+++ b/paddlex/inference/models_new/image_multilabel_classification/__init__.py
--- a/paddlex/inference/models_new/image_multilabel_classification/predictor.py
+++ b/paddlex/inference/models_new/image_multilabel_classification/predictor.py
--- a/paddlex/inference/models_new/image_multilabel_classification/processors.py
+++ b/paddlex/inference/models_new/image_multilabel_classification/processors.py
--- a/paddlex/inference/models_new/image_multilabel_classification/result.py
+++ b/paddlex/inference/models_new/image_multilabel_classification/result.py
--- a/paddlex/inference/models/image_unwarping.py
+++ b/paddlex/inference/models/image_unwarping.py
@@ -1,43 +0,0 @@
 
				-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-from ...modules.image_unwarping.model_list import MODELS
			
 
				-from ..components import *
			
 
				-from ..results import DocTrResult
			
 
				-from .base import BasicPredictor
			
 
				-
			
 
				-
			
 
				-class WarpPredictor(BasicPredictor):
			
 
				-
			
 
				-    entities = MODELS
			
 
				-
			
 
				-    def _build_components(self):
			
 
				-        self._add_component(
			
 
				-            [
			
 
				-                ReadImage(format="RGB"),
			
 
				-                Normalize(mean=0.0, std=1.0, scale=1.0 / 255),
			
 
				-                ToCHWImage(),
			
 
				-            ]
			
 
				-        )
			
 
				-
			
 
				-        predictor = ImagePredictor(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				-        self._add_component([predictor, DocTrPostProcess()])
			
 
				-
			
 
				-    def _pack_res(self, single):
			
 
				-        keys = ["input_path", "doctr_img"]
			
 
				-        return DocTrResult({key: single[key] for key in keys})
			
--- a/paddlex/inference/models_new/image_unwarping/__init__.py
+++ b/paddlex/inference/models_new/image_unwarping/__init__.py
--- a/paddlex/inference/models_new/image_unwarping/predictor.py
+++ b/paddlex/inference/models_new/image_unwarping/predictor.py
--- a/paddlex/inference/models_new/image_unwarping/processors.py
+++ b/paddlex/inference/models_new/image_unwarping/processors.py
--- a/paddlex/inference/models_new/image_unwarping/result.py
+++ b/paddlex/inference/models_new/image_unwarping/result.py
--- a/paddlex/inference/models/instance_segmentation.py
+++ b/paddlex/inference/models/instance_segmentation.py
@@ -1,66 +0,0 @@
 
				-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-import numpy as np
			
 
				-
			
 
				-from .object_detection import DetPredictor
			
 
				-from ...utils.func_register import FuncRegister
			
 
				-from ...modules.instance_segmentation.model_list import MODELS
			
 
				-from ..components import *
			
 
				-from ..results import InstanceSegResult
			
 
				-
			
 
				-
			
 
				-class InstanceSegPredictor(DetPredictor):
			
 
				-
			
 
				-    entities = MODELS
			
 
				-
			
 
				-    def _build_components(self):
			
 
				-        self._add_component(ReadImage(format="RGB"))
			
 
				-        for cfg in self.config["Preprocess"]:
			
 
				-            tf_key = cfg["type"]
			
 
				-            func = self._FUNC_MAP[tf_key]
			
 
				-            cfg.pop("type")
			
 
				-            args = cfg
			
 
				-            op = func(self, **args) if args else func(self)
			
 
				-            self._add_component(op)
			
 
				-
			
 
				-        predictor = ImageDetPredictor(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				-        model_names = ["RT-DETR", "SOLOv2", "RCNN", "YOLO"]
			
 
				-        if any(name in self.model_name for name in model_names):
			
 
				-            predictor.set_inputs(
			
 
				-                {"img": "img", "scale_factors": "scale_factors", "img_size": "img_size"}
			
 
				-            )
			
 
				-
			
 
				-        postprecss = InstanceSegPostProcess(
			
 
				-            threshold=self.config["draw_threshold"],
			
 
				-            labels=self.config["label_list"],
			
 
				-        )
			
 
				-
			
 
				-        if "SOLOv2" in self.model_name:
			
 
				-            postprecss.set_inputs(
			
 
				-                {
			
 
				-                    "class_id": "class_id",
			
 
				-                    "masks": "masks",
			
 
				-                    "img_size": "img_size",
			
 
				-                }
			
 
				-            )
			
 
				-        self._add_component([predictor, postprecss])
			
 
				-
			
 
				-    def _pack_res(self, single):
			
 
				-        keys = ["input_path", "boxes", "masks"]
			
 
				-        return InstanceSegResult({key: single[key] for key in keys})
			
--- a/paddlex/inference/models_new/instance_segmentation/__init__.py
+++ b/paddlex/inference/models_new/instance_segmentation/__init__.py
--- a/paddlex/inference/models_new/instance_segmentation/predictor.py
+++ b/paddlex/inference/models_new/instance_segmentation/predictor.py
--- a/paddlex/inference/models_new/instance_segmentation/processors.py
+++ b/paddlex/inference/models_new/instance_segmentation/processors.py
--- a/paddlex/inference/models_new/instance_segmentation/result.py
+++ b/paddlex/inference/models_new/instance_segmentation/result.py
--- a/paddlex/inference/models_new/keypoint_detection/__init__.py
+++ b/paddlex/inference/models_new/keypoint_detection/__init__.py
--- a/paddlex/inference/models_new/keypoint_detection/predictor.py
+++ b/paddlex/inference/models_new/keypoint_detection/predictor.py
--- a/paddlex/inference/models_new/keypoint_detection/processors.py
+++ b/paddlex/inference/models_new/keypoint_detection/processors.py
--- a/paddlex/inference/models_new/keypoint_detection/result.py
+++ b/paddlex/inference/models_new/keypoint_detection/result.py
--- a/paddlex/inference/models/multilabel_classification.py
+++ b/paddlex/inference/models/multilabel_classification.py
@@ -1,33 +0,0 @@
 
				-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-import numpy as np
			
 
				-
			
 
				-from ...utils.func_register import FuncRegister
			
 
				-from ...modules.multilabel_classification.model_list import MODELS
			
 
				-from ..components import *
			
 
				-from ..results import MLClassResult
			
 
				-from ..utils.process_hook import batchable_method
			
 
				-from .image_classification import ClasPredictor
			
 
				-
			
 
				-
			
 
				-class MLClasPredictor(ClasPredictor):
			
 
				-
			
 
				-    entities = [*MODELS]
			
 
				-
			
 
				-    def _pack_res(self, single):
			
 
				-        keys = ["input_path", "class_ids", "scores"]
			
 
				-        if "label_names" in single:
			
 
				-            keys.append("label_names")
			
 
				-        return MLClassResult({key: single[key] for key in keys})
			
--- a/paddlex/inference/models_new/multilingual_speech_recognition/__init__.py
+++ b/paddlex/inference/models_new/multilingual_speech_recognition/__init__.py
--- a/paddlex/inference/models_new/multilingual_speech_recognition/predictor.py
+++ b/paddlex/inference/models_new/multilingual_speech_recognition/predictor.py
--- a/paddlex/inference/models_new/multilingual_speech_recognition/processors.py
+++ b/paddlex/inference/models_new/multilingual_speech_recognition/processors.py
--- a/paddlex/inference/models_new/multilingual_speech_recognition/result.py
+++ b/paddlex/inference/models_new/multilingual_speech_recognition/result.py
--- a/paddlex/inference/models/object_detection.py
+++ b/paddlex/inference/models/object_detection.py
@@ -1,130 +0,0 @@
 
				-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-import numpy as np
			
 
				-
			
 
				-from ...utils.func_register import FuncRegister
			
 
				-from ...modules.object_detection.model_list import MODELS
			
 
				-from ..components import *
			
 
				-from ..results import DetResult
			
 
				-from .base import BasicPredictor
			
 
				-
			
 
				-
			
 
				-class DetPredictor(BasicPredictor):
			
 
				-
			
 
				-    entities = MODELS
			
 
				-
			
 
				-    _FUNC_MAP = {}
			
 
				-    register = FuncRegister(_FUNC_MAP)
			
 
				-
			
 
				-    def _build_components(self):
			
 
				-        self._add_component(ReadImage(format="RGB"))
			
 
				-        for cfg in self.config["Preprocess"]:
			
 
				-            tf_key = cfg["type"]
			
 
				-            func = self._FUNC_MAP[tf_key]
			
 
				-            cfg.pop("type")
			
 
				-            args = cfg
			
 
				-            op = func(self, **args) if args else func(self)
			
 
				-            self._add_component(op)
			
 
				-
			
 
				-        predictor = ImageDetPredictor(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				-        model_names = ["DETR", "RCNN", "YOLOv3", "CenterNet", "PP-DocLayout-L"]
			
 
				-        if any(name in self.model_name for name in model_names):
			
 
				-            predictor.set_inputs(
			
 
				-                {
			
 
				-                    "img": "img",
			
 
				-                    "scale_factors": "scale_factors",
			
 
				-                    "img_size": "img_size",
			
 
				-                }
			
 
				-            )
			
 
				-
			
 
				-        if self.model_name in ["BlazeFace", "BlazeFace-FPN-SSH"]:
			
 
				-            predictor.set_inputs(
			
 
				-                {
			
 
				-                    "img": "img",
			
 
				-                    "img_size": "img_size",
			
 
				-                }
			
 
				-            )
			
 
				-
			
 
				-        self._add_component(
			
 
				-            [
			
 
				-                predictor,
			
 
				-                DetPostProcess(
			
 
				-                    threshold=self.config["draw_threshold"],
			
 
				-                    labels=self.config["label_list"],
			
 
				-                    layout_postprocess=self.config.get("layout_postprocess", False),
			
 
				-                ),
			
 
				-            ]
			
 
				-        )
			
 
				-
			
 
				-    @register("Resize")
			
 
				-    def build_resize(self, target_size, keep_ratio=False, interp=2):
			
 
				-        assert target_size
			
 
				-        if isinstance(interp, int):
			
 
				-            interp = {
			
 
				-                0: "NEAREST",
			
 
				-                1: "LINEAR",
			
 
				-                2: "CUBIC",
			
 
				-                3: "AREA",
			
 
				-                4: "LANCZOS4",
			
 
				-            }[interp]
			
 
				-        op = Resize(target_size=target_size[::-1], keep_ratio=keep_ratio, interp=interp)
			
 
				-        return op
			
 
				-
			
 
				-    @register("NormalizeImage")
			
 
				-    def build_normalize(
			
 
				-        self,
			
 
				-        norm_type=None,
			
 
				-        mean=[0.485, 0.456, 0.406],
			
 
				-        std=[0.229, 0.224, 0.225],
			
 
				-        is_scale=True,
			
 
				-    ):
			
 
				-        if is_scale:
			
 
				-            scale = 1.0 / 255.0
			
 
				-        else:
			
 
				-            scale = 1
			
 
				-        if not norm_type or norm_type == "none":
			
 
				-            norm_type = "mean_std"
			
 
				-        if norm_type != "mean_std":
			
 
				-            mean = 0
			
 
				-            std = 1
			
 
				-        return Normalize(scale=scale, mean=mean, std=std)
			
 
				-
			
 
				-    @register("Permute")
			
 
				-    def build_to_chw(self):
			
 
				-        return ToCHWImage()
			
 
				-
			
 
				-    @register("Pad")
			
 
				-    def build_pad(self, fill_value=None, size=None):
			
 
				-        if fill_value is None:
			
 
				-            fill_value = [127.5, 127.5, 127.5]
			
 
				-        if size is None:
			
 
				-            size = [3, 640, 640]
			
 
				-        return DetPad(size=size, fill_value=fill_value)
			
 
				-
			
 
				-    @register("PadStride")
			
 
				-    def build_pad_stride(self, stride=32):
			
 
				-        return PadStride(stride=stride)
			
 
				-
			
 
				-    @register("WarpAffine")
			
 
				-    def build_warp_affine(self, input_h=512, input_w=512, keep_res=True):
			
 
				-        return WarpAffine(input_h=input_h, input_w=input_w, keep_res=keep_res)
			
 
				-
			
 
				-    def _pack_res(self, single):
			
 
				-        keys = ["input_path", "boxes"]
			
 
				-        return DetResult({key: single[key] for key in keys})
			
--- a/paddlex/inference/models_new/object_detection/__init__.py
+++ b/paddlex/inference/models_new/object_detection/__init__.py
--- a/paddlex/inference/models_new/object_detection/predictor.py
+++ b/paddlex/inference/models_new/object_detection/predictor.py
--- a/paddlex/inference/models_new/object_detection/processors.py
+++ b/paddlex/inference/models_new/object_detection/processors.py
@@ -424,6 +424,7 @@ class WarpAffine:
 
				 
			
 
				         return datas
			
 
				 
			
 
				+
			
 
				 def restructured_boxes(
			
 
				     boxes: ndarray, labels: List[str], img_size: Tuple[int, int]
			
 
				 ) -> Boxes:
			
@@ -498,20 +499,21 @@ def restructured_rotated_boxes(
 
				 
			
 
				     return box_list
			
 
				 
			
 
				+
			
 
				 def unclip_boxes(boxes, unclip_ratio=None):
			
 
				     """
			
 
				     Expand bounding boxes from (x1, y1, x2, y2) format using an unclipping ratio.
			
 
				-    
			
 
				+
			
 
				     Parameters:
			
 
				     - boxes: np.ndarray of shape (N, 4), where each row is (x1, y1, x2, y2).
			
 
				     - unclip_ratio: tuple of (width_ratio, height_ratio), optional.
			
 
				-    
			
 
				+
			
 
				     Returns:
			
 
				     - expanded_boxes: np.ndarray of shape (N, 4), where each row is (x1, y1, x2, y2).
			
 
				     """
			
 
				     if unclip_ratio is None:
			
 
				         return boxes
			
 
				-    
			
 
				+
			
 
				     widths = boxes[:, 4] - boxes[:, 2]
			
 
				     heights = boxes[:, 5] - boxes[:, 3]
			
 
				 
			
@@ -524,7 +526,9 @@ def unclip_boxes(boxes, unclip_ratio=None):
 
				     new_y1 = center_y - new_h / 2
			
 
				     new_x2 = center_x + new_w / 2
			
 
				     new_y2 = center_y + new_h / 2
			
 
				-    expanded_boxes = np.column_stack((boxes[:, 0], boxes[:, 1], new_x1, new_y1, new_x2, new_y2))
			
 
				+    expanded_boxes = np.column_stack(
			
 
				+        (boxes[:, 0], boxes[:, 1], new_x1, new_y1, new_x2, new_y2)
			
 
				+    )
			
 
				 
			
 
				     return expanded_boxes
			
 
				 
			
@@ -552,6 +556,7 @@ def iou(box1, box2):
 
				 
			
 
				     return iou_value
			
 
				 
			
 
				+
			
 
				 def nms(boxes, iou_same=0.6, iou_diff=0.95):
			
 
				     """Perform Non-Maximum Suppression (NMS) with different IoU thresholds for same and different classes."""
			
 
				     # Extract class scores
			
@@ -585,6 +590,7 @@ def nms(boxes, iou_same=0.6, iou_diff=0.95):
 
				         indices = filtered_indices
			
 
				     return selected_boxes
			
 
				 
			
 
				+
			
 
				 def is_contained(box1, box2):
			
 
				     """Check if box1 is contained within box2."""
			
 
				     _, _, x1, y1, x2, y2 = box1
			
@@ -600,6 +606,7 @@ def is_contained(box1, box2):
 
				     iou = intersect_area / box1_area if box1_area > 0 else 0
			
 
				     return iou >= 0.9
			
 
				 
			
 
				+
			
 
				 def check_containment(boxes, formula_index=None):
			
 
				     """Check containment relationships among boxes."""
			
 
				     n = len(boxes)
			
@@ -627,10 +634,7 @@ class DetPostProcess:
 
				     based on the input type (normal or rotated object detection).
			
 
				     """
			
 
				 
			
 
				-    def __init__(
			
 
				-        self,
			
 
				-        labels: Optional[List[str]] = None
			
 
				-    ) -> None:
			
 
				+    def __init__(self, labels: Optional[List[str]] = None) -> None:
			
 
				         """Initialize the DetPostProcess class.
			
 
				 
			
 
				         Args:
			
@@ -641,14 +645,15 @@ class DetPostProcess:
 
				         super().__init__()
			
 
				         self.labels = labels
			
 
				 
			
 
				-    def apply(self, 
			
 
				-            boxes: ndarray, 
			
 
				-            img_size: Tuple[int, int],
			
 
				-            threshold: Union[float, dict], 
			
 
				-            layout_nms: Optional[bool],
			
 
				-            layout_unclip_ratio: Optional[Union[float, Tuple[float, float]]], 
			
 
				-            layout_merge_bboxes_mode: Optional[str]
			
 
				-        ) -> Boxes:
			
 
				+    def apply(
			
 
				+        self,
			
 
				+        boxes: ndarray,
			
 
				+        img_size: Tuple[int, int],
			
 
				+        threshold: Union[float, dict],
			
 
				+        layout_nms: Optional[bool],
			
 
				+        layout_unclip_ratio: Optional[Union[float, Tuple[float, float]]],
			
 
				+        layout_merge_bboxes_mode: Optional[str],
			
 
				+    ) -> Boxes:
			
 
				         """Apply post-processing to the detection boxes.
			
 
				 
			
 
				         Args:
			
@@ -666,7 +671,9 @@ class DetPostProcess:
 
				             for cat_id in np.unique(boxes[:, 0]):
			
 
				                 category_boxes = boxes[boxes[:, 0] == cat_id]
			
 
				                 category_threshold = threshold.get(int(cat_id), 0.5)
			
 
				-                selected_indices = (category_boxes[:, 1] > category_threshold) & (category_boxes[:, 0] > -1)
			
 
				+                selected_indices = (category_boxes[:, 1] > category_threshold) & (
			
 
				+                    category_boxes[:, 0] > -1
			
 
				+                )
			
 
				                 category_filtered_boxes.append(category_boxes[selected_indices])
			
 
				             boxes = (
			
 
				                 np.vstack(category_filtered_boxes)
			
@@ -681,30 +688,39 @@ class DetPostProcess:
 
				             boxes = np.array(boxes[selected_indices])
			
 
				 
			
 
				         if layout_merge_bboxes_mode:
			
 
				-            assert layout_merge_bboxes_mode in ["union", "large", "small"], \
			
 
				-                f"The value of `layout_merge_bboxes_mode` must be one of ['union', 'large', 'small'], but got {layout_merge_bboxes_mode}"
			
 
				+            assert layout_merge_bboxes_mode in [
			
 
				+                "union",
			
 
				+                "large",
			
 
				+                "small",
			
 
				+            ], f"The value of `layout_merge_bboxes_mode` must be one of ['union', 'large', 'small'], but got {layout_merge_bboxes_mode}"
			
 
				 
			
 
				             if layout_merge_bboxes_mode == "union":
			
 
				                 pass
			
 
				             else:
			
 
				-                formula_index = self.labels.index("formula") if "formula" in self.labels else None
			
 
				-                contains_other, contained_by_other = check_containment(boxes, formula_index)
			
 
				+                formula_index = (
			
 
				+                    self.labels.index("formula") if "formula" in self.labels else None
			
 
				+                )
			
 
				+                contains_other, contained_by_other = check_containment(
			
 
				+                    boxes, formula_index
			
 
				+                )
			
 
				                 if layout_merge_bboxes_mode == "large":
			
 
				                     boxes = boxes[contained_by_other == 0]
			
 
				                 elif layout_merge_bboxes_mode == "small":
			
 
				-                    boxes = boxes[(contains_other == 0) | (contained_by_other == 1)] 
			
 
				+                    boxes = boxes[(contains_other == 0) | (contained_by_other == 1)]
			
 
				 
			
 
				         if layout_unclip_ratio:
			
 
				             if isinstance(layout_unclip_ratio, float):
			
 
				                 layout_unclip_ratio = (layout_unclip_ratio, layout_unclip_ratio)
			
 
				             elif isinstance(layout_unclip_ratio, (tuple, list)):
			
 
				-                assert len(layout_unclip_ratio) == 2, f"The length of `layout_unclip_ratio` should be 2."
			
 
				+                assert (
			
 
				+                    len(layout_unclip_ratio) == 2
			
 
				+                ), f"The length of `layout_unclip_ratio` should be 2."
			
 
				             else:
			
 
				                 raise ValueError(
			
 
				                     f"The type of `layout_unclip_ratio` must be float or Tuple[float, float], but got {type(layout_unclip_ratio)}."
			
 
				                 )
			
 
				             boxes = unclip_boxes(boxes, layout_unclip_ratio)
			
 
				-        
			
 
				+
			
 
				         if boxes.shape[1] == 6:
			
 
				             """For Normal Object Detection"""
			
 
				             boxes = restructured_boxes(boxes, self.labels, img_size)
			
@@ -744,7 +760,7 @@ class DetPostProcess:
 
				                 threshold,
			
 
				                 layout_nms,
			
 
				                 layout_unclip_ratio,
			
 
				-                layout_merge_bboxes_mode
			
 
				+                layout_merge_bboxes_mode,
			
 
				             )
			
 
				             outputs.append(boxes)
			
 
				         return outputs
			
--- a/paddlex/inference/models_new/object_detection/result.py
+++ b/paddlex/inference/models_new/object_detection/result.py
--- a/paddlex/inference/models_new/object_detection/utils.py
+++ b/paddlex/inference/models_new/object_detection/utils.py
--- a/paddlex/inference/models_new/open_vocabulary_detection/__init__.py
+++ b/paddlex/inference/models_new/open_vocabulary_detection/__init__.py
--- a/paddlex/inference/models_new/open_vocabulary_detection/predictor.py
+++ b/paddlex/inference/models_new/open_vocabulary_detection/predictor.py
@@ -20,10 +20,7 @@ from ....utils.func_register import FuncRegister
 
				 from ....modules.open_vocabulary_detection.model_list import MODELS
			
 
				 from ...common.batch_sampler import ImageBatchSampler
			
 
				 from ...common.reader import ReadImage
			
 
				-from .processors import (
			
 
				-    GroundingDINOProcessor,
			
 
				-    GroundingDINOPostProcessor
			
 
				-)
			
 
				+from .processors import GroundingDINOProcessor, GroundingDINOPostProcessor
			
 
				 from ..common import StaticInfer
			
 
				 from ..base import BasicPredictor
			
 
				 from ..object_detection.result import DetResult
			
@@ -36,7 +33,9 @@ class OVDetPredictor(BasicPredictor):
 
				     _FUNC_MAP = {}
			
 
				     register = FuncRegister(_FUNC_MAP)
			
 
				 
			
 
				-    def __init__(self, *args, thresholds: Optional[Union[Dict, float]] = None, **kwargs):
			
 
				+    def __init__(
			
 
				+        self, *args, thresholds: Optional[Union[Dict, float]] = None, **kwargs
			
 
				+    ):
			
 
				         """Initializes DetPredictor.
			
 
				         Args:
			
 
				             *args: Arbitrary positional arguments passed to the superclass.
			
@@ -76,11 +75,13 @@ class OVDetPredictor(BasicPredictor):
 
				         )
			
 
				 
			
 
				         # build postprocess op
			
 
				-        post_op = self.build_postprocess(pre_ops = pre_ops)
			
 
				+        post_op = self.build_postprocess(pre_ops=pre_ops)
			
 
				 
			
 
				         return pre_ops, infer, post_op
			
 
				 
			
 
				-    def process(self, batch_data: List[Any], prompt: str, thresholds: Optional[dict] = None):
			
 
				+    def process(
			
 
				+        self, batch_data: List[Any], prompt: str, thresholds: Optional[dict] = None
			
 
				+    ):
			
 
				         """
			
 
				         Process a batch of data through the preprocessing, inference, and postprocessing.
			
 
				 
			
@@ -144,9 +145,11 @@ class OVDetPredictor(BasicPredictor):
 
				             raise NotImplementedError
			
 
				 
			
 
				     @register("GroundingDINOProcessor")
			
 
				-    def build_grounding_dino_preprocessor(self, text_max_words=256, target_size=(800, 1333)):
			
 
				+    def build_grounding_dino_preprocessor(
			
 
				+        self, text_max_words=256, target_size=(800, 1333)
			
 
				+    ):
			
 
				         return GroundingDINOProcessor(
			
 
				             model_dir=self.model_dir,
			
 
				             text_max_words=text_max_words,
			
 
				-            target_size=target_size
			
 
				-        )
			
 
				+            target_size=target_size,
			
 
				+        )
			
--- a/paddlex/inference/models_new/open_vocabulary_detection/processors/__init__.py
+++ b/paddlex/inference/models_new/open_vocabulary_detection/processors/__init__.py
@@ -12,4 +12,4 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-from .groundingdino_processors import GroundingDINOProcessor, GroundingDINOPostProcessor
			
 
				+from .groundingdino_processors import GroundingDINOProcessor, GroundingDINOPostProcessor
			
--- a/paddlex/inference/models_new/open_vocabulary_detection/processors/groundingdino_processors.py
+++ b/paddlex/inference/models_new/open_vocabulary_detection/processors/groundingdino_processors.py
--- a/paddlex/inference/models_new/open_vocabulary_segmentation/__init__.py
+++ b/paddlex/inference/models_new/open_vocabulary_segmentation/__init__.py
--- a/paddlex/inference/models_new/open_vocabulary_segmentation/predictor.py
+++ b/paddlex/inference/models_new/open_vocabulary_segmentation/predictor.py
@@ -21,9 +21,7 @@ from ....utils.func_register import FuncRegister
 
				 from ....modules.open_vocabulary_segmentation.model_list import MODELS
			
 
				 from ...common.batch_sampler import ImageBatchSampler
			
 
				 from ...common.reader import ReadImage
			
 
				-from .processors import (
			
 
				-    SAMProcessor
			
 
				-)
			
 
				+from .processors import SAMProcessor
			
 
				 from ..common import StaticInfer
			
 
				 from ..base import BasicPredictor
			
 
				 from .results import SAMSegResult
			
@@ -104,7 +102,7 @@ class OVSegPredictor(BasicPredictor):
 
				 
			
 
				         # do infer
			
 
				         batch_preds = self.infer(batch_inputs)
			
 
				-        
			
 
				+
			
 
				         # postprocess
			
 
				         masks = self.processor.postprocess(batch_preds)
			
 
				 
			
@@ -116,9 +114,7 @@ class OVSegPredictor(BasicPredictor):
 
				         }
			
 
				 
			
 
				     @register("SAMProcessor")
			
 
				-    def build_sam_preprocessor(self, size=1024, mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375]):
			
 
				-        return SAMProcessor(
			
 
				-            size=size,
			
 
				-            img_mean=mean,
			
 
				-            img_std=std
			
 
				-        )
			
 
				+    def build_sam_preprocessor(
			
 
				+        self, size=1024, mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375]
			
 
				+    ):
			
 
				+        return SAMProcessor(size=size, img_mean=mean, img_std=std)
			
--- a/paddlex/inference/models/open_vocabulary_segmentation/processors/__init__.py
+++ b/paddlex/inference/models/open_vocabulary_segmentation/processors/__init__.py
@@ -0,0 +1,15 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from .sam_processer import SAMProcessor
			
--- a/paddlex/inference/models_new/open_vocabulary_segmentation/processors/sam_processer.py
+++ b/paddlex/inference/models_new/open_vocabulary_segmentation/processors/sam_processer.py
--- a/paddlex/inference/models/open_vocabulary_segmentation/results/__init__.py
+++ b/paddlex/inference/models/open_vocabulary_segmentation/results/__init__.py
@@ -0,0 +1,15 @@
 
				+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				+#
			
 
				+# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+# you may not use this file except in compliance with the License.
			
 
				+# You may obtain a copy of the License at
			
 
				+#
			
 
				+#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+from .sam_result import SAMSegResult
			
--- a/paddlex/inference/models_new/open_vocabulary_segmentation/results/sam_result.py
+++ b/paddlex/inference/models_new/open_vocabulary_segmentation/results/sam_result.py
--- a/paddlex/inference/models/semantic_segmentation.py
+++ b/paddlex/inference/models/semantic_segmentation.py
@@ -1,86 +0,0 @@
 
				-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-import numpy as np
			
 
				-
			
 
				-from ...utils.func_register import FuncRegister
			
 
				-from ...modules.semantic_segmentation.model_list import MODELS
			
 
				-from ..components import *
			
 
				-from ..results import SegResult
			
 
				-from .base import BasicPredictor
			
 
				-
			
 
				-
			
 
				-class SegPredictor(BasicPredictor):
			
 
				-
			
 
				-    entities = MODELS
			
 
				-
			
 
				-    _FUNC_MAP = {}
			
 
				-    register = FuncRegister(_FUNC_MAP)
			
 
				-
			
 
				-    def _build_components(self):
			
 
				-        self._add_component(ReadImage(format="RGB"))
			
 
				-        self._add_component(ToCHWImage())
			
 
				-        for cfg in self.config["Deploy"]["transforms"]:
			
 
				-            tf_key = cfg["type"]
			
 
				-            func = self._FUNC_MAP[tf_key]
			
 
				-            cfg.pop("type")
			
 
				-            args = cfg
			
 
				-            op = func(self, **args) if args else func(self)
			
 
				-            self._add_component(op)
			
 
				-
			
 
				-        predictor = ImagePredictor(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				-        self._add_component(predictor)
			
 
				-
			
 
				-    @register("Resize")
			
 
				-    def build_resize(
			
 
				-        self, target_size, keep_ratio=False, size_divisor=None, interp="LINEAR"
			
 
				-    ):
			
 
				-        assert target_size
			
 
				-        op = Resize(
			
 
				-            target_size=target_size,
			
 
				-            keep_ratio=keep_ratio,
			
 
				-            size_divisor=size_divisor,
			
 
				-            interp=interp,
			
 
				-        )
			
 
				-        return op
			
 
				-
			
 
				-    @register("ResizeByLong")
			
 
				-    def build_resizebylong(self, long_size):
			
 
				-        assert long_size
			
 
				-        return ResizeByLong(
			
 
				-            target_long_edge=long_size, size_divisor=size_divisor, interp=interp
			
 
				-        )
			
 
				-
			
 
				-    @register("ResizeByShort")
			
 
				-    def build_resizebylong(self, short_size):
			
 
				-        assert short_size
			
 
				-        return ResizeByLong(
			
 
				-            target_long_edge=short_size, size_divisor=size_divisor, interp=interp
			
 
				-        )
			
 
				-
			
 
				-    @register("Normalize")
			
 
				-    def build_normalize(
			
 
				-        self,
			
 
				-        mean=0.5,
			
 
				-        std=0.5,
			
 
				-    ):
			
 
				-        return Normalize(mean=mean, std=std)
			
 
				-
			
 
				-    def _pack_res(self, single):
			
 
				-        keys = ["input_path", "pred"]
			
 
				-        return SegResult({key: single[key] for key in keys})
			
--- a/paddlex/inference/models_new/semantic_segmentation/__init__.py
+++ b/paddlex/inference/models_new/semantic_segmentation/__init__.py
--- a/paddlex/inference/models_new/semantic_segmentation/predictor.py
+++ b/paddlex/inference/models_new/semantic_segmentation/predictor.py
--- a/paddlex/inference/models_new/semantic_segmentation/processors.py
+++ b/paddlex/inference/models_new/semantic_segmentation/processors.py
--- a/paddlex/inference/models_new/semantic_segmentation/result.py
+++ b/paddlex/inference/models_new/semantic_segmentation/result.py
--- a/paddlex/inference/models/table_recognition.py
+++ b/paddlex/inference/models/table_recognition.py
@@ -1,106 +0,0 @@
 
				-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-
			
 
				-import numpy as np
			
 
				-
			
 
				-from ...utils.func_register import FuncRegister
			
 
				-from ...modules.table_recognition.model_list import MODELS
			
 
				-from ..components import *
			
 
				-from ..results import TableRecResult
			
 
				-from .base import BasicPredictor
			
 
				-
			
 
				-
			
 
				-class TablePredictor(BasicPredictor):
			
 
				-    """table recognition predictor"""
			
 
				-
			
 
				-    entities = MODELS
			
 
				-
			
 
				-    _FUNC_MAP = {}
			
 
				-    register = FuncRegister(_FUNC_MAP)
			
 
				-
			
 
				-    def _build_components(self):
			
 
				-        for cfg in self.config["PreProcess"]["transform_ops"]:
			
 
				-            tf_key = list(cfg.keys())[0]
			
 
				-            func = self._FUNC_MAP[tf_key]
			
 
				-            args = cfg.get(tf_key, {})
			
 
				-            op = func(self, **args) if args else func(self)
			
 
				-            if op:
			
 
				-                self._add_component(op)
			
 
				-
			
 
				-        predictor = ImagePredictor(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				-        self._add_component(predictor)
			
 
				-
			
 
				-        op = self.build_postprocess(
			
 
				-            model_name=self.config["Global"]["model_name"], **self.config["PostProcess"]
			
 
				-        )
			
 
				-        self._add_component(op)
			
 
				-
			
 
				-    def build_postprocess(self, model_name, **kwargs):
			
 
				-        if kwargs.get("name") == "TableLabelDecode":
			
 
				-            return TableLabelDecode(
			
 
				-                model_name=model_name,
			
 
				-                merge_no_span_structure=kwargs.get("merge_no_span_structure"),
			
 
				-                dict_character=kwargs.get("character_dict"),
			
 
				-            )
			
 
				-        else:
			
 
				-            raise Exception()
			
 
				-
			
 
				-    @register("DecodeImage")
			
 
				-    def build_readimg(self, channel_first=False, img_mode="BGR"):
			
 
				-        assert channel_first is False
			
 
				-        assert img_mode == "BGR"
			
 
				-        return ReadImage(format=img_mode)
			
 
				-
			
 
				-    @register("TableLabelEncode")
			
 
				-    def foo(self, *args, **kwargs):
			
 
				-        return None
			
 
				-
			
 
				-    @register("TableBoxEncode")
			
 
				-    def foo(self, *args, **kwargs):
			
 
				-        return None
			
 
				-
			
 
				-    @register("ResizeTableImage")
			
 
				-    def build_resize_table(self, max_len=488):
			
 
				-        return ResizeByLong(target_long_edge=max_len)
			
 
				-
			
 
				-    @register("NormalizeImage")
			
 
				-    def build_normalize(
			
 
				-        self,
			
 
				-        mean=[0.485, 0.456, 0.406],
			
 
				-        std=[0.229, 0.224, 0.225],
			
 
				-        scale=1 / 255,
			
 
				-        order="hwc",
			
 
				-    ):
			
 
				-        return Normalize(mean=mean, std=std)
			
 
				-
			
 
				-    @register("PaddingTableImage")
			
 
				-    def build_padding(self, size=[488, 448], pad_value=0):
			
 
				-        return Pad(target_size=size[0], val=pad_value)
			
 
				-
			
 
				-    @register("ToCHWImage")
			
 
				-    def build_to_chw(self):
			
 
				-        return ToCHWImage()
			
 
				-
			
 
				-    @register("KeepKeys")
			
 
				-    def foo(self, *args, **kwargs):
			
 
				-        return None
			
 
				-
			
 
				-    def _pack_res(self, single):
			
 
				-        keys = ["input_path", "bbox", "structure"]
			
 
				-        return TableRecResult({key: single[key] for key in keys})
			
--- a/paddlex/inference/models_new/table_structure_recognition/__init__.py
+++ b/paddlex/inference/models_new/table_structure_recognition/__init__.py
--- a/paddlex/inference/models_new/table_structure_recognition/predictor.py
+++ b/paddlex/inference/models_new/table_structure_recognition/predictor.py
--- a/paddlex/inference/models_new/table_structure_recognition/processors.py
+++ b/paddlex/inference/models_new/table_structure_recognition/processors.py
--- a/paddlex/inference/models_new/table_structure_recognition/result.py
+++ b/paddlex/inference/models_new/table_structure_recognition/result.py
--- a/paddlex/inference/models/text_detection.py
+++ b/paddlex/inference/models/text_detection.py
@@ -1,105 +0,0 @@
 
				-# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#    http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-import numpy as np
			
 
				-
			
 
				-from ...utils.func_register import FuncRegister
			
 
				-from ...modules.text_detection.model_list import MODELS
			
 
				-from ..components import *
			
 
				-from ..results import TextDetResult
			
 
				-from .base import BasicPredictor
			
 
				-
			
 
				-
			
 
				-class TextDetPredictor(BasicPredictor):
			
 
				-
			
 
				-    entities = MODELS
			
 
				-
			
 
				-    _FUNC_MAP = {}
			
 
				-    register = FuncRegister(_FUNC_MAP)
			
 
				-
			
 
				-    def _build_components(self):
			
 
				-        for cfg in self.config["PreProcess"]["transform_ops"]:
			
 
				-            tf_key = list(cfg.keys())[0]
			
 
				-            func = self._FUNC_MAP[tf_key]
			
 
				-            args = cfg.get(tf_key, {})
			
 
				-            op = func(self, **args) if args else func(self)
			
 
				-            if op:
			
 
				-                self._add_component(op)
			
 
				-
			
 
				-        predictor = ImagePredictor(
			
 
				-            model_dir=self.model_dir,
			
 
				-            model_prefix=self.MODEL_FILE_PREFIX,
			
 
				-            option=self.pp_option,
			
 
				-        )
			
 
				-        self._add_component(predictor)
			
 
				-
			
 
				-        op = self.build_postprocess(**self.config["PostProcess"])
			
 
				-        self._add_component(op)
			
 
				-
			
 
				-    @register("DecodeImage")
			
 
				-    def build_readimg(self, channel_first, img_mode):
			
 
				-        assert channel_first == False
			
 
				-        return ReadImage(format=img_mode)
			
 
				-
			
 
				-    @register("DetResizeForTest")
			
 
				-    def build_resize(self, **kwargs):
			
 
				-        # TODO: align to PaddleOCR
			
 
				-        if self.model_name in ("PP-OCRv4_server_det", "PP-OCRv4_mobile_det"):
			
 
				-            resize_long = kwargs.get("resize_long", 960)
			
 
				-            return DetResizeForTest(limit_side_len=resize_long, limit_type="max")
			
 
				-        return DetResizeForTest(**kwargs)
			
 
				-
			
 
				-    @register("NormalizeImage")
			
 
				-    def build_normalize(
			
 
				-        self,
			
 
				-        mean=[0.485, 0.456, 0.406],
			
 
				-        std=[0.229, 0.224, 0.225],
			
 
				-        scale=1 / 255,
			
 
				-        order="",
			
 
				-        channel_num=3,
			
 
				-    ):
			
 
				-        return NormalizeImage(
			
 
				-            mean=mean, std=std, scale=scale, order=order, channel_num=channel_num
			
 
				-        )
			
 
				-
			
 
				-    @register("ToCHWImage")
			
 
				-    def build_to_chw(self):
			
 
				-        return ToCHWImage()
			
 
				-
			
 
				-    def build_postprocess(self, **kwargs):
			
 
				-        if kwargs.get("name") == "DBPostProcess":
			
 
				-            return DBPostProcess(
			
 
				-                thresh=kwargs.get("thresh", 0.3),
			
 
				-                box_thresh=kwargs.get("box_thresh", 0.7),
			
 
				-                max_candidates=kwargs.get("max_candidates", 1000),
			
 
				-                unclip_ratio=kwargs.get("unclip_ratio", 2.0),
			
 
				-                use_dilation=kwargs.get("use_dilation", False),
			
 
				-                score_mode=kwargs.get("score_mode", "fast"),
			
 
				-                box_type=kwargs.get("box_type", "quad"),
			
 
				-            )
			
 
				-
			
 
				-        else:
			
 
				-            raise Exception()
			
 
				-
			
 
				-    @register("DetLabelEncode")
			
 
				-    def foo(self, *args, **kwargs):
			
 
				-        return None
			
 
				-
			
 
				-    @register("KeepKeys")
			
 
				-    def foo(self, *args, **kwargs):
			
 
				-        return None
			
 
				-
			
 
				-    def _pack_res(self, single):
			
 
				-        keys = ["input_path", "dt_polys", "dt_scores"]
			
 
				-        return TextDetResult({key: single[key] for key in keys})
			
--- a/paddlex/inference/models_new/text_detection/__init__.py
+++ b/paddlex/inference/models_new/text_detection/__init__.py
--- a/paddlex/inference/models_new/text_detection/predictor.py
+++ b/paddlex/inference/models_new/text_detection/predictor.py