пре 10 месеци · 4a42505cfe
--- a/paddlex/inference/common/batch_sampler/image_batch_sampler.py
+++ b/paddlex/inference/common/batch_sampler/image_batch_sampler.py
@@ -24,6 +24,26 @@ from ...utils.io import PDFReader
 
															 from .base_batch_sampler import BaseBatchSampler
														
 
															+class ImgInstance:
														
 
															+    def __init__(self):
														
 
															+        self.instances = []
														
 
															+        self.input_paths = []
														
 
															+        self.page_indexes = []
														
 
															+
														
 
															+    def append(self, instance, input_path, page_index):
														
 
															+        self.instances.append(instance)
														
 
															+        self.input_paths.append(input_path)
														
 
															+        self.page_indexes.append(page_index)
														
 
															+
														
 
															+    def reset(self):
														
 
															+        self.instances = []
														
 
															+        self.input_paths = []
														
 
															+        self.page_indexes = []
														
 
															+
														
 
															+    def __len__(self):
														
 
															+        return len(self.instances)
														
 
															+
														
 
															+
														
 
															 class ImageBatchSampler(BaseBatchSampler):
														
 
															     SUFFIX = ["jpg", "png", "jpeg", "JPEG", "JPG", "bmp"]
														
@@ -60,24 +80,25 @@ class ImageBatchSampler(BaseBatchSampler):
 
															         if not isinstance(inputs, list):
														
 
															             inputs = [inputs]
														
 
															-        batch = []
														
 
															+        batch = {"instances": [], "input_paths": [], "page_indexes": []}
														
 
															+        batch = ImgInstance()
														
 
															         for input in inputs:
														
 
															             if isinstance(input, np.ndarray):
														
 
															-                batch.append(input)
														
 
															+                batch.append(input, None, None)
														
 
															                 if len(batch) == self.batch_size:
														
 
															                     yield batch
														
 
															-                    batch = []
														
 
															+                    batch.reset()
														
 
															             elif isinstance(input, str) and input.split(".")[-1] in ("PDF", "pdf"):
														
 
															                 file_path = (
														
 
															                     self._download_from_url(input)
														
 
															                     if input.startswith("http")
														
 
															                     else input
														
 
															                 )
														
 
															-                for page_img in self.pdf_reader.read(file_path):
														
 
															-                    batch.append(page_img)
														
 
															+                for page_idx, page_img in enumerate(self.pdf_reader.read(file_path)):
														
 
															+                    batch.append(page_img, file_path, page_idx)
														
 
															                     if len(batch) == self.batch_size:
														
 
															                         yield batch
														
 
															-                        batch = []
														
 
															+                        batch.reset()
														
 
															             elif isinstance(input, str):
														
 
															                 file_path = (
														
 
															                     self._download_from_url(input)
														
@@ -86,10 +107,10 @@ class ImageBatchSampler(BaseBatchSampler):
 
															                 )
														
 
															                 file_list = self._get_files_list(file_path)
														
 
															                 for file_path in file_list:
														
 
															-                    batch.append(file_path)
														
 
															+                    batch.append(file_path, file_path, None)
														
 
															                     if len(batch) == self.batch_size:
														
 
															                         yield batch
														
 
															-                        batch = []
														
 
															+                        batch.reset()
														
 
															             else:
														
 
															                 logging.warning(
														
 
															                     f"Not supported input data type! Only `numpy.ndarray` and `str` are supported! So has been ignored: {input}."
														
--- a/paddlex/inference/common/result/base_cv_result.py
+++ b/paddlex/inference/common/result/base_cv_result.py
@@ -26,9 +26,6 @@ class BaseCVResult(BaseResult, ImgMixin):
 
															         Args:
														
 
															             data (dict): The initial data.
														
 
															-
														
 
															-        Raises:
														
 
															-            AssertionError: If the required key (`BaseCVResult.INPUT_IMG_KEY`) are not found in the data.
														
 
															         """
														
 
															         super().__init__(data)
														
 
															         ImgMixin.__init__(self, "pillow")
														
--- a/paddlex/inference/common/result/base_result.py
+++ b/paddlex/inference/common/result/base_result.py
@@ -13,6 +13,11 @@
 
															 # limitations under the License.
														
 
															 import inspect
														
 
															+from pathlib import Path
														
 
															+import time
														
 
															+import random
														
 
															+
														
 
															+from ....utils import logging
														
 
															 from .mixin import StrMixin, JsonMixin, ImgMixin
														
@@ -45,3 +50,13 @@ class BaseResult(dict, JsonMixin, StrMixin):
 
															                 func(save_path=save_path)
														
 
															             else:
														
 
															                 func()
														
 
															+
														
 
															+    def _get_input_fn(self):
														
 
															+        if (fp := self["input_path"]) is None:
														
 
															+            timestamp = int(time.time())
														
 
															+            random_number = random.randint(1000, 9999)
														
 
															+            fp = f"{timestamp}_{random_number}"
														
 
															+            logging.warning(
														
 
															+                f"There is not input file name as reference for name of saved result file. So the saved result file would be named with timestamp and random number: `{fp}`."
														
 
															+            )
														
 
															+        return Path(fp).name
														
--- a/paddlex/inference/common/result/mixin.py
+++ b/paddlex/inference/common/result/mixin.py
@@ -144,9 +144,8 @@ class JsonMixin:
 
															             return mime_type is not None and mime_type == "application/json"
														
 
															         if not _is_json_file(save_path):
														
 
															-            fp = Path(self["input_path"])
														
 
															-            stem = fp.stem
														
 
															-            suffix = fp.suffix
														
 
															+            fn = Path(self._get_input_fn())
														
 
															+            stem = fn.stem
														
 
															             base_save_path = Path(save_path)
														
 
															             for key in self.json:
														
 
															                 save_path = base_save_path / f"{stem}_{key}.json"
														
@@ -247,9 +246,8 @@ class Base64Mixin:
 
															             **kwargs: Additional keyword arguments that will be passed to the base64 writer.
														
 
															         """
														
 
															         if not str(save_path).lower().endswith((".b64")):
														
 
															-            fp = Path(self["input_path"])
														
 
															-            stem = fp.stem
														
 
															-            suffix = fp.suffix
														
 
															+            fn = Path(self._get_input_fn())
														
 
															+            stem = fn.stem
														
 
															             base_save_path = Path(save_path)
														
 
															             for key in self.base64:
														
 
															                 save_path = base_save_path / f"{stem}_{key}.b64"
														
@@ -312,9 +310,9 @@ class ImgMixin:
 
															             return mime_type is not None and mime_type.startswith("image/")
														
 
															         if not _is_image_file(save_path):
														
 
															-            fp = Path(self["input_path"])
														
 
															-            stem = fp.stem
														
 
															-            suffix = fp.suffix
														
 
															+            fn = Path(self._get_input_fn())
														
 
															+            suffix = fn.suffix if _is_image_file(fn) else ".png"
														
 
															+            stem = fn.stem
														
 
															             base_save_path = Path(save_path)
														
 
															             for key in self.img:
														
 
															                 save_path = base_save_path / f"{stem}_{key}{suffix}"
														
@@ -380,8 +378,9 @@ class CSVMixin:
 
															             return mime_type is not None and mime_type == "text/csv"
														
 
															         if not _is_csv_file(save_path):
														
 
															-            fp = Path(self["input_path"])
														
 
															-            stem = fp.stem
														
 
															+            fn = Path(self._get_input_fn())
														
 
															+            fn = Path(self._get_input_fn())
														
 
															+            stem = fn.stem
														
 
															             base_save_path = Path(save_path)
														
 
															             for key in self.csv:
														
 
															                 save_path = base_save_path / f"{stem}_{key}.csv"
														
@@ -444,8 +443,8 @@ class HtmlMixin:
 
															             return mime_type is not None and mime_type == "text/html"
														
 
															         if not _is_html_file(save_path):
														
 
															-            fp = Path(self["input_path"])
														
 
															-            stem = fp.stem
														
 
															+            fn = Path(self._get_input_fn())
														
 
															+            stem = fn.stem
														
 
															             base_save_path = Path(save_path)
														
 
															             for key in self.html:
														
 
															                 save_path = base_save_path / f"{stem}_{key}.html"
														
@@ -512,8 +511,8 @@ class XlsxMixin:
 
															             )
														
 
															         if not _is_xlsx_file(save_path):
														
 
															-            fp = Path(self["input_path"])
														
 
															-            stem = fp.stem
														
 
															+            fn = Path(self._get_input_fn())
														
 
															+            stem = fn.stem
														
 
															             base_save_path = Path(save_path)
														
 
															             for key in self.xlsx:
														
 
															                 save_path = base_save_path / f"{stem}_{key}.xlsx"
														
@@ -578,9 +577,9 @@ class VideoMixin:
 
															         video_writer = VideoWriter(backend=self._backend, *args, **kwargs)
														
 
															         if not _is_video_file(save_path):
														
 
															-            fp = Path(self["input_path"])
														
 
															-            stem = fp.stem
														
 
															-            suffix = fp.suffix
														
 
															+            fn = Path(self._get_input_fn())
														
 
															+            stem = fn.stem
														
 
															+            suffix = fn.suffix if _is_video_file(fn) else ".mp4"
														
 
															             base_save_path = Path(save_path)
														
 
															             for key in self.video:
														
 
															                 save_path = base_save_path / f"{stem}_{key}{suffix}"
														
--- a/paddlex/inference/models_new/anomaly_detection/predictor.py
+++ b/paddlex/inference/models_new/anomaly_detection/predictor.py
@@ -100,7 +100,7 @@ class UadPredictor(BasicPredictor):
 
															         Returns:
														
 
															             dict: A dictionary containing the input path, raw image, and predicted segmentation maps for every instance of the batch. Keys include 'input_path', 'input_img', and 'pred'.
														
 
															         """
														
 
															-        batch_raw_imgs = self.preprocessors["Read"](imgs=batch_data)
														
 
															+        batch_raw_imgs = self.preprocessors["Read"](imgs=batch_data.instances)
														
 
															         batch_imgs = self.preprocessors["Resize"](imgs=batch_raw_imgs)
														
 
															         batch_imgs = self.preprocessors["Normalize"](imgs=batch_imgs)
														
 
															         batch_imgs = self.preprocessors["ToCHW"](imgs=batch_imgs)
														
@@ -111,7 +111,8 @@ class UadPredictor(BasicPredictor):
 
															             batch_preds = np.split(batch_preds[0], len(batch_data), axis=0)
														
 
															         return {
														
 
															-            "input_path": batch_data,
														
 
															+            "input_path": batch_data.input_paths,
														
 
															+            "page_index": batch_data.page_indexes,
														
 
															             "input_img": batch_raw_imgs,
														
 
															             "pred": batch_preds,
														
 
															         }
														
--- a/paddlex/inference/models_new/common/static_infer.py
+++ b/paddlex/inference/models_new/common/static_infer.py
@@ -110,8 +110,6 @@ class StaticInfer:
 
															             self._update_option(option)
														
 
															     def _reset(self) -> None:
														
 
															-        if not self.option:
														
 
															-            self.option = PaddlePredictorOption()
														
 
															         logging.debug(f"Env: {self.option}")
														
 
															         (
														
 
															             predictor,
														
--- a/paddlex/inference/models_new/face_feature/predictor.py
+++ b/paddlex/inference/models_new/face_feature/predictor.py
@@ -45,7 +45,7 @@ class FaceFeaturePredictor(ImageFeaturePredictor):
 
															         Returns:
														
 
															             dict: A dictionary containing the input path, raw image, class IDs, scores, and label names for every instance of the batch. Keys include 'input_path', 'input_img', 'class_ids', 'scores', and 'label_names'.
														
 
															         """
														
 
															-        batch_raw_imgs = self.preprocessors["Read"](imgs=batch_data)
														
 
															+        batch_raw_imgs = self.preprocessors["Read"](imgs=batch_data.instances)
														
 
															         batch_imgs = self.preprocessors["Resize"](imgs=batch_raw_imgs)
														
 
															         batch_imgs = self.preprocessors["Normalize"](imgs=batch_imgs)
														
 
															         batch_imgs = self.preprocessors["ToCHW"](imgs=batch_imgs)
														
@@ -58,7 +58,8 @@ class FaceFeaturePredictor(ImageFeaturePredictor):
 
															         features = self.postprocessors["NormalizeFeatures"](batch_preds)
														
 
															         return {
														
 
															-            "input_path": batch_data,
														
 
															+            "input_path": batch_data.input_paths,
														
 
															+            "page_index": batch_data.page_indexes,
														
 
															             "input_img": batch_raw_imgs,
														
 
															             "feature": features,
														
 
															         }
														
--- a/paddlex/inference/models_new/formula_recognition/predictor.py
+++ b/paddlex/inference/models_new/formula_recognition/predictor.py
@@ -85,7 +85,7 @@ class FormulaRecPredictor(BasicPredictor):
 
															         return pre_tfs, infer, post_op
														
 
															     def process(self, batch_data):
														
 
															-        batch_raw_imgs = self.pre_tfs["Read"](imgs=batch_data)
														
 
															+        batch_raw_imgs = self.pre_tfs["Read"](imgs=batch_data.instances)
														
 
															         if self.model_name in ("LaTeX_OCR_rec"):
														
 
															             batch_imgs = self.pre_tfs["MinMaxResize"](imgs=batch_raw_imgs)
														
 
															             batch_imgs = self.pre_tfs["LatexTestTransform"](imgs=batch_imgs)
														
@@ -105,7 +105,8 @@ class FormulaRecPredictor(BasicPredictor):
 
															         batch_preds = [p.reshape([-1]) for p in batch_preds[0]]
														
 
															         rec_formula = self.post_op(batch_preds)
														
 
															         return {
														
 
															-            "input_path": batch_data,
														
 
															+            "input_path": batch_data.input_paths,
														
 
															+            "page_index": batch_data.page_indexes,
														
 
															             "input_img": batch_raw_imgs,
														
 
															             "rec_formula": rec_formula,
														
 
															         }
														
--- a/paddlex/inference/models_new/formula_recognition/result.py
+++ b/paddlex/inference/models_new/formula_recognition/result.py
@@ -33,6 +33,15 @@ from ....utils.file_interface import custom_open
 
															 class FormulaRecResult(BaseCVResult):
														
 
															+    def _get_input_fn(self):
														
 
															+        fn = super()._get_input_fn()
														
 
															+        if (page_idx := self["page_index"]) is not None:
														
 
															+            fp = Path(fn)
														
 
															+            stem, suffix = fp.stem, fp.suffix
														
 
															+            return f"{stem}_{page_idx}{suffix}"
														
 
															+        else:
														
 
															+            return fn
														
 
															+
														
 
															     def _to_str(self, *args, **kwargs):
														
 
															         data = copy.deepcopy(self)
														
 
															         data.pop("input_img")
														
--- a/paddlex/inference/models_new/image_classification/predictor.py
+++ b/paddlex/inference/models_new/image_classification/predictor.py
@@ -112,7 +112,7 @@ class ClasPredictor(BasicPredictor):
 
															         Returns:
														
 
															             dict: A dictionary containing the input path, raw image, class IDs, scores, and label names for every instance of the batch. Keys include 'input_path', 'input_img', 'class_ids', 'scores', and 'label_names'.
														
 
															         """
														
 
															-        batch_raw_imgs = self.preprocessors["Read"](imgs=batch_data)
														
 
															+        batch_raw_imgs = self.preprocessors["Read"](imgs=batch_data.instances)
														
 
															         batch_imgs = self.preprocessors["Resize"](imgs=batch_raw_imgs)
														
 
															         if "Crop" in self.preprocessors:
														
 
															             batch_imgs = self.preprocessors["Crop"](imgs=batch_imgs)
														
@@ -124,7 +124,8 @@ class ClasPredictor(BasicPredictor):
 
															             batch_preds, topk=topk or self.topk
														
 
															         )
														
 
															         return {
														
 
															-            "input_path": batch_data,
														
 
															+            "input_path": batch_data.input_paths,
														
 
															+            "page_index": batch_data.page_indexes,
														
 
															             "input_img": batch_raw_imgs,
														
 
															             "class_ids": batch_class_ids,
														
 
															             "scores": batch_scores,
														
--- a/paddlex/inference/models_new/image_classification/result.py
+++ b/paddlex/inference/models_new/image_classification/result.py
@@ -24,6 +24,7 @@ from ...common.result import BaseCVResult, StrMixin, JsonMixin
 
															 class TopkResult(BaseCVResult):
														
 
															+
														
 
															     def _to_str(self, *args, **kwargs):
														
 
															         data = copy.deepcopy(self)
														
 
															         data.pop("input_img")
														
--- a/paddlex/inference/models_new/image_feature/predictor.py
+++ b/paddlex/inference/models_new/image_feature/predictor.py
@@ -107,7 +107,7 @@ class ImageFeaturePredictor(BasicPredictor):
 
															         Returns:
														
 
															             dict: A dictionary containing the input path, raw image, class IDs, scores, and label names for every instance of the batch. Keys include 'input_path', 'input_img', 'class_ids', 'scores', and 'label_names'.
														
 
															         """
														
 
															-        batch_raw_imgs = self.preprocessors["Read"](imgs=batch_data)
														
 
															+        batch_raw_imgs = self.preprocessors["Read"](imgs=batch_data.instances)
														
 
															         batch_imgs = self.preprocessors["Resize"](imgs=batch_raw_imgs)
														
 
															         batch_imgs = self.preprocessors["Normalize"](imgs=batch_imgs)
														
 
															         batch_imgs = self.preprocessors["ToCHW"](imgs=batch_imgs)
														
@@ -115,7 +115,8 @@ class ImageFeaturePredictor(BasicPredictor):
 
															         batch_preds = self.infer(x=x)
														
 
															         features = self.postprocessors["NormalizeFeatures"](batch_preds)
														
 
															         return {
														
 
															-            "input_path": batch_data,
														
 
															+            "input_path": batch_data.input_paths,
														
 
															+            "page_index": batch_data.page_indexes,
														
 
															             "input_img": batch_raw_imgs,
														
 
															             "feature": features,
														
 
															         }
														
--- a/paddlex/inference/models_new/image_multilabel_classification/predictor.py
+++ b/paddlex/inference/models_new/image_multilabel_classification/predictor.py
@@ -66,7 +66,7 @@ class MLClasPredictor(ClasPredictor):
 
															         Returns:
														
 
															             dict: A dictionary containing the input path, raw image, class IDs, scores, and label names for every instance of the batch. Keys include 'input_path', 'input_img', 'class_ids', 'scores', and 'label_names'.
														
 
															         """
														
 
															-        batch_raw_imgs = self.preprocessors["Read"](imgs=batch_data)
														
 
															+        batch_raw_imgs = self.preprocessors["Read"](imgs=batch_data.instances)
														
 
															         batch_imgs = self.preprocessors["Resize"](imgs=batch_raw_imgs)
														
 
															         batch_imgs = self.preprocessors["Normalize"](imgs=batch_imgs)
														
 
															         batch_imgs = self.preprocessors["ToCHW"](imgs=batch_imgs)
														
@@ -79,7 +79,8 @@ class MLClasPredictor(ClasPredictor):
 
															             threshold=self.threshold if threshold is None else threshold,
														
 
															         )
														
 
															         return {
														
 
															-            "input_path": batch_data,
														
 
															+            "input_path": batch_data.input_paths,
														
 
															+            "page_index": batch_data.page_indexes,
														
 
															             "input_img": batch_raw_imgs,
														
 
															             "class_ids": batch_class_ids,
														
 
															             "scores": batch_scores,
														
--- a/paddlex/inference/models_new/image_unwarping/predictor.py
+++ b/paddlex/inference/models_new/image_unwarping/predictor.py
@@ -90,7 +90,7 @@ class WarpPredictor(BasicPredictor):
 
															         Returns:
														
 
															             dict: A dictionary containing the input path, raw image, class IDs, scores, and label names for every instance of the batch. Keys include 'input_path', 'input_img', 'class_ids', 'scores', and 'label_names'.
														
 
															         """
														
 
															-        batch_raw_imgs = self.preprocessors["Read"](imgs=batch_data)
														
 
															+        batch_raw_imgs = self.preprocessors["Read"](imgs=batch_data.instances)
														
 
															         batch_imgs = self.preprocessors["Normalize"](imgs=batch_raw_imgs)
														
 
															         batch_imgs = self.preprocessors["ToCHW"](imgs=batch_imgs)
														
 
															         x = self.preprocessors["ToBatch"](imgs=batch_imgs)
														
@@ -98,7 +98,8 @@ class WarpPredictor(BasicPredictor):
 
															         batch_warp_preds = self.postprocessors["DocTrPostProcess"](batch_preds)
														
 
															         return {
														
 
															-            "input_path": batch_data,
														
 
															+            "input_path": batch_data.input_paths,
														
 
															+            "page_index": batch_data.page_indexes,
														
 
															             "input_img": batch_raw_imgs,
														
 
															             "doctr_img": batch_warp_preds,
														
 
															         }
														
--- a/paddlex/inference/models_new/instance_segmentation/predictor.py
+++ b/paddlex/inference/models_new/instance_segmentation/predictor.py
@@ -117,7 +117,7 @@ class InstanceSegPredictor(DetPredictor):
 
															             dict: A dictionary containing the input path, raw image, box and mask
														
 
															                 for every instance of the batch. Keys include 'input_path', 'input_img', 'boxes' and 'masks'.
														
 
															         """
														
 
															-        datas = batch_data
														
 
															+        datas = batch_data.instances
														
 
															         # preprocess
														
 
															         for pre_op in self.pre_ops[:-1]:
														
 
															             datas = pre_op(datas)
														
@@ -146,7 +146,8 @@ class InstanceSegPredictor(DetPredictor):
 
															         )
														
 
															         return {
														
 
															-            "input_path": [data.get("img_path", None) for data in datas],
														
 
															+            "input_path": batch_data.input_paths,
														
 
															+            "page_index": batch_data.page_indexes,
														
 
															             "input_img": [data["ori_img"] for data in datas],
														
 
															             "boxes": [result["boxes"] for result in boxes_masks],
														
 
															             "masks": [result["masks"] for result in boxes_masks],
														
--- a/paddlex/inference/models_new/object_detection/predictor.py
+++ b/paddlex/inference/models_new/object_detection/predictor.py
@@ -83,20 +83,26 @@ class DetPredictor(BasicPredictor):
 
															                 raise ValueError(
														
 
															                     f"The type of `img_size` must be int or Tuple[int, int], but got {type(img_size)}."
														
 
															                 )
														
 
															-        
														
 
															+
														
 
															         if layout_unclip_ratio is not None:
														
 
															             if isinstance(layout_unclip_ratio, float):
														
 
															                 layout_unclip_ratio = (layout_unclip_ratio, layout_unclip_ratio)
														
 
															             elif isinstance(layout_unclip_ratio, (tuple, list)):
														
 
															-                assert len(layout_unclip_ratio) == 2, f"The length of `layout_unclip_ratio` should be 2."
														
 
															+                assert (
														
 
															+                    len(layout_unclip_ratio) == 2
														
 
															+                ), f"The length of `layout_unclip_ratio` should be 2."
														
 
															             else:
														
 
															                 raise ValueError(
														
 
															                     f"The type of `layout_unclip_ratio` must be float or Tuple[float, float], but got {type(layout_unclip_ratio)}."
														
 
															                 )
														
 
															-        
														
 
															+
														
 
															         if layout_merge_bboxes_mode is not None:
														
 
															-            assert layout_merge_bboxes_mode in ["union", "large", "small"], \
														
 
															-                f"The value of `layout_merge_bboxes_mode` must be one of ['union', 'large', 'small'], but got {layout_merge_bboxes_mode}"
														
 
															+            assert layout_merge_bboxes_mode in [
														
 
															+                "union",
														
 
															+                "large",
														
 
															+                "small",
														
 
															+            ], f"The value of `layout_merge_bboxes_mode` must be one of ['union', 'large', 'small'], but got {layout_merge_bboxes_mode}"
														
 
															+
														
 
															         self.img_size = img_size
														
 
															         self.threshold = threshold
														
 
															         self.layout_nms = layout_nms
														
@@ -197,13 +203,14 @@ class DetPredictor(BasicPredictor):
 
															         else:
														
 
															             return [{"boxes": np.array(res)} for res in pred_box]
														
 
															-    def process(self, 
														
 
															-            batch_data: List[Any], 
														
 
															-            threshold: Optional[Union[float, dict]] = None,
														
 
															-            layout_nms: Optional[bool] = None,
														
 
															-            layout_unclip_ratio: Optional[Union[float, Tuple[float, float]]] = None,
														
 
															-            layout_merge_bboxes_mode: Optional[str] = None,
														
 
															-        ):
														
 
															+    def process(
														
 
															+        self,
														
 
															+        batch_data: List[Any],
														
 
															+        threshold: Optional[Union[float, dict]] = None,
														
 
															+        layout_nms: bool = False,
														
 
															+        layout_unclip_ratio: Optional[Union[float, Tuple[float, float]]] = None,
														
 
															+        layout_merge_bboxes_mode: Optional[str] = None,
														
 
															+    ):
														
 
															         """
														
 
															         Process a batch of data through the preprocessing, inference, and postprocessing.
														
@@ -218,7 +225,7 @@ class DetPredictor(BasicPredictor):
 
															             dict: A dictionary containing the input path, raw image, class IDs, scores, and label names
														
 
															                 for every instance of the batch. Keys include 'input_path', 'input_img', 'class_ids', 'scores', and 'label_names'.
														
 
															         """
														
 
															-        datas = batch_data
														
 
															+        datas = batch_data.instances
														
 
															         # preprocess
														
 
															         for pre_op in self.pre_ops[:-1]:
														
 
															             datas = pre_op(datas)
														
@@ -233,16 +240,18 @@ class DetPredictor(BasicPredictor):
 
															         preds_list = self._format_output(batch_preds)
														
 
															         # postprocess
														
 
															         boxes = self.post_op(
														
 
															-            preds_list, 
														
 
															-            datas, 
														
 
															-            threshold = threshold or self.threshold,
														
 
															+            preds_list,
														
 
															+            datas,
														
 
															+            threshold=threshold or self.threshold,
														
 
															             layout_nms=layout_nms or self.layout_nms,
														
 
															             layout_unclip_ratio=layout_unclip_ratio or self.layout_unclip_ratio,
														
 
															-            layout_merge_bboxes_mode=layout_merge_bboxes_mode or self.layout_merge_bboxes_mode
														
 
															+            layout_merge_bboxes_mode=layout_merge_bboxes_mode
														
 
															+            or self.layout_merge_bboxes_mode,
														
 
															         )
														
 
															         return {
														
 
															-            "input_path": [data.get("img_path", None) for data in datas],
														
 
															+            "input_path": batch_data.input_paths,
														
 
															+            "page_index": batch_data.page_indexes,
														
 
															             "input_img": [data["ori_img"] for data in datas],
														
 
															             "boxes": boxes,
														
 
															         }
														
@@ -330,7 +339,7 @@ class DetPredictor(BasicPredictor):
 
															         if self.layout_unclip_ratio is None:
														
 
															             self.layout_unclip_ratio = self.config.get("layout_unclip_ratio", None)
														
 
															         if self.layout_merge_bboxes_mode is None:
														
 
															-            self.layout_merge_bboxes_mode = self.config.get("layout_merge_bboxes_mode", None)
														
 
															-        return DetPostProcess(
														
 
															-            labels=self.config["label_list"]
														
 
															-        )
														
 
															+            self.layout_merge_bboxes_mode = self.config.get(
														
 
															+                "layout_merge_bboxes_mode", None
														
 
															+            )
														
 
															+        return DetPostProcess(labels=self.config["label_list"])
														
--- a/paddlex/inference/models_new/semantic_segmentation/predictor.py
+++ b/paddlex/inference/models_new/semantic_segmentation/predictor.py
@@ -120,7 +120,7 @@ class SegPredictor(BasicPredictor):
 
															         Returns:
														
 
															             dict: A dictionary containing the input path, raw image, and predicted segmentation maps for every instance of the batch. Keys include 'input_path', 'input_img', and 'pred'.
														
 
															         """
														
 
															-        batch_raw_imgs = self.preprocessors["Read"](imgs=batch_data)
														
 
															+        batch_raw_imgs = self.preprocessors["Read"](imgs=batch_data.instances)
														
 
															         batch_imgs = self.preprocessors["Resize"](
														
 
															             imgs=batch_raw_imgs, target_size=target_size
														
 
															         )
														
@@ -135,7 +135,8 @@ class SegPredictor(BasicPredictor):
 
															         batch_preds = self.postprocessers(batch_preds, batch_raw_imgs)
														
 
															         return {
														
 
															-            "input_path": batch_data,
														
 
															+            "input_path": batch_data.input_paths,
														
 
															+            "page_index": batch_data.page_indexes,
														
 
															             "input_img": batch_raw_imgs,
														
 
															             "pred": batch_preds,
														
 
															         }
														
--- a/paddlex/inference/models_new/table_structure_recognition/predictor.py
+++ b/paddlex/inference/models_new/table_structure_recognition/predictor.py
@@ -84,7 +84,7 @@ class TablePredictor(BasicPredictor):
 
															         Returns:
														
 
															             dict: A dictionary containing the input path, raw image, class IDs, scores, and label names for every instance of the batch. Keys include 'input_path', 'input_img', 'class_ids', 'scores', and 'label_names'.
														
 
															         """
														
 
															-        batch_raw_imgs = self.preprocessors[0](imgs=batch_data)  # ReadImage
														
 
															+        batch_raw_imgs = self.preprocessors[0](imgs=batch_data.instances)  # ReadImage
														
 
															         ori_shapes = []
														
 
															         for s in range(len(batch_raw_imgs)):
														
 
															             ori_shapes.append([batch_raw_imgs[s].shape[1], batch_raw_imgs[s].shape[0]])
														
@@ -116,7 +116,8 @@ class TablePredictor(BasicPredictor):
 
															             table_result_structure_score.append(table_result[i]["structure_score"])
														
 
															         final_result = {
														
 
															-            "input_path": batch_data,
														
 
															+            "input_path": batch_data.input_paths,
														
 
															+            "page_index": batch_data.page_indexes,
														
 
															             "input_img": batch_raw_imgs,
														
 
															             "bbox": table_result_bbox,
														
 
															             "structure": table_result_structure,
														
--- a/paddlex/inference/models_new/table_structure_recognition/result.py
+++ b/paddlex/inference/models_new/table_structure_recognition/result.py
@@ -25,8 +25,14 @@ from ...common.result import BaseCVResult, StrMixin, JsonMixin
 
															 class TableRecResult(BaseCVResult):
														
 
															     """SaveTableResults"""
														
 
															-    def __init__(self, data):
														
 
															-        super().__init__(data)
														
 
															+    def _get_input_fn(self):
														
 
															+        fn = super()._get_input_fn()
														
 
															+        if (page_idx := self["page_index"]) is not None:
														
 
															+            fp = Path(fn)
														
 
															+            stem, suffix = fp.stem, fp.suffix
														
 
															+            return f"{stem}_{page_idx}{suffix}"
														
 
															+        else:
														
 
															+            return fn
														
 
															     def _to_img(self):
														
 
															         image = self["input_img"]
														
--- a/paddlex/inference/models_new/text_detection/predictor.py
+++ b/paddlex/inference/models_new/text_detection/predictor.py
@@ -95,7 +95,7 @@ class TextDetPredictor(BasicPredictor):
 
															         unclip_ratio: Union[float, None] = None,
														
 
															     ):
														
 
															-        batch_raw_imgs = self.pre_tfs["Read"](imgs=batch_data)
														
 
															+        batch_raw_imgs = self.pre_tfs["Read"](imgs=batch_data.instances)
														
 
															         batch_imgs, batch_shapes = self.pre_tfs["Resize"](
														
 
															             imgs=batch_raw_imgs,
														
 
															             limit_side_len=limit_side_len or self.limit_side_len,
														
@@ -113,7 +113,8 @@ class TextDetPredictor(BasicPredictor):
 
															             unclip_ratio=unclip_ratio or self.unclip_ratio,
														
 
															         )
														
 
															         return {
														
 
															-            "input_path": batch_data,
														
 
															+            "input_path": batch_data.input_paths,
														
 
															+            "page_index": batch_data.page_indexes,
														
 
															             "input_img": batch_raw_imgs,
														
 
															             "dt_polys": polys,
														
 
															             "dt_scores": scores,
														
--- a/paddlex/inference/models_new/text_detection/result.py
+++ b/paddlex/inference/models_new/text_detection/result.py
@@ -15,14 +15,21 @@
 
															 import copy
														
 
															 import numpy as np
														
 
															 import cv2
														
 
															+from pathlib import Path
														
 
															 from ...common.result import BaseCVResult, StrMixin, JsonMixin
														
 
															 class TextDetResult(BaseCVResult):
														
 
															-    def __init__(self, data):
														
 
															-        super().__init__(data)
														
 
															+    def _get_input_fn(self):
														
 
															+        fn = super()._get_input_fn()
														
 
															+        if (page_idx := self["page_index"]) is not None:
														
 
															+            fp = Path(fn)
														
 
															+            stem, suffix = fp.stem, fp.suffix
														
 
															+            return f"{stem}_{page_idx}{suffix}"
														
 
															+        else:
														
 
															+            return fn
														
 
															     def _to_img(self):
														
 
															         """draw rectangle"""
														
--- a/paddlex/inference/models_new/text_recognition/predictor.py
+++ b/paddlex/inference/models_new/text_recognition/predictor.py
@@ -67,13 +67,14 @@ class TextRecPredictor(BasicPredictor):
 
															         return pre_tfs, infer, post_op
														
 
															     def process(self, batch_data):
														
 
															-        batch_raw_imgs = self.pre_tfs["Read"](imgs=batch_data)
														
 
															+        batch_raw_imgs = self.pre_tfs["Read"](imgs=batch_data.instances)
														
 
															         batch_imgs = self.pre_tfs["ReisizeNorm"](imgs=batch_raw_imgs)
														
 
															         x = self.pre_tfs["ToBatch"](imgs=batch_imgs)
														
 
															         batch_preds = self.infer(x=x)
														
 
															         texts, scores = self.post_op(batch_preds)
														
 
															         return {
														
 
															-            "input_path": batch_data,
														
 
															+            "input_path": batch_data.input_paths,
														
 
															+            "page_index": batch_data.page_indexes,
														
 
															             "input_img": batch_raw_imgs,
														
 
															             "rec_text": texts,
														
 
															             "rec_score": scores,
														
--- a/paddlex/inference/pipelines_new/attribute_recognition/pipeline.py
+++ b/paddlex/inference/pipelines_new/attribute_recognition/pipeline.py
@@ -63,9 +63,11 @@ class AttributeRecPipeline(BasePipeline):
 
															         det_threshold = self.det_threshold if det_threshold is None else det_threshold
														
 
															         cls_threshold = self.cls_threshold if cls_threshold is None else cls_threshold
														
 
															         for img_id, batch_data in enumerate(self.batch_sampler(input)):
														
 
															-            raw_imgs = self.img_reader(batch_data)
														
 
															+            raw_imgs = self.img_reader(batch_data.instances)
														
 
															             all_det_res = list(self.det_model(raw_imgs, threshold=det_threshold))
														
 
															-            for input_data, raw_img, det_res in zip(batch_data, raw_imgs, all_det_res):
														
 
															+            for input_data, raw_img, det_res in zip(
														
 
															+                batch_data.instances, raw_imgs, all_det_res
														
 
															+            ):
														
 
															                 cls_res = self.get_cls_result(raw_img, det_res, cls_threshold)
														
 
															                 yield self.get_final_result(input_data, raw_img, det_res, cls_res)
														
--- a/paddlex/inference/pipelines_new/doc_preprocessor/pipeline.py
+++ b/paddlex/inference/pipelines_new/doc_preprocessor/pipeline.py
@@ -163,13 +163,7 @@ class DocPreprocessorPipeline(BasePipeline):
 
															             yield {"error": "the input params for model settings are invalid!"}
														
 
															         for img_id, batch_data in enumerate(self.batch_sampler(input)):
														
 
															-            if not isinstance(batch_data[0], str):
														
 
															-                # TODO: add support input_pth for ndarray and pdf
														
 
															-                input_path = f"{img_id}.jpg"
														
 
															-            else:
														
 
															-                input_path = batch_data[0]
														
 
															-
														
 
															-            image_array = self.img_reader(batch_data)[0]
														
 
															+            image_array = self.img_reader(batch_data.instances)[0]
														
 
															             if model_settings["use_doc_orientation_classify"]:
														
 
															                 pred = next(self.doc_ori_classify_model(image_array))
														
@@ -185,7 +179,8 @@ class DocPreprocessorPipeline(BasePipeline):
 
															                 output_img = rot_img
														
 
															             single_img_res = {
														
 
															-                "input_path": input_path,
														
 
															+                "input_path": batch_data.input_paths[0],
														
 
															+                "page_index": batch_data.page_indexes[0],
														
 
															                 "input_img": image_array,
														
 
															                 "model_settings": model_settings,
														
 
															                 "angle": angle,
														
--- a/paddlex/inference/pipelines_new/formula_recognition/pipeline.py
+++ b/paddlex/inference/pipelines_new/formula_recognition/pipeline.py
@@ -234,13 +234,7 @@ class FormulaRecognitionPipeline(BasePipeline):
 
															             yield {"error": "the input params for model settings are invalid!"}
														
 
															         for img_id, batch_data in enumerate(self.batch_sampler(input)):
														
 
															-            if not isinstance(batch_data[0], str):
														
 
															-                # TODO: add support input_pth for ndarray and pdf
														
 
															-                input_path = f"{img_id}.jpg"
														
 
															-            else:
														
 
															-                input_path = batch_data[0]
														
 
															-
														
 
															-            image_array = self.img_reader(batch_data)[0]
														
 
															+            image_array = self.img_reader(batch_data.instances)[0]
														
 
															             if model_settings["use_doc_preprocessor"]:
														
 
															                 doc_preprocessor_res = next(
														
@@ -301,7 +295,8 @@ class FormulaRecognitionPipeline(BasePipeline):
 
															                     formula_res_list[idx] = formula_rec_res
														
 
															             single_img_res = {
														
 
															-                "input_path": input_path,
														
 
															+                "input_path": batch_data.input_paths[0],
														
 
															+                "page_index": batch_data.page_indexes[0],
														
 
															                 "layout_det_res": layout_det_res,
														
 
															                 "doc_preprocessor_res": doc_preprocessor_res,
														
 
															                 "formula_res_list": formula_res_list,
														
--- a/paddlex/inference/pipelines_new/layout_parsing/pipeline.py
+++ b/paddlex/inference/pipelines_new/layout_parsing/pipeline.py
@@ -293,13 +293,7 @@ class LayoutParsingPipeline(BasePipeline):
 
															             yield {"error": "the input params for model settings are invalid!"}
														
 
															         for img_id, batch_data in enumerate(self.batch_sampler(input)):
														
 
															-            if not isinstance(batch_data[0], str):
														
 
															-                # TODO: add support input_pth for ndarray and pdf
														
 
															-                input_path = f"{img_id}.jpg"
														
 
															-            else:
														
 
															-                input_path = batch_data[0]
														
 
															-
														
 
															-            image_array = self.img_reader(batch_data)[0]
														
 
															+            image_array = self.img_reader(batch_data.instances)[0]
														
 
															             if model_settings["use_doc_preprocessor"]:
														
 
															                 doc_preprocessor_res = next(
														
@@ -393,7 +387,8 @@ class LayoutParsingPipeline(BasePipeline):
 
															                 formula_res_list = []
														
 
															             single_img_res = {
														
 
															-                "input_path": input_path,
														
 
															+                "input_path": batch_data.input_paths[0],
														
 
															+                "page_index": batch_data.page_indexes[0],
														
 
															                 "doc_preprocessor_res": doc_preprocessor_res,
														
 
															                 "layout_det_res": layout_det_res,
														
 
															                 "overall_ocr_res": overall_ocr_res,
														
--- a/paddlex/inference/pipelines_new/layout_parsing/pipeline_v2.py
+++ b/paddlex/inference/pipelines_new/layout_parsing/pipeline_v2.py
@@ -309,13 +309,7 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															             yield {"error": "the input params for model settings are invalid!"}
														
 
															         for img_id, batch_data in enumerate(self.batch_sampler(input)):
														
 
															-            if not isinstance(batch_data[0], str):
														
 
															-                # TODO: add support input_pth for ndarray and pdf
														
 
															-                input_path = f"{img_id}"
														
 
															-            else:
														
 
															-                input_path = batch_data[0]
														
 
															-
														
 
															-            image_array = self.img_reader(batch_data)[0]
														
 
															+            image_array = self.img_reader(batch_data.instances)[0]
														
 
															             if model_settings["use_doc_preprocessor"]:
														
 
															                 doc_preprocessor_res = next(
														
@@ -452,7 +446,8 @@ class LayoutParsingPipelineV2(BasePipeline):
 
															             ]
														
 
															             single_img_res = {
														
 
															-                "input_path": input_path,
														
 
															+                "input_path": batch_data.input_paths[0],
														
 
															+                "page_index": batch_data.page_indexes[0],
														
 
															                 "doc_preprocessor_res": doc_preprocessor_res,
														
 
															                 "layout_det_res": layout_det_res,
														
 
															                 "overall_ocr_res": overall_ocr_res,
														
--- a/paddlex/inference/pipelines_new/ocr/pipeline.py
+++ b/paddlex/inference/pipelines_new/ocr/pipeline.py
@@ -304,13 +304,7 @@ class OCRPipeline(BasePipeline):
 
															             text_rec_score_thresh = self.text_rec_score_thresh
														
 
															         for img_id, batch_data in enumerate(self.batch_sampler(input)):
														
 
															-            if not isinstance(batch_data[0], str):
														
 
															-                # TODO: add support input_pth for ndarray and pdf
														
 
															-                input_path = f"{img_id}.jpg"
														
 
															-            else:
														
 
															-                input_path = batch_data[0]
														
 
															-
														
 
															-            image_array = self.img_reader(batch_data)[0]
														
 
															+            image_array = self.img_reader(batch_data.instances)[0]
														
 
															             if model_settings["use_doc_preprocessor"]:
														
 
															                 doc_preprocessor_res = next(
														
@@ -335,7 +329,8 @@ class OCRPipeline(BasePipeline):
 
															             dt_polys = self._sort_boxes(dt_polys)
														
 
															             single_img_res = {
														
 
															-                "input_path": input_path,
														
 
															+                "input_path": batch_data.input_paths[0],
														
 
															+                "page_index": batch_data.page_indexes[0],
														
 
															                 "doc_preprocessor_res": doc_preprocessor_res,
														
 
															                 "dt_polys": dt_polys,
														
 
															                 "model_settings": model_settings,
														
--- a/paddlex/inference/pipelines_new/ocr/result.py
+++ b/paddlex/inference/pipelines_new/ocr/result.py
@@ -29,6 +29,15 @@ from ...common.result import BaseCVResult, StrMixin, JsonMixin
 
															 class OCRResult(BaseCVResult):
														
 
															     """OCR result"""
														
 
															+    def _get_input_fn(self):
														
 
															+        fn = super()._get_input_fn()
														
 
															+        if (page_idx := self["page_index"]) is not None:
														
 
															+            fp = Path(fn)
														
 
															+            stem, suffix = fp.stem, fp.suffix
														
 
															+            return f"{stem}_{page_idx}{suffix}"
														
 
															+        else:
														
 
															+            return fn
														
 
															+
														
 
															     def get_minarea_rect(self, points: np.ndarray) -> np.ndarray:
														
 
															         """
														
 
															         Get the minimum area rectangle for the given points using OpenCV.
														
@@ -127,13 +136,15 @@ class OCRResult(BaseCVResult):
 
															         """
														
 
															         data = {}
														
 
															         data["input_path"] = self["input_path"]
														
 
															+        data["page_index"] = self["page_index"]
														
 
															         data["model_settings"] = self["model_settings"]
														
 
															         if self["model_settings"]["use_doc_preprocessor"]:
														
 
															             data["doc_preprocessor_res"] = self["doc_preprocessor_res"].str["res"]
														
 
															         data["dt_polys"] = self["dt_polys"]
														
 
															         data["text_det_params"] = self["text_det_params"]
														
 
															         data["text_type"] = self["text_type"]
														
 
															-        data["textline_orientation_angles"] = self["textline_orientation_angles"]
														
 
															+        if "textline_orientation_angles" in self:
														
 
															+            data["textline_orientation_angles"] = self["textline_orientation_angles"]
														
 
															         data["text_rec_score_thresh"] = self["text_rec_score_thresh"]
														
 
															         data["rec_texts"] = self["rec_texts"]
														
 
															         data["rec_scores"] = self["rec_scores"]
														
@@ -155,13 +166,15 @@ class OCRResult(BaseCVResult):
 
															         """
														
 
															         data = {}
														
 
															         data["input_path"] = self["input_path"]
														
 
															+        data["page_index"] = self["page_index"]
														
 
															         data["model_settings"] = self["model_settings"]
														
 
															         if self["model_settings"]["use_doc_preprocessor"]:
														
 
															             data["doc_preprocessor_res"] = self["doc_preprocessor_res"].json["res"]
														
 
															         data["dt_polys"] = self["dt_polys"]
														
 
															         data["text_det_params"] = self["text_det_params"]
														
 
															         data["text_type"] = self["text_type"]
														
 
															-        data["textline_orientation_angles"] = self["textline_orientation_angles"]
														
 
															+        if "textline_orientation_angles" in self:
														
 
															+            data["textline_orientation_angles"] = self["textline_orientation_angles"]
														
 
															         data["text_rec_score_thresh"] = self["text_rec_score_thresh"]
														
 
															         data["rec_texts"] = self["rec_texts"]
														
 
															         data["rec_scores"] = self["rec_scores"]
														
--- a/paddlex/inference/pipelines_new/seal_recognition/pipeline.py
+++ b/paddlex/inference/pipelines_new/seal_recognition/pipeline.py
@@ -75,11 +75,21 @@ class SealRecognitionPipeline(BasePipeline):
 
															                 layout_kwargs["threshold"] = threshold
														
 
															             if (layout_nms := layout_det_config.get("layout_nms", None)) is not None:
														
 
															                 layout_kwargs["layout_nms"] = layout_nms
														
 
															-            if (layout_unclip_ratio := layout_det_config.get("layout_unclip_ratio", None)) is not None:
														
 
															+            if (
														
 
															+                layout_unclip_ratio := layout_det_config.get(
														
 
															+                    "layout_unclip_ratio", None
														
 
															+                )
														
 
															+            ) is not None:
														
 
															                 layout_kwargs["layout_unclip_ratio"] = layout_unclip_ratio
														
 
															-            if (layout_merge_bboxes_mode := layout_det_config.get("layout_merge_bboxes_mode", None)) is not None:
														
 
															+            if (
														
 
															+                layout_merge_bboxes_mode := layout_det_config.get(
														
 
															+                    "layout_merge_bboxes_mode", None
														
 
															+                )
														
 
															+            ) is not None:
														
 
															                 layout_kwargs["layout_merge_bboxes_mode"] = layout_merge_bboxes_mode
														
 
															-            self.layout_det_model = self.create_model(layout_det_config, **layout_kwargs)
														
 
															+            self.layout_det_model = self.create_model(
														
 
															+                layout_det_config, **layout_kwargs
														
 
															+            )
														
 
															         seal_ocr_config = config.get("SubPipelines", {}).get(
														
 
															             "SealOCR", {"pipeline_config_error": "config error for seal_ocr_pipeline!"}
														
 
															         )
														
@@ -185,13 +195,7 @@ class SealRecognitionPipeline(BasePipeline):
 
															             yield {"error": "the input params for model settings are invalid!"}
														
 
															         for img_id, batch_data in enumerate(self.batch_sampler(input)):
														
 
															-            if not isinstance(batch_data[0], str):
														
 
															-                # TODO: add support input_pth for ndarray and pdf
														
 
															-                input_path = f"{img_id}.jpg"
														
 
															-            else:
														
 
															-                input_path = batch_data[0]
														
 
															-
														
 
															-            image_array = self.img_reader(batch_data)[0]
														
 
															+            image_array = self.img_reader(batch_data.instances)[0]
														
 
															             if model_settings["use_doc_preprocessor"]:
														
 
															                 doc_preprocessor_res = next(
														
@@ -226,14 +230,15 @@ class SealRecognitionPipeline(BasePipeline):
 
															                 seal_region_id += 1
														
 
															             else:
														
 
															                 if model_settings["use_layout_detection"]:
														
 
															-                    layout_det_res = next(self.layout_det_model(
														
 
															-                        doc_preprocessor_image,
														
 
															-                        threshold=layout_threshold,
														
 
															-                        layout_nms=layout_nms,
														
 
															-                        layout_unclip_ratio=layout_unclip_ratio,
														
 
															-                        layout_merge_bboxes_mode=layout_merge_bboxes_mode
														
 
															+                    layout_det_res = next(
														
 
															+                        self.layout_det_model(
														
 
															+                            doc_preprocessor_image,
														
 
															+                            threshold=layout_threshold,
														
 
															+                            layout_nms=layout_nms,
														
 
															+                            layout_unclip_ratio=layout_unclip_ratio,
														
 
															+                            layout_merge_bboxes_mode=layout_merge_bboxes_mode,
														
 
															+                        )
														
 
															                     )
														
 
															-                )
														
 
															                 for box_info in layout_det_res["boxes"]:
														
 
															                     if box_info["label"].lower() in ["seal"]:
														
@@ -257,7 +262,8 @@ class SealRecognitionPipeline(BasePipeline):
 
															                         seal_region_id += 1
														
 
															             single_img_res = {
														
 
															-                "input_path": input_path,
														
 
															+                "input_path": batch_data.input_paths[0],
														
 
															+                "page_index": batch_data.page_indexes[0],
														
 
															                 "doc_preprocessor_res": doc_preprocessor_res,
														
 
															                 "layout_det_res": layout_det_res,
														
 
															                 "seal_res_list": seal_res_list,
														
--- a/paddlex/inference/pipelines_new/table_recognition/pipeline.py
+++ b/paddlex/inference/pipelines_new/table_recognition/pipeline.py
@@ -300,13 +300,7 @@ class TableRecognitionPipeline(BasePipeline):
 
															             yield {"error": "the input params for model settings are invalid!"}
														
 
															         for img_id, batch_data in enumerate(self.batch_sampler(input)):
														
 
															-            if not isinstance(batch_data[0], str):
														
 
															-                # TODO: add support input_pth for ndarray and pdf
														
 
															-                input_path = f"{img_id}"
														
 
															-            else:
														
 
															-                input_path = batch_data[0]
														
 
															-
														
 
															-            image_array = self.img_reader(batch_data)[0]
														
 
															+            image_array = self.img_reader(batch_data.instances)[0]
														
 
															             if model_settings["use_doc_preprocessor"]:
														
 
															                 doc_preprocessor_res = next(
														
@@ -368,7 +362,8 @@ class TableRecognitionPipeline(BasePipeline):
 
															                         table_region_id += 1
														
 
															             single_img_res = {
														
 
															-                "input_path": input_path,
														
 
															+                "input_path": batch_data.input_paths[0],
														
 
															+                "page_index": batch_data.page_indexes[0],
														
 
															                 "doc_preprocessor_res": doc_preprocessor_res,
														
 
															                 "layout_det_res": layout_det_res,
														
 
															                 "overall_ocr_res": overall_ocr_res,
														
--- a/paddlex/inference/pipelines_new/table_recognition/pipeline_v2.py
+++ b/paddlex/inference/pipelines_new/table_recognition/pipeline_v2.py
@@ -368,13 +368,7 @@ class TableRecognitionPipelineV2(BasePipeline):
 
															             yield {"error": "the input params for model settings are invalid!"}
														
 
															         for img_id, batch_data in enumerate(self.batch_sampler(input)):
														
 
															-            if not isinstance(batch_data[0], str):
														
 
															-                # TODO: add support input_pth for ndarray and pdf
														
 
															-                input_path = f"{img_id}"
														
 
															-            else:
														
 
															-                input_path = batch_data[0]
														
 
															-
														
 
															-            image_array = self.img_reader(batch_data)[0]
														
 
															+            image_array = self.img_reader(batch_data.instances)[0]
														
 
															             if model_settings["use_doc_preprocessor"]:
														
 
															                 doc_preprocessor_res = next(
														
@@ -436,7 +430,8 @@ class TableRecognitionPipelineV2(BasePipeline):
 
															                         table_region_id += 1
														
 
															             single_img_res = {
														
 
															-                "input_path": input_path,
														
 
															+                "input_path": batch_data.input_paths[0],
														
 
															+                "page_index": batch_data.page_indexes[0],
														
 
															                 "doc_preprocessor_res": doc_preprocessor_res,
														
 
															                 "layout_det_res": layout_det_res,
														
 
															                 "overall_ocr_res": overall_ocr_res,
														
--- a/paddlex/inference/pipelines_new/table_recognition/result.py
+++ b/paddlex/inference/pipelines_new/table_recognition/result.py
@@ -30,6 +30,15 @@ class SingleTableRecognitionResult(BaseCVResult, HtmlMixin, XlsxMixin):
 
															         HtmlMixin.__init__(self)
														
 
															         XlsxMixin.__init__(self)
														
 
															+    def _get_input_fn(self):
														
 
															+        fn = super()._get_input_fn()
														
 
															+        if (page_idx := self["page_index"]) is not None:
														
 
															+            fp = Path(fn)
														
 
															+            stem, suffix = fp.stem, fp.suffix
														
 
															+            return f"{stem}_{page_idx}{suffix}"
														
 
															+        else:
														
 
															+            return fn
														
 
															+
														
 
															     def _to_html(self) -> Dict[str, str]:
														
 
															         """Converts the prediction to its corresponding HTML representation.
														
@@ -101,7 +110,9 @@ class TableRecognitionResult(BaseCVResult, HtmlMixin, XlsxMixin):
 
															         res_img_dict.update(**self["overall_ocr_res"].img)
														
 
															         if len(self["table_res_list"]) > 0:
														
 
															-            table_cell_img = Image.fromarray(copy.deepcopy(self["doc_preprocessor_res"]["output_img"]))
														
 
															+            table_cell_img = Image.fromarray(
														
 
															+                copy.deepcopy(self["doc_preprocessor_res"]["output_img"])
														
 
															+            )
														
 
															             table_draw = ImageDraw.Draw(table_cell_img)
														
 
															             rectangle_color = (255, 0, 0)
														
 
															             for sno in range(len(self["table_res_list"])):
														
@@ -109,7 +120,9 @@ class TableRecognitionResult(BaseCVResult, HtmlMixin, XlsxMixin):
 
															                 cell_box_list = table_res["cell_box_list"]
														
 
															                 for box in cell_box_list:
														
 
															                     x1, y1, x2, y2 = [int(pos) for pos in box]
														
 
															-                    table_draw.rectangle([x1, y1, x2, y2], outline=rectangle_color, width=2)
														
 
															+                    table_draw.rectangle(
														
 
															+                        [x1, y1, x2, y2], outline=rectangle_color, width=2
														
 
															+                    )
														
 
															             res_img_dict["table_cell_img"] = table_cell_img
														
 
															         return res_img_dict