浏览代码

[Feat] New pipeline serving (#2848)

* Add high-stability serving docs

* Add notice

* Add return_img_urls option

* Add new app code and update docs

* Reduce redundant code

* Update docs

* Refactor

* Fix and update

* Support video classification

* Support inference params

* Add image_classification

* Separate shemas from apps

* Separate infra and basic_serving

* Update

* Support multi-label image classification

* Fix code style and update OCR inference params

* Add serialization/deserialization methods to IndexData

* Revert "Add high-stability serving docs"

This reverts commit 066f0f72a5a78a903c7ee4636b31a17112bed4ad.

* main operation->primary operation

* Revert doc changes

* Update OCR fastapi app

* Update OCR

* update seal rec

* Update OCR

* Update ppchatocr

* Update OCR schemas

* Update OCR

* Revert to old entry

* Move constants to top

* Remove inferenceParams

* Update threshold parameters

* Update object detection schema
Lin Manhui 10 月之前
父节点
当前提交
1d580e073a
共有 84 个文件被更改,包括 5108 次插入52 次删除
  1. 2 2
      paddlex/inference/__init__.py
  2. 12 2
      paddlex/inference/pipelines/serving/_pipeline_apps/_common/cv.py
  3. 24 10
      paddlex/inference/pipelines/serving/_pipeline_apps/_common/ocr.py
  4. 1 1
      paddlex/inference/pipelines/serving/_pipeline_apps/formula_recognition.py
  5. 1 0
      paddlex/inference/pipelines/serving/_pipeline_apps/layout_parsing.py
  6. 2 2
      paddlex/inference/pipelines/serving/_pipeline_apps/multi_label_image_classification.py
  7. 2 2
      paddlex/inference/pipelines/serving/_pipeline_apps/ppchatocrv3.py
  8. 2 2
      paddlex/inference/pipelines/serving/_pipeline_apps/seal_recognition.py
  9. 2 1
      paddlex/inference/pipelines/serving/app.py
  10. 33 18
      paddlex/inference/pipelines_new/__init__.py
  11. 0 2
      paddlex/inference/pipelines_new/base.py
  12. 1 1
      paddlex/inference/pipelines_new/components/__init__.py
  13. 10 0
      paddlex/inference/pipelines_new/components/faisser.py
  14. 2 5
      paddlex/inference/pipelines_new/pp_shitu_v2/pipeline.py
  15. 13 0
      paddlex/inference/serving/__init__.py
  16. 18 0
      paddlex/inference/serving/basic_serving/__init__.py
  17. 196 0
      paddlex/inference/serving/basic_serving/_app.py
  18. 36 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/__init__.py
  19. 13 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/_common/__init__.py
  20. 98 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py
  21. 36 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/_common/image_recognition.py
  22. 90 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py
  23. 64 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py
  24. 89 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py
  25. 225 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py
  26. 96 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py
  27. 66 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py
  28. 70 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py
  29. 81 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py
  30. 107 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py
  31. 89 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/multilingual_speech_recognition.py
  32. 74 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py
  33. 99 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py
  34. 81 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py
  35. 199 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py
  36. 220 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py
  37. 106 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py
  38. 64 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py
  39. 69 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py
  40. 107 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py
  41. 56 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py
  42. 55 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py
  43. 56 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py
  44. 81 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py
  45. 73 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/video_classification.py
  46. 89 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/video_detection.py
  47. 36 0
      paddlex/inference/serving/basic_serving/_server.py
  48. 13 0
      paddlex/inference/serving/infra/__init__.py
  49. 36 0
      paddlex/inference/serving/infra/config.py
  50. 72 0
      paddlex/inference/serving/infra/models.py
  51. 175 0
      paddlex/inference/serving/infra/storage.py
  52. 259 0
      paddlex/inference/serving/infra/utils.py
  53. 13 0
      paddlex/inference/serving/schemas/__init__.py
  54. 39 0
      paddlex/inference/serving/schemas/anomaly_detection.py
  55. 53 0
      paddlex/inference/serving/schemas/doc_preprocessor.py
  56. 124 0
      paddlex/inference/serving/schemas/face_recognition.py
  57. 54 0
      paddlex/inference/serving/schemas/formula_recognition.py
  58. 45 0
      paddlex/inference/serving/schemas/image_classification.py
  59. 47 0
      paddlex/inference/serving/schemas/image_multilabel_classification.py
  60. 59 0
      paddlex/inference/serving/schemas/instance_segmentation.py
  61. 79 0
      paddlex/inference/serving/schemas/layout_parsing.py
  62. 57 0
      paddlex/inference/serving/schemas/multilingual_speech_recognition.py
  63. 52 0
      paddlex/inference/serving/schemas/object_detection.py
  64. 61 0
      paddlex/inference/serving/schemas/ocr.py
  65. 61 0
      paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py
  66. 128 0
      paddlex/inference/serving/schemas/pp_chatocrv3_doc.py
  67. 124 0
      paddlex/inference/serving/schemas/pp_shituv2.py
  68. 67 0
      paddlex/inference/serving/schemas/seal_recognition.py
  69. 45 0
      paddlex/inference/serving/schemas/semantic_segmentation.py
  70. 13 0
      paddlex/inference/serving/schemas/shared/__init__.py
  71. 23 0
      paddlex/inference/serving/schemas/shared/classification.py
  72. 23 0
      paddlex/inference/serving/schemas/shared/image_segmentation.py
  73. 22 0
      paddlex/inference/serving/schemas/shared/object_detection.py
  74. 25 0
      paddlex/inference/serving/schemas/shared/ocr.py
  75. 52 0
      paddlex/inference/serving/schemas/small_object_detection.py
  76. 68 0
      paddlex/inference/serving/schemas/table_recognition.py
  77. 36 0
      paddlex/inference/serving/schemas/ts_anomaly_detection.py
  78. 37 0
      paddlex/inference/serving/schemas/ts_classification.py
  79. 36 0
      paddlex/inference/serving/schemas/ts_forecast.py
  80. 61 0
      paddlex/inference/serving/schemas/vehicle_attribute_recognition.py
  81. 44 0
      paddlex/inference/serving/schemas/video_classification.py
  82. 56 0
      paddlex/inference/serving/schemas/video_detection.py
  83. 2 4
      paddlex/paddlex_cli.py
  84. 1 0
      paddlex/serving_requirements.txt

+ 2 - 2
paddlex/inference/__init__.py

@@ -17,9 +17,9 @@ from ..utils.flags import USE_NEW_INFERENCE, NEW_PREDICTOR
 
 if USE_NEW_INFERENCE:
     logging.warning("=" * 20 + " Using pipelines_new " + "=" * 20)
-    from .pipelines_new import create_pipeline
+    from .pipelines_new import create_pipeline, load_pipeline_config
 else:
-    from .pipelines import create_pipeline
+    from .pipelines import create_pipeline, load_pipeline_config
 if NEW_PREDICTOR:
     logging.warning("=" * 20 + " Using models_new " + "=" * 20)
     from .models_new import create_predictor

+ 12 - 2
paddlex/inference/pipelines/serving/_pipeline_apps/_common/cv.py

@@ -27,10 +27,19 @@ def postprocess_image(
     image: ArrayLike,
     log_id: str,
     filename: str,
-    file_storage: Optional[Storage],
     *,
+    file_storage: Optional[Storage] = None,
+    return_url: bool = False,
     max_img_size: Optional[Tuple[int, int]] = None,
 ) -> str:
+    if return_url:
+        if not file_storage:
+            raise ValueError(
+                "`file_storage` must not be None when URLs need to be returned."
+            )
+        if not isinstance(file_storage, SupportsGetURL):
+            raise TypeError("The provided storage does not support getting URLs.")
+
     key = f"{log_id}/{filename}"
     ext = os.path.splitext(filename)[1]
     image = np.asarray(image)
@@ -44,6 +53,7 @@ def postprocess_image(
     img_bytes = serving_utils.image_array_to_bytes(image, ext=ext)
     if file_storage is not None:
         file_storage.set(key, img_bytes)
-        if isinstance(file_storage, SupportsGetURL):
+        if return_url:
+            assert isinstance(file_storage, SupportsGetURL)
             return file_storage.get_url(key)
     return serving_utils.base64_encode(img_bytes)

+ 24 - 10
paddlex/inference/pipelines/serving/_pipeline_apps/_common/ocr.py

@@ -24,8 +24,8 @@ from typing_extensions import Annotated, TypeAlias, assert_never
 from ......utils import logging
 from ... import utils as serving_utils
 from .cv import postprocess_image
-from ...models import DataInfo, ImageInfo, PDFInfo
-from ...storage import create_storage
+from ...models import DataInfo
+from ...storage import create_storage, SupportsGetURL
 from ...app import AppContext
 
 DEFAULT_MAX_NUM_INPUT_IMGS: Final[int] = 10
@@ -49,6 +49,17 @@ def update_app_context(app_context: AppContext) -> None:
     app_context.extra["file_storage"] = None
     if "file_storage" in extra_cfg:
         app_context.extra["file_storage"] = create_storage(extra_cfg["file_storage"])
+    app_context.extra["return_img_urls"] = extra_cfg.get("return_img_urls", False)
+    if app_context.extra["return_img_urls"]:
+        file_storage = app_context.extra["file_storage"]
+        if not file_storage:
+            raise ValueError(
+                "The file storage must be properly configured when URLs need to be returned."
+            )
+        if not isinstance(file_storage, SupportsGetURL):
+            raise TypeError(
+                f"`{type(file_storage).__name__}` does not support getting URLs."
+            )
     app_context.extra["max_num_input_imgs"] = extra_cfg.get(
         "max_num_input_imgs", DEFAULT_MAX_NUM_INPUT_IMGS
     )
@@ -111,12 +122,13 @@ async def postprocess_images(
     index: str,
     app_context: AppContext,
     input_image: Optional[ArrayLike] = None,
-    ocr_image: Optional[ArrayLike] = None,
     layout_image: Optional[ArrayLike] = None,
+    ocr_image: Optional[ArrayLike] = None,
 ) -> List[str]:
-    if input_image is None and ocr_image is None and layout_image is None:
+    if input_image is None and layout_image is None and ocr_image is None:
         raise ValueError("At least one of the images must be provided.")
     file_storage = app_context.extra["file_storage"]
+    return_img_urls = app_context.extra["return_img_urls"]
     max_img_size = app_context.extra["max_output_img_size"]
     futures: List[Awaitable] = []
     if input_image is not None:
@@ -126,25 +138,27 @@ async def postprocess_images(
             log_id=log_id,
             filename=f"input_image_{index}.jpg",
             file_storage=file_storage,
+            return_url=return_img_urls,
             max_img_size=max_img_size,
         )
         futures.append(future)
-    if ocr_image is not None:
+    if layout_image is not None:
         future = serving_utils.call_async(
             postprocess_image,
-            ocr_image,
+            layout_image,
             log_id=log_id,
-            filename=f"ocr_image_{index}.jpg",
+            filename=f"layout_image_{index}.jpg",
             file_storage=file_storage,
+            return_url=return_img_urls,
             max_img_size=max_img_size,
         )
         futures.append(future)
-    if layout_image is not None:
+    if ocr_image is not None:
         future = serving_utils.call_async(
             postprocess_image,
-            layout_image,
+            ocr_image,
             log_id=log_id,
-            filename=f"layout_image_{index}.jpg",
+            filename=f"ocr_image_{index}.jpg",
             file_storage=file_storage,
             max_img_size=max_img_size,
         )

+ 1 - 1
paddlex/inference/pipelines/serving/_pipeline_apps/formula_recognition.py

@@ -108,8 +108,8 @@ def create_pipeline_app(
                     index=i,
                     app_context=ctx,
                     input_image=img,
-                    ocr_image=ocr_img,
                     layout_image=layout_img,
+                    ocr_image=ocr_img,
                 )
                 if ocr_img is not None:
                     input_img, layout_img, ocr_img = output_imgs

+ 1 - 0
paddlex/inference/pipelines/serving/_pipeline_apps/layout_parsing.py

@@ -109,6 +109,7 @@ def create_pipeline_app(
                             log_id=log_id,
                             filename=f"image_{i}_{j}.jpg",
                             file_storage=ctx.extra["file_storage"],
+                            return_url=ctx.extra["return_img_urls"],
                             max_img_size=ctx.extra["max_output_img_size"],
                         )
                         text = subitem[label]["image_text"]

+ 2 - 2
paddlex/inference/pipelines/serving/_pipeline_apps/multi_label_image_classification.py

@@ -18,7 +18,7 @@ from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 
 from .....utils import logging
-from ...single_model_pipeline import ImageClassification
+from ...single_model_pipeline import MultiLableImageClas
 from .. import utils as serving_utils
 from ..app import AppConfig, create_app
 from ..models import NoResultResponse, ResultResponse
@@ -45,7 +45,7 @@ class InferResult(BaseModel):
 
 
 def create_pipeline_app(
-    pipeline: ImageClassification, app_config: AppConfig
+    pipeline: MultiLableImageClas, app_config: AppConfig
 ) -> FastAPI:
     app, ctx = create_app(
         pipeline=pipeline, app_config=app_config, app_aiohttp_session=True

+ 2 - 2
paddlex/inference/pipelines/serving/_pipeline_apps/ppchatocrv3.py

@@ -200,13 +200,13 @@ def create_pipeline_app(pipeline: PPChatOCRPipeline, app_config: AppConfig) -> F
                     Table(bbox=r["layout_bbox"], html=r["html"])
                     for r in item["table_result"]
                 ]
-                input_img, ocr_img, layout_img = await ocr_common.postprocess_images(
+                input_img, layout_img, ocr_img = await ocr_common.postprocess_images(
                     log_id=log_id,
                     index=i,
                     app_context=ctx,
                     input_image=img,
-                    ocr_image=item["ocr_result"].img,
                     layout_image=item["layout_result"].img,
+                    ocr_image=item["ocr_result"].img,
                 )
                 vision_result = VisionResult(
                     texts=texts,

+ 2 - 2
paddlex/inference/pipelines/serving/_pipeline_apps/seal_recognition.py

@@ -89,13 +89,13 @@ def create_pipeline_app(pipeline: SealOCRPipeline, app_config: AppConfig) -> Fas
                     item["ocr_result"]["rec_score"],
                 ):
                     texts.append(Text(poly=poly, text=text, score=score))
-                input_img, ocr_img, layout_img = await ocr_common.postprocess_images(
+                input_img, layout_img, ocr_img = await ocr_common.postprocess_images(
                     log_id=log_id,
                     index=i,
                     app_context=ctx,
                     input_image=img,
-                    ocr_image=item["ocr_result"].img,
                     layout_image=item["layout_result"].img,
+                    ocr_image=item["ocr_result"].img,
                 )
                 seal_rec_results.append(
                     SealRecResult(

+ 2 - 1
paddlex/inference/pipelines/serving/app.py

@@ -20,6 +20,7 @@ from typing import (
     AsyncGenerator,
     Callable,
     Dict,
+    Final,
     Generic,
     List,
     Mapping,
@@ -35,7 +36,7 @@ from fastapi.exceptions import RequestValidationError
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 from starlette.exceptions import HTTPException
-from typing_extensions import Final, ParamSpec
+from typing_extensions import ParamSpec
 
 from ..base import BasePipeline
 from .models import NoResultResponse

+ 33 - 18
paddlex/inference/pipelines_new/__init__.py

@@ -17,6 +17,7 @@ from typing import Any, Dict, Optional
 from .base import BasePipeline
 from ..utils.pp_option import PaddlePredictorOption
 from .components import BaseChat, BaseRetriever, BaseGeneratePrompt
+from ...utils import logging
 from ...utils.config import parse_config
 from .ocr import OCRPipeline
 from .doc_preprocessor import DocPreprocessorPipeline
@@ -98,37 +99,51 @@ def load_pipeline_config(pipeline_name: str) -> Dict[str, Any]:
 
 
 def create_pipeline(
-    pipeline: str,
-    config: Dict = None,
-    device: str = None,
-    pp_option: PaddlePredictorOption = None,
+    pipeline_name: Optional[str] = None,
+    config: Optional[Dict[str, Any]] = None,
+    device: Optional[str] = None,
+    pp_option: Optional[PaddlePredictorOption] = None,
     use_hpip: bool = False,
-    *args,
-    **kwargs,
+    *args: Any,
+    **kwargs: Any,
 ) -> BasePipeline:
     """
     Create a pipeline instance based on the provided parameters.
-    If the input parameter config is not provided,
-    it is obtained from the default config corresponding to the pipeline name.
+
+    If the input parameter config is not provided, it is obtained from the
+    default config corresponding to the pipeline name.
 
     Args:
-        pipeline (str): The name of the pipeline to create.
-        config (Dict, optional): The path to the pipeline configuration file. Defaults to None.
-        device (str, optional): The device to run the pipeline on. Defaults to None.
-        pp_option (PaddlePredictorOption, optional): The options for the PaddlePredictor. Defaults to None.
-        use_hpip (bool, optional): Whether to use high-performance inference (hpip) for prediction. Defaults to False.
+        pipeline_name (Optional[str], optional): The name of the pipeline to
+            create, or the path to the config file. Defaults to None.
+        config (Optional[Dict[str, Any]], optional): The pipeline configuration.
+            Defaults to None.
+        device (Optional[str], optional): The device to run the pipeline on.
+            Defaults to None.
+        pp_option (Optional[PaddlePredictorOption], optional): The options for
+            the PaddlePredictor. Defaults to None.
+        use_hpip (bool, optional): Whether to use high-performance inference
+            plugin (HPIP) for prediction. Defaults to False.
         *args: Additional positional arguments.
         **kwargs: Additional keyword arguments.
 
     Returns:
         BasePipeline: The created pipeline instance.
     """
-
+    if pipeline_name is None and config is None:
+        raise ValueError(
+            "Both `pipeline_name` and `config` cannot be None at the same time."
+        )
     if config is None:
-        config = load_pipeline_config(pipeline)
-        pipeline_name = config["pipeline_name"]
-    else:
-        pipeline_name = pipeline
+        config = load_pipeline_config(pipeline_name)
+    if pipeline_name is not None and config["pipeline_name"] != pipeline_name:
+        logging.warning(
+            "The pipeline name in the config (%r) is different from the specified pipeline name (%r). %r will be used.",
+            config["pipeline_name"],
+            pipeline_name,
+            config["pipeline_name"],
+        )
+    pipeline_name = config["pipeline_name"]
 
     pipeline = BasePipeline.get(pipeline_name)(
         config=config,

+ 0 - 2
paddlex/inference/pipelines_new/base.py

@@ -109,9 +109,7 @@ class BasePipeline(ABC, metaclass=AutoRegisterABCMetaClass):
 
         from . import create_pipeline
 
-        pipeline_name = config["pipeline_name"]
         pipeline = create_pipeline(
-            pipeline_name,
             config=config,
             device=self.device,
             pp_option=self.pp_option,

+ 1 - 1
paddlex/inference/pipelines_new/components/__init__.py

@@ -20,4 +20,4 @@ from .utils.mixin import HtmlMixin, XlsxMixin
 from .chat_server.base import BaseChat
 from .retriever.base import BaseRetriever
 from .prompt_engeering.base import BaseGeneratePrompt
-from .faisser import FaissBuilder, FaissIndexer
+from .faisser import FaissBuilder, FaissIndexer, IndexData

+ 10 - 0
paddlex/inference/pipelines_new/components/faisser.py

@@ -63,6 +63,16 @@ class IndexData:
             "id_map": self._convert_int(self.id_map),
         }
 
+    @classmethod
+    def from_bytes(cls, bytes):
+        tup = pickle.loads(bytes)
+        index = faiss.deserialize_index(tup[0])
+        return cls(index, tup[1])
+
+    def to_bytes(self):
+        tup = (faiss.serialize_index(self._index), self.index_info)
+        return pickle.dumps(tup)
+
     def _convert_int(self, id_map):
         return {int(k): str(v) for k, v in id_map.items()}
 

+ 2 - 5
paddlex/inference/pipelines_new/pp_shitu_v2/pipeline.py

@@ -14,14 +14,10 @@
 
 from typing import Any, Dict, Optional
 
-import pickle
-from pathlib import Path
-import numpy as np
-
 from ...utils.pp_option import PaddlePredictorOption
 from ...common.reader import ReadImage
 from ...common.batch_sampler import ImageBatchSampler
-from ..components import CropByBoxes, FaissIndexer, FaissBuilder
+from ..components import CropByBoxes, FaissIndexer, FaissBuilder, IndexData
 from ..base import BasePipeline
 from .result import ShiTuResult
 
@@ -60,6 +56,7 @@ class ShiTuV2Pipeline(BasePipeline):
     def predict(self, input, index=None, **kwargs):
         indexer = FaissIndexer(index) if index is not None else self.indexer
         assert indexer
+        kwargs = {k: v for k, v in kwargs.items() if v is not None}
         topk = kwargs.get("topk", self._topk)
         rec_threshold = kwargs.get("rec_threshold", self._rec_threshold)
         hamming_radius = kwargs.get("hamming_radius", self._hamming_radius)

+ 13 - 0
paddlex/inference/serving/__init__.py

@@ -0,0 +1,13 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

+ 18 - 0
paddlex/inference/serving/basic_serving/__init__.py

@@ -0,0 +1,18 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ._pipeline_apps import create_pipeline_app
+from ._server import run_server
+
+__all__ = ["create_pipeline_app", "run_server"]

+ 196 - 0
paddlex/inference/serving/basic_serving/_app.py

@@ -0,0 +1,196 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+import contextlib
+import json
+from typing import (
+    Any,
+    AsyncGenerator,
+    Callable,
+    Dict,
+    Generic,
+    List,
+    Optional,
+    Tuple,
+    TypeVar,
+)
+
+import aiohttp
+import fastapi
+from fastapi.encoders import jsonable_encoder
+from fastapi.exceptions import RequestValidationError
+from fastapi.responses import JSONResponse
+from starlette.exceptions import HTTPException
+from typing_extensions import ParamSpec
+
+from ....utils import logging
+from ...pipelines_new import BasePipeline
+from ..infra.config import AppConfig
+from ..infra.models import NoResultResponse
+from ..infra.utils import call_async, generate_log_id
+
+_PipelineT = TypeVar("_PipelineT", bound=BasePipeline)
+_P = ParamSpec("_P")
+_R = TypeVar("_R")
+
+
+# XXX: Since typing info (e.g., the pipeline class) cannot be easily obtained
+# without abstraction leaks, generic classes do not offer additional benefits
+# for type hinting. However, I would stick with the current design, as it does
+# not introduce runtime overhead at the moment and may prove useful in the
+# future.
+class PipelineWrapper(Generic[_PipelineT]):
+    def __init__(self, pipeline: _PipelineT) -> None:
+        super().__init__()
+        self._pipeline = pipeline
+        self._lock = asyncio.Lock()
+
+    @property
+    def pipeline(self) -> _PipelineT:
+        return self._pipeline
+
+    async def infer(self, *args: Any, **kwargs: Any) -> List[Any]:
+        def _infer() -> List[Any]:
+            output = list(self._pipeline(*args, **kwargs))
+            if (
+                len(output) == 1
+                and isinstance(output[0], dict)
+                and output[0].keys() == {"error"}
+            ):
+                raise fastapi.HTTPException(status_code=500, detail=output[0]["error"])
+            return output
+
+        return await self.call(_infer)
+
+    async def call(
+        self, func: Callable[_P, _R], *args: _P.args, **kwargs: _P.kwargs
+    ) -> _R:
+        async with self._lock:
+            return await call_async(func, *args, **kwargs)
+
+
+class AppContext(Generic[_PipelineT]):
+    def __init__(self, *, config: AppConfig) -> None:
+        super().__init__()
+        self._config = config
+        self.extra: Dict[str, Any] = {}
+        self._pipeline: Optional[PipelineWrapper[_PipelineT]] = None
+        self._aiohttp_session: Optional[aiohttp.ClientSession] = None
+
+    @property
+    def config(self) -> AppConfig:
+        return self._config
+
+    @property
+    def pipeline(self) -> PipelineWrapper[_PipelineT]:
+        if not self._pipeline:
+            raise AttributeError("`pipeline` has not been set.")
+        return self._pipeline
+
+    @pipeline.setter
+    def pipeline(self, val: PipelineWrapper[_PipelineT]) -> None:
+        self._pipeline = val
+
+    @property
+    def aiohttp_session(self) -> aiohttp.ClientSession:
+        if not self._aiohttp_session:
+            raise AttributeError("`aiohttp_session` has not been set.")
+        return self._aiohttp_session
+
+    @aiohttp_session.setter
+    def aiohttp_session(self, val: aiohttp.ClientSession) -> None:
+        self._aiohttp_session = val
+
+
+def create_app(
+    *, pipeline: _PipelineT, app_config: AppConfig, app_aiohttp_session: bool = True
+) -> Tuple[fastapi.FastAPI, AppContext[_PipelineT]]:
+    @contextlib.asynccontextmanager
+    async def _app_lifespan(app: fastapi.FastAPI) -> AsyncGenerator[None, None]:
+        ctx.pipeline = PipelineWrapper[_PipelineT](pipeline)
+        if app_aiohttp_session:
+            async with aiohttp.ClientSession(
+                cookie_jar=aiohttp.DummyCookieJar()
+            ) as aiohttp_session:
+                ctx.aiohttp_session = aiohttp_session
+                yield
+        else:
+            yield
+
+    # Should we control API versions?
+    app = fastapi.FastAPI(lifespan=_app_lifespan)
+    ctx = AppContext[_PipelineT](config=app_config)
+    app.state.context = ctx
+
+    @app.get("/health", operation_id="checkHealth")
+    async def _check_health() -> NoResultResponse:
+        return NoResultResponse(
+            logId=generate_log_id(), errorCode=0, errorMsg="Healthy"
+        )
+
+    @app.exception_handler(RequestValidationError)
+    async def _validation_exception_handler(
+        request: fastapi.Request, exc: RequestValidationError
+    ) -> JSONResponse:
+        json_compatible_data = jsonable_encoder(
+            NoResultResponse(
+                logId=generate_log_id(),
+                errorCode=422,
+                errorMsg=json.dumps(exc.errors()),
+            )
+        )
+        return JSONResponse(content=json_compatible_data, status_code=422)
+
+    @app.exception_handler(HTTPException)
+    async def _http_exception_handler(
+        request: fastapi.Request, exc: HTTPException
+    ) -> JSONResponse:
+        json_compatible_data = jsonable_encoder(
+            NoResultResponse(
+                logId=generate_log_id(), errorCode=exc.status_code, errorMsg=exc.detail
+            )
+        )
+        return JSONResponse(content=json_compatible_data, status_code=exc.status_code)
+
+    @app.exception_handler(Exception)
+    async def _unexpected_exception_handler(
+        request: fastapi.Request, exc: Exception
+    ) -> JSONResponse:
+        # XXX: The default server will duplicate the error message. Is it
+        # necessary to log the exception info here?
+        logging.exception("Unhandled exception")
+        json_compatible_data = jsonable_encoder(
+            NoResultResponse(
+                logId=generate_log_id(),
+                errorCode=500,
+                errorMsg="Internal server error",
+            )
+        )
+        return JSONResponse(content=json_compatible_data, status_code=500)
+
+    return app, ctx
+
+
+# TODO: Precise type hints
+def primary_operation(
+    app: fastapi.FastAPI, path: str, operation_id: str, **kwargs: Any
+) -> Callable:
+    return app.post(
+        path,
+        operation_id=operation_id,
+        responses={422: {"model": NoResultResponse}, 500: {"model": NoResultResponse}},
+        response_model_exclude_none=True,
+        **kwargs,
+    )

+ 36 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/__init__.py

@@ -0,0 +1,36 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import importlib
+from typing import Any, Dict
+
+from fastapi import FastAPI
+
+from ...infra.config import create_app_config
+
+
+def _pipeline_name_to_mod_name(pipeline_name: str) -> str:
+    return pipeline_name.lower().replace("-", "_")
+
+
+# XXX: A dynamic approach is used here for writing fewer lines of code, at the
+# cost of sacrificing some benefits of type hints.
+def create_pipeline_app(pipeline: Any, pipeline_config: Dict[str, Any]) -> FastAPI:
+    pipeline_name = pipeline_config["pipeline_name"]
+    mod_name = _pipeline_name_to_mod_name(pipeline_name)
+    mod = importlib.import_module(f".{mod_name}", package=__package__)
+    app_config = create_app_config(pipeline_config)
+    app_creator = getattr(mod, "create_pipeline_app")
+    app = app_creator(pipeline, app_config)
+    return app

+ 13 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/_common/__init__.py

@@ -0,0 +1,13 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

+ 98 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py

@@ -0,0 +1,98 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from typing import Dict, Optional, Tuple, Union
+
+import cv2
+import numpy as np
+from numpy.typing import ArrayLike
+from PIL.Image import Image
+
+from ....infra import utils as serving_utils
+from ....infra.storage import Storage, SupportsGetURL
+
+
+def prune_result(result: dict) -> dict:
+    KEYS_TO_REMOVE = ["input_path"]
+
+    def _process_obj(obj):
+        if isinstance(obj, dict):
+            return {
+                k: _process_obj(v) for k, v in obj.items() if k not in KEYS_TO_REMOVE
+            }
+        elif isinstance(obj, list):
+            return [_process_obj(item) for item in obj]
+        else:
+            return obj
+
+    return _process_obj(result)
+
+
+def postprocess_image(
+    image: ArrayLike,
+    log_id: str,
+    filename: str,
+    *,
+    file_storage: Optional[Storage] = None,
+    return_url: bool = False,
+    max_img_size: Optional[Tuple[int, int]] = None,
+) -> str:
+    if return_url:
+        if not file_storage:
+            raise ValueError(
+                "`file_storage` must not be None when URLs need to be returned."
+            )
+        if not isinstance(file_storage, SupportsGetURL):
+            raise TypeError("The provided storage does not support getting URLs.")
+
+    key = f"{log_id}/{filename}"
+    ext = os.path.splitext(filename)[1]
+    image = np.asarray(image)
+    h, w = image.shape[0:2]
+    if max_img_size is not None:
+        if w > max_img_size[1] or h > max_img_size[0]:
+            if w / h > max_img_size[0] / max_img_size[1]:
+                factor = max_img_size[0] / w
+            else:
+                factor = max_img_size[1] / h
+            image = cv2.resize(image, (int(factor * w), int(factor * h)))
+    img_bytes = serving_utils.image_array_to_bytes(image, ext=ext)
+    if file_storage is not None:
+        file_storage.set(key, img_bytes)
+        if return_url:
+            assert isinstance(file_storage, SupportsGetURL)
+            return file_storage.get_url(key)
+    return serving_utils.base64_encode(img_bytes)
+
+
+def postprocess_images(
+    images: Dict[str, Union[Image, ArrayLike]],
+    log_id: str,
+    filename_template: str = "{key}.jpg",
+    file_storage: Optional[Storage] = None,
+    return_urls: bool = False,
+    max_img_size: Optional[Tuple[int, int]] = None,
+) -> Dict[str, str]:
+    output_images: Dict[str, str] = {}
+    for key, img in images.items():
+        output_images[key] = postprocess_image(
+            np.array(img) if isinstance(img, Image) else img,
+            log_id=log_id,
+            filename=filename_template.format(key=key),
+            file_storage=file_storage,
+            return_url=return_urls,
+            max_img_size=max_img_size,
+        )
+    return output_images

+ 36 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/_common/image_recognition.py

@@ -0,0 +1,36 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import uuid
+from typing import Final
+
+from ....infra.storage import create_storage
+from ..._app import AppContext
+
+DEFAULT_INDEX_DIR: Final[str] = ".index"
+
+
+def update_app_context(app_context: AppContext) -> None:
+    if app_context.config.extra and "index_storage" in app_context.config.extra:
+        app_context.extra["index_storage"] = create_storage(
+            app_context.config.extra["index_storage"]
+        )
+    else:
+        app_context.extra["index_storage"] = create_storage(
+            {"type": "file_system", "directory": DEFAULT_INDEX_DIR}
+        )
+
+
+def generate_index_key() -> str:
+    return str(uuid.uuid4())

+ 90 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py

@@ -0,0 +1,90 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Final, List, Tuple, Union
+
+import numpy as np
+from fastapi import HTTPException
+from typing_extensions import Literal
+
+from ....infra import utils as serving_utils
+from ....infra.models import ImageInfo, PDFInfo
+from ....infra.storage import SupportsGetURL, create_storage
+from ....schemas.shared.ocr import BaseInferRequest
+from ..._app import AppContext
+
+DEFAULT_MAX_NUM_INPUT_IMGS: Final[int] = 10
+DEFAULT_MAX_OUTPUT_IMG_SIZE: Final[Tuple[int, int]] = (2000, 2000)
+
+
+def update_app_context(app_context: AppContext) -> None:
+    extra_cfg = app_context.config.extra or {}
+    app_context.extra["file_storage"] = None
+    if "file_storage" in extra_cfg:
+        app_context.extra["file_storage"] = create_storage(extra_cfg["file_storage"])
+    app_context.extra["return_img_urls"] = extra_cfg.get("return_img_urls", False)
+    if app_context.extra["return_img_urls"]:
+        file_storage = app_context.extra["file_storage"]
+        if not file_storage:
+            raise ValueError(
+                "The file storage must be properly configured when URLs need to be returned."
+            )
+        if not isinstance(file_storage, SupportsGetURL):
+            raise TypeError(
+                f"`{type(file_storage).__name__}` does not support getting URLs."
+            )
+    app_context.extra["max_num_input_imgs"] = extra_cfg.get(
+        "max_num_input_imgs", DEFAULT_MAX_NUM_INPUT_IMGS
+    )
+    app_context.extra["max_output_img_size"] = extra_cfg.get(
+        "max_output_img_size", DEFAULT_MAX_OUTPUT_IMG_SIZE
+    )
+
+
+def get_file_type(request: BaseInferRequest) -> Literal["PDF", "IMAGE"]:
+    if request.fileType is None:
+        if serving_utils.is_url(request.file):
+            maybe_file_type = serving_utils.infer_file_type(request.file)
+            if maybe_file_type is None or not (
+                maybe_file_type == "PDF" or maybe_file_type == "IMAGE"
+            ):
+                raise HTTPException(status_code=422, detail="Unsupported file type")
+            file_type = maybe_file_type
+        else:
+            raise HTTPException(
+                status_code=422, detail="File type cannot be determined"
+            )
+    else:
+        file_type = "PDF" if request.fileType == 0 else "IMAGE"
+    return file_type
+
+
+async def get_images(
+    request: BaseInferRequest, app_context: AppContext
+) -> Tuple[List[np.ndarray], Union[ImageInfo, PDFInfo]]:
+    file_type = get_file_type(request)
+    # XXX: Should we return 422?
+
+    file_bytes = await serving_utils.get_raw_bytes_async(
+        request.file,
+        app_context.aiohttp_session,
+    )
+    images, data_info = await serving_utils.call_async(
+        serving_utils.file_to_images,
+        file_bytes,
+        file_type,
+        max_num_imgs=app_context.extra["max_num_input_imgs"],
+    )
+
+    return images, data_info

+ 64 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py

@@ -0,0 +1,64 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any
+
+from fastapi import FastAPI
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.anomaly_detection import INFER_ENDPOINT, InferRequest, InferResult
+from .._app import create_app, primary_operation
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes = await serving_utils.get_raw_bytes_async(
+            request.image, aiohttp_session
+        )
+        image = serving_utils.image_bytes_to_array(file_bytes)
+
+        result = (await pipeline.infer(image))[0]
+
+        pred = result["pred"][0].tolist()
+        size = [len(pred), len(pred[0])]
+        label_map = [item for sublist in pred for item in sublist]
+        if ctx.config.visualize:
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img["res"].convert("RGB"))
+            )
+        else:
+            output_image_base64 = None
+
+        return ResultResponse[InferResult](
+            logId=serving_utils.generate_log_id(),
+            result=InferResult(
+                labelMap=label_map, size=size, image=output_image_base64
+            ),
+        )
+
+    return app

+ 89 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py

@@ -0,0 +1,89 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, List
+
+from fastapi import FastAPI
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.doc_preprocessor import INFER_ENDPOINT, InferRequest, InferResult
+from .._app import create_app, primary_operation
+from ._common import common
+from ._common import ocr as ocr_common
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    ocr_common.update_app_context(ctx)
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+
+        log_id = serving_utils.generate_log_id()
+
+        images, data_info = await ocr_common.get_images(request, ctx)
+
+        result = await pipeline.infer(
+            images,
+            use_doc_orientation_classify=request.useDocOrientationClassify,
+            use_doc_unwarping=request.useDocUnwarping,
+        )
+
+        doc_pp_results: List[Dict[str, Any]] = []
+        for i, (img, item) in enumerate(zip(images, result)):
+            pruned_res = common.prune_result(item.json["res"])
+            if ctx.config.visualize:
+                output_imgs = item.img
+                imgs = {
+                    "input_img": img,
+                    "doc_preprocessing_img": output_imgs["preprocessed_img"],
+                }
+                imgs = await serving_utils.call_async(
+                    common.postprocess_images,
+                    imgs,
+                    log_id,
+                    filename_template=f"{{key}}_{i}.jpg",
+                    file_storage=ctx.extra["file_storage"],
+                    return_urls=ctx.extra["return_img_urls"],
+                    max_img_size=ctx.extra["max_output_img_size"],
+                )
+            else:
+                imgs = {}
+            doc_pp_results.append(
+                dict(
+                    prunedResult=pruned_res,
+                    docPreprocessingImage=imgs.get("doc_preprocessing_img"),
+                    inputImage=imgs.get("input_img"),
+                )
+            )
+
+        return ResultResponse[InferResult](
+            logId=log_id,
+            result=InferResult(
+                docPreprocessingResults=doc_pp_results,
+                dataInfo=data_info,
+            ),
+        )
+
+    return app

+ 225 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py

@@ -0,0 +1,225 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+from operator import attrgetter
+from typing import Any, Dict, List
+
+from fastapi import FastAPI
+
+from ....pipelines_new.components import IndexData
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas import face_recognition as schema
+from .._app import create_app, primary_operation
+from ._common import image_recognition as ir_common
+
+# XXX: Currently the implementations of the face recognition and PP-ShiTuV2
+# pipeline apps overlap significantly. We should aim to facilitate code reuse,
+# but is it acceptable to assume a strong similarity between these two
+# pipelines?
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    ir_common.update_app_context(ctx)
+
+    @primary_operation(
+        app,
+        schema.BUILD_INDEX_ENDPOINT,
+        "buildIndex",
+    )
+    async def _build_index(
+        request: schema.BuildIndexRequest,
+    ) -> ResultResponse[schema.BuildIndexResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes_list = await asyncio.gather(
+            *(
+                serving_utils.get_raw_bytes_async(img, aiohttp_session)
+                for img in map(attrgetter("image"), request.imageLabelPairs)
+            )
+        )
+        images = [serving_utils.image_bytes_to_array(item) for item in file_bytes_list]
+        labels = [pair.label for pair in request.imageLabelPairs]
+
+        # TODO: Support specifying `index_type` and `metric_type` in the
+        # request
+        index_data = await pipeline.call(
+            pipeline.pipeline.build_index,
+            images,
+            labels,
+            index_type="Flat",
+            metric_type="IP",
+        )
+
+        index_storage = ctx.extra["index_storage"]
+        index_key = ir_common.generate_index_key()
+        index_data_bytes = index_data.to_bytes()
+        await serving_utils.call_async(index_storage.set, index_key, index_data_bytes)
+
+        return ResultResponse[schema.BuildIndexResult](
+            logId=serving_utils.generate_log_id(),
+            result=schema.BuildIndexResult(indexKey=index_key, idMap=index_data.id_map),
+        )
+
+    @primary_operation(
+        app,
+        schema.ADD_IMAGES_TO_INDEX_ENDPOINT,
+        "addImagesToIndex",
+    )
+    async def _add_images_to_index(
+        request: schema.AddImagesToIndexRequest,
+    ) -> ResultResponse[schema.AddImagesToIndexResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes_list = await asyncio.gather(
+            *(
+                serving_utils.get_raw_bytes_async(img, aiohttp_session)
+                for img in map(attrgetter("image"), request.imageLabelPairs)
+            )
+        )
+        images = [serving_utils.image_bytes_to_array(item) for item in file_bytes_list]
+        labels = [pair.label for pair in request.imageLabelPairs]
+
+        if request.indexKey is not None:
+            index_storage = ctx.extra["index_storage"]
+            index_data_bytes = await serving_utils.call_async(
+                index_storage.get, request.indexKey
+            )
+            index_data = IndexData.from_bytes(index_data_bytes)
+        else:
+            index_data = None
+
+        index_data = await pipeline.call(
+            pipeline.pipeline.append_index, images, labels, index_data
+        )
+
+        index_data_bytes = index_data.to_bytes()
+        await serving_utils.call_async(
+            index_storage.set, request.indexKey, index_data_bytes
+        )
+
+        return ResultResponse[schema.AddImagesToIndexResult](
+            logId=serving_utils.generate_log_id(),
+            result=schema.AddImagesToIndexResult(idMap=index_data.id_map),
+        )
+
+    @primary_operation(
+        app,
+        schema.REMOVE_IMAGES_FROM_INDEX_ENDPOINT,
+        "removeImagesFromIndex",
+    )
+    async def _remove_images_from_index(
+        request: schema.RemoveImagesFromIndexRequest,
+    ) -> ResultResponse[schema.RemoveImagesFromIndexResult]:
+        pipeline = ctx.pipeline
+
+        if request.indexKey is not None:
+            index_storage = ctx.extra["index_storage"]
+            index_data_bytes = await serving_utils.call_async(
+                index_storage.get, request.indexKey
+            )
+            index_data = IndexData.from_bytes(index_data_bytes)
+        else:
+            index_data = None
+
+        index_data = await pipeline.call(
+            pipeline.pipeline.remove_index, request.ids, index_data
+        )
+
+        index_data_bytes = index_data.to_bytes()
+        await serving_utils.call_async(
+            index_storage.set, request.indexKey, index_data_bytes
+        )
+
+        return ResultResponse[schema.RemoveImagesFromIndexResult](
+            logId=serving_utils.generate_log_id(),
+            result=schema.RemoveImagesFromIndexResult(idMap=index_data.id_map),
+        )
+
+    @primary_operation(
+        app,
+        schema.INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(
+        request: schema.InferRequest,
+    ) -> ResultResponse[schema.InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        image_bytes = await serving_utils.get_raw_bytes_async(
+            request.image, aiohttp_session
+        )
+        image = serving_utils.image_bytes_to_array(image_bytes)
+
+        if request.indexKey is not None:
+            index_storage = ctx.extra["index_storage"]
+            index_data_bytes = await serving_utils.call_async(
+                index_storage.get, request.indexKey
+            )
+            index_data = IndexData.from_bytes(index_data_bytes)
+        else:
+            index_data = None
+
+        result = list(
+            await pipeline.call(
+                pipeline.pipeline.predict,
+                image,
+                index=index_data,
+                det_threshold=request.detThreshold,
+                rec_threshold=request.recThreshold,
+                hamming_radius=request.hammingRadius,
+                topk=request.topk,
+            )
+        )[0]
+
+        objs: List[Dict[str, Any]] = []
+        for obj in result["boxes"]:
+            rec_results: List[Dict[str, Any]] = []
+            if obj["rec_scores"] is not None:
+                for label, score in zip(obj["labels"], obj["rec_scores"]):
+                    rec_results.append(
+                        dict(
+                            label=label,
+                            score=score,
+                        )
+                    )
+            objs.append(
+                dict(
+                    bbox=obj["coordinate"],
+                    recResults=rec_results,
+                    score=obj["det_score"],
+                )
+            )
+        if ctx.config.visualize:
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img["res"])
+            )
+        else:
+            output_image_base64 = None
+
+        return ResultResponse[schema.InferResult](
+            logId=serving_utils.generate_log_id(),
+            result=schema.InferResult(faces=objs, image=output_image_base64),
+        )
+
+    return app

+ 96 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py

@@ -0,0 +1,96 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, List
+
+from fastapi import FastAPI
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.formula_recognition import INFER_ENDPOINT, InferRequest, InferResult
+from .._app import create_app, primary_operation
+from ._common import common
+from ._common import ocr as ocr_common
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    ocr_common.update_app_context(ctx)
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+
+        log_id = serving_utils.generate_log_id()
+
+        images, data_info = await ocr_common.get_images(request, ctx)
+
+        result = await pipeline.infer(
+            images,
+            use_layout_detection=request.useLayoutDetection,
+            use_doc_orientation_classify=request.useDocOrientationClassify,
+            use_doc_unwarping=request.useDocUnwarping,
+        )
+
+        formula_rec_results: List[Dict[str, Any]] = []
+        for i, (img, item) in enumerate(zip(images, result)):
+            pruned_res = common.prune_result(item.json["res"])
+            if ctx.config.visualize:
+                output_imgs = item.img
+                imgs = {
+                    "input_img": img,
+                    "formula_rec_img": output_imgs["formula_res_img"],
+                }
+                if "layout_det_res" in output_imgs:
+                    imgs["layout_det_img"] = output_imgs["layout_det_res"]
+                if "preprocessed_img" in output_imgs:
+                    imgs["doc_preprocessing_img"] = output_imgs["preprocessed_img"]
+                imgs = await serving_utils.call_async(
+                    common.postprocess_images,
+                    imgs,
+                    log_id,
+                    filename_template=f"{{key}}_{i}.jpg",
+                    file_storage=ctx.extra["file_storage"],
+                    return_urls=ctx.extra["return_img_urls"],
+                    max_img_size=ctx.extra["max_output_img_size"],
+                )
+            else:
+                imgs = {}
+            formula_rec_results.append(
+                dict(
+                    prunedResult=pruned_res,
+                    formulaRecImage=imgs.get("formula_rec_img"),
+                    layoutDetImage=imgs.get("layout_det_img"),
+                    docPreprocessingImage=imgs.get("doc_preprocessing_img"),
+                    inputImage=imgs.get("input_img"),
+                )
+            )
+
+        return ResultResponse[InferResult](
+            logId=log_id,
+            result=InferResult(
+                formulaRecResults=formula_rec_results,
+                dataInfo=data_info,
+            ),
+        )
+
+    return app

+ 66 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py

@@ -0,0 +1,66 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, List
+
+from fastapi import FastAPI
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.image_classification import INFER_ENDPOINT, InferRequest, InferResult
+from .._app import create_app, primary_operation
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes = await serving_utils.get_raw_bytes_async(
+            request.image, aiohttp_session
+        )
+        image = serving_utils.image_bytes_to_array(file_bytes)
+
+        result = (await pipeline.infer(image, topk=request.topk))[0]
+
+        if "label_names" in result:
+            cat_names = result["label_names"]
+        else:
+            cat_names = [str(id_) for id_ in result["class_ids"]]
+        categories: List[Dict[str, Any]] = []
+        for id_, name, score in zip(result["class_ids"], cat_names, result["scores"]):
+            categories.append(dict(id=id_, name=name, score=score))
+        if ctx.config.visualize:
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img["res"])
+            )
+        else:
+            output_image_base64 = None
+
+        return ResultResponse[InferResult](
+            logId=serving_utils.generate_log_id(),
+            result=InferResult(categories=categories, image=output_image_base64),
+        )
+
+    return app

+ 70 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py

@@ -0,0 +1,70 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, List
+
+from fastapi import FastAPI
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.image_multilabel_classification import (
+    INFER_ENDPOINT,
+    InferRequest,
+    InferResult,
+)
+from .._app import create_app, primary_operation
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes = await serving_utils.get_raw_bytes_async(
+            request.image, aiohttp_session
+        )
+        image = serving_utils.image_bytes_to_array(file_bytes)
+
+        result = (await pipeline.infer(image, threshold=request.threshold))[0]
+
+        if "label_names" in result:
+            cat_names = result["label_names"]
+        else:
+            cat_names = [str(id_) for id_ in result["class_ids"]]
+        categories: List[Dict[str, Any]] = []
+        for id_, name, score in zip(result["class_ids"], cat_names, result["scores"]):
+            categories.append(dict(id=id_, name=name, score=score))
+        if ctx.config.visualize:
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img["res"])
+            )
+        else:
+            output_image_base64 = None
+
+        return ResultResponse[InferResult](
+            logId=serving_utils.generate_log_id(),
+            result=InferResult(categories=categories, image=output_image_base64),
+        )
+
+    return app

+ 81 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py

@@ -0,0 +1,81 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, List
+
+import numpy as np
+import pycocotools.mask as mask_util
+from fastapi import FastAPI
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.instance_segmentation import INFER_ENDPOINT, InferRequest, InferResult
+from .._app import create_app, primary_operation
+
+
+def _rle(mask: np.ndarray) -> str:
+    rle_res = mask_util.encode(np.asarray(mask[..., None], order="F", dtype="uint8"))[0]
+    return rle_res["counts"].decode("utf-8")
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline,
+        app_config=app_config,
+        app_aiohttp_session=True,
+    )
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes = await serving_utils.get_raw_bytes_async(
+            request.image, aiohttp_session
+        )
+        image = serving_utils.image_bytes_to_array(file_bytes)
+
+        result = (await pipeline.infer(image, threshold=request.threshold))[0]
+
+        instances: List[Dict[str, Any]] = []
+        for obj, mask in zip(result["boxes"], result["masks"]):
+            rle_res = _rle(mask)
+            mask = dict(rleResult=rle_res, size=mask.shape)
+            instances.append(
+                dict(
+                    bbox=obj["coordinate"],
+                    categoryId=obj["cls_id"],
+                    categoryName=obj["label"],
+                    score=obj["score"],
+                    mask=mask,
+                )
+            )
+        if ctx.config.visualize:
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img["res"])
+            )
+        else:
+            output_image_base64 = None
+
+        return ResultResponse[InferResult](
+            logId=serving_utils.generate_log_id(),
+            result=InferResult(instances=instances, image=output_image_base64),
+        )
+
+    return app

+ 107 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py

@@ -0,0 +1,107 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, List
+
+from fastapi import FastAPI, HTTPException
+
+from .....utils import logging
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.layout_parsing import INFER_ENDPOINT, InferRequest, InferResult
+from .._app import create_app, primary_operation
+from ._common import image as image_common
+from ._common import ocr as ocr_common
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    ocr_common.update_app_context(ctx)
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(
+        request: InferRequest,
+    ) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+
+        log_id = serving_utils.generate_log_id()
+
+        images, data_info = await ocr_common.get_images(request, ctx)
+
+        result = await pipeline.infer(
+            images,
+            use_doc_image_ori_cls_model=request.useImgOrientationCls,
+            use_doc_image_unwarp_model=request.useImgUnwarping,
+            use_seal_text_det_model=request.useSealTextDet,
+        )
+
+        layout_parsing_results: List[Dict[str, Any]] = []
+        for i, item in enumerate(result):
+            layout_elements: List[Dict[str, Any]] = []
+            for j, subitem in enumerate(
+                item["layout_parsing_result"]["parsing_result"]
+            ):
+                dyn_keys = subitem.keys() - {"input_path", "layout_bbox", "layout"}
+                if len(dyn_keys) != 1:
+                    logging.error("Unexpected result: %s", subitem)
+                    raise HTTPException(
+                        status_code=500,
+                        detail="Internal server error",
+                    )
+                label = next(iter(dyn_keys))
+                if label in ("image", "figure", "img", "fig"):
+                    text = subitem[label]["image_text"]
+                    if ctx.config.visualize:
+                        image = await serving_utils.call_async(
+                            image_common.postprocess_image,
+                            subitem[label]["img"],
+                            log_id=log_id,
+                            filename=f"image_{i}_{j}.jpg",
+                            file_storage=ctx.extra["file_storage"],
+                            return_url=ctx.extra["return_img_urls"],
+                            max_img_size=ctx.extra["max_output_img_size"],
+                        )
+                    else:
+                        image = None
+                else:
+                    text = subitem[label]
+                    image = None
+                layout_elements.append(
+                    dict(
+                        bbox=subitem["layout_bbox"],
+                        label=label,
+                        text=text,
+                        layoutType=subitem["layout"],
+                        image=image,
+                    )
+                )
+            layout_parsing_results.append(dict(layoutElements=layout_elements))
+
+        return ResultResponse[InferResult](
+            logId=log_id,
+            result=InferResult(
+                layoutParsingResults=layout_parsing_results,
+                dataInfo=data_info,
+            ),
+        )
+
+    return app

+ 89 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/multilingual_speech_recognition.py

@@ -0,0 +1,89 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from typing import Any, Dict, List
+
+from fastapi import FastAPI, HTTPException
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.multilingual_speech_recognition import (
+    INFER_ENDPOINT,
+    InferRequest,
+    InferResult,
+)
+from .._app import create_app, primary_operation
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes = await serving_utils.get_raw_bytes_async(
+            request.audio, aiohttp_session
+        )
+        ext = serving_utils.infer_file_ext(request.audio)
+        if ext is None:
+            raise HTTPException(
+                status_code=422, detail="File extension cannot be inferred"
+            )
+        audio_path = await serving_utils.call_async(
+            serving_utils.write_to_temp_file,
+            file_bytes,
+            suffix=ext,
+        )
+
+        try:
+            result = (await pipeline.infer(audio_path))[0]
+        finally:
+            await serving_utils.call_async(os.unlink, audio_path)
+
+        segments: List[Dict[str, Any]] = []
+        for item in result["result"]["segments"]:
+            segment = dict(
+                id=item["id"],
+                seek=item["seek"],
+                start=item["start"],
+                end=item["end"],
+                text=item["text"],
+                tokens=item["tokens"],
+                temperature=item["temperature"],
+                avgLogProb=item["avg_logprob"],
+                compressionRatio=item["compression_ratio"],
+                noSpeechProb=item["no_speech_prob"],
+            )
+            segments.append(segment)
+
+        return ResultResponse[InferResult](
+            logId=serving_utils.generate_log_id(),
+            result=InferResult(
+                text=result["result"]["text"],
+                segments=segments,
+                language=result["result"]["language"],
+            ),
+        )
+
+    return app

+ 74 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py

@@ -0,0 +1,74 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, List
+
+from fastapi import FastAPI
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.object_detection import INFER_ENDPOINT, InferRequest, InferResult
+from .._app import create_app, primary_operation
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes = await serving_utils.get_raw_bytes_async(
+            request.image, aiohttp_session
+        )
+        image = serving_utils.image_bytes_to_array(file_bytes)
+
+        result = (
+            await pipeline.infer(
+                image,
+                threshold=request.threshold,
+            )
+        )[0]
+
+        objects: List[Dict[str, Any]] = []
+        for obj in result["boxes"]:
+            objects.append(
+                dict(
+                    bbox=obj["coordinate"],
+                    categoryId=obj["cls_id"],
+                    categoryName=obj["label"],
+                    score=obj["score"],
+                )
+            )
+        if ctx.config.visualize:
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img["res"])
+            )
+        else:
+            output_image_base64 = None
+
+        return ResultResponse[InferResult](
+            logId=serving_utils.generate_log_id(),
+            result=InferResult(detectedObjects=objects, image=output_image_base64),
+        )
+
+    return app

+ 99 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py

@@ -0,0 +1,99 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, List
+
+from fastapi import FastAPI
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.ocr import INFER_ENDPOINT, InferRequest, InferResult
+from .._app import create_app, primary_operation
+from ._common import common
+from ._common import ocr as ocr_common
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    ocr_common.update_app_context(ctx)
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+
+        log_id = serving_utils.generate_log_id()
+
+        images, data_info = await ocr_common.get_images(request, ctx)
+
+        result = await pipeline.infer(
+            images,
+            use_doc_orientation_classify=request.useDocOrientationClassify,
+            use_doc_unwarping=request.useDocUnwarping,
+            use_textline_orientation=request.useTextlineOrientation,
+            text_det_limit_side_len=request.textDetLimitSideLen,
+            text_det_limit_type=request.textDetLimitType,
+            text_det_thresh=request.textDetThresh,
+            text_det_box_thresh=request.textDetBoxThresh,
+            text_det_unclip_ratio=request.textDetUnclipRatio,
+            text_rec_score_thresh=request.textRecScoreThresh,
+        )
+
+        ocr_results: List[Dict[str, Any]] = []
+        for i, (img, item) in enumerate(zip(images, result)):
+            pruned_res = common.prune_result(item.json["res"])
+            if ctx.config.visualize:
+                output_imgs = item.img
+                imgs = {
+                    "input_img": img,
+                    "ocr_img": output_imgs["ocr_res_img"],
+                }
+                if "preprocessed_img" in output_imgs:
+                    imgs["doc_preprocessing_img"] = output_imgs["preprocessed_img"]
+                imgs = await serving_utils.call_async(
+                    common.postprocess_images,
+                    imgs,
+                    log_id,
+                    filename_template=f"{{key}}_{i}.jpg",
+                    file_storage=ctx.extra["file_storage"],
+                    return_urls=ctx.extra["return_img_urls"],
+                    max_img_size=ctx.extra["max_output_img_size"],
+                )
+            else:
+                imgs = {}
+            ocr_results.append(
+                dict(
+                    prunedResult=pruned_res,
+                    ocrImage=imgs.get("ocr_img"),
+                    docPreprocessingImage=imgs.get("doc_preprocessing_img"),
+                    inputImage=imgs.get("input_img"),
+                )
+            )
+
+        return ResultResponse[InferResult](
+            logId=log_id,
+            result=InferResult(
+                ocrResults=ocr_results,
+                dataInfo=data_info,
+            ),
+        )
+
+    return app

+ 81 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py

@@ -0,0 +1,81 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, List
+
+from fastapi import FastAPI
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.pedestrian_attribute_recognition import (
+    INFER_ENDPOINT,
+    InferRequest,
+    InferResult,
+)
+from .._app import create_app, primary_operation
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes = await serving_utils.get_raw_bytes_async(
+            request.image, aiohttp_session
+        )
+        image = serving_utils.image_bytes_to_array(file_bytes)
+
+        result = (
+            await pipeline.infer(
+                image,
+                det_threshold=request.detThreshold,
+                cls_threshold=request.clsThreshold,
+            )
+        )[0]
+
+        objs: List[Dict[str, Any]] = []
+        for obj in result["boxes"]:
+            objs.append(
+                dict(
+                    bbox=obj["coordinate"],
+                    attributes=[
+                        dict(label=l, score=s)
+                        for l, s in zip(obj["labels"], obj["cls_scores"])
+                    ],
+                    score=obj["det_score"],
+                )
+            )
+        if ctx.config.visualize:
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img["res"])
+            )
+        else:
+            output_image_base64 = None
+
+        return ResultResponse[InferResult](
+            logId=serving_utils.generate_log_id(),
+            result=InferResult(pedestrians=objs, image=output_image_base64),
+        )
+
+    return app

+ 199 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py

@@ -0,0 +1,199 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import tempfile
+from typing import Any, Dict, List
+
+from fastapi import FastAPI
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas import pp_chatocrv3_doc as schema
+from .._app import create_app, primary_operation
+from ._common import ocr as ocr_common
+
+
+# XXX: Since the pipeline class does not provide serialization and
+# deserialization methods, these are implemented here based on the save-to-path
+# and load-from-path methods.
+def _serialize_vector_info(pipeline: Any, vector_info: dict) -> str:
+    with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
+        path = f.name
+    try:
+        pipeline.save_vector(vector_info, path)
+        with open(path, "r", encoding="utf-8") as f:
+            return f.read()
+    finally:
+        os.unlink(path)
+
+
+def _deserialize_vector_info(pipeline: Any, vector_info: str) -> dict:
+    with tempfile.NamedTemporaryFile(
+        "w", encoding="utf-8", suffix=".json", delete=False
+    ) as f:
+        f.write(vector_info)
+        path = f.name
+    try:
+        return pipeline.load_vector(path)
+    finally:
+        os.unlink(path)
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    ocr_common.update_app_context(ctx)
+
+    @primary_operation(
+        app,
+        schema.ANALYZE_IMAGES_ENDPOINT,
+        "analyzeImages",
+    )
+    async def _analyze_images(
+        request: schema.AnalyzeImagesRequest,
+    ) -> ResultResponse[schema.AnalyzeImagesResult]:
+        pipeline = ctx.pipeline
+
+        log_id = serving_utils.generate_log_id()
+
+        images, data_info = await ocr_common.get_images(request, ctx)
+
+        result = await pipeline.call(
+            pipeline.pipeline.visual_predict,
+            images,
+            use_doc_orientation_classify=request.useDocOrientationClassify,
+            use_doc_unwarping=request.useDocUnwarping,
+            use_general_ocr=request.useGeneralOcr,
+            use_seal_recognition=request.useSealRecognition,
+            use_table_recognition=request.useTableRecognition,
+        )
+
+        visual_results: List[Dict[str, Any]] = []
+        for i, (img, item) in enumerate(zip(images, result["layout_parsing_result"])):
+            texts: List[dict] = []
+            for poly, text, score in zip(
+                item["ocr_result"]["dt_polys"],
+                item["ocr_result"]["rec_text"],
+                item["ocr_result"]["rec_score"],
+            ):
+                texts.append(dict(poly=poly, text=text, score=score))
+            tables = [
+                dict(bbox=r["layout_bbox"], html=r["html"])
+                for r in item["table_result"]
+            ]
+            if ctx.config.visualize:
+                input_img, layout_img, ocr_img = await ocr_common.postprocess_images(
+                    log_id=log_id,
+                    index=i,
+                    app_context=ctx,
+                    input_image=img,
+                    layout_image=item["layout_result"].img,
+                    ocr_image=item["ocr_result"].img,
+                )
+            else:
+                input_img, layout_img, ocr_img = None, None, None
+            visual_result = dict(
+                texts=texts,
+                tables=tables,
+                inputImage=input_img,
+                layoutImage=layout_img,
+                ocrImage=ocr_img,
+            )
+            visual_results.append(visual_result)
+
+        return ResultResponse[schema.AnalyzeImagesResult](
+            logId=log_id,
+            result=schema.AnalyzeImagesResult(
+                visualResults=visual_results,
+                visualInfo=result["visual_info"],
+                dataInfo=data_info,
+            ),
+        )
+
+    @primary_operation(
+        app,
+        schema.BUILD_VECTOR_STORE_ENDPOINT,
+        "buildVectorStore",
+    )
+    async def _build_vector_store(
+        request: schema.BuildVectorStoreRequest,
+    ) -> ResultResponse[schema.BuildVectorStoreResult]:
+        pipeline = ctx.pipeline
+
+        vector_info = await serving_utils.call_async(
+            pipeline.pipeline.build_vector,
+            request.visualInfo,
+            min_characters=request.minCharacters,
+            llm_request_interval=request.llmRequestInterval,
+        )
+
+        vector_info = await serving_utils.call_async(
+            _serialize_vector_info, pipeline.pipeline, vector_info
+        )
+
+        return ResultResponse[schema.BuildVectorStoreResult](
+            logId=serving_utils.generate_log_id(),
+            result=schema.BuildVectorStoreResult(vectorInfo=vector_info),
+        )
+
+    @primary_operation(
+        app,
+        schema.CHAT_ENDPOINT,
+        "chat",
+    )
+    async def _chat(
+        request: schema.ChatRequest,
+    ) -> ResultResponse[schema.ChatResult]:
+        pipeline = ctx.pipeline
+
+        if request.vectorInfo:
+            vector_info = await serving_utils.call_async(
+                _deserialize_vector_info,
+                pipeline.pipeline,
+                request.vectorInfo,
+            )
+        else:
+            vector_info = None
+
+        result = await serving_utils.call_async(
+            pipeline.pipeline.chat,
+            request.keyList,
+            request.visualInfo,
+            use_vector_retrieval=request.useVectorRetrieval,
+            vector_info=vector_info,
+            min_characters=request.minCharacters,
+            text_task_description=request.textTaskDescription,
+            text_output_format=request.textOutputFormat,
+            text_rules_str=request.textRulesStr,
+            text_few_shot_demo_text_content=request.textFewShotDemoTextContent,
+            text_few_shot_demo_key_value_list=request.textFewShotDemoKeyValueList,
+            table_task_description=request.tableTaskDescription,
+            table_output_format=request.tableOutputFormat,
+            table_rules_str=request.tableRulesStr,
+            table_few_shot_demo_text_content=request.tableFewShotDemoTextContent,
+            table_few_shot_demo_key_value_list=request.tableFewShotDemoKeyValueList,
+        )
+
+        return ResultResponse[schema.ChatResult](
+            logId=serving_utils.generate_log_id(),
+            result=schema.ChatResult(
+                chatResult=result["chat_res"],
+            ),
+        )
+
+    return app

+ 220 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py

@@ -0,0 +1,220 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+from operator import attrgetter
+from typing import Any, Dict, List
+
+from fastapi import FastAPI
+
+from ....pipelines_new.components import IndexData
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas import pp_shituv2 as schema
+from .._app import create_app, primary_operation
+from ._common import image_recognition as ir_common
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    ir_common.update_app_context(ctx)
+
+    @primary_operation(
+        app,
+        schema.BUILD_INDEX_ENDPOINT,
+        "buildIndex",
+    )
+    async def _build_index(
+        request: schema.BuildIndexRequest,
+    ) -> ResultResponse[schema.BuildIndexResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes_list = await asyncio.gather(
+            *(
+                serving_utils.get_raw_bytes_async(img, aiohttp_session)
+                for img in map(attrgetter("image"), request.imageLabelPairs)
+            )
+        )
+        images = [serving_utils.image_bytes_to_array(item) for item in file_bytes_list]
+        labels = [pair.label for pair in request.imageLabelPairs]
+
+        # TODO: Support specifying `index_type` and `metric_type` in the
+        # request
+        index_data = await pipeline.call(
+            pipeline.pipeline.build_index,
+            images,
+            labels,
+            index_type="Flat",
+            metric_type="IP",
+        )
+
+        index_storage = ctx.extra["index_storage"]
+        index_key = ir_common.generate_index_key()
+        index_data_bytes = index_data.to_bytes()
+        await serving_utils.call_async(index_storage.set, index_key, index_data_bytes)
+
+        return ResultResponse[schema.BuildIndexResult](
+            logId=serving_utils.generate_log_id(),
+            result=schema.BuildIndexResult(indexKey=index_key, idMap=index_data.id_map),
+        )
+
+    @primary_operation(
+        app,
+        schema.ADD_IMAGES_TO_INDEX_ENDPOINT,
+        "addImagesToIndex",
+    )
+    async def _add_images_to_index(
+        request: schema.AddImagesToIndexRequest,
+    ) -> ResultResponse[schema.AddImagesToIndexResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes_list = await asyncio.gather(
+            *(
+                serving_utils.get_raw_bytes_async(img, aiohttp_session)
+                for img in map(attrgetter("image"), request.imageLabelPairs)
+            )
+        )
+        images = [serving_utils.image_bytes_to_array(item) for item in file_bytes_list]
+        labels = [pair.label for pair in request.imageLabelPairs]
+
+        if request.indexKey is not None:
+            index_storage = ctx.extra["index_storage"]
+            index_data_bytes = await serving_utils.call_async(
+                index_storage.get, request.indexKey
+            )
+            index_data = IndexData.from_bytes(index_data_bytes)
+        else:
+            index_data = None
+
+        index_data = await pipeline.call(
+            pipeline.pipeline.append_index, images, labels, index_data
+        )
+
+        index_data_bytes = index_data.to_bytes()
+        await serving_utils.call_async(
+            index_storage.set, request.indexKey, index_data_bytes
+        )
+
+        return ResultResponse[schema.AddImagesToIndexResult](
+            logId=serving_utils.generate_log_id(),
+            result=schema.AddImagesToIndexResult(idMap=index_data.id_map),
+        )
+
+    @primary_operation(
+        app,
+        schema.REMOVE_IMAGES_FROM_INDEX_ENDPOINT,
+        "removeImagesFromIndex",
+    )
+    async def _remove_images_from_index(
+        request: schema.RemoveImagesFromIndexRequest,
+    ) -> ResultResponse[schema.RemoveImagesFromIndexResult]:
+        pipeline = ctx.pipeline
+
+        if request.indexKey is not None:
+            index_storage = ctx.extra["index_storage"]
+            index_data_bytes = await serving_utils.call_async(
+                index_storage.get, request.indexKey
+            )
+            index_data = IndexData.from_bytes(index_data_bytes)
+        else:
+            index_data = None
+
+        index_data = await pipeline.call(
+            pipeline.pipeline.remove_index, request.ids, index_data
+        )
+
+        index_data_bytes = index_data.to_bytes()
+        await serving_utils.call_async(
+            index_storage.set, request.indexKey, index_data_bytes
+        )
+
+        return ResultResponse[schema.RemoveImagesFromIndexResult](
+            logId=serving_utils.generate_log_id(),
+            result=schema.RemoveImagesFromIndexResult(idMap=index_data.id_map),
+        )
+
+    @primary_operation(
+        app,
+        schema.INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(
+        request: schema.InferRequest,
+    ) -> ResultResponse[schema.InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        image_bytes = await serving_utils.get_raw_bytes_async(
+            request.image, aiohttp_session
+        )
+        image = serving_utils.image_bytes_to_array(image_bytes)
+
+        if request.indexKey is not None:
+            index_storage = ctx.extra["index_storage"]
+            index_data_bytes = await serving_utils.call_async(
+                index_storage.get, request.indexKey
+            )
+            index_data = IndexData.from_bytes(index_data_bytes)
+        else:
+            index_data = None
+
+        result = list(
+            await pipeline.call(
+                pipeline.pipeline.predict,
+                image,
+                index=index_data,
+                det_threshold=request.detThreshold,
+                rec_threshold=request.recThreshold,
+                hamming_radius=request.hammingRadius,
+                topk=request.topk,
+            )
+        )[0]
+
+        objs: List[Dict[str, Any]] = []
+        for obj in result["boxes"]:
+            rec_results: List[Dict[str, Any]] = []
+            if obj["rec_scores"] is not None:
+                for label, score in zip(obj["labels"], obj["rec_scores"]):
+                    rec_results.append(
+                        dict(
+                            label=label,
+                            score=score,
+                        )
+                    )
+            objs.append(
+                dict(
+                    bbox=obj["coordinate"],
+                    recResults=rec_results,
+                    score=obj["det_score"],
+                )
+            )
+        if ctx.config.visualize:
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img["res"])
+            )
+        else:
+            output_image_base64 = None
+
+        return ResultResponse[schema.InferResult](
+            logId=serving_utils.generate_log_id(),
+            result=schema.InferResult(detectedObjects=objs, image=output_image_base64),
+        )
+
+    return app

+ 106 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py

@@ -0,0 +1,106 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, List
+
+from fastapi import FastAPI
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.seal_recognition import INFER_ENDPOINT, InferRequest, InferResult
+from .._app import create_app, primary_operation
+from ._common import common
+from ._common import ocr as ocr_common
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    ocr_common.update_app_context(ctx)
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+
+        log_id = serving_utils.generate_log_id()
+
+        images, data_info = await ocr_common.get_images(request, ctx)
+
+        result = await pipeline.infer(
+            images,
+            use_doc_orientation_classify=request.useDocOrientationClassify,
+            use_doc_unwarping=request.useDocUnwarping,
+            use_layout_detection=request.useLayoutDetection,
+            layout_threshold=request.layoutThreshold,
+            layout_nms=request.layoutNms,
+            layout_unclip_ratio=request.layoutUnclipRatio,
+            layout_merge_bboxes_mode=request.layoutMergeBboxesMode,
+            seal_det_limit_side_len=request.sealDetLimitSideLen,
+            seal_det_limit_type=request.sealDetLimitType,
+            seal_det_thresh=request.sealDetThresh,
+            seal_det_box_thresh=request.sealDetBoxThresh,
+            seal_det_unclip_ratio=request.sealDetUnclipRatio,
+            seal_rec_score_thresh=request.sealRecScoreThresh,
+        )
+
+        seal_rec_results: List[Dict[str, Any]] = []
+        for i, (img, item) in enumerate(zip(images, result)):
+            pruned_res = common.prune_result(item.json["res"])
+            if ctx.config.visualize:
+                output_imgs = item.img
+                imgs = {
+                    "input_img": img,
+                    "seal_rec_img": output_imgs["seal_res_region1"],
+                }
+                if "layout_det_res" in output_imgs:
+                    imgs["layout_det_img"] = output_imgs["layout_det_res"]
+                if "preprocessed_img" in output_imgs:
+                    imgs["doc_preprocessing_img"] = output_imgs["preprocessed_img"]
+                imgs = await serving_utils.call_async(
+                    common.postprocess_images,
+                    imgs,
+                    log_id,
+                    filename_template=f"{{key}}_{i}.jpg",
+                    file_storage=ctx.extra["file_storage"],
+                    return_urls=ctx.extra["return_img_urls"],
+                    max_img_size=ctx.extra["max_output_img_size"],
+                )
+            else:
+                imgs = {}
+            seal_rec_results.append(
+                dict(
+                    prunedResult=pruned_res,
+                    sealRecImage=imgs.get("seal_rec_img"),
+                    layoutDetImage=imgs.get("layout_det_img"),
+                    docPreprocessingImage=imgs.get("doc_preprocessing_img"),
+                    inputImage=imgs.get("input_img"),
+                )
+            )
+
+        return ResultResponse[InferResult](
+            logId=log_id,
+            result=InferResult(
+                sealRecResults=seal_rec_results,
+                dataInfo=data_info,
+            ),
+        )
+
+    return app

+ 64 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py

@@ -0,0 +1,64 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any
+
+from fastapi import FastAPI
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.semantic_segmentation import INFER_ENDPOINT, InferRequest, InferResult
+from .._app import create_app, primary_operation
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes = await serving_utils.get_raw_bytes_async(
+            request.image, aiohttp_session
+        )
+        image = serving_utils.image_bytes_to_array(file_bytes)
+
+        result = (await pipeline.infer(image, target_size=request.targetSize))[0]
+
+        pred = result["pred"][0].tolist()
+        size = [len(pred), len(pred[0])]
+        label_map = [item for sublist in pred for item in sublist]
+        if ctx.config.visualize:
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img["res"].convert("RGB"))
+            )
+        else:
+            output_image_base64 = None
+
+        return ResultResponse[InferResult](
+            logId=serving_utils.generate_log_id(),
+            result=InferResult(
+                labelMap=label_map, size=size, image=output_image_base64
+            ),
+        )
+
+    return app

+ 69 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py

@@ -0,0 +1,69 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, List
+
+from fastapi import FastAPI
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.small_object_detection import INFER_ENDPOINT, InferRequest, InferResult
+from .._app import create_app, primary_operation
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes = await serving_utils.get_raw_bytes_async(
+            request.image, aiohttp_session
+        )
+        image = serving_utils.image_bytes_to_array(file_bytes)
+
+        result = (await pipeline.infer(image, threshold=request.threshold))[0]
+
+        objects: List[Dict[str, Any]] = []
+        for obj in result["boxes"]:
+            objects.append(
+                dict(
+                    bbox=obj["coordinate"],
+                    categoryId=obj["cls_id"],
+                    categoryName=obj["label"],
+                    score=obj["score"],
+                )
+            )
+        if ctx.config.visualize:
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img["res"])
+            )
+        else:
+            output_image_base64 = None
+
+        return ResultResponse[InferResult](
+            logId=serving_utils.generate_log_id(),
+            result=InferResult(detectedObjects=objects, image=output_image_base64),
+        )
+
+    return app

+ 107 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py

@@ -0,0 +1,107 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, List
+
+from fastapi import FastAPI
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.table_recognition import INFER_ENDPOINT, InferRequest, InferResult
+from .._app import create_app, primary_operation
+from ._common import common
+from ._common import ocr as ocr_common
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    ocr_common.update_app_context(ctx)
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+
+        log_id = serving_utils.generate_log_id()
+
+        images, data_info = await ocr_common.get_images(request, ctx)
+
+        result = await pipeline.infer(
+            images,
+            use_doc_orientation_classify=request.useDocOrientationClassify,
+            use_doc_unwarping=request.useDocUnwarping,
+            use_layout_detection=request.useLayoutDetection,
+            use_ocr_model=request.useOcrModel,
+            layout_threshold=request.layoutThreshold,
+            layout_nms=request.layoutNms,
+            layout_unclip_ratio=request.layoutUnclipRatio,
+            layout_merge_bboxes_mode=request.layoutMergeBboxesMode,
+            text_det_limit_side_len=request.textDetLimitSideLen,
+            text_det_limit_type=request.textDetLimitType,
+            text_det_thresh=request.textDetThresh,
+            text_det_box_thresh=request.textDetBoxThresh,
+            text_det_unclip_ratio=request.textDetUnclipRatio,
+            text_rec_score_thresh=request.textRecScoreThresh,
+        )
+
+        table_rec_results: List[Dict[str, Any]] = []
+        for i, (img, item) in enumerate(zip(images, result)):
+            pruned_res = common.prune_result(item.json["res"])
+            if ctx.config.visualize:
+                output_imgs = item.img
+                imgs = {
+                    "input_img": img,
+                    "ocr_img": output_imgs["ocr_res_img"],
+                }
+                if "layout_det_res" in output_imgs:
+                    imgs["layout_det_img"] = output_imgs["layout_det_res"]
+                if "preprocessed_img" in output_imgs:
+                    imgs["doc_preprocessing_img"] = output_imgs["preprocessed_img"]
+                imgs = await serving_utils.call_async(
+                    common.postprocess_images,
+                    imgs,
+                    log_id,
+                    filename_template=f"{{key}}_{i}.jpg",
+                    file_storage=ctx.extra["file_storage"],
+                    return_urls=ctx.extra["return_img_urls"],
+                    max_img_size=ctx.extra["max_output_img_size"],
+                )
+            else:
+                imgs = {}
+            table_rec_results.append(
+                dict(
+                    prunedResult=pruned_res,
+                    ocrImage=imgs.get("ocr_img"),
+                    layoutDetImage=imgs.get("layout_det_img"),
+                    docPreprocessingImage=imgs.get("doc_preprocessing_img"),
+                    inputImage=imgs.get("input_img"),
+                )
+            )
+
+        return ResultResponse[InferResult](
+            logId=serving_utils.generate_log_id(),
+            result=InferResult(
+                tableRecResults=table_rec_results,
+                dataInfo=data_info,
+            ),
+        )
+
+    return app

+ 56 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py

@@ -0,0 +1,56 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any
+
+from fastapi import FastAPI
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.ts_anomaly_detection import INFER_ENDPOINT, InferRequest, InferResult
+from .._app import create_app, primary_operation
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes = await serving_utils.get_raw_bytes_async(
+            request.csv, aiohttp_session
+        )
+        df = serving_utils.csv_bytes_to_data_frame(file_bytes)
+
+        result = (await pipeline.infer(df))[0]
+
+        output_csv = serving_utils.base64_encode(
+            serving_utils.data_frame_to_bytes(result["anomaly"])
+        )
+
+        return ResultResponse[InferResult](
+            logId=serving_utils.generate_log_id(),
+            result=InferResult(csv=output_csv),
+        )
+
+    return app

+ 55 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py

@@ -0,0 +1,55 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any
+
+from fastapi import FastAPI
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.ts_classification import INFER_ENDPOINT, InferRequest, InferResult
+from .._app import create_app, primary_operation
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes = await serving_utils.get_raw_bytes_async(
+            request.csv, aiohttp_session
+        )
+        df = serving_utils.csv_bytes_to_data_frame(file_bytes)
+
+        result = (await pipeline.infer(df))[0]
+
+        label = str(result["classification"].at[0, "classid"])
+        score = float(result["classification"].at[0, "score"])
+
+        return ResultResponse[InferResult](
+            logId=serving_utils.generate_log_id(),
+            result=InferResult(label=label, score=score),
+        )
+
+    return app

+ 56 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py

@@ -0,0 +1,56 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any
+
+from fastapi import FastAPI
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.ts_forecast import INFER_ENDPOINT, InferRequest, InferResult
+from .._app import create_app, primary_operation
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes = await serving_utils.get_raw_bytes_async(
+            request.csv, aiohttp_session
+        )
+        df = serving_utils.csv_bytes_to_data_frame(file_bytes)
+
+        result = (await pipeline.infer(df))[0]
+
+        output_csv = serving_utils.base64_encode(
+            serving_utils.data_frame_to_bytes(result["forecast"])
+        )
+
+        return ResultResponse[InferResult](
+            logId=serving_utils.generate_log_id(),
+            result=InferResult(csv=output_csv),
+        )
+
+    return app

+ 81 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py

@@ -0,0 +1,81 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, List
+
+from fastapi import FastAPI
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.vehicle_attribute_recognition import (
+    INFER_ENDPOINT,
+    InferRequest,
+    InferResult,
+)
+from .._app import create_app, primary_operation
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes = await serving_utils.get_raw_bytes_async(
+            request.image, aiohttp_session
+        )
+        image = serving_utils.image_bytes_to_array(file_bytes)
+
+        result = (
+            await pipeline.infer(
+                image,
+                det_threshold=request.detThreshold,
+                cls_threshold=request.clsThreshold,
+            )
+        )[0]
+
+        objs: List[Dict[str, Any]] = []
+        for obj in result["boxes"]:
+            objs.append(
+                dict(
+                    bbox=obj["coordinate"],
+                    attributes=[
+                        dict(label=l, score=s)
+                        for l, s in zip(obj["labels"], obj["cls_scores"])
+                    ],
+                    score=obj["det_score"],
+                )
+            )
+        if ctx.config.visualize:
+            output_image_base64 = serving_utils.base64_encode(
+                serving_utils.image_to_bytes(result.img["res"])
+            )
+        else:
+            output_image_base64 = None
+
+        return ResultResponse[InferResult](
+            logId=serving_utils.generate_log_id(),
+            result=InferResult(vehicles=objs, image=output_image_base64),
+        )
+
+    return app

+ 73 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/video_classification.py

@@ -0,0 +1,73 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from typing import Any, Dict, List
+
+from fastapi import FastAPI, HTTPException
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.video_classification import INFER_ENDPOINT, InferRequest, InferResult
+from .._app import create_app, primary_operation
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes = await serving_utils.get_raw_bytes_async(
+            request.video, aiohttp_session
+        )
+        ext = serving_utils.infer_file_ext(request.video)
+        if ext is None:
+            raise HTTPException(
+                status_code=422, detail="File extension cannot be inferred"
+            )
+        video_path = await serving_utils.call_async(
+            serving_utils.write_to_temp_file,
+            file_bytes,
+            suffix=ext,
+        )
+
+        try:
+            result = (await pipeline.infer(video_path, topk=request.topk))[0]
+        finally:
+            await serving_utils.call_async(os.unlink, video_path)
+
+        if "label_names" in result:
+            cat_names = result["label_names"]
+        else:
+            cat_names = [str(id_) for id_ in result["class_ids"]]
+        categories: List[Dict[str, Any]] = []
+        for id_, name, score in zip(result["class_ids"], cat_names, result["scores"]):
+            categories.append(dict(id=id_, name=name, score=score))
+
+        return ResultResponse[InferResult](
+            logId=serving_utils.generate_log_id(),
+            result=InferResult(categories=categories),
+        )
+
+    return app

+ 89 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/video_detection.py

@@ -0,0 +1,89 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from typing import Any, Dict, List
+
+from fastapi import FastAPI, HTTPException
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.video_detection import INFER_ENDPOINT, InferRequest, InferResult
+from .._app import create_app, primary_operation
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes = await serving_utils.get_raw_bytes_async(
+            request.video, aiohttp_session
+        )
+        ext = serving_utils.infer_file_ext(request.video)
+        if ext is None:
+            raise HTTPException(
+                status_code=422, detail="File extension cannot be inferred"
+            )
+        video_path = await serving_utils.call_async(
+            serving_utils.write_to_temp_file,
+            file_bytes,
+            suffix=ext,
+        )
+
+        try:
+            result = (
+                await pipeline.infer(
+                    video_path,
+                    nms_thresh=request.nmsThresh,
+                    score_thresh=request.scoreThresh,
+                )
+            )[0]
+        finally:
+            await serving_utils.call_async(os.unlink, video_path)
+
+        frames: List[Dict[str, Any]] = []
+        for i, item in enumerate(result["result"]):
+            objs: List[Dict[str, Any]] = []
+            for obj in item:
+                objs.append(
+                    dict(
+                        bbox=obj[0],
+                        categoryName=obj[2],
+                        score=obj[1],
+                    )
+                )
+            frames.append(
+                dict(
+                    index=i,
+                    detectedObjects=objs,
+                )
+            )
+
+        return ResultResponse[InferResult](
+            logId=serving_utils.generate_log_id(),
+            result=InferResult(frames=frames),
+        )
+
+    return app

+ 36 - 0
paddlex/inference/serving/basic_serving/_server.py

@@ -0,0 +1,36 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+import uvicorn
+from fastapi import FastAPI
+
+
+def run_server(app: FastAPI, *, host: str, port: int, debug: bool) -> None:
+    # XXX: Currently, `debug` is not used.
+    # HACK: Fix duplicate logs
+    uvicorn_version = tuple(int(x) for x in uvicorn.__version__.split("."))
+    if uvicorn_version < (0, 19, 0):
+        logging.getLogger("uvicorn").propagate = False
+
+    # HACK
+    # https://github.com/encode/starlette/issues/864
+    class _EndpointFilter(logging.Filter):
+        def filter(self, record: logging.LogRecord) -> bool:
+            return record.getMessage().find("/health") == -1
+
+    logging.getLogger("uvicorn.access").addFilter(_EndpointFilter())
+
+    uvicorn.run(app, host=host, port=port, log_level="info")

+ 13 - 0
paddlex/inference/serving/infra/__init__.py

@@ -0,0 +1,13 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

+ 36 - 0
paddlex/inference/serving/infra/config.py

@@ -0,0 +1,36 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, Final, Mapping, Optional
+
+from pydantic import BaseModel
+
+__all__ = [
+    "SERVING_CONFIG_KEY",
+    "AppConfig",
+    "create_app_config",
+]
+
+SERVING_CONFIG_KEY: Final[str] = "Serving"
+
+
+class AppConfig(BaseModel):
+    visualize: bool = True
+    extra: Optional[Dict[str, Any]] = None
+
+
+def create_app_config(pipeline_config: Mapping[str, Any], **kwargs: Any) -> AppConfig:
+    app_config = pipeline_config.get(SERVING_CONFIG_KEY, {})
+    app_config.update(kwargs)
+    return AppConfig.model_validate(app_config)

+ 72 - 0
paddlex/inference/serving/infra/models.py

@@ -0,0 +1,72 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, Generic, List, Tuple, TypeVar, Union
+
+from pydantic import BaseModel, Discriminator
+from typing_extensions import Annotated, Literal, TypeAlias
+
+__all__ = [
+    "NoResultResponse",
+    "ResultT",
+    "ResultResponse",
+    "Response",
+    "ImageInfo",
+    "PDFPageInfo",
+    "PDFInfo",
+    "DataInfo",
+    "PrimaryOperations",
+]
+
+
+class NoResultResponse(BaseModel):
+    logId: str
+    errorCode: int
+    errorMsg: str
+
+
+ResultT = TypeVar("ResultT", bound=BaseModel)
+
+
+class ResultResponse(BaseModel, Generic[ResultT]):
+    logId: str
+    result: ResultT
+    errorCode: Literal[0] = 0
+    errorMsg: Literal["Success"] = "Success"
+
+
+Response: TypeAlias = Union[ResultResponse, NoResultResponse]
+
+
+class ImageInfo(BaseModel):
+    width: int
+    height: int
+    type: Literal["image"] = "image"
+
+
+class PDFPageInfo(BaseModel):
+    width: int
+    height: int
+
+
+class PDFInfo(BaseModel):
+    numPages: int
+    pages: List[PDFPageInfo]
+    type: Literal["pdf"] = "pdf"
+
+
+DataInfo: TypeAlias = Annotated[Union[ImageInfo, PDFInfo], Discriminator("type")]
+
+# Should we use generics?
+PrimaryOperations: TypeAlias = Dict[str, Tuple[str, BaseModel, BaseModel]]

+ 175 - 0
paddlex/inference/serving/infra/storage.py

@@ -0,0 +1,175 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+from os import PathLike
+from pathlib import Path
+from typing import Any, Dict, Optional, Protocol, Union, runtime_checkable
+
+from baidubce.auth.bce_credentials import BceCredentials
+from baidubce.bce_client_configuration import BceClientConfiguration
+from baidubce.services.bos.bos_client import BosClient
+from pydantic import BaseModel, Discriminator, SecretStr, TypeAdapter
+from typing_extensions import Annotated, Literal, assert_never
+
+__all__ = [
+    "InMemoryStorageConfig",
+    "FileSystemStorageConfig",
+    "BOSConfig",
+    "FileStorageConfig",
+    "SupportsGetURL",
+    "Storage",
+    "InMemoryStorage",
+    "FileSystemStorage",
+    "BOS",
+    "create_storage",
+]
+
+
+class InMemoryStorageConfig(BaseModel):
+    type: Literal["memory"] = "memory"
+
+
+class FileSystemStorageConfig(BaseModel):
+    directory: Union[str, PathLike]
+
+    type: Literal["file_system"] = "file_system"
+
+
+class BOSConfig(BaseModel):
+    endpoint: str
+    ak: SecretStr
+    sk: SecretStr
+    bucket_name: str
+    key_prefix: Optional[str] = None
+    connection_timeout_in_mills: Optional[int] = None
+
+    type: Literal["bos"] = "bos"
+
+
+FileStorageConfig = Annotated[
+    Union[InMemoryStorageConfig, FileSystemStorageConfig, BOSConfig],
+    Discriminator("type"),
+]
+
+
+@runtime_checkable
+class SupportsGetURL(Protocol):
+    def get_url(self, key: str) -> str: ...
+
+
+class Storage(metaclass=abc.ABCMeta):
+    @abc.abstractmethod
+    def get(self, key: str) -> bytes:
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def set(self, key: str, value: bytes) -> None:
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def delete(self, key: str) -> None:
+        raise NotImplementedError
+
+
+class InMemoryStorage(Storage):
+    def __init__(self, config: InMemoryStorageConfig) -> None:
+        super().__init__()
+        self._data: Dict[str, bytes] = {}
+
+    def get(self, key: str) -> bytes:
+        return self._data[key]
+
+    def set(self, key: str, value: bytes) -> None:
+        self._data[key] = value
+
+    def delete(self, key: str) -> None:
+        del self._data[key]
+
+
+class FileSystemStorage(Storage):
+    def __init__(self, config: FileSystemStorageConfig) -> None:
+        super().__init__()
+        self._directory = Path(config.directory)
+        self._directory.mkdir(exist_ok=True)
+
+    def get(self, key: str) -> bytes:
+        with open(self._get_file_path(key), "rb") as f:
+            contents = f.read()
+        return contents
+
+    def set(self, key: str, value: bytes) -> None:
+        path = self._get_file_path(key)
+        path.parent.mkdir(exist_ok=True)
+        with open(path, "wb") as f:
+            f.write(value)
+
+    def delete(self, key: str) -> None:
+        file_path = self._get_file_path(key)
+        file_path.unlink(missing_ok=True)
+
+    def _get_file_path(self, key: str) -> Path:
+        return self._directory / key
+
+
+class BOS(Storage):
+    def __init__(self, config: BOSConfig) -> None:
+        super().__init__()
+        bos_cfg = BceClientConfiguration(
+            credentials=BceCredentials(
+                config.ak.get_secret_value(), config.sk.get_secret_value()
+            ),
+            endpoint=config.endpoint,
+            connection_timeout_in_mills=config.connection_timeout_in_mills,
+        )
+        self._client = BosClient(bos_cfg)
+        self._bucket_name = config.bucket_name
+        self._key_prefix = config.key_prefix
+
+    def get(self, key: str) -> bytes:
+        key = self._get_full_key(key)
+        return self._client.get_object_as_string(bucket_name=self._bucket_name, key=key)
+
+    def set(self, key: str, value: bytes) -> None:
+        key = self._get_full_key(key)
+        self._client.put_object_from_string(
+            bucket=self._bucket_name, key=key, data=value
+        )
+
+    def delete(self, key: str) -> None:
+        key = self._get_full_key(key)
+        self._client.delete_object(bucket_name=self._bucket_name, key=key)
+
+    def get_url(self, key: str) -> str:
+        key = self._get_full_key(key)
+        return self._client.generate_pre_signed_url(
+            self._bucket_name, key, expiration_in_seconds=-1
+        ).decode("ascii")
+
+    def _get_full_key(self, key: str) -> str:
+        if self._key_prefix:
+            return f"{self._key_prefix}/{key}"
+        return key
+
+
+def create_storage(dic: Dict[str, Any], /) -> Storage:
+    config = TypeAdapter(FileStorageConfig).validate_python(dic)
+    if config.type == "memory":
+        return InMemoryStorage(config)
+    elif config.type == "file_system":
+        return FileSystemStorage(config)
+    elif config.type == "bos":
+        return BOS(config)
+    else:
+        assert_never(config)

+ 259 - 0
paddlex/inference/serving/infra/utils.py

@@ -0,0 +1,259 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+import base64
+import io
+import mimetypes
+import tempfile
+import uuid
+from functools import partial
+from typing import Awaitable, Callable, List, Optional, Tuple, TypeVar, Union, overload
+from urllib.parse import urlparse
+
+import aiohttp
+import cv2
+import filetype
+import fitz
+import numpy as np
+import pandas as pd
+import requests
+import yarl
+from PIL import Image
+from typing_extensions import Literal, ParamSpec, TypeAlias, assert_never
+
+from .models import ImageInfo, PDFInfo, PDFPageInfo
+
+__all__ = [
+    "FileType",
+    "generate_log_id",
+    "is_url",
+    "infer_file_type",
+    "infer_file_ext",
+    "image_bytes_to_array",
+    "image_bytes_to_image",
+    "image_to_bytes",
+    "image_array_to_bytes",
+    "csv_bytes_to_data_frame",
+    "data_frame_to_bytes",
+    "base64_encode",
+    "read_pdf",
+    "file_to_images",
+    "get_image_info",
+    "write_to_temp_file",
+    "get_raw_bytes",
+    "get_raw_bytes_async",
+    "call_async",
+]
+
+FileType: TypeAlias = Literal["IMAGE", "PDF", "VIDEO", "AUDIO"]
+
+_P = ParamSpec("_P")
+_R = TypeVar("_R")
+
+
+def generate_log_id() -> str:
+    return str(uuid.uuid4())
+
+
+# TODO:
+# 1. Use Pydantic to validate the URL and Base64-encoded string types for both
+#    input and output data instead of handling this manually.
+# 2. Define a `File` type for global use; this will be part of the contract.
+# 3. Consider using two separate fields instead of a union of URL and Base64,
+#    even though they are both strings. Backward compatibility should be
+#    maintained.
+def is_url(s: str) -> bool:
+    if not (s.startswith("http://") or s.startswith("https://")):
+        # Quick rejection
+        return False
+    result = urlparse(s)
+    return all([result.scheme, result.netloc]) and result.scheme in ("http", "https")
+
+
+def infer_file_type(url: str) -> Optional[FileType]:
+    url_parts = urlparse(url)
+    filename = url_parts.path.split("/")[-1]
+
+    file_type = mimetypes.guess_type(filename)[0]
+
+    if file_type is None:
+        return None
+
+    if file_type.startswith("image/"):
+        return "IMAGE"
+    elif file_type == "application/pdf":
+        return "PDF"
+    elif file_type.startswith("video/"):
+        return "VIDEO"
+    elif file_type.startswith("audio/"):
+        return "AUDIO"
+    else:
+        return None
+
+
+def infer_file_ext(file: str) -> Optional[str]:
+    if is_url(file):
+        url_parts = urlparse(file)
+        filename = url_parts.path.split("/")[-1]
+        mime_type = mimetypes.guess_type(filename)[0]
+        if mime_type is None:
+            return None
+        return mimetypes.guess_extension(mime_type)
+    else:
+        bytes_ = base64.b64decode(file)
+        return filetype.guess_extension(bytes_)
+
+
+def image_bytes_to_array(data: bytes) -> np.ndarray:
+    return cv2.imdecode(np.frombuffer(data, np.uint8), cv2.IMREAD_COLOR)
+
+
+def image_bytes_to_image(data: bytes) -> Image.Image:
+    return Image.open(io.BytesIO(data))
+
+
+def image_to_bytes(image: Image.Image, format: str = "JPEG") -> bytes:
+    with io.BytesIO() as f:
+        image.save(f, format=format)
+        img_bytes = f.getvalue()
+    return img_bytes
+
+
+def image_array_to_bytes(image: np.ndarray, ext: str = ".jpg") -> bytes:
+    image = cv2.imencode(ext, image)[1]
+    return image.tobytes()
+
+
+def csv_bytes_to_data_frame(data: bytes) -> pd.DataFrame:
+    with io.StringIO(data.decode("utf-8")) as f:
+        df = pd.read_csv(f)
+    return df
+
+
+def data_frame_to_bytes(df: pd.DataFrame) -> bytes:
+    return df.to_csv().encode("utf-8")
+
+
+def base64_encode(data: bytes) -> str:
+    return base64.b64encode(data).decode("ascii")
+
+
+def read_pdf(
+    bytes_: bytes, max_num_imgs: Optional[int] = None
+) -> Tuple[List[np.ndarray], PDFInfo]:
+    images: List[np.ndarray] = []
+    page_info_list: List[PDFPageInfo] = []
+    with fitz.open("pdf", bytes_) as doc:
+        for page in doc:
+            if max_num_imgs is not None and len(images) >= max_num_imgs:
+                break
+            # TODO: Do not always use zoom=2.0
+            zoom = 2.0
+            deg = 0
+            mat = fitz.Matrix(zoom, zoom).prerotate(deg)
+            pixmap = page.get_pixmap(matrix=mat, alpha=False)
+            image = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(
+                pixmap.h, pixmap.w, pixmap.n
+            )
+            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+            images.append(image)
+            page_info = PDFPageInfo(
+                width=pixmap.w,
+                height=pixmap.h,
+            )
+            page_info_list.append(page_info)
+    pdf_info = PDFInfo(
+        numPages=len(page_info_list),
+        pages=page_info_list,
+    )
+    return images, pdf_info
+
+
+@overload
+def file_to_images(
+    file_bytes: bytes,
+    file_type: Literal["IMAGE"],
+    *,
+    max_num_imgs: Optional[int] = ...,
+) -> Tuple[List[np.ndarray], ImageInfo]: ...
+
+
+@overload
+def file_to_images(
+    file_bytes: bytes,
+    file_type: Literal["PDF"],
+    *,
+    max_num_imgs: Optional[int] = ...,
+) -> Tuple[List[np.ndarray], PDFInfo]: ...
+
+
+@overload
+def file_to_images(
+    file_bytes: bytes,
+    file_type: Literal["IMAGE", "PDF"],
+    *,
+    max_num_imgs: Optional[int] = ...,
+) -> Union[Tuple[List[np.ndarray], ImageInfo], Tuple[List[np.ndarray], PDFInfo]]: ...
+
+
+def file_to_images(
+    file_bytes: bytes,
+    file_type: Literal["IMAGE", "PDF"],
+    *,
+    max_num_imgs: Optional[int] = None,
+) -> Union[Tuple[List[np.ndarray], ImageInfo], Tuple[List[np.ndarray], PDFInfo]]:
+    if file_type == "IMAGE":
+        images = [image_bytes_to_array(file_bytes)]
+        data_info = get_image_info(images[0])
+    elif file_type == "PDF":
+        images, data_info = read_pdf(file_bytes, max_num_imgs=max_num_imgs)
+    else:
+        assert_never(file_type)
+    return images, data_info
+
+
+def get_image_info(image: np.ndarray) -> ImageInfo:
+    return ImageInfo(width=image.shape[1], height=image.shape[0])
+
+
+def write_to_temp_file(file_bytes: bytes, suffix: str) -> str:
+    with tempfile.NamedTemporaryFile("wb", suffix=suffix, delete=False) as f:
+        f.write(file_bytes)
+        return f.name
+
+
+def get_raw_bytes(file: str) -> bytes:
+    if is_url(file):
+        resp = requests.get(file, timeout=5)
+        resp.raise_for_status()
+        return resp.content
+    else:
+        return base64.b64decode(file)
+
+
+async def get_raw_bytes_async(file: str, session: aiohttp.ClientSession) -> bytes:
+    if is_url(file):
+        async with session.get(yarl.URL(file, encoded=True)) as resp:
+            return await resp.read()
+    else:
+        return base64.b64decode(file)
+
+
+def call_async(
+    func: Callable[_P, _R], /, *args: _P.args, **kwargs: _P.kwargs
+) -> Awaitable[_R]:
+    return asyncio.get_running_loop().run_in_executor(
+        None, partial(func, *args, **kwargs)
+    )

+ 13 - 0
paddlex/inference/serving/schemas/__init__.py

@@ -0,0 +1,13 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

+ 39 - 0
paddlex/inference/serving/schemas/anomaly_detection.py

@@ -0,0 +1,39 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Final, List, Optional
+
+from pydantic import BaseModel
+
+from ..infra.models import PrimaryOperations
+from .shared import image_segmentation
+
+__all__ = ["INFER_ENDPOINT", "InferRequest", "InferResult", "PRIMARY_OPERATIONS"]
+
+INFER_ENDPOINT: Final[str] = "/image-anomaly-detection"
+
+
+class InferRequest(BaseModel):
+    image: str
+
+
+class InferResult(BaseModel):
+    labelMap: List[int]
+    size: image_segmentation.Size
+    image: Optional[str] = None
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 53 - 0
paddlex/inference/serving/schemas/doc_preprocessor.py

@@ -0,0 +1,53 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Final, List, Optional
+
+from pydantic import BaseModel
+
+from ..infra.models import DataInfo, PrimaryOperations
+from .shared import ocr
+
+__all__ = [
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "DocPreprocessingResult",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+INFER_ENDPOINT: Final[str] = "/document-preprocessing"
+
+
+class InferRequest(ocr.BaseInferRequest):
+    # Should it be "Classification" instead of "Classify"? Keep the names
+    # consistent with the parameters of the wrapped function though.
+    useDocOrientationClassify: Optional[bool] = None
+    useDocUnwarping: Optional[bool] = None
+
+
+class DocPreprocessingResult(BaseModel):
+    prunedResult: dict
+    docPreprocessingImage: Optional[str] = None
+    inputImage: Optional[str] = None
+
+
+class InferResult(BaseModel):
+    docPreprocessingResults: List[DocPreprocessingResult]
+    dataInfo: DataInfo
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 124 - 0
paddlex/inference/serving/schemas/face_recognition.py

@@ -0,0 +1,124 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, Final, List, Optional
+
+from pydantic import BaseModel
+
+from ..infra.models import PrimaryOperations
+from .shared import object_detection
+
+__all__ = [
+    "ImageLabelPair",
+    "BUILD_INDEX_ENDPOINT",
+    "BuildIndexRequest",
+    "BuildIndexResult",
+    "ADD_IMAGES_TO_INDEX_ENDPOINT",
+    "AddImagesToIndexRequest",
+    "AddImagesToIndexResult",
+    "REMOVE_IMAGES_FROM_INDEX_ENDPOINT",
+    "RemoveImagesFromIndexRequest",
+    "RemoveImagesFromIndexResult",
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "RecResult",
+    "Face",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+
+class ImageLabelPair(BaseModel):
+    image: str
+    label: str
+
+
+BUILD_INDEX_ENDPOINT: Final[str] = "/face-recognition-index-build"
+
+
+class BuildIndexRequest(BaseModel):
+    imageLabelPairs: List[ImageLabelPair]
+
+
+class BuildIndexResult(BaseModel):
+    indexKey: str
+    idMap: Dict[int, str]
+
+
+ADD_IMAGES_TO_INDEX_ENDPOINT: Final[str] = "/face-recognition-index-add"
+
+
+class AddImagesToIndexRequest(BaseModel):
+    imageLabelPairs: List[ImageLabelPair]
+    indexKey: Optional[str] = None
+
+
+class AddImagesToIndexResult(BaseModel):
+    idMap: Dict[int, str]
+
+
+REMOVE_IMAGES_FROM_INDEX_ENDPOINT: Final[str] = "/face-recognition-index-remove"
+
+
+class RemoveImagesFromIndexRequest(BaseModel):
+    ids: List[int]
+    indexKey: Optional[str] = None
+
+
+class RemoveImagesFromIndexResult(BaseModel):
+    idMap: Dict[int, str]
+
+
+INFER_ENDPOINT: Final[str] = "/face-recognition-infer"
+
+
+class InferRequest(BaseModel):
+    image: str
+    indexKey: Optional[str] = None
+    detThreshold: Optional[float] = None
+    recThreshold: Optional[float] = None
+    hammingRadius: Optional[float] = None
+    topk: Optional[int] = None
+
+
+class RecResult(BaseModel):
+    label: str
+    score: float
+
+
+class Face(BaseModel):
+    bbox: object_detection.BoundingBox
+    recResults: List[RecResult]
+    score: float
+
+
+class InferResult(BaseModel):
+    faces: List[Face]
+    image: Optional[str] = None
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "buildIndex": (BUILD_INDEX_ENDPOINT, BuildIndexRequest, BuildIndexResult),
+    "addImagesToIndex": (
+        ADD_IMAGES_TO_INDEX_ENDPOINT,
+        AddImagesToIndexRequest,
+        AddImagesToIndexResult,
+    ),
+    "removeImagesFromIndex": (
+        REMOVE_IMAGES_FROM_INDEX_ENDPOINT,
+        RemoveImagesFromIndexRequest,
+        RemoveImagesFromIndexResult,
+    ),
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 54 - 0
paddlex/inference/serving/schemas/formula_recognition.py

@@ -0,0 +1,54 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Final, List, Optional
+
+from pydantic import BaseModel
+
+from ..infra.models import DataInfo, PrimaryOperations
+from .shared import ocr
+
+__all__ = [
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "FormulaRecResult",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+INFER_ENDPOINT: Final[str] = "/formula-recognition"
+
+
+class InferRequest(ocr.BaseInferRequest):
+    useLayoutDetection: Optional[bool] = None
+    useDocOrientationClassify: Optional[bool] = None
+    useDocUnwarping: Optional[bool] = None
+
+
+class FormulaRecResult(BaseModel):
+    prunedResult: dict
+    formulaRecImage: Optional[str] = None
+    layoutDetImage: Optional[str] = None
+    docPreprocessingImage: Optional[str] = None
+    inputImage: Optional[str] = None
+
+
+class InferResult(BaseModel):
+    formulaRecResults: List[FormulaRecResult]
+    dataInfo: DataInfo
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 45 - 0
paddlex/inference/serving/schemas/image_classification.py

@@ -0,0 +1,45 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Final, List, Optional
+
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated
+
+from ..infra.models import PrimaryOperations
+from .shared import classification
+
+__all__ = [
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+INFER_ENDPOINT: Final[str] = "/image-classification"
+
+
+class InferRequest(BaseModel):
+    image: str
+    topk: Optional[Annotated[int, Field(gt=0)]] = None
+
+
+class InferResult(BaseModel):
+    categories: List[classification.Category]
+    image: Optional[str] = None
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 47 - 0
paddlex/inference/serving/schemas/image_multilabel_classification.py

@@ -0,0 +1,47 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, Final, List, Optional, Union
+
+from pydantic import BaseModel
+from typing_extensions import Literal
+
+from ..infra.models import PrimaryOperations
+from .shared import classification
+
+__all__ = [
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+INFER_ENDPOINT: Final[str] = "/multilabel-image-classification"
+
+
+class InferRequest(BaseModel):
+    image: str
+    threshold: Optional[
+        Union[float, Dict[Union[Literal["default"], int], float], List[float]]
+    ] = None
+
+
+class InferResult(BaseModel):
+    categories: List[classification.Category]
+    image: Optional[str] = None
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 59 - 0
paddlex/inference/serving/schemas/instance_segmentation.py

@@ -0,0 +1,59 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Final, List, Optional
+
+from pydantic import BaseModel
+
+from ..infra.models import PrimaryOperations
+from .shared import image_segmentation, object_detection
+
+__all__ = [
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "Mask",
+    "Instance",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+INFER_ENDPOINT: Final[str] = "/instance-segmentation"
+
+
+class InferRequest(BaseModel):
+    image: str
+    threshold: Optional[float] = None
+
+
+class Mask(BaseModel):
+    rleResult: str
+    size: image_segmentation.Size
+
+
+class Instance(BaseModel):
+    bbox: object_detection.BoundingBox
+    categoryId: int
+    categoryName: str
+    score: float
+    mask: Mask
+
+
+class InferResult(BaseModel):
+    instances: List[Instance]
+    image: Optional[str] = None
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 79 - 0
paddlex/inference/serving/schemas/layout_parsing.py

@@ -0,0 +1,79 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Final, List, Optional, Union
+
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated, Literal
+
+from ..infra.models import DataInfo, PrimaryOperations
+from .shared import ocr
+
+__all__ = [
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "LayoutParsingResult",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+INFER_ENDPOINT: Final[str] = "/layout-parsing"
+
+
+class InferRequest(ocr.BaseInferRequest):
+    useDocOrientationClassify: Optional[bool] = None
+    useDocUnwarping: Optional[bool] = None
+    useGeneralOcr: Optional[bool] = None
+    useSealRecognition: Optional[bool] = None
+    useTableRecognition: Optional[bool] = None
+    useFormulaRecognition: Optional[bool] = None
+    layoutThreshold: Optional[float] = None
+    layoutNms: Optional[bool] = None
+    layoutUnclipRatio: Optional[
+        Union[float, Annotated[List[float], Field(min_length=2, max_length=2)]]
+    ] = None
+    layoutMergeBboxesMode: Optional[Literal["union", "large", "small"]] = None
+    textDetLimitSideLen: Optional[int] = None
+    textDetLimitType: Optional[Literal["min", "max"]] = None
+    textDetThresh: Optional[float] = None
+    textDetBoxThresh: Optional[float] = None
+    textDetUnclipRatio: Optional[float] = None
+    textRecScoreThresh: Optional[float] = None
+    sealDetLimitSideLen: Optional[int] = None
+    sealDetLimitType: Optional[Literal["min", "max"]] = None
+    sealDetThresh: Optional[float] = None
+    sealDetBoxThresh: Optional[float] = None
+    sealDetUnclipRatio: Optional[float] = None
+    sealRecScoreThresh: Optional[float] = None
+
+
+class LayoutParsingResult(BaseModel):
+    prunedResult: dict
+    ocrImage: Optional[str] = None
+    sealRecImage: Optional[str] = None
+    tableRecImage: Optional[str] = None
+    formulaRecImage: Optional[str] = None
+    layoutDetImage: Optional[str] = None
+    docPreprocessingImage: Optional[str] = None
+    inputImage: Optional[str] = None
+
+
+class InferResult(BaseModel):
+    layoutParsingResults: List[LayoutParsingResult]
+    dataInfo: DataInfo
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 57 - 0
paddlex/inference/serving/schemas/multilingual_speech_recognition.py

@@ -0,0 +1,57 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Final, List
+
+from pydantic import BaseModel
+
+from ..infra.models import PrimaryOperations
+
+__all__ = [
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "Segment",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+INFER_ENDPOINT: Final[str] = "/multilingual-speech-recognition"
+
+
+class InferRequest(BaseModel):
+    audio: str
+
+
+class Segment(BaseModel):
+    id: int
+    seek: int
+    start: float
+    end: float
+    text: str
+    tokens: List[int]
+    temperature: float
+    avgLogProb: float
+    compressionRatio: float
+    noSpeechProb: float
+
+
+class InferResult(BaseModel):
+    text: str
+    segments: List[Segment]
+    language: str
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 52 - 0
paddlex/inference/serving/schemas/object_detection.py

@@ -0,0 +1,52 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, Final, List, Optional, Union
+
+from pydantic import BaseModel
+
+from ..infra.models import PrimaryOperations
+from .shared import object_detection
+
+__all__ = [
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "DetectedObject",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+INFER_ENDPOINT: Final[str] = "/object-detection"
+
+
+class InferRequest(BaseModel):
+    image: str
+    threshold: Optional[Union[float, Dict[int, float]]] = None
+
+
+class DetectedObject(BaseModel):
+    bbox: object_detection.BoundingBox
+    categoryId: int
+    categoryName: str
+    score: float
+
+
+class InferResult(BaseModel):
+    detectedObjects: List[DetectedObject]
+    image: Optional[str] = None
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 61 - 0
paddlex/inference/serving/schemas/ocr.py

@@ -0,0 +1,61 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Final, List, Optional
+
+from pydantic import BaseModel
+from typing_extensions import Literal
+
+from ..infra.models import DataInfo, PrimaryOperations
+from .shared import ocr
+
+__all__ = [
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "OCRResult",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+INFER_ENDPOINT: Final[str] = "/ocr"
+
+
+class InferRequest(ocr.BaseInferRequest):
+    useDocOrientationClassify: Optional[bool] = None
+    useDocUnwarping: Optional[bool] = None
+    useTextlineOrientation: Optional[bool] = False
+    textDetLimitSideLen: Optional[int] = None
+    textDetLimitType: Optional[Literal["min", "max"]] = None
+    # Better to use "threshold"? Be consistent with the pipeline API though.
+    textDetThresh: Optional[float] = None
+    textDetBoxThresh: Optional[float] = None
+    textDetUnclipRatio: Optional[float] = None
+    textRecScoreThresh: Optional[float] = None
+
+
+class OCRResult(BaseModel):
+    prunedResult: dict
+    ocrImage: Optional[str] = None
+    docPreprocessingImage: Optional[str] = None
+    inputImage: Optional[str] = None
+
+
+class InferResult(BaseModel):
+    ocrResults: List[OCRResult]
+    dataInfo: DataInfo
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 61 - 0
paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py

@@ -0,0 +1,61 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, Final, List, Optional, Union
+
+from pydantic import BaseModel
+from typing_extensions import Literal
+
+from ..infra.models import PrimaryOperations
+from .shared import object_detection
+
+__all__ = [
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "Attribute",
+    "Pedestrian",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+INFER_ENDPOINT: Final[str] = "/pedestrian-attribute-recognition"
+
+
+class InferRequest(BaseModel):
+    image: str
+    detThreshold: Optional[float] = None
+    clsThreshold: Optional[
+        Union[float, Dict[Union[Literal["default"], int], float], List[float]]
+    ] = None
+
+
+class Attribute(BaseModel):
+    label: str
+    score: float
+
+
+class Pedestrian(BaseModel):
+    bbox: object_detection.BoundingBox
+    attributes: List[Attribute]
+    score: float
+
+
+class InferResult(BaseModel):
+    pedestrians: List[Pedestrian]
+    image: Optional[str] = None
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 128 - 0
paddlex/inference/serving/schemas/pp_chatocrv3_doc.py

@@ -0,0 +1,128 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Final, List, Optional
+
+from pydantic import BaseModel
+from typing_extensions import Literal
+
+from ..infra.models import DataInfo, PrimaryOperations
+from .shared import ocr
+
+__all__ = [
+    "ANALYZE_IMAGES_ENDPOINT",
+    "AnalyzeImagesRequest",
+    "VisualResult",
+    "AnalyzeImagesResult",
+    "BUILD_VECTOR_STORE_ENDPOINT",
+    "BuildVectorStoreRequest",
+    "BuildVectorStoreResult",
+    "CHAT_ENDPOINT",
+    "ChatRequest",
+    "ChatResult",
+    "PRIMARY_OPERATIONS",
+]
+
+ANALYZE_IMAGES_ENDPOINT: Final[str] = "/chatocr-visual"
+
+
+class AnalyzeImagesRequest(ocr.BaseInferRequest):
+    useDocOrientationClassify: Optional[bool] = None
+    useDocUnwarping: Optional[bool] = None
+    useGeneralOcr: Optional[bool] = None
+    useSealRecognition: Optional[bool] = None
+    useTableRecognition: Optional[bool] = None
+    textDetLimitSideLen: Optional[int] = None
+    textDetLimitType: Optional[Literal["min", "max"]] = None
+    textDetThresh: Optional[float] = None
+    textDetBoxThresh: Optional[float] = None
+    textDetUnclipRatio: Optional[float] = None
+    textRecScoreThresh: Optional[float] = None
+    sealDetLimitSideLen: Optional[int] = None
+    sealDetLimitType: Optional[Literal["min", "max"]] = None
+    sealDetThresh: Optional[float] = None
+    sealDetBoxThresh: Optional[float] = None
+    sealDetUnclipRatio: Optional[float] = None
+    sealRecScoreThresh: Optional[float] = None
+
+
+class VisualResult(BaseModel):
+    prunedResult: dict
+    ocrImage: Optional[str] = None
+    layoutDetImage: Optional[str] = None
+    docPreprocessingImage: Optional[str] = None
+    inputImage: Optional[str] = None
+
+
+class AnalyzeImagesResult(BaseModel):
+    visualResults: List[VisualResult]
+    visualInfo: dict
+    dataInfo: DataInfo
+
+
+BUILD_VECTOR_STORE_ENDPOINT: Final[str] = "/chatocr-vector"
+
+
+class BuildVectorStoreRequest(BaseModel):
+    visualInfo: dict
+    minCharacters: Optional[int] = None
+    llmRequestInterval: Optional[float] = None
+
+
+class BuildVectorStoreResult(BaseModel):
+    vectorInfo: dict
+
+
+CHAT_ENDPOINT: Final[str] = "/chatocr-chat"
+
+
+class ChatRequest(BaseModel):
+    keyList: List[str]
+    visualInfo: dict
+    useVectorRetrieval: Optional[bool] = None
+    vectorInfo: Optional[str] = None
+    minCharacters: Optional[int] = None
+    textTaskDescription: Optional[str] = None
+    textOutputFormat: Optional[str] = None
+    # Is the "Str" in the name unnecessary? Keep the names consistent with the
+    # parameters of the wrapped function though.
+    textRulesStr: Optional[str] = None
+    # Should this be just "text" instead of "text content", given that there is
+    # no container?
+    textFewShotDemoTextContent: Optional[str] = None
+    textFewShotDemoKeyValueList: Optional[str] = None
+    tableTaskDescription: Optional[str] = None
+    tableOutputFormat: Optional[str] = None
+    tableRulesStr: Optional[str] = None
+    tableFewShotDemoTextContent: Optional[str] = None
+    tableFewShotDemoKeyValueList: Optional[str] = None
+
+
+class ChatResult(BaseModel):
+    chatResult: dict
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "analyzeImages": (
+        ANALYZE_IMAGES_ENDPOINT,
+        AnalyzeImagesRequest,
+        AnalyzeImagesResult,
+    ),
+    "buildVectorStore": (
+        BUILD_VECTOR_STORE_ENDPOINT,
+        BuildVectorStoreRequest,
+        BuildVectorStoreResult,
+    ),
+    "chat": (CHAT_ENDPOINT, ChatRequest, ChatResult),
+}

+ 124 - 0
paddlex/inference/serving/schemas/pp_shituv2.py

@@ -0,0 +1,124 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, Final, List, Optional
+
+from pydantic import BaseModel
+
+from ..infra.models import PrimaryOperations
+from .shared import object_detection
+
+__all__ = [
+    "ImageLabelPair",
+    "BUILD_INDEX_ENDPOINT",
+    "BuildIndexRequest",
+    "BuildIndexResult",
+    "ADD_IMAGES_TO_INDEX_ENDPOINT",
+    "AddImagesToIndexRequest",
+    "AddImagesToIndexResult",
+    "REMOVE_IMAGES_FROM_INDEX_ENDPOINT",
+    "RemoveImagesFromIndexRequest",
+    "RemoveImagesFromIndexResult",
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "RecResult",
+    "DetectedObject",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+
+class ImageLabelPair(BaseModel):
+    image: str
+    label: str
+
+
+BUILD_INDEX_ENDPOINT: Final[str] = "/shitu-index-build"
+
+
+class BuildIndexRequest(BaseModel):
+    imageLabelPairs: List[ImageLabelPair]
+
+
+class BuildIndexResult(BaseModel):
+    indexKey: str
+    idMap: Dict[int, str]
+
+
+ADD_IMAGES_TO_INDEX_ENDPOINT: Final[str] = "/shitu-index-add"
+
+
+class AddImagesToIndexRequest(BaseModel):
+    imageLabelPairs: List[ImageLabelPair]
+    indexKey: Optional[str] = None
+
+
+class AddImagesToIndexResult(BaseModel):
+    idMap: Dict[int, str]
+
+
+REMOVE_IMAGES_FROM_INDEX_ENDPOINT: Final[str] = "/shitu-index-remove"
+
+
+class RemoveImagesFromIndexRequest(BaseModel):
+    ids: List[int]
+    indexKey: Optional[str] = None
+
+
+class RemoveImagesFromIndexResult(BaseModel):
+    idMap: Dict[int, str]
+
+
+INFER_ENDPOINT: Final[str] = "/shitu-infer"
+
+
+class InferRequest(BaseModel):
+    image: str
+    indexKey: Optional[str] = None
+    detThreshold: Optional[float] = None
+    recThreshold: Optional[float] = None
+    hammingRadius: Optional[float] = None
+    topk: Optional[int] = None
+
+
+class RecResult(BaseModel):
+    label: str
+    score: float
+
+
+class DetectedObject(BaseModel):
+    bbox: object_detection.BoundingBox
+    recResults: List[RecResult]
+    score: float
+
+
+class InferResult(BaseModel):
+    detectedObjects: List[DetectedObject]
+    image: Optional[str] = None
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "buildIndex": (BUILD_INDEX_ENDPOINT, BuildIndexRequest, BuildIndexResult),
+    "addImagesToIndex": (
+        ADD_IMAGES_TO_INDEX_ENDPOINT,
+        AddImagesToIndexRequest,
+        AddImagesToIndexResult,
+    ),
+    "removeImagesFromIndex": (
+        REMOVE_IMAGES_FROM_INDEX_ENDPOINT,
+        RemoveImagesFromIndexRequest,
+        RemoveImagesFromIndexResult,
+    ),
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 67 - 0
paddlex/inference/serving/schemas/seal_recognition.py

@@ -0,0 +1,67 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Final, List, Optional, Union
+
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated, Literal
+
+from ..infra.models import DataInfo, PrimaryOperations
+from .shared import ocr
+
+__all__ = [
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "SealRecResult",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+INFER_ENDPOINT: Final[str] = "/seal-recognition"
+
+
+class InferRequest(ocr.BaseInferRequest):
+    useDocOrientationClassify: Optional[bool] = None
+    useDocUnwarping: Optional[bool] = None
+    useLayoutDetection: Optional[bool] = None
+    layoutThreshold: Optional[float] = None
+    layoutNms: Optional[bool] = None
+    layoutUnclipRatio: Optional[
+        Union[float, Annotated[List[float], Field(min_length=2, max_length=2)]]
+    ] = None
+    layoutMergeBboxesMode: Optional[Literal["union", "large", "small"]] = None
+    sealDetLimitSideLen: Optional[int] = None
+    sealDetLimitType: Optional[Literal["min", "max"]] = None
+    sealDetThresh: Optional[float] = None
+    sealDetBoxThresh: Optional[float] = None
+    sealDetUnclipRatio: Optional[float] = None
+    sealRecScoreThresh: Optional[float] = None
+
+
+class SealRecResult(BaseModel):
+    prunedResult: dict
+    sealRecImage: Optional[str] = None
+    layoutDetImage: Optional[str] = None
+    docPreprocessingImage: Optional[str] = None
+    inputImage: Optional[str] = None
+
+
+class InferResult(BaseModel):
+    sealRecResults: List[SealRecResult]
+    dataInfo: DataInfo
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 45 - 0
paddlex/inference/serving/schemas/semantic_segmentation.py

@@ -0,0 +1,45 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Final, List, Optional, Union
+
+from pydantic import BaseModel
+
+from ..infra.models import PrimaryOperations
+from .shared import image_segmentation
+
+__all__ = [
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+INFER_ENDPOINT: Final[str] = "/semantic-segmentation"
+
+
+class InferRequest(BaseModel):
+    image: str
+    targetSize: Optional[Union[int, image_segmentation.Size]] = None
+
+
+class InferResult(BaseModel):
+    labelMap: List[int]
+    size: image_segmentation.Size
+    image: Optional[str] = None
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 13 - 0
paddlex/inference/serving/schemas/shared/__init__.py

@@ -0,0 +1,13 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

+ 23 - 0
paddlex/inference/serving/schemas/shared/classification.py

@@ -0,0 +1,23 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pydantic import BaseModel
+
+__all__ = ["Category"]
+
+
+class Category(BaseModel):
+    id: int
+    name: str
+    score: float

+ 23 - 0
paddlex/inference/serving/schemas/shared/image_segmentation.py

@@ -0,0 +1,23 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List
+
+from pydantic import Field
+from typing_extensions import Annotated, TypeAlias
+
+__all__ = ["Size"]
+
+
+Size: TypeAlias = Annotated[List[int], Field(min_length=2, max_length=2)]

+ 22 - 0
paddlex/inference/serving/schemas/shared/object_detection.py

@@ -0,0 +1,22 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List
+
+from pydantic import Field
+from typing_extensions import Annotated, TypeAlias
+
+__all__ = ["BoundingBox"]
+
+BoundingBox: TypeAlias = Annotated[List[float], Field(min_length=4, max_length=4)]

+ 25 - 0
paddlex/inference/serving/schemas/shared/ocr.py

@@ -0,0 +1,25 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional
+
+from pydantic import BaseModel
+from typing_extensions import Literal, TypeAlias
+
+FileType: TypeAlias = Literal[0, 1]
+
+
+class BaseInferRequest(BaseModel):
+    file: str
+    fileType: Optional[FileType] = None

+ 52 - 0
paddlex/inference/serving/schemas/small_object_detection.py

@@ -0,0 +1,52 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, Final, List, Optional, Union
+
+from pydantic import BaseModel
+
+from ..infra.models import PrimaryOperations
+from .shared import object_detection
+
+__all__ = [
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "DetectedObject",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+INFER_ENDPOINT: Final[str] = "/small-object-detection"
+
+
+class InferRequest(BaseModel):
+    image: str
+    threshold: Optional[Union[float, Dict[int, float]]] = None
+
+
+class DetectedObject(BaseModel):
+    bbox: object_detection.BoundingBox
+    categoryId: int
+    categoryName: str
+    score: float
+
+
+class InferResult(BaseModel):
+    detectedObjects: List[DetectedObject]
+    image: Optional[str] = None
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 68 - 0
paddlex/inference/serving/schemas/table_recognition.py

@@ -0,0 +1,68 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Final, List, Optional, Union
+
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated, Literal
+
+from ..infra.models import DataInfo, PrimaryOperations
+from .shared import ocr
+
+__all__ = [
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "TableRecResult",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+INFER_ENDPOINT: Final[str] = "/table-recognition"
+
+
+class InferRequest(ocr.BaseInferRequest):
+    useDocOrientationClassify: Optional[bool] = None
+    useDocUnwarping: Optional[bool] = None
+    useLayoutDetection: Optional[bool] = None
+    useOcrModel: Optional[bool] = None
+    layoutThreshold: Optional[float] = None
+    layoutNms: Optional[bool] = None
+    layoutUnclipRatio: Optional[
+        Union[float, Annotated[List[float], Field(min_length=2, max_length=2)]]
+    ] = None
+    layoutMergeBboxesMode: Optional[Literal["union", "large", "small"]] = None
+    textDetLimitSideLen: Optional[int] = None
+    textDetLimitType: Optional[Literal["min", "max"]] = None
+    textDetThresh: Optional[float] = None
+    textDetBoxThresh: Optional[float] = None
+    textDetUnclipRatio: Optional[float] = None
+    textRecScoreThresh: Optional[float] = None
+
+
+class TableRecResult(BaseModel):
+    prunedResult: dict
+    tableRecImage: Optional[str] = None
+    layoutDetImage: Optional[str] = None
+    docPreprocessingImage: Optional[str] = None
+    inputImage: Optional[str] = None
+
+
+class InferResult(BaseModel):
+    tableRecResults: List[TableRecResult]
+    dataInfo: DataInfo
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 36 - 0
paddlex/inference/serving/schemas/ts_anomaly_detection.py

@@ -0,0 +1,36 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Final
+
+from pydantic import BaseModel
+
+from ..infra.models import PrimaryOperations
+
+__all__ = ["INFER_ENDPOINT", "InferRequest", "InferResult", "PRIMARY_OPERATIONS"]
+
+INFER_ENDPOINT: Final[str] = "/time-series-anomaly-detection"
+
+
+class InferRequest(BaseModel):
+    csv: str
+
+
+class InferResult(BaseModel):
+    csv: str
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 37 - 0
paddlex/inference/serving/schemas/ts_classification.py

@@ -0,0 +1,37 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Final
+
+from pydantic import BaseModel
+
+from ..infra.models import PrimaryOperations
+
+__all__ = ["INFER_ENDPOINT", "InferRequest", "InferResult", "PRIMARY_OPERATIONS"]
+
+INFER_ENDPOINT: Final[str] = "/time-series-classification"
+
+
+class InferRequest(BaseModel):
+    csv: str
+
+
+class InferResult(BaseModel):
+    label: str
+    score: float
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 36 - 0
paddlex/inference/serving/schemas/ts_forecast.py

@@ -0,0 +1,36 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Final
+
+from pydantic import BaseModel
+
+from ..infra.models import PrimaryOperations
+
+__all__ = ["INFER_ENDPOINT", "InferRequest", "InferResult", "PRIMARY_OPERATIONS"]
+
+INFER_ENDPOINT: Final[str] = "/time-series-forecasting"
+
+
+class InferRequest(BaseModel):
+    csv: str
+
+
+class InferResult(BaseModel):
+    csv: str
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 61 - 0
paddlex/inference/serving/schemas/vehicle_attribute_recognition.py

@@ -0,0 +1,61 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, Final, List, Optional, Union
+
+from pydantic import BaseModel
+from typing_extensions import Literal
+
+from ..infra.models import PrimaryOperations
+from .shared import object_detection
+
+__all__ = [
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "Attribute",
+    "Vehicle",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+INFER_ENDPOINT: Final[str] = "/vehicle-attribute-recognition"
+
+
+class InferRequest(BaseModel):
+    image: str
+    detThreshold: Optional[float] = None
+    clsThreshold: Optional[
+        Union[float, Dict[Union[Literal["default"], int], float], List[float]]
+    ] = None
+
+
+class Attribute(BaseModel):
+    label: str
+    score: float
+
+
+class Vehicle(BaseModel):
+    bbox: object_detection.BoundingBox
+    attributes: List[Attribute]
+    score: float
+
+
+class InferResult(BaseModel):
+    vehicles: List[Vehicle]
+    image: Optional[str] = None
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 44 - 0
paddlex/inference/serving/schemas/video_classification.py

@@ -0,0 +1,44 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Final, List, Optional
+
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated
+
+from ..infra.models import PrimaryOperations
+from .shared import classification
+
+__all__ = [
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+INFER_ENDPOINT: Final[str] = "/video-classification"
+
+
+class InferRequest(BaseModel):
+    video: str
+    topk: Optional[Annotated[int, Field(gt=0)]] = None
+
+
+class InferResult(BaseModel):
+    categories: List[classification.Category]
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 56 - 0
paddlex/inference/serving/schemas/video_detection.py

@@ -0,0 +1,56 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Final, List, Optional
+
+from pydantic import BaseModel
+
+from ..infra.models import PrimaryOperations
+from .shared import object_detection
+
+__all__ = [
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "DetectedObject",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+INFER_ENDPOINT: Final[str] = "/video-detection"
+
+
+class InferRequest(BaseModel):
+    video: str
+    nmsThresh: Optional[float] = None
+    scoreThresh: Optional[float] = None
+
+
+class DetectedObject(BaseModel):
+    bbox: object_detection.BoundingBox
+    categoryName: str
+    score: float
+
+
+class Frame(BaseModel):
+    index: int
+    detectedObjects: List[DetectedObject]
+
+
+class InferResult(BaseModel):
+    frames: List[Frame]
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 2 - 4
paddlex/paddlex_cli.py

@@ -264,12 +264,10 @@ def pipeline_predict(
 
 
 def serve(pipeline, *, device, use_hpip, host, port):
-    from .inference.pipelines.serving import create_pipeline_app, run_server
+    from .inference.serving.basic_serving import create_pipeline_app, run_server
 
     pipeline_config = load_pipeline_config(pipeline)
-    pipeline = create_pipeline_from_config(
-        pipeline_config, device=device, use_hpip=use_hpip
-    )
+    pipeline = create_pipeline(config=pipeline_config, device=device, use_hpip=use_hpip)
     app = create_pipeline_app(pipeline, pipeline_config)
     run_server(app, host=host, port=port, debug=False)
 

+ 1 - 0
paddlex/serving_requirements.txt

@@ -1,6 +1,7 @@
 aiohttp>=3.9
 bce-python-sdk>=0.9
 fastapi>=0.110
+filetype>=1.2
 pydantic>=2
 starlette>=0.36
 typing_extensions>=4.11