Browse Source

[Docs] Update deployment documentation (#2435)

* Update

* Refine docs

* Add version control

* basic inference->quick inference

* Remove deprecated docs

* Remove deprecated docs

* Remove tutorial list in serving docs
Lin Manhui 1 year ago
parent
commit
3744b71bdc

+ 62 - 36
docs/pipeline_deploy/high_performance_inference.en.md

@@ -8,11 +8,11 @@ In real-world production environments, many applications have stringent standard
 
 ## 1. Installation and Usage of High-Performance Inference Plugins
 
-Before using the high-performance inference plugins, ensure you have completed the installation of PaddleX according to the [PaddleX Local Installation Tutorial](../installation/installation.en.md), and have successfully run the basic inference of the pipeline using either the PaddleX pipeline command line instructions or the Python script instructions.
+Before using the high-performance inference plugins, ensure you have completed the installation of PaddleX according to the [PaddleX Local Installation Tutorial](../installation/installation.en.md), and have successfully run the quick inference of the pipeline using either the PaddleX pipeline command line instructions or the Python script instructions.
 
 ### 1.1 Installing High-Performance Inference Plugins
 
-Find the corresponding installation command based on your processor architecture, operating system, device type, and Python version in the table below and execute it in your deployment environment:
+Find the corresponding installation command based on your processor architecture, operating system, device type, and Python version in the table below and execute it in your deployment environment. Please replace `{paddlex version number}` with the actual paddlex version number, such as the current latest stable version `3.0.0b2`. If you need to use the version corresponding to the development branch, replace `{paddlex version number}` with `0.0.0.dev0`.
 
 <table>
   <tr>
@@ -29,33 +29,33 @@ Find the corresponding installation command based on your processor architecture
   </tr>
   <tr>
     <td>3.8</td>
-    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.8 - --arch x86_64 --os linux --device cpu --py 38</td>
+    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex version number}/install_paddlex_hpi.py | python3.8 - --arch x86_64 --os linux --device cpu --py 38</td>
   </tr>
   <tr>
     <td>3.9</td>
-    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.9 - --arch x86_64 --os linux --device cpu --py 39</td>
+    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex version number}/install_paddlex_hpi.py | python3.9 - --arch x86_64 --os linux --device cpu --py 39</td>
   </tr>
   <tr>
     <td>3.10</td>
-    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.10 - --arch x86_64 --os linux --device cpu --py 310</td>
+    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex version number}/install_paddlex_hpi.py | python3.10 - --arch x86_64 --os linux --device cpu --py 310</td>
   </tr>
   <tr>
     <td rowspan="3">GPU&nbsp;(CUDA&nbsp;11.8&nbsp;+&nbsp;cuDNN&nbsp;8.6)</td>
     <td>3.8</td>
-    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.8 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 38</td>
+    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex version number}/install_paddlex_hpi.py | python3.8 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 38</td>
   </tr>
   <tr>
     <td>3.9</td>
-    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.9 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 39</td>
+    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex version number}/install_paddlex_hpi.py | python3.9 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 39</td>
   </tr>
   <tr>
     <td>3.10</td>
-    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.10 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 310</td>
+    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex version number}/install_paddlex_hpi.py | python3.10 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 310</td>
   </tr>
 </table>
 
-* When the device type is GPU, please use the installation instructions corresponding to the CUDA and cuDNN versions that match your environment. Otherwise, you will not be able to use the high-performance inference plugin properly.
 * For Linux systems, execute the installation instructions using Bash.
+* When using NVIDIA GPUs, please use the installation instructions corresponding to the CUDA and cuDNN versions that match your environment. Otherwise, you will not be able to use the high-performance inference plugin properly.
 * When the device type is CPU, the installed high-performance inference plugin only supports inference using the CPU; for other device types, the installed high-performance inference plugin supports inference using the CPU or other devices.
 
 ### 1.2 Obtaining Serial Numbers and Activation
@@ -77,37 +77,37 @@ Please note: Each serial number can only be bound to a unique device fingerprint
 
 ### 1.3 Enabling High-Performance Inference Plugins
 
-Before enabling high-performance plugins, please ensure that the `LD_LIBRARY_PATH` of the current environment does not specify the TensorRT directory, as the plugins already integrate TensorRT to avoid conflicts caused by different TensorRT versions that may prevent the plugins from functioning properly.
+For Linux systems, if using the high-performance inference plugin in a Docker container, please mount the host machine's `/dev/disk/by-uuid` and `${HOME}/.baidu/paddlex/licenses` directories to the container.
 
 For PaddleX CLI, specify `--use_hpip` and set the serial number to enable the high-performance inference plugin. If you wish to activate the license online, specify `--update_license` when using the serial number for the first time. Taking the general image classification pipeline as an example:
 
-```diff
+```bash
 paddlex \
     --pipeline image_classification \
     --input https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg \
     --device gpu:0 \
-+   --use_hpip \
-+   --serial_number {serial_number}
+    --use_hpip \
+    --serial_number {serial_number}
 
-# If you wish to activate the license online
+# If you wish to perform online activation
 paddlex \
     --pipeline image_classification \
     --input https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg \
     --device gpu:0 \
-+   --use_hpip \
-+   --serial_number {serial_number} \
-+   --update_license
+    --use_hpip \
+    --serial_number {serial_number} \
+    --update_license
 ```
 
 For PaddleX Python API, enabling the high-performance inference plugin is similar. Still taking the general image classification pipeline as an example:
 
-```diff
+```python
 from paddlex import create_pipeline
 
 pipeline = create_pipeline(
     pipeline="image_classification",
-+   use_hpip=True,
-+   serial_number="{serial_number}",
+    use_hpip=True,
+    hpi_params={"serial_number": "{serial_number}"},
 )
 
 output = pipeline.predict("https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg")
@@ -117,35 +117,61 @@ The inference results obtained with the high-performance inference plugin enable
 
 ### 1.4 Modifying High-Performance Inference Configurations
 
-PaddleX provides default high-performance inference configurations for each model and stores them in the model's configuration file. Due to the diversity of actual deployment environments, using the default configurations may not achieve ideal performance in specific environments or may even result in inference failures. For situations where the default configurations cannot meet requirements, you can try changing the model's inference backend as follows:
+PaddleX combines model information and runtime environment information to provide default high-performance inference configurations for each model. These default configurations are carefully prepared to be applicable in several common scenarios and achieve relatively optimal performance. Therefore, users typically may not need to be concerned with the specific details of these configurations. However, due to the diversity of actual deployment environments and requirements, the default configuration may not yield ideal performance in certain scenarios and could even result in inference failures. In cases where the default configuration does not meet the requirements, users can manually adjust the configuration by modifying the Hpi field in the inference.yml file within the model directory (if this field does not exist, it needs to be added). The following are two common situations:
 
-1. Locate the `inference.yml` file in the model directory and find the `Hpi` field.
+- Switching inference backends:
 
-2. Modify the value of `selected_backends`. Specifically, `selected_backends` may be set as follows:
+    When the default inference backend is not available, the inference backend needs to be switched manually. Users should modify the `selected_backends` field (if it does not exist, it needs to be added).
 
     ```yaml
-    selected_backends:
+    Hpi:
+      ...
+      selected_backends:
         cpu: paddle_infer
         gpu: onnx_runtime
+      ...
     ```
 
-    Each entry is formatted as `{device_type}: {inference_backend_name}`. The default selects the backend with the shortest inference time in the official test environment. `supported_backends` lists the inference backends supported by the model in the official test environment for reference.
+    Each entry should follow the format `{device type}: {inference backend name}`.
 
     The currently available inference backends are:
 
-    * `paddle_infer`: The standard Paddle Inference engine. Supports CPU and GPU.
-    * `paddle_tensorrt`: [Paddle-TensorRT](https://www.paddlepaddle.org.cn/lite/v2.10/optimize/paddle_trt.html), a high-performance deep learning inference library produced by Paddle, which integrates TensorRT in the form of subgraphs for further optimization and acceleration. Supports GPU only.
-    * `openvino`: [OpenVINO](https://github.com/openvinotoolkit/openvino), a deep learning inference tool provided by Intel, optimized for model inference performance on various Intel hardware. Supports CPU only.
-    * `onnx_runtime`: [ONNX Runtime](https://onnxruntime.ai/), a cross-platform, high-performance inference engine. Supports CPU and GPU.
-    * `tensorrt`: [TensorRT](https://developer.nvidia.com/tensorrt), a high-performance deep learning inference library provided by NVIDIA, optimized for NVIDIA GPUs to improve speed. Supports GPU only.
+    * `paddle_infer`: The Paddle Inference engine. Supports CPU and GPU. Compared to the PaddleX quick inference, TensorRT subgraphs can be integrated to enhance inference performance on GPUs.
+    * `openvino`: [OpenVINO](https://github.com/openvinotoolkit/openvino), a deep learning inference tool provided by Intel, optimized for model inference performance on various Intel hardware. Supports CPU only. The high-performance inference plugin automatically converts the model to the ONNX format and uses this engine for inference.
+    * `onnx_runtime`: [ONNX Runtime](https://onnxruntime.ai/), a cross-platform, high-performance inference engine. Supports CPU and GPU. The high-performance inference plugin automatically converts the model to the ONNX format and uses this engine for inference.
+    * `tensorrt`: [TensorRT](https://developer.nvidia.com/tensorrt), a high-performance deep learning inference library provided by NVIDIA, optimized for NVIDIA GPUs to improve speed. Supports GPU only. The high-performance inference plugin automatically converts the model to the ONNX format and uses this engine for inference.
 
-    Here are some key details of the current official test environment:
+- Modifying dynamic shape configurations for Paddle Inference or TensorRT:
 
-    * CPU: Intel Xeon Gold 5117
-    * GPU: NVIDIA Tesla T4
-    * CUDA Version: 11.8
-    * cuDNN Version: 8.6
-    * Docker:registry.baidubce.com/paddlepaddle/paddle:latest-dev-cuda11.8-cudnn8.6-trt8.5-gcc82
+    Dynamic shape is the ability of TensorRT to defer specifying parts or all of a tensor’s dimensions until runtime. If the default dynamic shape configuration does not meet requirements (e.g., the model may require input shapes beyond the default range), users need to modify the `trt_dynamic_shapes` or `dynamic_shapes` field in the inference backend configuration:
+
+    ```yaml
+    Hpi:
+      ...
+      backend_configs:
+        # Configuration for the Paddle Inference backend
+        paddle_infer:
+          ...
+          trt_dynamic_shapes:
+            x:
+              - [1, 3, 300, 300]
+              - [4, 3, 300, 300]
+              - [32, 3, 1200, 1200]
+          ...
+        # Configuration for the TensorRT backend
+        tensorrt:
+          ...
+          dynamic_shapes:
+            x:
+              - [1, 3, 300, 300]
+              - [4, 3, 300, 300]
+              - [32, 3, 1200, 1200]
+          ...
+    ```
+
+    In `trt_dynamic_shapes` or `dynamic_shapes`, each input tensor requires a specified dynamic shape in the format: `{input tensor name}: [{minimum shape}, [{optimal shape}], [{maximum shape}]]`. For details on minimum, optimal, and maximum shapes and further information, please refer to the official TensorRT documentation.
+
+    After completing the modifications, please delete the cache files in the model directory (`shape_range_info.pbtxt` and files starting with `trt_serialized`).
 
 ## 2. Pipelines and Models Supporting High-Performance Inference Plugins
 

+ 64 - 36
docs/pipeline_deploy/high_performance_inference.md

@@ -8,11 +8,11 @@ comments: true
 
 ## 1.高性能推理插件的安装与使用
 
-使用高性能推理插件前,请确保您已经按照[PaddleX本地安装教程](../installation/installation.md) 完成了PaddleX的安装,且按照PaddleX产线命令行使用说明或PaddleX产线Python脚本使用说明跑通了产线的基本推理。
+使用高性能推理插件前,请确保您已经按照[PaddleX本地安装教程](../installation/installation.md) 完成了PaddleX的安装,且按照PaddleX产线命令行使用说明或PaddleX产线Python脚本使用说明跑通了产线的快速推理。
 
 ### 1.1 安装高性能推理插件
 
-在下表中根据处理器架构、操作系统、设备类型、Python 版本等信息,找到对应的安装指令并在部署环境中执行
+在下表中根据处理器架构、操作系统、设备类型、Python 版本等信息,找到对应的安装指令并在部署环境中执行。请将 `{paddlex 版本号}` 替换为实际的 paddlex 的版本号,例如当前最新的稳定版本 `3.0.0b2`。如果需要使用开发分支对应的版本,请将 `{paddlex 版本号}` 替换为 `0.0.0.dev0`。
 
 <table>
   <tr>
@@ -29,33 +29,33 @@ comments: true
   </tr>
   <tr>
     <td>3.8</td>
-    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.8 - --arch x86_64 --os linux --device cpu --py 38</td>
+    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex 版本号}/install_paddlex_hpi.py | python3.8 - --arch x86_64 --os linux --device cpu --py 38</td>
   </tr>
   <tr>
     <td>3.9</td>
-    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.9 - --arch x86_64 --os linux --device cpu --py 39</td>
+    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex 版本号}/install_paddlex_hpi.py | python3.9 - --arch x86_64 --os linux --device cpu --py 39</td>
   </tr>
   <tr>
     <td>3.10</td>
-    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.10 - --arch x86_64 --os linux --device cpu --py 310</td>
+    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex 版本号}/install_paddlex_hpi.py | python3.10 - --arch x86_64 --os linux --device cpu --py 310</td>
   </tr>
   <tr>
     <td rowspan="3">GPU&nbsp;(CUDA&nbsp;11.8&nbsp;+&nbsp;cuDNN&nbsp;8.6)</td>
     <td>3.8</td>
-    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.8 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 38</td>
+    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex 版本号}/install_paddlex_hpi.py | python3.8 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 38</td>
   </tr>
   <tr>
     <td>3.9</td>
-    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.9 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 39</td>
+    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex 版本号}/install_paddlex_hpi.py | python3.9 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 39</td>
   </tr>
   <tr>
     <td>3.10</td>
-    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.10 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 310</td>
+    <td>curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex 版本号}/install_paddlex_hpi.py | python3.10 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 310</td>
   </tr>
 </table>
 
-* 当设备类型为 GPU 时,请使用与环境匹配的 CUDA 和 cuDNN 版本对应的安装指令,否则,将无法正常使用高性能推理插件。
 * 对于 Linux 系统,使用 Bash 执行安装指令。
+* 当使用 NVIDIA GPU 时,请使用与环境匹配的 CUDA 和 cuDNN 版本对应的安装指令,否则,将无法正常使用高性能推理插件。
 * 当设备类型为 CPU 时,安装的高性能推理插件仅支持使用 CPU 进行推理;对于其他设备类型,安装的高性能推理插件则支持使用 CPU 或其他设备进行推理。
 
 ### 1.2 获取序列号与激活
@@ -76,37 +76,37 @@ comments: true
 
 ### 1.3 启用高性能推理插件
 
-在启用高性能插件前,请确保当前环境的 `LD_LIBRARY_PATH` 没有指定 TensorRT 的共享库目录,因为插件中已经集成了 TensorRT,避免 TensorRT 版本冲突导致插件无法正常使用
+对于 Linux 系统,如果在 Docker 容器中使用高性能推理插件,请为容器挂载宿主机的 `/dev/disk/by-uuid` 与 `${HOME}/.baidu/paddlex/licenses` 目录
 
 对于 PaddleX CLI,指定 `--use_hpip`,并设置序列号,即可启用高性能推理插件。如果希望进行联网激活,在第一次使用序列号时,需指定 `--update_license`,以通用图像分类产线为例:
 
-```diff
+```bash
 paddlex \
     --pipeline image_classification \
     --input https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg \
     --device gpu:0 \
-+   --use_hpip \
-+   --serial_number {序列号}
+    --use_hpip \
+    --serial_number {序列号}
 
 # 如果希望进行联网激活
 paddlex \
     --pipeline image_classification \
     --input https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg \
     --device gpu:0 \
-+   --use_hpip \
-+   --serial_number {序列号}
-+   --update_license
+    --use_hpip \
+    --serial_number {序列号}
+    --update_license
 ```
 
 对于 PaddleX Python API,启用高性能推理插件的方法类似。仍以通用图像分类产线为例:
 
-```diff
+```python
 from paddlex import create_pipeline
 
 pipeline = create_pipeline(
     pipeline="image_classification",
-+   use_hpip=True,
-+   serial_number="{序列号}",
+    use_hpip=True,
+    hpi_params={"serial_number": "{序列号}"},
 )
 
 output = pipeline.predict("https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg")
@@ -116,33 +116,61 @@ output = pipeline.predict("https://paddle-model-ecology.bj.bcebos.com/paddlex/im
 
 ### 1.4 修改高性能推理配置
 
-PaddleX 为每个模型提供默认的高性能推理配置,并将其存储在模型的配置文件中。由于实际部署环境的多样性,使用默认配置可能无法在特定环境中获取理想的性能,甚至可能出现推理失败的情况。对于默认配置无法满足要求的情形,可以通过如下方式,尝试更换模型的推理后端
+PaddleX 结合模型信息与运行环境信息为每个模型提供默认的高性能推理配置。这些默认配置经过精心准备,以便在数个常见场景中可用,且能够取得较优的性能。因此,通常用户可能并不用关心如何这些配置的具体细节。然而,由于实际部署环境与需求的多样性,使用默认配置可能无法在特定场景获取理想的性能,甚至可能出现推理失败的情况。对于默认配置无法满足要求的情形,用户可以通过修改模型目录中 `inference.yml` 文件中 `Hpi` 字段(如果该字段不存在,需要新增)的方式,手动调整配置。以下列举两种常见的情形
 
-1. 找到模型目录中的 `inference.yml` 文件,定位到其中的 `Hpi` 字段;
-2. 修改 `selected_backends` 的值。具体而言,`selected_backends` 可能被设置如下:
+- 更换推理后端:
 
-    ```
-    selected_backends:
+    当默认的推理后端不可用时,需要手动更换推理后端。用户需要修改 `selected_backends` 字段(如果不存在,需要新增)。
+
+    ```yaml
+    Hpi:
+      ...
+      selected_backends:
         cpu: paddle_infer
         gpu: onnx_runtime
+      ...
     ```
 
-    其中每一项均按照 `{设备类型}: {推理后端名称}` 的格式填写,默认选用在官方测试环境中推理耗时最短的后端。`supported_backends` 中记录了官方测试环境中模型支持的推理后端,可供参考。
+    其中每一项均按照 `{设备类型}: {推理后端名称}` 的格式填写。
+
     目前所有可选的推理后端如下:
 
-    * `paddle_infer`:标准的 Paddle Inference 推理引擎。支持 CPU 和 GPU。
-    * `paddle_tensorrt`:[Paddle-TensorRT](https://www.paddlepaddle.org.cn/lite/v2.10/optimize/paddle_trt.html),Paddle 官方出品的高性能深度学习推理库,采用子图的形式对 TensorRT 进行了集成,以实现进一步优化加速。仅支持 GPU。
-    * `openvino`:[OpenVINO](https://github.com/openvinotoolkit/openvino),Intel 提供的深度学习推理工具,优化了多种 Intel 硬件上的模型推理性能。仅支持 CPU。
-    * `onnx_runtime`:[ONNX Runtime](https://onnxruntime.ai/),跨平台、高性能的推理引擎。支持 CPU 和 GPU。
-    * `tensorrt`:[TensorRT](https://developer.nvidia.com/tensorrt),NVIDIA 提供的高性能深度学习推理库,针对 NVIDIA GPU 进行优化以提升速度。仅支持 GPU。
+    * `paddle_infer`:Paddle Inference 推理引擎。支持 CPU 和 GPU。相比 PaddleX 快速推理,高性能推理插件支持以集成 TensorRT 子图的方式提升模型的 GPU 推理性能。
+    * `openvino`:[OpenVINO](https://github.com/openvinotoolkit/openvino),Intel 提供的深度学习推理工具,优化了多种 Intel 硬件上的模型推理性能。仅支持 CPU。高性能推理插件自动将模型转换为 ONNX 格式后用该引擎推理。
+    * `onnx_runtime`:[ONNX Runtime](https://onnxruntime.ai/),跨平台、高性能的推理引擎。支持 CPU 和 GPU。高性能推理插件自动将模型转换为 ONNX 格式后用该引擎推理。
+    * `tensorrt`:[TensorRT](https://developer.nvidia.com/tensorrt),NVIDIA 提供的高性能深度学习推理库,针对 NVIDIA GPU 进行优化以提升速度。仅支持 GPU。高性能推理插件自动将模型转换为 ONNX 格式后用该引擎推理。
+
+- 修改 Paddle Inference 或 TensorRT 的动态形状配置:
+
+    动态形状是 TensorRT 延迟指定部分或全部张量维度直到运行时的能力。当默认的动态形状配置无法满足需求(例如,模型可能需要范围外的输入形状),用户需要修改状推理后端配置中的 `trt_dynamic_shapes` 或 `dynamic_shapes` 字段:
+
+    ```yaml
+    Hpi:
+      ...
+      backend_configs:
+        # Paddle Inference 后端配置
+        paddle_infer:
+          ...
+          trt_dynamic_shapes:
+            x:
+              - [1, 3, 300, 300]
+              - [4, 3, 300, 300]
+              - [32, 3, 1200, 1200]
+          ...
+        # TensorRT 后端配置
+        tensorrt:
+          ...
+          dynamic_shapes:
+            x:
+              - [1, 3, 300, 300]
+              - [4, 3, 300, 300]
+              - [32, 3, 1200, 1200]
+          ...
+    ```
 
-    以下是目前的官方测试环境的部分关键信息:
+    在 `trt_dynamic_shapes` 或 `dynamic_shapes` 中,需要为每一个输入张量指定动态形状,格式为:`{输入张量名称}: [{最小形状}, [{最优形状}], [{最大形状}]]`。有关最小形状、最优形状以及最大形状的相关介绍及更多细节,请参考 TensorRT 官方文档。
 
-    * CPU:Intel Xeon Gold 5117
-    * GPU:NVIDIA Tesla T4
-    * CUDA版本:11.8
-    * cuDNN版本:8.6
-    * Docker 镜像:registry.baidubce.com/paddlepaddle/paddle:latest-dev-cuda11.8-cudnn8.6-trt8.5-gcc82
+    在完成修改后,请删除模型目录中的缓存文件(`shape_range_info.pbtxt` 与 `trt_serialized` 开头的文件)。
 
 ## 2、支持使用高性能推理插件的产线与模型
 

+ 2 - 76
docs/pipeline_deploy/service_deploy.en.md

@@ -46,7 +46,7 @@ INFO:     Application startup complete.
 INFO:     Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)
 ```
 
-`--pipeline` can be specified as an official pipeline name or the path to a local pipeline configuration file. PaddleX uses this to build the pipeline and deploy it as a service. To adjust configurations (such as model path, batch_size, deployment device), please refer to the <b>"Model Application"</b> section in the [General Image Classification Pipeline Tutorial](../pipeline_usage/tutorials/cv_pipelines/image_classification.en.md) (for other pipelines, refer to the corresponding tutorials in the <b>"1.3 Calling the Service"</b> table).
+`--pipeline` can be specified as an official pipeline name or the path to a local pipeline configuration file. PaddleX uses this to build the pipeline and deploy it as a service. To adjust configurations (such as model path, batch_size, deployment device), please refer to the <b>"Model Application"</b> section in the [General Image Classification Pipeline Tutorial](../pipeline_usage/tutorials/cv_pipelines/image_classification.en.md).
 
 Command-line options related to serving deployment are as follows:
 
@@ -92,82 +92,8 @@ Command-line options related to serving deployment are as follows:
 
 ### 1.3 Call the Service
 
-Please refer to the <b>"Development Integration/Deployment"</b> section in the usage tutorials for each pipeline.
+Please refer to the <b>"Development Integration/Deployment"</b> section in the usage tutorials for each pipeline. You can find the pipeline tutorials [here](../pipeline_usage/pipeline_develop_guide.en.md).
 
-<table>
-<thead>
-<tr>
-<th>Model Pipelines</th>
-<th>Usage Tutorials</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>General Image Classification Pipeline</td>
-<td><a href="../pipeline_usage/tutorials/cv_pipelines/image_classification.en.md">Tutorial for Using the General Image Classification Pipeline</a></td>
-</tr>
-<tr>
-<td>General Object Detection Pipeline</td>
-<td><a href="../pipeline_usage/tutorials/cv_pipelines/object_detection.en.md">Tutorial for Using the General Object Detection Pipeline</a></td>
-</tr>
-<tr>
-<td>General Semantic Segmentation Pipeline</td>
-<td><a href="../pipeline_usage/tutorials/cv_pipelines/semantic_segmentation.en.md">Tutorial for Using the General Semantic Segmentation Pipeline</a></td>
-</tr>
-<tr>
-<td>General Instance Segmentation Pipeline</td>
-<td><a href="../pipeline_usage/tutorials/cv_pipelines/instance_segmentation.en.md">Tutorial for Using the General Instance Segmentation Pipeline</a></td>
-</tr>
-<tr>
-<td>General Image Multi-Label Classification Pipeline</td>
-<td><a href="../pipeline_usage/tutorials/cv_pipelines/image_multi_label_classification.en.md">Tutorial for Using the General Image Multi-Label Classification Pipeline</a></td>
-</tr>
-<tr>
-<td>Small Object Detection Pipeline</td>
-<td><a href="../pipeline_usage/tutorials/cv_pipelines/small_object_detection.en.md">Tutorial for Using the Small Object Detection Pipeline</a></td>
-</tr>
-<tr>
-<td>Image Anomaly Detection Pipeline</td>
-<td><a href="../pipeline_usage/tutorials/cv_pipelines/image_anomaly_detection.en.md">Tutorial for Using the Image Anomaly Detection Pipeline</a></td>
-</tr>
-<tr>
-<td>General OCR Pipeline</td>
-<td><a href="../pipeline_usage/tutorials/ocr_pipelines/OCR.en.md">Tutorial for Using the General OCR Pipeline</a></td>
-</tr>
-<tr>
-<td>General Table Recognition Pipeline</td>
-<td><a href="../pipeline_usage/tutorials/ocr_pipelines/table_recognition.en.md">Tutorial for Using the General Table Recognition Pipeline</a></td>
-</tr>
-<tr>
-<td>General Layout Parsing Pipeline</td>
-<td><a href="../pipeline_usage/tutorials/ocr_pipelines/layout_parsing.en.md">Tutorial for Using the Layout Parsing Recognition Pipeline</a></td>
-</tr>
-<tr>
-<td>Formula Recognition Pipeline</td>
-<td><a href="../pipeline_usage/tutorials/ocr_pipelines/formula_recognition.en.md">Tutorial for Using the Formula Recognition Pipeline</a></td>
-</tr>
-<tr>
-<td>Seal Text Recognition Pipeline</td>
-<td><a href="../pipeline_usage/tutorials/ocr_pipelines/seal_recognition.en.md">Tutorial for Using the Seal Text Recognition Pipeline</a></td>
-</tr>
-<tr>
-<td>Time Series Forecasting Pipeline</td>
-<td><a href="../pipeline_usage/tutorials/time_series_pipelines/time_series_forecasting.en.md">Tutorial for Using the Time Series Forecasting Pipeline</a></td>
-</tr>
-<tr>
-<td>Time Series Anomaly Detection Pipeline</td>
-<td><a href="../pipeline_usage/tutorials/time_series_pipelines/time_series_anomaly_detection.en.md">Tutorial for Using the Time Series Anomaly Detection Pipeline</a></td>
-</tr>
-<tr>
-<td>Time Series Classification Pipeline</td>
-<td><a href="../pipeline_usage/tutorials/time_series_pipelines/time_series_classification.en.md">Tutorial for Using the Time Series Classification Pipeline</a></td>
-</tr>
-<tr>
-<td>Document Scene Information Extraction v3 Pipeline</td>
-<td><a href="../pipeline_usage/tutorials/information_extraction_pipelines/document_scene_information_extraction.en.md">Tutorial for Using the Document Scene Information Extraction v3 Pipeline</a></td>
-</tr>
-</tbody>
-</table>
 ## 2. Deploy Services for Production
 
 When deploying services into production environments, the stability, efficiency, and security of the services are of paramount importance. Below are some recommendations for deploying services into production.

+ 4 - 77
docs/pipeline_deploy/service_deploy.md

@@ -37,7 +37,7 @@ paddlex --serve --pipeline {产线名称或产线配置文件路径} [{其他命
 paddlex --serve --pipeline image_classifcation
 ```
 
-服务启动成功后,可以看到类似下展示的信息:
+服务启动成功后,可以看到类似下展示的信息:
 
 ```
 INFO:     Started server process [63108]
@@ -46,7 +46,8 @@ INFO:     Application startup complete.
 INFO:     Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)
 ```
 
---pipeline可指定为官方产线名称或本地产线配置文件路径。PaddleX 以此构建产线并部署为服务。如需调整配置(如模型路径、batch_size、部署设备等),请参考[通用图像分类产线使用教程](../pipeline_usage/tutorials/cv_pipelines/image_classification.md)中的 <b>“模型应用”</b> 部分(对于其他产线,可参考 <b>“1.3 调用服务”</b> 表格中的对应教程)。
+--pipeline可指定为官方产线名称或本地产线配置文件路径。PaddleX 以此构建产线并部署为服务。如需调整配置(如模型路径、batch_size、部署设备等),请参考[通用图像分类产线使用教程](../pipeline_usage/tutorials/cv_pipelines/image_classification.md)中的 <b>“模型应用</b> 部分。
+
 与服务化部署相关的命令行选项如下:
 
 <table>
@@ -91,82 +92,8 @@ INFO:     Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)
 
 ### 1.3 调用服务
 
-请参考各产线使用教程中的 <b>“开发集成/部署”</b> 部分。
+请参考各产线使用教程中的 <b>“开发集成/部署”</b> 部分。在[此处](../pipeline_usage/pipeline_develop_guide.md)可以找到各产线的使用教程。
 
-<table>
-<thead>
-<tr>
-<th>模型产线</th>
-<th>使用教程</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>通用图像分类产线</td>
-<td><a href="../pipeline_usage/tutorials/cv_pipelines/image_classification.md">通用图像分类产线使用教程</a></td>
-</tr>
-<tr>
-<td>通用目标检测产线</td>
-<td><a href="../pipeline_usage/tutorials/cv_pipelines/object_detection.md">通用目标检测产线使用教程</a></td>
-</tr>
-<tr>
-<td>通用语义分割产线</td>
-<td><a href="../pipeline_usage/tutorials/cv_pipelines/semantic_segmentation.md">通用语义分割产线使用教程</a></td>
-</tr>
-<tr>
-<td>通用实例分割产线</td>
-<td><a href="../pipeline_usage/tutorials/cv_pipelines/instance_segmentation.md">通用实例分割产线使用教程</a></td>
-</tr>
-<tr>
-<td>通用图像多标签分类产线</td>
-<td><a href="../pipeline_usage/tutorials/cv_pipelines/image_multi_label_classification.md">通用图像多标签分类产线使用教程</a></td>
-</tr>
-<tr>
-<td>小目标检测产线</td>
-<td><a href="../pipeline_usage/tutorials/cv_pipelines/small_object_detection.md">小目标检测产线使用教程</a></td>
-</tr>
-<tr>
-<td>图像异常检测产线</td>
-<td><a href="../pipeline_usage/tutorials/cv_pipelines/image_anomaly_detection.md">图像异常检测产线使用教程</a></td>
-</tr>
-<tr>
-<td>通用OCR产线</td>
-<td><a href="../pipeline_usage/tutorials/ocr_pipelines/OCR.md">通用OCR产线使用教程</a></td>
-</tr>
-<tr>
-<td>通用表格识别产线</td>
-<td><a href="../pipeline_usage/tutorials/ocr_pipelines/table_recognition.md">通用表格识别产线使用教程</a></td>
-</tr>
-<tr>
-<td>通用版面解析产线</td>
-<td><a href="../pipeline_usage/tutorials/ocr_pipelines/layout_parsing.md">通用版面解析产线使用教程</a></td>
-</tr>
-<tr>
-<td>公式识别产线</td>
-<td><a href="../pipeline_usage/tutorials/ocr_pipelines/formula_recognition.md">公式识别产线使用教程</a></td>
-</tr>
-<tr>
-<td>印章文本识别产线</td>
-<td><a href="../pipeline_usage/tutorials/ocr_pipelines/seal_recognition.md">印章文本识别产线使用教程</a></td>
-</tr>
-<tr>
-<td>时序预测产线</td>
-<td><a href="../pipeline_usage/tutorials/time_series_pipelines/time_series_forecasting.md">时序预测产线使用教程</a></td>
-</tr>
-<tr>
-<td>时序异常检测产线</td>
-<td><a href="../pipeline_usage/tutorials/time_series_pipelines/time_series_anomaly_detection.md">时序异常检测产线使用教程</a></td>
-</tr>
-<tr>
-<td>时序分类产线</td>
-<td><a href="../pipeline_usage/tutorials/time_series_pipelines/time_series_classification.md">时序分类产线使用教程</a></td>
-</tr>
-<tr>
-<td>文档场景信息抽取v3产线</td>
-<td><a href="../pipeline_usage/tutorials/information_extraction_pipelines/document_scene_information_extraction.md">文档场景信息抽取v3产线使用教程</a></td>
-</tr>
-</tbody>
-</table>
 ## 2、将服务用于生产
 
 将服务应用于生产环境中时,服务的稳定性、高效性与安全性尤为重要。以下是一些针对将服务用于生产的建议。

+ 7 - 7
docs/pipeline_usage/tutorials/cv_pipelines/image_anomaly_detection.md

@@ -348,7 +348,7 @@ for res in output:
 <pre><code class="language-python">import base64
 import requests
 
-API_URL = &quot;http://localhost:8080/anomaly-detection&quot; # 服务URL
+API_URL = &quot;http://localhost:8080/image-anomaly-detection&quot; # 服务URL
 image_path = &quot;./demo.jpg&quot;
 output_image_path = &quot;./out.jpg&quot;
 
@@ -405,7 +405,7 @@ int main() {
     std::string body = jsonObj.dump();
 
     // 调用API
-    auto response = client.Post(&quot;/anomaly-detection&quot;, headers, body, &quot;application/json&quot;);
+    auto response = client.Post(&quot;/image-anomaly-detection&quot;, headers, body, &quot;application/json&quot;);
     // 处理接口返回数据
     if (response &amp;&amp; response-&gt;status == 200) {
         nlohmann::json jsonResponse = nlohmann::json::parse(response-&gt;body);
@@ -446,7 +446,7 @@ import java.util.Base64;
 
 public class Main {
     public static void main(String[] args) throws IOException {
-        String API_URL = &quot;http://localhost:8080/anomaly-detection&quot;; // 服务URL
+        String API_URL = &quot;http://localhost:8080/image-anomaly-detection&quot;; // 服务URL
         String imagePath = &quot;./demo.jpg&quot;; // 本地图像
         String outputImagePath = &quot;./out.jpg&quot;; // 输出图像
 
@@ -505,7 +505,7 @@ import (
 )
 
 func main() {
-    API_URL := &quot;http://localhost:8080/anomaly-detection&quot;
+    API_URL := &quot;http://localhost:8080/image-anomaly-detection&quot;
     imagePath := &quot;./demo.jpg&quot;
     outputImagePath := &quot;./out.jpg&quot;
 
@@ -585,7 +585,7 @@ using Newtonsoft.Json.Linq;
 
 class Program
 {
-    static readonly string API_URL = &quot;http://localhost:8080/anomaly-detection&quot;;
+    static readonly string API_URL = &quot;http://localhost:8080/image-anomaly-detection&quot;;
     static readonly string imagePath = &quot;./demo.jpg&quot;;
     static readonly string outputImagePath = &quot;./out.jpg&quot;;
 
@@ -623,7 +623,7 @@ class Program
 <pre><code class="language-js">const axios = require('axios');
 const fs = require('fs');
 
-const API_URL = 'http://localhost:8080/anomaly-detection'
+const API_URL = 'http://localhost:8080/image-anomaly-detection'
 const imagePath = './demo.jpg'
 const outputImagePath = &quot;./out.jpg&quot;;
 
@@ -663,7 +663,7 @@ axios.request(config)
 
 <pre><code class="language-php">&lt;?php
 
-$API_URL = &quot;http://localhost:8080/anomaly-detection&quot;; // 服务URL
+$API_URL = &quot;http://localhost:8080/image-anomaly-detection&quot;; // 服务URL
 $image_path = &quot;./demo.jpg&quot;;
 $output_image_path = &quot;./out.jpg&quot;;
 

+ 42 - 27
docs/pipeline_usage/tutorials/information_extraction_pipelines/document_scene_information_extraction.en.md

@@ -670,7 +670,7 @@ Below are the API references and multi-language service invocation examples:
 </table>
 <p>Operations provided by the service are as follows:</p>
 <ul>
-<li><b><code>analyzeImage</code></b></li>
+<li><b><code>analyzeImages</code></b></li>
 </ul>
 <p>Analyze images using computer vision models to obtain OCR, table recognition results, and extract key information from the images.</p>
 <p><code>POST /chatocr-vision</code></p>
@@ -875,7 +875,7 @@ Below are the API references and multi-language service invocation examples:
 <tr>
 <td><code>visionInfo</code></td>
 <td><code>object</code></td>
-<td>Key information from the image. Provided by the <code>analyzeImage</code> operation.</td>
+<td>Key information from the image. Provided by the <code>analyzeImages</code> operation.</td>
 <td>Yes</td>
 </tr>
 <tr>
@@ -904,11 +904,16 @@ Below are the API references and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>Currently, <code>llmParams</code> can take the following form:</p>
+<p>Currently, <code>llmParams</code> can take one of the following forms:</p>
 <pre><code class="language-json">{
 &quot;apiType&quot;: &quot;qianfan&quot;,
-&quot;apiKey&quot;: &quot;{qianfan API key}&quot;,
-&quot;secretKey&quot;: &quot;{qianfan secret key}&quot;
+&quot;apiKey&quot;: &quot;{Qianfan Platform API key}&quot;,
+&quot;secretKey&quot;: &quot;{Qianfan Platform secret key}&quot;
+}
+</code></pre>
+<pre><code class="language-json">{
+&quot;apiType&quot;: &quot;aistudio&quot;,
+&quot;accessToken&quot;: &quot;{AI Studio access token}&quot;
 }
 </code></pre>
 <ul>
@@ -974,13 +979,18 @@ Below are the API references and multi-language service invocation examples:
 </tr>
 </tbody>
 </table>
-<p>Currently, <code>llmParams</code> can take the following form:</p>
+<p>Currently, <code>llmParams</code> can take one of the following forms:</p>
 <pre><code class="language-json">{
 &quot;apiType&quot;: &quot;qianfan&quot;,
 &quot;apiKey&quot;: &quot;{Qianfan Platform API key}&quot;,
 &quot;secretKey&quot;: &quot;{Qianfan Platform secret key}&quot;
 }
 </code></pre>
+<pre><code class="language-json">{
+&quot;apiType&quot;: &quot;aistudio&quot;,
+&quot;accessToken&quot;: &quot;{AI Studio access token}&quot;
+}
+</code></pre>
 <ul>
 <li>When the request is processed successfully, the <code>result</code> in the response body has the following properties:</li>
 </ul>
@@ -1027,43 +1037,37 @@ Below are the API references and multi-language service invocation examples:
 <tr>
 <td><code>visionInfo</code></td>
 <td><code>object</code></td>
-<td>Key information from images. Provided by the <code>analyzeImage</code> operation.</td>
+<td>Key information from images. Provided by the <code>analyzeImages</code> operation.</td>
 <td>Yes</td>
 </tr>
 <tr>
-<td><code>taskDescription</code></td>
+<td><code>vectorStore</code></td>
 <td><code>string</code></td>
-<td>Task prompt.</td>
+<td>Serialized result of the vector database. Provided by the <code>buildVectorStore</code> operation.</td>
 <td>No</td>
 </tr>
 <tr>
-<td><code>rules</code></td>
+<td><code>retrievalResult</code></td>
 <td><code>string</code></td>
-<td>Custom extraction rules, e.g., for output formatting.</td>
+<td>Results of knowledge retrieval. Provided by the <code>retrieveKnowledge</code> operation.</td>
 <td>No</td>
 </tr>
 <tr>
-<td><code>fewShot</code></td>
+<td><code>taskDescription</code></td>
 <td><code>string</code></td>
-<td>Example prompts.</td>
+<td>Task prompt.</td>
 <td>No</td>
 </tr>
 <tr>
-<td><code>vectorStore</code></td>
+<td><code>rules</code></td>
 <td><code>string</code></td>
-<td>Serialized result of the vector database. Provided by the <code>buildVectorStore</code> operation.</td>
+<td>Custom extraction rules, e.g., for output formatting.</td>
 <td>No</td>
 </tr>
 <tr>
-<td><code>retrievalResult</code></td>
+<td><code>fewShot</code></td>
 <td><code>string</code></td>
-<td>Results of knowledge retrieval. Provided by the <code>retrieveKnowledge</code> operation.</td>
-<td>No</td>
-</tr>
-<tr>
-<td><code>returnPrompts</code></td>
-<td><code>boolean</code></td>
-<td>Whether to return the prompts used. Enabled by default.</td>
+<td>Example prompts.</td>
 <td>No</td>
 </tr>
 <tr>
@@ -1078,15 +1082,26 @@ Below are the API references and multi-language service invocation examples:
 <td>API parameters for the large language model.</td>
 <td>No</td>
 </tr>
+<tr>
+<td><code>returnPrompts</code></td>
+<td><code>boolean</code></td>
+<td>Whether to return the prompts used. Enabled by default.</td>
+<td>No</td>
+</tr>
 </tbody>
 </table>
-<p>Currently, <code>llmParams</code> can take the following form:</p>
+<p>Currently, <code>llmParams</code> can take one of the following forms:</p>
 <pre><code class="language-json">{
 &quot;apiType&quot;: &quot;qianfan&quot;,
 &quot;apiKey&quot;: &quot;{Qianfan Platform API key}&quot;,
 &quot;secretKey&quot;: &quot;{Qianfan Platform secret key}&quot;
 }
 </code></pre>
+<pre><code class="language-json">{
+&quot;apiType&quot;: &quot;aistudio&quot;,
+&quot;accessToken&quot;: &quot;{AI Studio access token}&quot;
+}
+</code></pre>
 <ul>
 <li>On successful request processing, the <code>result</code> in the response body has the following properties:</li>
 </ul>
@@ -1231,14 +1246,14 @@ result_retrieval = resp_retrieval.json()[&quot;result&quot;]
 payload = {
     &quot;keys&quot;: keys,
     &quot;visionInfo&quot;: result_vision[&quot;visionInfo&quot;],
+    &quot;vectorStore&quot;: result_vector[&quot;vectorStore&quot;],
+    &quot;retrievalResult&quot;: result_retrieval[&quot;retrievalResult&quot;],
     &quot;taskDescription&quot;: &quot;&quot;,
     &quot;rules&quot;: &quot;&quot;,
     &quot;fewShot&quot;: &quot;&quot;,
-    &quot;vectorStore&quot;: result_vector[&quot;vectorStore&quot;],
-    &quot;retrievalResult&quot;: result_retrieval[&quot;retrievalResult&quot;],
-    &quot;returnPrompts&quot;: True,
     &quot;llmName&quot;: LLM_NAME,
     &quot;llmParams&quot;: LLM_PARAMS,
+    &quot;returnPrompts&quot;: True,
 }
 resp_chat = requests.post(url=f&quot;{API_BASE_URL}/chatocr-chat&quot;, json=payload)
 if resp_chat.status_code != 200:

+ 40 - 25
docs/pipeline_usage/tutorials/information_extraction_pipelines/document_scene_information_extraction.md

@@ -639,7 +639,7 @@ chat_result.print()
 </table>
 <p>服务提供的操作如下:</p>
 <ul>
-<li><b><code>analyzeImage</code></b></li>
+<li><b><code>analyzeImages</code></b></li>
 </ul>
 <p>使用计算机视觉模型对图像进行分析,获得OCR、表格识别结果等,并提取图像中的关键信息。</p>
 <p><code>POST /chatocr-vision</code></p>
@@ -844,7 +844,7 @@ chat_result.print()
 <tr>
 <td><code>visionInfo</code></td>
 <td><code>object</code></td>
-<td>图像中的关键信息。由<code>analyzeImage</code>操作提供。</td>
+<td>图像中的关键信息。由<code>analyzeImages</code>操作提供。</td>
 <td>是</td>
 </tr>
 <tr>
@@ -873,13 +873,18 @@ chat_result.print()
 </tr>
 </tbody>
 </table>
-<p>当前,<code>llmParams</code> 可以采用如下形式:</p>
+<p>当前,<code>llmParams</code> 可以采用如下形式之一:</p>
 <pre><code class="language-json">{
 &quot;apiType&quot;: &quot;qianfan&quot;,
 &quot;apiKey&quot;: &quot;{千帆平台API key}&quot;,
 &quot;secretKey&quot;: &quot;{千帆平台secret key}&quot;
 }
 </code></pre>
+<pre><code class="language-json">{
+&quot;apiType&quot;: &quot;aistudio&quot;,
+&quot;accessToken&quot;: &quot;{星河社区access token}&quot;
+}
+</code></pre>
 <ul>
 <li>请求处理成功时,响应体的<code>result</code>具有如下属性:</li>
 </ul>
@@ -943,13 +948,18 @@ chat_result.print()
 </tr>
 </tbody>
 </table>
-<p>当前,<code>llmParams</code> 可以采用如下形式:</p>
+<p>当前,<code>llmParams</code> 可以采用如下形式之一:</p>
 <pre><code class="language-json">{
 &quot;apiType&quot;: &quot;qianfan&quot;,
 &quot;apiKey&quot;: &quot;{千帆平台API key}&quot;,
 &quot;secretKey&quot;: &quot;{千帆平台secret key}&quot;
 }
 </code></pre>
+<pre><code class="language-json">{
+&quot;apiType&quot;: &quot;aistudio&quot;,
+&quot;accessToken&quot;: &quot;{星河社区access token}&quot;
+}
+</code></pre>
 <ul>
 <li>请求处理成功时,响应体的<code>result</code>具有如下属性:</li>
 </ul>
@@ -996,43 +1006,37 @@ chat_result.print()
 <tr>
 <td><code>visionInfo</code></td>
 <td><code>object</code></td>
-<td>图像中的关键信息。由<code>analyzeImage</code>操作提供。</td>
+<td>图像中的关键信息。由<code>analyzeImages</code>操作提供。</td>
 <td>是</td>
 </tr>
 <tr>
-<td><code>taskDescription</code></td>
+<td><code>vectorStore</code></td>
 <td><code>string</code></td>
-<td>提示词任务。</td>
+<td>向量数据库序列化结果。由<code>buildVectorStore</code>操作提供。</td>
 <td>否</td>
 </tr>
 <tr>
-<td><code>rules</code></td>
+<td><code>retrievalResult</code></td>
 <td><code>string</code></td>
-<td>提示词规则。用于自定义信息抽取规则,例如规范输出格式。</td>
+<td>知识检索结果。由<code>retrieveKnowledge</code>操作提供。</td>
 <td>否</td>
 </tr>
 <tr>
-<td><code>fewShot</code></td>
+<td><code>taskDescription</code></td>
 <td><code>string</code></td>
-<td>提示词示例。</td>
+<td>提示词任务。</td>
 <td>否</td>
 </tr>
 <tr>
-<td><code>vectorStore</code></td>
+<td><code>rules</code></td>
 <td><code>string</code></td>
-<td>向量数据库序列化结果。由<code>buildVectorStore</code>操作提供。</td>
+<td>提示词规则。用于自定义信息抽取规则,例如规范输出格式。</td>
 <td>否</td>
 </tr>
 <tr>
-<td><code>retrievalResult</code></td>
+<td><code>fewShot</code></td>
 <td><code>string</code></td>
-<td>知识检索结果。由<code>retrieveKnowledge</code>操作提供。</td>
-<td>否</td>
-</tr>
-<tr>
-<td><code>returnPrompts</code></td>
-<td><code>boolean</code></td>
-<td>是否返回使用的提示词。默认启用。</td>
+<td>提示词示例。</td>
 <td>否</td>
 </tr>
 <tr>
@@ -1047,15 +1051,26 @@ chat_result.print()
 <td>大语言模型API参数。</td>
 <td>否</td>
 </tr>
+<tr>
+<td><code>returnPrompts</code></td>
+<td><code>boolean</code></td>
+<td>是否返回使用的提示词。默认禁用。</td>
+<td>否</td>
+</tr>
 </tbody>
 </table>
-<p>当前,<code>llmParams</code> 可以采用如下形式:</p>
+<p>当前,<code>llmParams</code> 可以采用如下形式之一:</p>
 <pre><code class="language-json">{
 &quot;apiType&quot;: &quot;qianfan&quot;,
 &quot;apiKey&quot;: &quot;{千帆平台API key}&quot;,
 &quot;secretKey&quot;: &quot;{千帆平台secret key}&quot;
 }
 </code></pre>
+<pre><code class="language-json">{
+&quot;apiType&quot;: &quot;aistudio&quot;,
+&quot;accessToken&quot;: &quot;{星河社区access token}&quot;
+}
+</code></pre>
 <ul>
 <li>请求处理成功时,响应体的<code>result</code>具有如下属性:</li>
 </ul>
@@ -1201,14 +1216,14 @@ result_retrieval = resp_retrieval.json()[&quot;result&quot;]
 payload = {
     &quot;keys&quot;: keys,
     &quot;visionInfo&quot;: result_vision[&quot;visionInfo&quot;],
+    &quot;vectorStore&quot;: result_vector[&quot;vectorStore&quot;],
+    &quot;retrievalResult&quot;: result_retrieval[&quot;retrievalResult&quot;],
     &quot;taskDescription&quot;: &quot;&quot;,
     &quot;rules&quot;: &quot;&quot;,
     &quot;fewShot&quot;: &quot;&quot;,
-    &quot;vectorStore&quot;: result_vector[&quot;vectorStore&quot;],
-    &quot;retrievalResult&quot;: result_retrieval[&quot;retrievalResult&quot;],
-    &quot;returnPrompts&quot;: True,
     &quot;llmName&quot;: LLM_NAME,
     &quot;llmParams&quot;: LLM_PARAMS,
+    &quot;returnPrompts&quot;: True,
 }
 resp_chat = requests.post(url=f&quot;{API_BASE_URL}/chatocr-chat&quot;, json=payload)
 if resp_chat.status_code != 200:

+ 2 - 0
docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing.md

@@ -744,6 +744,8 @@ import requests
 
 API_URL = &quot;http://localhost:8080/layout-parsing&quot; # 服务URL
 
+image_path = "./demo.jpg"
+
 # 对本地图像进行Base64编码
 with open(image_path, &quot;rb&quot;) as file:
     image_bytes = file.read()

+ 26 - 28
paddlex/inference/pipelines/serving/_pipeline_apps/ppchatocrv3.py

@@ -45,7 +45,7 @@ class InferenceParams(BaseModel):
     maxLongSide: Optional[Annotated[int, Field(gt=0)]] = None
 
 
-class AnalyzeImageRequest(BaseModel):
+class AnalyzeImagesRequest(BaseModel):
     file: str
     fileType: Optional[FileType] = None
     useImgOrientationCls: bool = True
@@ -78,22 +78,22 @@ class VisionResult(BaseModel):
     layoutImage: str
 
 
-class AnalyzeImageResult(BaseModel):
+class AnalyzeImagesResult(BaseModel):
     visionResults: List[VisionResult]
     visionInfo: dict
 
 
-class AIStudioParams(BaseModel):
-    accessToken: str
-    apiType: Literal["aistudio"] = "aistudio"
-
-
 class QianfanParams(BaseModel):
     apiKey: str
     secretKey: str
     apiType: Literal["qianfan"] = "qianfan"
 
 
+class AIStudioParams(BaseModel):
+    accessToken: str
+    apiType: Literal["aistudio"] = "aistudio"
+
+
 LLMName: TypeAlias = Literal[
     "ernie-3.5",
     "ernie-3.5-8k",
@@ -105,7 +105,7 @@ LLMName: TypeAlias = Literal[
     "ernie-tiny-8k",
     "ernie-char-8k",
 ]
-LLMParams: TypeAlias = Union[AIStudioParams, QianfanParams]
+LLMParams: TypeAlias = Union[QianfanParams, AIStudioParams]
 
 
 class BuildVectorStoreRequest(BaseModel):
@@ -134,14 +134,14 @@ class RetrieveKnowledgeResult(BaseModel):
 class ChatRequest(BaseModel):
     keys: List[str]
     visionInfo: dict
+    vectorStore: Optional[str] = None
+    retrievalResult: Optional[str] = None
     taskDescription: Optional[str] = None
     rules: Optional[str] = None
     fewShot: Optional[str] = None
-    vectorStore: Optional[str] = None
-    retrievalResult: Optional[str] = None
-    returnPrompts: bool = True
     llmName: Optional[LLMName] = None
     llmParams: Optional[Annotated[LLMParams, Field(discriminator="apiType")]] = None
+    returnPrompts: bool = False
 
 
 class Prompts(BaseModel):
@@ -196,14 +196,14 @@ def _infer_file_type(url: str) -> FileType:
 
 
 def _llm_params_to_dict(llm_params: LLMParams) -> dict:
-    if llm_params.apiType == "aistudio":
-        return {"api_type": "aistudio", "access_token": llm_params.accessToken}
-    elif llm_params.apiType == "qianfan":
+    if llm_params.apiType == "qianfan":
         return {
             "api_type": "qianfan",
             "ak": llm_params.apiKey,
             "sk": llm_params.secretKey,
         }
+    if llm_params.apiType == "aistudio":
+        return {"api_type": "aistudio", "access_token": llm_params.accessToken}
     else:
         assert_never(llm_params.apiType)
 
@@ -265,12 +265,12 @@ def create_pipeline_app(pipeline: PPChatOCRPipeline, app_config: AppConfig) -> F
 
     @app.post(
         "/chatocr-vision",
-        operation_id="analyzeImage",
+        operation_id="analyzeImages",
         responses={422: {"model": Response}},
     )
-    async def _analyze_image(
-        request: AnalyzeImageRequest,
-    ) -> ResultResponse[AnalyzeImageResult]:
+    async def _analyze_images(
+        request: AnalyzeImagesRequest,
+    ) -> ResultResponse[AnalyzeImagesResult]:
         pipeline = ctx.pipeline
         aiohttp_session = ctx.aiohttp_session
 
@@ -371,7 +371,7 @@ def create_pipeline_app(pipeline: PPChatOCRPipeline, app_config: AppConfig) -> F
                 logId=serving_utils.generate_log_id(),
                 errorCode=0,
                 errorMsg="Success",
-                result=AnalyzeImageResult(
+                result=AnalyzeImagesResult(
                     visionResults=vision_results,
                     visionInfo=result[1],
                 ),
@@ -395,8 +395,6 @@ def create_pipeline_app(pipeline: PPChatOCRPipeline, app_config: AppConfig) -> F
             kwargs = {"visual_info": results.VisualInfoResult(request.visionInfo)}
             if request.minChars is not None:
                 kwargs["min_characters"] = request.minChars
-            else:
-                kwargs["min_characters"] = 0
             if request.llmRequestInterval is not None:
                 kwargs["llm_request_interval"] = request.llmRequestInterval
             if request.llmName is not None:
@@ -470,23 +468,23 @@ def create_pipeline_app(pipeline: PPChatOCRPipeline, app_config: AppConfig) -> F
                 "key_list": request.keys,
                 "visual_info": results.VisualInfoResult(request.visionInfo),
             }
-            if request.taskDescription is not None:
-                kwargs["user_task_description"] = request.taskDescription
-            if request.rules is not None:
-                kwargs["rules"] = request.rules
-            if request.fewShot is not None:
-                kwargs["few_shot"] = request.fewShot
             if request.vectorStore is not None:
                 kwargs["vector"] = results.VectorResult({"vector": request.vectorStore})
             if request.retrievalResult is not None:
                 kwargs["retrieval_result"] = results.RetrievalResult(
                     {"retrieval": request.retrievalResult}
                 )
-            kwargs["save_prompt"] = request.returnPrompts
+            if request.taskDescription is not None:
+                kwargs["user_task_description"] = request.taskDescription
+            if request.rules is not None:
+                kwargs["rules"] = request.rules
+            if request.fewShot is not None:
+                kwargs["few_shot"] = request.fewShot
             if request.llmName is not None:
                 kwargs["llm_name"] = request.llmName
             if request.llmParams is not None:
                 kwargs["llm_params"] = _llm_params_to_dict(request.llmParams)
+            kwargs["save_prompt"] = request.returnPrompts
 
             result = await serving_utils.call_async(pipeline.pipeline.chat, **kwargs)