Bladeren bron

Fix and update service schemas and apps (#2980)

* Update schemas and apps

* Make type vars public

* Add 3d_bev_detection

* Add layout_parsing_v2 and fix style

* Fix 3d_bev_det

* Update ppchatocrv4

* Fix paddle2onnx dep list bug and update paddle2onnx version

* Update ppchatocr apps

* Update HPS docs

* Add double quotes
Lin Manhui 9 maanden geleden
bovenliggende
commit
ab99094014
31 gewijzigde bestanden met toevoegingen van 631 en 155 verwijderingen
  1. 69 25
      docs/pipeline_deploy/serving.en.md
  2. 70 26
      docs/pipeline_deploy/serving.md
  3. 15 17
      paddlex/inference/serving/basic_serving/_app.py
  4. 6 1
      paddlex/inference/serving/basic_serving/_pipeline_apps/__init__.py
  5. 2 4
      paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py
  6. 10 16
      paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py
  7. 1 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py
  8. 126 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing_v2.py
  9. 76 0
      paddlex/inference/serving/basic_serving/_pipeline_apps/m_3d_bev_detection.py
  10. 1 1
      paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py
  11. 9 3
      paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py
  12. 39 3
      paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py
  13. 10 16
      paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py
  14. 4 4
      paddlex/inference/serving/infra/utils.py
  15. 2 2
      paddlex/inference/serving/schemas/face_recognition.py
  16. 2 1
      paddlex/inference/serving/schemas/human_keypoint_detection.py
  17. 5 4
      paddlex/inference/serving/schemas/layout_parsing.py
  18. 82 0
      paddlex/inference/serving/schemas/layout_parsing_v2.py
  19. 49 0
      paddlex/inference/serving/schemas/m_3d_bev_detection.py
  20. 2 3
      paddlex/inference/serving/schemas/ocr.py
  21. 1 1
      paddlex/inference/serving/schemas/open_vocabulary_detection.py
  22. 6 4
      paddlex/inference/serving/schemas/pp_chatocrv3_doc.py
  23. 29 10
      paddlex/inference/serving/schemas/pp_chatocrv4_doc.py
  24. 2 2
      paddlex/inference/serving/schemas/pp_shituv2.py
  25. 3 3
      paddlex/inference/serving/schemas/seal_recognition.py
  26. 1 1
      paddlex/inference/serving/schemas/shared/image_segmentation.py
  27. 3 3
      paddlex/inference/serving/schemas/table_recognition.py
  28. 3 3
      paddlex/inference/serving/schemas/table_recognition_v2.py
  29. 1 1
      paddlex/inference/utils/official_models.py
  30. 1 1
      paddlex/paddle2onnx_requirements.txt
  31. 1 0
      setup.py

+ 69 - 25
docs/pipeline_deploy/serving.en.md

@@ -113,7 +113,7 @@ The "Development Integration/Deployment" section in each pipeline’s tutorial p
 Find the high-stability serving SDK corresponding to the pipeline in the table below and download it:
 
 <details>
-<summary> 👉Click to view</summary>
+<summary>👉 Click to view</summary>
 <table>
 <thead>
 <tr>
@@ -123,84 +123,128 @@ Find the high-stability serving SDK corresponding to the pipeline in the table b
 </thead>
 <tbody>
 <tr>
-<td>PP-ChatOCRv3-doc</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_PP-ChatOCRv3-doc_sdk.tar.gz">paddlex_hps_PP-ChatOCRv3-doc_sdk.tar.gz</a></td>
+<td>PP-ChatOCR-doc v3</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_PP-ChatOCRv3-doc_sdk.tar.gz">paddlex_hps_PP-ChatOCRv3-doc_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>General image classification</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_image_classification.tar.gz">paddlex_hps_image_classification.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_image_classification.tar.gz">paddlex_hps_image_classification.tar.gz</a></td>
 </tr>
 <tr>
 <td>General object detection</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_object_detection_sdk.tar.gz">paddlex_hps_object_detection_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_object_detection_sdk.tar.gz">paddlex_hps_object_detection_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>General instance segmentation</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_instance_segmentation_sdk.tar.gz">paddlex_hps_instance_segmentation_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_instance_segmentation_sdk.tar.gz">paddlex_hps_instance_segmentation_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>General semantic segmentation</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_semantic_segmentation_sdk.tar.gz">paddlex_hps_semantic_segmentation_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_semantic_segmentation_sdk.tar.gz">paddlex_hps_semantic_segmentation_sdk.tar.gz</a></td>
 </tr>
 <tr>
-<td>General image multi-label classification</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_image_multi_label_classification_sdk.tar.gz">paddlex_hps_image_multi_label_classification_sdk.tar.gz</a></td>
+<td>Image multi-label classification</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_image_multilabel_classification_sdk.tar.gz">paddlex_hps_image_multilabel_classification_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>General image recognition</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_general_image_recognition_sdk.tar.gz">paddlex_hps_general_image_recognition_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_PP-ShiTuV2_sdk.tar.gz">paddlex_hps_PP-ShiTuV2_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>Pedestrian attribute recognition</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_pedestrian_attribute_recognition_sdk.tar.gz">paddlex_hps_pedestrian_attribute_recognition_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_pedestrian_attribute_recognition_sdk.tar.gz">paddlex_hps_pedestrian_attribute_recognition_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>Vehicle attribute recognition</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_vehicle_attribute_recognition_sdk.tar.gz">paddlex_hps_vehicle_attribute_recognition_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_vehicle_attribute_recognition_sdk.tar.gz">paddlex_hps_vehicle_attribute_recognition_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>Face recognition</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_face_recognition_sdk.tar.gz">paddlex_hps_face_recognition_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_face_recognition_sdk.tar.gz">paddlex_hps_face_recognition_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>Small object detection</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_small_object_detection_sdk.tar.gz">paddlex_hps_small_object_detection_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_small_object_detection_sdk.tar.gz">paddlex_hps_small_object_detection_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>Image anomaly detection</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_image_anomaly_detection_sdk.tar.gz">paddlex_hps_image_anomaly_detection_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_anomaly_detection_sdk.tar.gz">paddlex_hps_anomaly_detection_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>Human keypoint detection</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_human_keypoint_detection_sdk.tar.gz">paddlex_hps_human_keypoint_detection_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>Open vocabulary detection</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_open_vocabulary_detection_sdk.tar.gz">paddlex_hps_open_vocabulary_detection_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>Open vocabulary segmentation</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_open_vocabulary_segmentation_sdk.tar.gz">paddlex_hps_open_vocabulary_segmentation_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>Rotated object detection</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_rotated_object_detection_sdk.tar.gz">paddlex_hps_rotated_object_detection_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>3D multi-modal fusion detection</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_3d_bev_detection_sdk.tar.gz">paddlex_hps_3d_bev_detection_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>General OCR</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_OCR_sdk.tar.gz">paddlex_hps_OCR_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_OCR_sdk.tar.gz">paddlex_hps_OCR_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>General table recognition</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_table_recognition_sdk.tar.gz">paddlex_hps_table_recognition_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_table_recognition_sdk.tar.gz">paddlex_hps_table_recognition_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>General table recognition v2</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_table_recognition_v2_sdk.tar.gz">paddlex_hps_table_recognition_v2_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>General layout parsing</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_layout_parsing_sdk.tar.gz">paddlex_hps_layout_parsing_sdk.tar.gz</a></td>
 </tr>
 <tr>
-<td>Layout parsing</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_layout_parsing_sdk.tar.gz">paddlex_hps_layout_parsing_sdk.tar.gz</a></td>
+<td>General layout parsing v2</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_layout_parsing_v2_sdk.tar.gz">paddlex_hps_layout_parsing_v2_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>Formula recognition</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_formula_recognition_sdk.tar.gz">paddlex_hps_formula_recognition_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_formula_recognition_sdk.tar.gz">paddlex_hps_formula_recognition_sdk.tar.gz</a></td>
 </tr>
 <tr>
-<td>Seal recognition</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_seal_recognition_sdk.tar.gz">paddlex_hps_seal_recognition_sdk.tar.gz</a></td>
+<td>Seal text recognition</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_seal_recognition_sdk.tar.gz">paddlex_hps_seal_recognition_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>Document image preprocessing</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_doc_preprocessor_sdk.tar.gz">paddlex_hps_doc_preprocessor_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>Time series forecasting</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_time_series_forecasting_sdk.tar.gz">paddlex_hps_time_series_forecasting_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_ts_forecast_sdk.tar.gz">paddlex_hps_ts_forecast_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>Time series anomaly detection</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_time_series_anomaly_detection_sdk.tar.gz">paddlex_hps_time_series_anomaly_detection_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_ts_anomaly_detection_sdk.tar.gz">paddlex_hps_ts_anomaly_detection_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>Time series classification</td>
-<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_time_series_classification_sdk.tar.gz">paddlex_hps_time_series_classification_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_ts_classification_sdk.tar.gz">paddlex_hps_ts_classification_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>Multilingual speech recognition</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_multilingual_speech_recognition_sdk.tar.gz">paddlex_hps_multilingual_speech_recognition_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>General video classification</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_video_classification_sdk.tar.gz">paddlex_hps_video_classification_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>General video detection</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_video_detection_sdk.tar.gz">paddlex_hps_video_detection_sdk.tar.gz</a></td>
 </tr>
 </tbody>
 </table>

+ 70 - 26
docs/pipeline_deploy/serving.md

@@ -8,7 +8,7 @@ comments: true
 
 PaddleX 产线服务化部署示意图:
 
-<img src="https://raw.githubusercontent.com/cuicheng01/PaddleX_doc_images/main/images/pipeline_deploy/serving.png"  width="300" />
+<img src="https://raw.githubusercontent.com/cuicheng01/PaddleX_doc_images/main/images/pipeline_deploy/serving.png" width="300"/>
 
 针对用户的不同需求,PaddleX 提供多种产线服务化部署方案:
 
@@ -113,7 +113,7 @@ paddlex --serve --pipeline image_classification --use_hpip
 在下表中找到产线对应的高稳定性服务化部署 SDK 并下载:
 
 <details>
-<summary> 👉点击查看</summary>
+<summary>👉 点击查看</summary>
 <table>
 <thead>
 <tr>
@@ -123,84 +123,128 @@ paddlex --serve --pipeline image_classification --use_hpip
 </thead>
 <tbody>
 <tr>
-<td>文档场景信息抽取v3</td>
-<td><a href=https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_PP-ChatOCRv3-doc_sdk.tar.gz>paddlex_hps_PP-ChatOCRv3-doc_sdk.tar.gz</a></td>
+<td>文档场景信息抽取 v3</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_PP-ChatOCRv3-doc_sdk.tar.gz">paddlex_hps_PP-ChatOCRv3-doc_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>通用图像分类</td>
-<td><a href=https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_image_classification.tar.gz>paddlex_hps_image_classification.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_image_classification.tar.gz">paddlex_hps_image_classification.tar.gz</a></td>
 </tr>
 <tr>
 <td>通用目标检测</td>
-<td><a href=https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_object_detection_sdk.tar.gz>paddlex_hps_object_detection_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_object_detection_sdk.tar.gz">paddlex_hps_object_detection_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>通用实例分割</td>
-<td><a href=https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_instance_segmentation_sdk.tar.gz>paddlex_hps_instance_segmentation_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_instance_segmentation_sdk.tar.gz">paddlex_hps_instance_segmentation_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>通用语义分割</td>
-<td><a href=https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_semantic_segmentation_sdk.tar.gz>paddlex_hps_semantic_segmentation_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_semantic_segmentation_sdk.tar.gz">paddlex_hps_semantic_segmentation_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>通用图像多标签分类</td>
-<td><a href=https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_image_multi_label_classification_sdk.tar.gz>paddlex_hps_image_multi_label_classification_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_image_multilabel_classification_sdk.tar.gz">paddlex_hps_image_multilabel_classification_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>通用图像识别</td>
-<td><a href=https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_general_image_recognition_sdk.tar.gz>paddlex_hps_general_image_recognition_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_PP-ShiTuV2_sdk.tar.gz">paddlex_hps_PP-ShiTuV2_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>行人属性识别</td>
-<td><a href=https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_pedestrian_attribute_recognition_sdk.tar.gz>paddlex_hps_pedestrian_attribute_recognition_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_pedestrian_attribute_recognition_sdk.tar.gz">paddlex_hps_pedestrian_attribute_recognition_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>车辆属性识别</td>
-<td><a href=https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_vehicle_attribute_recognition_sdk.tar.gz>paddlex_hps_vehicle_attribute_recognition_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_vehicle_attribute_recognition_sdk.tar.gz">paddlex_hps_vehicle_attribute_recognition_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>人脸识别</td>
-<td><a href=https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_face_recognition_sdk.tar.gz>paddlex_hps_face_recognition_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_face_recognition_sdk.tar.gz">paddlex_hps_face_recognition_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>小目标检测</td>
-<td><a href=https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_small_object_detection_sdk.tar.gz>paddlex_hps_small_object_detection_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_small_object_detection_sdk.tar.gz">paddlex_hps_small_object_detection_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>图像异常检测</td>
-<td><a href=https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_image_anomaly_detection_sdk.tar.gz>paddlex_hps_image_anomaly_detection_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_anomaly_detection_sdk.tar.gz">paddlex_hps_anomaly_detection_sdk.tar.gz</a></td>
 </tr>
 <tr>
-<td>通用OCR</td>
-<td><a href=https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_OCR_sdk.tar.gz>paddlex_hps_OCR_sdk.tar.gz</a></td>
+<td>人体关键点检测</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_human_keypoint_detection_sdk.tar.gz">paddlex_hps_human_keypoint_detection_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>开放词汇检测</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_open_vocabulary_detection_sdk.tar.gz">paddlex_hps_open_vocabulary_detection_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>开放词汇分割</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_open_vocabulary_segmentation_sdk.tar.gz">paddlex_hps_open_vocabulary_segmentation_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>旋转目标检测</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_rotated_object_detection_sdk.tar.gz">paddlex_hps_rotated_object_detection_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>3D 多模态融合检测</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_3d_bev_detection_sdk.tar.gz">paddlex_hps_3d_bev_detection_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>通用 OCR</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_OCR_sdk.tar.gz">paddlex_hps_OCR_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>通用表格识别</td>
-<td><a href=https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_table_recognition_sdk.tar.gz>paddlex_hps_table_recognition_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_table_recognition_sdk.tar.gz">paddlex_hps_table_recognition_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>通用表格识别 v2</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_table_recognition_v2_sdk.tar.gz">paddlex_hps_table_recognition_v2_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>通用版面解析</td>
-<td><a href=https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_layout_parsing_sdk.tar.gz>paddlex_hps_layout_parsing_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_layout_parsing_sdk.tar.gz">paddlex_hps_layout_parsing_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>通用版面解析 v2</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_layout_parsing_v2_sdk.tar.gz">paddlex_hps_layout_parsing_v2_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>公式识别</td>
-<td><a href=https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_formula_recognition_sdk.tar.gz>paddlex_hps_formula_recognition_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_formula_recognition_sdk.tar.gz">paddlex_hps_formula_recognition_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>印章文本识别</td>
-<td><a href=https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_seal_recognition_sdk.tar.gz>paddlex_hps_seal_recognition_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_seal_recognition_sdk.tar.gz">paddlex_hps_seal_recognition_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>文档图像预处理</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_doc_preprocessor_sdk.tar.gz">paddlex_hps_doc_preprocessor_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>时序预测</td>
-<td><a href=https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_time_series_forecasting_sdk.tar.gz>paddlex_hps_time_series_forecasting_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_ts_forecast_sdk.tar.gz">paddlex_hps_ts_forecast_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>时序异常检测</td>
-<td><a href=https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_time_series_anomaly_detection_sdk.tar.gz>paddlex_hps_time_series_anomaly_detection_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_ts_anomaly_detection_sdk.tar.gz">paddlex_hps_ts_anomaly_detection_sdk.tar.gz</a></td>
 </tr>
 <tr>
 <td>时序分类</td>
-<td><a href=https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0b2/paddlex_hps_time_series_classification_sdk.tar.gz>paddlex_hps_time_series_classification_sdk.tar.gz</a></td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_ts_classification_sdk.tar.gz">paddlex_hps_ts_classification_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>多语种语音识别</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_multilingual_speech_recognition_sdk.tar.gz">paddlex_hps_multilingual_speech_recognition_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>通用视频分类</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_video_classification_sdk.tar.gz">paddlex_hps_video_classification_sdk.tar.gz</a></td>
+</tr>
+<tr>
+<td>通用视频检测</td>
+<td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hps/public/sdks/v3.0.0rc0/paddlex_hps_video_detection_sdk.tar.gz">paddlex_hps_video_detection_sdk.tar.gz</a></td>
 </tr>
 </tbody>
 </table>
@@ -269,13 +313,13 @@ PaddleX 高稳定性服务化部署方案基于 NVIDIA Triton Inference Server 
 - 支持使用 NVIDIA GPU 部署的镜像(机器上需要安装有支持 CUDA 11.8 的 NVIDIA 驱动):
 
     ```bash
-    docker pull ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:paddlex3.0.0b2-gpu
+    docker pull ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:paddlex3.0.0rc0-gpu
     ```
 
 - CPU-only 镜像:
 
     ```bash
-    docker pull ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:paddlex3.0.0b2-cpu
+    docker pull ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlex/hps:paddlex3.0.0rc0-cpu
     ```
 
 准备好镜像后,执行如下命令运行服务器:

+ 15 - 17
paddlex/inference/serving/basic_serving/_app.py

@@ -42,9 +42,9 @@ from ..infra.config import AppConfig
 from ..infra.models import NoResultResponse
 from ..infra.utils import call_async, generate_log_id
 
-_PipelineT = TypeVar("_PipelineT", bound=BasePipeline)
-_P = ParamSpec("_P")
-_R = TypeVar("_R")
+PipelineT = TypeVar("PipelineT", bound=BasePipeline)
+P = ParamSpec("P")
+R = TypeVar("R")
 
 
 class _Error(TypedDict):
@@ -64,14 +64,14 @@ def _is_error(obj: object) -> TypeGuard[_Error]:
 # for type hinting. However, I would stick with the current design, as it does
 # not introduce runtime overhead at the moment and may prove useful in the
 # future.
-class PipelineWrapper(Generic[_PipelineT]):
-    def __init__(self, pipeline: _PipelineT) -> None:
+class PipelineWrapper(Generic[PipelineT]):
+    def __init__(self, pipeline: PipelineT) -> None:
         super().__init__()
         self._pipeline = pipeline
         self._lock = asyncio.Lock()
 
     @property
-    def pipeline(self) -> _PipelineT:
+    def pipeline(self) -> PipelineT:
         return self._pipeline
 
     async def infer(self, *args: Any, **kwargs: Any) -> List[Any]:
@@ -89,19 +89,17 @@ class PipelineWrapper(Generic[_PipelineT]):
 
         return await self.call(_infer)
 
-    async def call(
-        self, func: Callable[_P, _R], *args: _P.args, **kwargs: _P.kwargs
-    ) -> _R:
+    async def call(self, func: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R:
         async with self._lock:
             return await call_async(func, *args, **kwargs)
 
 
-class AppContext(Generic[_PipelineT]):
+class AppContext(Generic[PipelineT]):
     def __init__(self, *, config: AppConfig) -> None:
         super().__init__()
         self._config = config
         self.extra: Dict[str, Any] = {}
-        self._pipeline: Optional[PipelineWrapper[_PipelineT]] = None
+        self._pipeline: Optional[PipelineWrapper[PipelineT]] = None
         self._aiohttp_session: Optional[aiohttp.ClientSession] = None
 
     @property
@@ -109,13 +107,13 @@ class AppContext(Generic[_PipelineT]):
         return self._config
 
     @property
-    def pipeline(self) -> PipelineWrapper[_PipelineT]:
+    def pipeline(self) -> PipelineWrapper[PipelineT]:
         if not self._pipeline:
             raise AttributeError("`pipeline` has not been set.")
         return self._pipeline
 
     @pipeline.setter
-    def pipeline(self, val: PipelineWrapper[_PipelineT]) -> None:
+    def pipeline(self, val: PipelineWrapper[PipelineT]) -> None:
         self._pipeline = val
 
     @property
@@ -130,11 +128,11 @@ class AppContext(Generic[_PipelineT]):
 
 
 def create_app(
-    *, pipeline: _PipelineT, app_config: AppConfig, app_aiohttp_session: bool = True
-) -> Tuple[fastapi.FastAPI, AppContext[_PipelineT]]:
+    *, pipeline: PipelineT, app_config: AppConfig, app_aiohttp_session: bool = True
+) -> Tuple[fastapi.FastAPI, AppContext[PipelineT]]:
     @contextlib.asynccontextmanager
     async def _app_lifespan(app: fastapi.FastAPI) -> AsyncGenerator[None, None]:
-        ctx.pipeline = PipelineWrapper[_PipelineT](pipeline)
+        ctx.pipeline = PipelineWrapper[PipelineT](pipeline)
         if app_aiohttp_session:
             async with aiohttp.ClientSession(
                 cookie_jar=aiohttp.DummyCookieJar()
@@ -146,7 +144,7 @@ def create_app(
 
     # Should we control API versions?
     app = fastapi.FastAPI(lifespan=_app_lifespan)
-    ctx = AppContext[_PipelineT](config=app_config)
+    ctx = AppContext[PipelineT](config=app_config)
     app.state.context = ctx
 
     @app.get("/health", operation_id="checkHealth")

+ 6 - 1
paddlex/inference/serving/basic_serving/_pipeline_apps/__init__.py

@@ -21,7 +21,12 @@ from ...infra.config import create_app_config
 
 
 def _pipeline_name_to_mod_name(pipeline_name: str) -> str:
-    return pipeline_name.lower().replace("-", "_")
+    if not pipeline_name:
+        raise ValueError("Empty pipeline name")
+    mod_name = pipeline_name.lower().replace("-", "_")
+    if mod_name[0].isdigit():
+        return "m_" + mod_name
+    return mod_name
 
 
 # XXX: A dynamic approach is used here for writing fewer lines of code, at the

+ 2 - 4
paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py

@@ -17,7 +17,6 @@ from typing import Dict, Optional, Tuple, Union
 
 import cv2
 import numpy as np
-from numpy.typing import ArrayLike
 from PIL.Image import Image
 
 from ....infra import utils as serving_utils
@@ -41,7 +40,7 @@ def prune_result(result: dict) -> dict:
 
 
 def postprocess_image(
-    image: ArrayLike,
+    image: np.ndarray,
     log_id: str,
     filename: str,
     *,
@@ -59,7 +58,6 @@ def postprocess_image(
 
     key = f"{log_id}/{filename}"
     ext = os.path.splitext(filename)[1]
-    image = np.asarray(image)
     h, w = image.shape[0:2]
     if max_img_size is not None:
         if w > max_img_size[1] or h > max_img_size[0]:
@@ -78,7 +76,7 @@ def postprocess_image(
 
 
 def postprocess_images(
-    images: Dict[str, Union[Image, ArrayLike]],
+    images: Dict[str, Union[Image, np.ndarray]],
     log_id: str,
     filename_template: str = "{key}.jpg",
     file_storage: Optional[Storage] = None,

+ 10 - 16
paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py

@@ -99,14 +99,11 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
         images = [serving_utils.image_bytes_to_array(item) for item in file_bytes_list]
         labels = [pair.label for pair in request.imageLabelPairs]
 
-        if request.indexKey is not None:
-            index_storage = ctx.extra["index_storage"]
-            index_data_bytes = await serving_utils.call_async(
-                index_storage.get, request.indexKey
-            )
-            index_data = IndexData.from_bytes(index_data_bytes)
-        else:
-            index_data = None
+        index_storage = ctx.extra["index_storage"]
+        index_data_bytes = await serving_utils.call_async(
+            index_storage.get, request.indexKey
+        )
+        index_data = IndexData.from_bytes(index_data_bytes)
 
         index_data = await pipeline.call(
             pipeline.pipeline.append_index, images, labels, index_data
@@ -132,14 +129,11 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
     ) -> ResultResponse[schema.RemoveImagesFromIndexResult]:
         pipeline = ctx.pipeline
 
-        if request.indexKey is not None:
-            index_storage = ctx.extra["index_storage"]
-            index_data_bytes = await serving_utils.call_async(
-                index_storage.get, request.indexKey
-            )
-            index_data = IndexData.from_bytes(index_data_bytes)
-        else:
-            index_data = None
+        index_storage = ctx.extra["index_storage"]
+        index_data_bytes = await serving_utils.call_async(
+            index_storage.get, request.indexKey
+        )
+        index_data = IndexData.from_bytes(index_data_bytes)
 
         index_data = await pipeline.call(
             pipeline.pipeline.remove_index, request.ids, index_data

+ 1 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py

@@ -50,6 +50,7 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
             images,
             use_doc_orientation_classify=request.useDocOrientationClassify,
             use_doc_unwarping=request.useDocUnwarping,
+            use_textline_orientation=request.useTextlineOrientation,
             use_general_ocr=request.useGeneralOcr,
             use_seal_recognition=request.useSealRecognition,
             use_table_recognition=request.useTableRecognition,

+ 126 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing_v2.py

@@ -0,0 +1,126 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict, List
+
+from fastapi import FastAPI
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.layout_parsing_v2 import INFER_ENDPOINT, InferRequest, InferResult
+from .._app import create_app, primary_operation
+from ._common import common
+from ._common import ocr as ocr_common
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    ocr_common.update_app_context(ctx)
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(
+        request: InferRequest,
+    ) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+
+        log_id = serving_utils.generate_log_id()
+
+        images, data_info = await ocr_common.get_images(request, ctx)
+
+        result = await pipeline.infer(
+            images,
+            use_doc_orientation_classify=request.useDocOrientationClassify,
+            use_doc_unwarping=request.useDocUnwarping,
+            use_textline_orientation=request.useTextlineOrientation,
+            use_general_ocr=request.useGeneralOcr,
+            use_seal_recognition=request.useSealRecognition,
+            use_table_recognition=request.useTableRecognition,
+            use_formula_recognition=request.useFormulaRecognition,
+            text_det_limit_side_len=request.textDetLimitSideLen,
+            text_det_limit_type=request.textDetLimitType,
+            text_det_thresh=request.textDetThresh,
+            text_det_box_thresh=request.textDetBoxThresh,
+            text_det_unclip_ratio=request.textDetUnclipRatio,
+            text_rec_score_thresh=request.textRecScoreThresh,
+            seal_det_limit_side_len=request.sealDetLimitSideLen,
+            seal_det_limit_type=request.sealDetLimitType,
+            seal_det_thresh=request.sealDetThresh,
+            seal_det_box_thresh=request.sealDetBoxThresh,
+            seal_det_unclip_ratio=request.sealDetUnclipRatio,
+            seal_rec_score_thresh=request.sealRecScoreThresh,
+            layout_nms=request.layoutNms,
+            layout_unclip_ratio=request.layoutUnclipRatio,
+            layout_merge_bboxes_mode=request.layoutMergeBboxesMode,
+        )
+
+        layout_parsing_results: List[Dict[str, Any]] = []
+        for i, (img, item) in enumerate(zip(images, result)):
+            pruned_res = common.prune_result(item.json["res"])
+            md_data = item.markdown
+            md_text = md_data["markdown_texts"]
+            md_imgs = await serving_utils.call_async(
+                common.postprocess_images,
+                md_data["markdown_images"],
+                log_id,
+                filename_template=f"markdown_{i}/{{key}}",
+                file_storage=ctx.extra["file_storage"],
+                return_urls=ctx.extra["return_img_urls"],
+                max_img_size=ctx.extra["max_output_img_size"],
+            )
+            if ctx.config.visualize:
+                imgs = {
+                    "input_img": img,
+                    **item.img,
+                }
+                imgs = await serving_utils.call_async(
+                    common.postprocess_images,
+                    imgs,
+                    log_id,
+                    filename_template=f"{{key}}_{i}.jpg",
+                    file_storage=ctx.extra["file_storage"],
+                    return_urls=ctx.extra["return_img_urls"],
+                    max_img_size=ctx.extra["max_output_img_size"],
+                )
+            else:
+                imgs = {}
+            layout_parsing_results.append(
+                dict(
+                    prunedResult=pruned_res,
+                    markdown=dict(text=md_text, images=md_imgs),
+                    outputImages=(
+                        {k: v for k, v in imgs.items() if k != "input_img"}
+                        if imgs
+                        else None
+                    ),
+                    inputImage=imgs.get("input_img"),
+                )
+            )
+
+        return ResultResponse[InferResult](
+            logId=log_id,
+            result=InferResult(
+                layoutParsingResults=layout_parsing_results,
+                dataInfo=data_info,
+            ),
+        )
+
+    return app

+ 76 - 0
paddlex/inference/serving/basic_serving/_pipeline_apps/m_3d_bev_detection.py

@@ -0,0 +1,76 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from typing import Any, Dict, List
+
+from fastapi import FastAPI
+
+from ...infra import utils as serving_utils
+from ...infra.config import AppConfig
+from ...infra.models import ResultResponse
+from ...schemas.m_3d_bev_detection import INFER_ENDPOINT, InferRequest, InferResult
+from .._app import create_app, primary_operation
+
+
+def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    @primary_operation(
+        app,
+        INFER_ENDPOINT,
+        "infer",
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes = await serving_utils.get_raw_bytes_async(
+            request.tar, aiohttp_session
+        )
+        tar_path = await serving_utils.call_async(
+            serving_utils.write_to_temp_file,
+            file_bytes,
+            suffix=".tar",
+        )
+
+        try:
+            result = (
+                await pipeline.infer(
+                    tar_path,
+                )
+            )[0]
+        finally:
+            await serving_utils.call_async(os.unlink, tar_path)
+
+        objects: List[Dict[str, Any]] = []
+        for box, label, score in zip(
+            result["boxes_3d"], result["labels_3d"], result["scores_3d"]
+        ):
+            objects.append(
+                dict(
+                    bbox=box,
+                    categoryId=label,
+                    score=score,
+                )
+            )
+
+        return ResultResponse[InferResult](
+            logId=serving_utils.generate_log_id(),
+            result=InferResult(detectedObjects=objects),
+        )
+
+    return app

+ 1 - 1
paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, Dict, List
+from typing import Any
 
 import numpy as np
 import pycocotools.mask as mask_util

+ 9 - 3
paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py

@@ -120,11 +120,14 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
     ) -> ResultResponse[schema.BuildVectorStoreResult]:
         pipeline = ctx.pipeline
 
-        kwargs: Dict[str, Any] = {"flag_save_bytes_vector": True}
+        kwargs: Dict[str, Any] = {
+            "flag_save_bytes_vector": True,
+            "retriever_config": request.retrieverConfig,
+        }
         if request.minCharacters is not None:
             kwargs["min_characters"] = request.minCharacters
-        if request.llmRequestInterval is not None:
-            kwargs["llm_request_interval"] = request.llmRequestInterval
+        if request.blockSize is not None:
+            kwargs["block_size"] = request.blockSize
 
         vector_info = await serving_utils.call_async(
             pipeline.pipeline.build_vector,
@@ -159,6 +162,8 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
             table_rules_str=request.tableRulesStr,
             table_few_shot_demo_text_content=request.tableFewShotDemoTextContent,
             table_few_shot_demo_key_value_list=request.tableFewShotDemoKeyValueList,
+            chat_bot_config=request.chatBotConfig,
+            retriever_config=request.retrieverConfig,
         )
         if request.useVectorRetrieval is not None:
             kwargs["use_vector_retrieval"] = request.useVectorRetrieval
@@ -169,6 +174,7 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
             pipeline.pipeline.chat,
             request.keyList,
             request.visualInfo,
+            **kwargs,
         )
 
         return ResultResponse[schema.ChatResult](

+ 39 - 3
paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py

@@ -120,11 +120,14 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
     ) -> ResultResponse[schema.BuildVectorStoreResult]:
         pipeline = ctx.pipeline
 
-        kwargs: Dict[str, Any] = {"flag_save_bytes_vector": True}
+        kwargs: Dict[str, Any] = {
+            "flag_save_bytes_vector": True,
+            "retriever_config": request.retrieverConfig,
+        }
         if request.minCharacters is not None:
             kwargs["min_characters"] = request.minCharacters
-        if request.llmRequestInterval is not None:
-            kwargs["llm_request_interval"] = request.llmRequestInterval
+        if request.blockSize is not None:
+            kwargs["block_size"] = request.blockSize
 
         vector_info = await serving_utils.call_async(
             pipeline.pipeline.build_vector,
@@ -139,6 +142,34 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
 
     @primary_operation(
         app,
+        schema.INVOKE_MLLM_ENDPOINT,
+        "invokeMllm",
+    )
+    async def _invoke_mllm(
+        request: schema.InvokeMLLMRequest,
+    ) -> ResultResponse[schema.InvokeMLLMResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        file_bytes = await serving_utils.get_raw_bytes_async(
+            request.image, aiohttp_session
+        )
+        image = serving_utils.image_bytes_to_array(file_bytes)
+
+        mllm_predict_info = await serving_utils.call_async(
+            pipeline.pipeline.mllm_pred,
+            image,
+            request.keyList,
+            mllm_chat_bot_config=request.mllmChatBotConfig,
+        )
+
+        return ResultResponse[schema.InvokeMLLMResult](
+            logId=serving_utils.generate_log_id(),
+            result=schema.InvokeMLLMResult(mllmPredictInfo=mllm_predict_info),
+        )
+
+    @primary_operation(
+        app,
         schema.CHAT_ENDPOINT,
         "chat",
     )
@@ -159,16 +190,21 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
             table_rules_str=request.tableRulesStr,
             table_few_shot_demo_text_content=request.tableFewShotDemoTextContent,
             table_few_shot_demo_key_value_list=request.tableFewShotDemoKeyValueList,
+            chat_bot_config=request.chatBotConfig,
+            retriever_config=request.retrieverConfig,
         )
         if request.useVectorRetrieval is not None:
             kwargs["use_vector_retrieval"] = request.useVectorRetrieval
         if request.minCharacters is not None:
             kwargs["min_characters"] = request.minCharacters
+        if request.mllmIntegrationStrategy is not None:
+            kwargs["mllm_integration_strategy"] = request.mllmIntegrationStrategy
 
         result = await serving_utils.call_async(
             pipeline.pipeline.chat,
             request.keyList,
             request.visualInfo,
+            **kwargs,
         )
 
         return ResultResponse[schema.ChatResult](

+ 10 - 16
paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py

@@ -94,14 +94,11 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
         images = [serving_utils.image_bytes_to_array(item) for item in file_bytes_list]
         labels = [pair.label for pair in request.imageLabelPairs]
 
-        if request.indexKey is not None:
-            index_storage = ctx.extra["index_storage"]
-            index_data_bytes = await serving_utils.call_async(
-                index_storage.get, request.indexKey
-            )
-            index_data = IndexData.from_bytes(index_data_bytes)
-        else:
-            index_data = None
+        index_storage = ctx.extra["index_storage"]
+        index_data_bytes = await serving_utils.call_async(
+            index_storage.get, request.indexKey
+        )
+        index_data = IndexData.from_bytes(index_data_bytes)
 
         index_data = await pipeline.call(
             pipeline.pipeline.append_index, images, labels, index_data
@@ -127,14 +124,11 @@ def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
     ) -> ResultResponse[schema.RemoveImagesFromIndexResult]:
         pipeline = ctx.pipeline
 
-        if request.indexKey is not None:
-            index_storage = ctx.extra["index_storage"]
-            index_data_bytes = await serving_utils.call_async(
-                index_storage.get, request.indexKey
-            )
-            index_data = IndexData.from_bytes(index_data_bytes)
-        else:
-            index_data = None
+        index_storage = ctx.extra["index_storage"]
+        index_data_bytes = await serving_utils.call_async(
+            index_storage.get, request.indexKey
+        )
+        index_data = IndexData.from_bytes(index_data_bytes)
 
         index_data = await pipeline.call(
             pipeline.pipeline.remove_index, request.ids, index_data

+ 4 - 4
paddlex/inference/serving/infra/utils.py

@@ -59,8 +59,8 @@ __all__ = [
 
 FileType: TypeAlias = Literal["IMAGE", "PDF", "VIDEO", "AUDIO"]
 
-_P = ParamSpec("_P")
-_R = TypeVar("_R")
+P = ParamSpec("P")
+R = TypeVar("R")
 
 
 def generate_log_id() -> str:
@@ -252,8 +252,8 @@ async def get_raw_bytes_async(file: str, session: aiohttp.ClientSession) -> byte
 
 
 def call_async(
-    func: Callable[_P, _R], /, *args: _P.args, **kwargs: _P.kwargs
-) -> Awaitable[_R]:
+    func: Callable[P, R], /, *args: P.args, **kwargs: P.kwargs
+) -> Awaitable[R]:
     return asyncio.get_running_loop().run_in_executor(
         None, partial(func, *args, **kwargs)
     )

+ 2 - 2
paddlex/inference/serving/schemas/face_recognition.py

@@ -61,7 +61,7 @@ ADD_IMAGES_TO_INDEX_ENDPOINT: Final[str] = "/face-recognition-index-add"
 
 class AddImagesToIndexRequest(BaseModel):
     imageLabelPairs: List[ImageLabelPair]
-    indexKey: Optional[str] = None
+    indexKey: str
 
 
 class AddImagesToIndexResult(BaseModel):
@@ -73,7 +73,7 @@ REMOVE_IMAGES_FROM_INDEX_ENDPOINT: Final[str] = "/face-recognition-index-remove"
 
 class RemoveImagesFromIndexRequest(BaseModel):
     ids: List[int]
-    indexKey: Optional[str] = None
+    indexKey: str
 
 
 class RemoveImagesFromIndexResult(BaseModel):

+ 2 - 1
paddlex/inference/serving/schemas/human_keypoint_detection.py

@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Final, List, Optional, TypeAlias, Annotated
+from typing import Final, List, Optional
 
 from pydantic import BaseModel, Field
+from typing_extensions import Annotated, TypeAlias
 
 from ..infra.models import PrimaryOperations
 from .shared import object_detection

+ 5 - 4
paddlex/inference/serving/schemas/layout_parsing.py

@@ -15,7 +15,7 @@
 from typing import Dict, Final, List, Optional, Union
 
 from pydantic import BaseModel, Field
-from typing_extensions import Annotated, Literal
+from typing_extensions import Annotated
 
 from ..infra.models import DataInfo, PrimaryOperations
 from .shared import ocr
@@ -34,18 +34,19 @@ INFER_ENDPOINT: Final[str] = "/layout-parsing"
 class InferRequest(ocr.BaseInferRequest):
     useDocOrientationClassify: Optional[bool] = None
     useDocUnwarping: Optional[bool] = None
+    useTextlineOrientation: Optional[bool] = None
     useGeneralOcr: Optional[bool] = None
     useSealRecognition: Optional[bool] = None
     useTableRecognition: Optional[bool] = None
     useFormulaRecognition: Optional[bool] = None
     textDetLimitSideLen: Optional[int] = None
-    textDetLimitType: Optional[Literal["min", "max"]] = None
+    textDetLimitType: Optional[str] = None
     textDetThresh: Optional[float] = None
     textDetBoxThresh: Optional[float] = None
     textDetUnclipRatio: Optional[float] = None
     textRecScoreThresh: Optional[float] = None
     sealDetLimitSideLen: Optional[int] = None
-    sealDetLimitType: Optional[Literal["min", "max"]] = None
+    sealDetLimitType: Optional[str] = None
     sealDetThresh: Optional[float] = None
     sealDetBoxThresh: Optional[float] = None
     sealDetUnclipRatio: Optional[float] = None
@@ -55,7 +56,7 @@ class InferRequest(ocr.BaseInferRequest):
     layoutUnclipRatio: Optional[
         Union[float, Annotated[List[float], Field(min_length=2, max_length=2)]]
     ] = None
-    layoutMergeBboxesMode: Optional[Literal["union", "large", "small"]] = None
+    layoutMergeBboxesMode: Optional[str] = None
 
 
 class LayoutParsingResult(BaseModel):

+ 82 - 0
paddlex/inference/serving/schemas/layout_parsing_v2.py

@@ -0,0 +1,82 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, Final, List, Optional, Union
+
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated
+
+from ..infra.models import DataInfo, PrimaryOperations
+from .shared import ocr
+
+__all__ = [
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "MarkdownData",
+    "LayoutParsingResult",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+INFER_ENDPOINT: Final[str] = "/layout-parsing"
+
+
+class InferRequest(ocr.BaseInferRequest):
+    useDocOrientationClassify: Optional[bool] = None
+    useDocUnwarping: Optional[bool] = None
+    useTextlineOrientation: Optional[bool] = None
+    useGeneralOcr: Optional[bool] = None
+    useSealRecognition: Optional[bool] = None
+    useTableRecognition: Optional[bool] = None
+    useFormulaRecognition: Optional[bool] = None
+    textDetLimitSideLen: Optional[int] = None
+    textDetLimitType: Optional[str] = None
+    textDetThresh: Optional[float] = None
+    textDetBoxThresh: Optional[float] = None
+    textDetUnclipRatio: Optional[float] = None
+    textRecScoreThresh: Optional[float] = None
+    sealDetLimitSideLen: Optional[int] = None
+    sealDetLimitType: Optional[str] = None
+    sealDetThresh: Optional[float] = None
+    sealDetBoxThresh: Optional[float] = None
+    sealDetUnclipRatio: Optional[float] = None
+    sealRecScoreThresh: Optional[float] = None
+    layoutThreshold: Optional[float] = None
+    layoutNms: Optional[bool] = None
+    layoutUnclipRatio: Optional[
+        Union[float, Annotated[List[float], Field(min_length=2, max_length=2)]]
+    ] = None
+    layoutMergeBboxesMode: Optional[str] = None
+
+
+class MarkdownData(BaseModel):
+    text: str
+    images: Dict[str, str]
+
+
+class LayoutParsingResult(BaseModel):
+    prunedResult: dict
+    markdown: MarkdownData
+    outputImages: Optional[Dict[str, str]] = None
+    inputImage: Optional[str] = None
+
+
+class InferResult(BaseModel):
+    layoutParsingResults: List[LayoutParsingResult]
+    dataInfo: DataInfo
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 49 - 0
paddlex/inference/serving/schemas/m_3d_bev_detection.py

@@ -0,0 +1,49 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Final, List
+
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated
+
+from ..infra.models import PrimaryOperations
+
+__all__ = [
+    "INFER_ENDPOINT",
+    "InferRequest",
+    "DetectedObject",
+    "InferResult",
+    "PRIMARY_OPERATIONS",
+]
+
+INFER_ENDPOINT: Final[str] = "/bev-3d-object-detection"
+
+
+class InferRequest(BaseModel):
+    tar: str
+
+
+class DetectedObject(BaseModel):
+    bbox: Annotated[List[float], Field(min_length=9, max_length=9)]
+    categoryId: int
+    score: float
+
+
+class InferResult(BaseModel):
+    detectedObjects: List[DetectedObject]
+
+
+PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
+    "infer": (INFER_ENDPOINT, InferRequest, InferResult),
+}

+ 2 - 3
paddlex/inference/serving/schemas/ocr.py

@@ -15,7 +15,6 @@
 from typing import Final, List, Optional
 
 from pydantic import BaseModel
-from typing_extensions import Literal
 
 from ..infra.models import DataInfo, PrimaryOperations
 from .shared import ocr
@@ -34,9 +33,9 @@ INFER_ENDPOINT: Final[str] = "/ocr"
 class InferRequest(ocr.BaseInferRequest):
     useDocOrientationClassify: Optional[bool] = None
     useDocUnwarping: Optional[bool] = None
-    useTextlineOrientation: Optional[bool] = False
+    useTextlineOrientation: Optional[bool] = None
     textDetLimitSideLen: Optional[int] = None
-    textDetLimitType: Optional[Literal["min", "max"]] = None
+    textDetLimitType: Optional[str] = None
     # Better to use "threshold"? Be consistent with the pipeline API though.
     textDetThresh: Optional[float] = None
     textDetBoxThresh: Optional[float] = None

+ 1 - 1
paddlex/inference/serving/schemas/open_vocabulary_detection.py

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Dict, Final, List, Optional, Union
+from typing import Dict, Final, List, Optional
 
 from pydantic import BaseModel
 

+ 6 - 4
paddlex/inference/serving/schemas/pp_chatocrv3_doc.py

@@ -15,7 +15,6 @@
 from typing import Dict, Final, List, Optional
 
 from pydantic import BaseModel
-from typing_extensions import Literal
 
 from ..infra.models import DataInfo, PrimaryOperations
 from .shared import ocr
@@ -44,13 +43,13 @@ class AnalyzeImagesRequest(ocr.BaseInferRequest):
     useSealRecognition: Optional[bool] = None
     useTableRecognition: Optional[bool] = None
     textDetLimitSideLen: Optional[int] = None
-    textDetLimitType: Optional[Literal["min", "max"]] = None
+    textDetLimitType: Optional[str] = None
     textDetThresh: Optional[float] = None
     textDetBoxThresh: Optional[float] = None
     textDetUnclipRatio: Optional[float] = None
     textRecScoreThresh: Optional[float] = None
     sealDetLimitSideLen: Optional[int] = None
-    sealDetLimitType: Optional[Literal["min", "max"]] = None
+    sealDetLimitType: Optional[str] = None
     sealDetThresh: Optional[float] = None
     sealDetBoxThresh: Optional[float] = None
     sealDetUnclipRatio: Optional[float] = None
@@ -77,7 +76,8 @@ BUILD_VECTOR_STORE_ENDPOINT: Final[str] = "/chatocr-vector"
 class BuildVectorStoreRequest(BaseModel):
     visualInfo: List[dict]
     minCharacters: Optional[int] = None
-    llmRequestInterval: Optional[float] = None
+    blockSize: Optional[int] = None
+    retrieverConfig: Optional[dict] = None
 
 
 class BuildVectorStoreResult(BaseModel):
@@ -107,6 +107,8 @@ class ChatRequest(BaseModel):
     tableRulesStr: Optional[str] = None
     tableFewShotDemoTextContent: Optional[str] = None
     tableFewShotDemoKeyValueList: Optional[str] = None
+    chatBotConfig: Optional[dict] = None
+    retrieverConfig: Optional[dict] = None
 
 
 class ChatResult(BaseModel):

+ 29 - 10
paddlex/inference/serving/schemas/pp_chatocrv4_doc.py

@@ -15,7 +15,6 @@
 from typing import Dict, Final, List, Optional
 
 from pydantic import BaseModel
-from typing_extensions import Literal
 
 from ..infra.models import DataInfo, PrimaryOperations
 from .shared import ocr
@@ -28,6 +27,9 @@ __all__ = [
     "BUILD_VECTOR_STORE_ENDPOINT",
     "BuildVectorStoreRequest",
     "BuildVectorStoreResult",
+    "INVOKE_MLLM_ENDPOINT",
+    "InvokeMLLMRequest",
+    "InvokeMLLMResult",
     "CHAT_ENDPOINT",
     "ChatRequest",
     "ChatResult",
@@ -44,13 +46,13 @@ class AnalyzeImagesRequest(ocr.BaseInferRequest):
     useSealRecognition: Optional[bool] = None
     useTableRecognition: Optional[bool] = None
     textDetLimitSideLen: Optional[int] = None
-    textDetLimitType: Optional[Literal["min", "max"]] = None
+    textDetLimitType: Optional[str] = None
     textDetThresh: Optional[float] = None
     textDetBoxThresh: Optional[float] = None
     textDetUnclipRatio: Optional[float] = None
     textRecScoreThresh: Optional[float] = None
     sealDetLimitSideLen: Optional[int] = None
-    sealDetLimitType: Optional[Literal["min", "max"]] = None
+    sealDetLimitType: Optional[str] = None
     sealDetThresh: Optional[float] = None
     sealDetBoxThresh: Optional[float] = None
     sealDetUnclipRatio: Optional[float] = None
@@ -65,8 +67,6 @@ class LayoutParsingResult(BaseModel):
 
 class AnalyzeImagesResult(BaseModel):
     layoutParsingResults: List[LayoutParsingResult]
-    # `visualInfo` is made a separate field to facilitate its use in subsequent
-    # steps.
     visualInfo: List[dict]
     dataInfo: DataInfo
 
@@ -77,13 +77,27 @@ BUILD_VECTOR_STORE_ENDPOINT: Final[str] = "/chatocr-vector"
 class BuildVectorStoreRequest(BaseModel):
     visualInfo: List[dict]
     minCharacters: Optional[int] = None
-    llmRequestInterval: Optional[float] = None
+    blockSize: Optional[int] = None
+    retrieverConfig: Optional[dict] = None
 
 
 class BuildVectorStoreResult(BaseModel):
     vectorInfo: dict
 
 
+INVOKE_MLLM_ENDPOINT: Final[str] = "/chatocr-mllm"
+
+
+class InvokeMLLMRequest(BaseModel):
+    image: str
+    keyList: List[str]
+    mllmChatBotConfig: Optional[dict] = None
+
+
+class InvokeMLLMResult(BaseModel):
+    mllmPredictInfo: dict
+
+
 CHAT_ENDPOINT: Final[str] = "/chatocr-chat"
 
 
@@ -95,11 +109,7 @@ class ChatRequest(BaseModel):
     minCharacters: Optional[int] = None
     textTaskDescription: Optional[str] = None
     textOutputFormat: Optional[str] = None
-    # Is the "Str" in the name unnecessary? Keep the names consistent with the
-    # parameters of the wrapped function though.
     textRulesStr: Optional[str] = None
-    # Should this be just "text" instead of "text content", given that there is
-    # no container?
     textFewShotDemoTextContent: Optional[str] = None
     textFewShotDemoKeyValueList: Optional[str] = None
     tableTaskDescription: Optional[str] = None
@@ -107,6 +117,10 @@ class ChatRequest(BaseModel):
     tableRulesStr: Optional[str] = None
     tableFewShotDemoTextContent: Optional[str] = None
     tableFewShotDemoKeyValueList: Optional[str] = None
+    mllmPredictInfo: Optional[dict] = None
+    mllmIntegrationStrategy: Optional[str] = None
+    chatBotConfig: Optional[dict] = None
+    retrieverConfig: Optional[dict] = None
 
 
 class ChatResult(BaseModel):
@@ -124,5 +138,10 @@ PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
         BuildVectorStoreRequest,
         BuildVectorStoreResult,
     ),
+    "invokeMllm": (
+        INVOKE_MLLM_ENDPOINT,
+        InvokeMLLMRequest,
+        InvokeMLLMResult,
+    ),
     "chat": (CHAT_ENDPOINT, ChatRequest, ChatResult),
 }

+ 2 - 2
paddlex/inference/serving/schemas/pp_shituv2.py

@@ -61,7 +61,7 @@ ADD_IMAGES_TO_INDEX_ENDPOINT: Final[str] = "/shitu-index-add"
 
 class AddImagesToIndexRequest(BaseModel):
     imageLabelPairs: List[ImageLabelPair]
-    indexKey: Optional[str] = None
+    indexKey: str
 
 
 class AddImagesToIndexResult(BaseModel):
@@ -73,7 +73,7 @@ REMOVE_IMAGES_FROM_INDEX_ENDPOINT: Final[str] = "/shitu-index-remove"
 
 class RemoveImagesFromIndexRequest(BaseModel):
     ids: List[int]
-    indexKey: Optional[str] = None
+    indexKey: str
 
 
 class RemoveImagesFromIndexResult(BaseModel):

+ 3 - 3
paddlex/inference/serving/schemas/seal_recognition.py

@@ -15,7 +15,7 @@
 from typing import Dict, Final, List, Optional, Union
 
 from pydantic import BaseModel, Field
-from typing_extensions import Annotated, Literal
+from typing_extensions import Annotated
 
 from ..infra.models import DataInfo, PrimaryOperations
 from .shared import ocr
@@ -40,9 +40,9 @@ class InferRequest(ocr.BaseInferRequest):
     layoutUnclipRatio: Optional[
         Union[float, Annotated[List[float], Field(min_length=2, max_length=2)]]
     ] = None
-    layoutMergeBboxesMode: Optional[Literal["union", "large", "small"]] = None
+    layoutMergeBboxesMode: Optional[str] = None
     sealDetLimitSideLen: Optional[int] = None
-    sealDetLimitType: Optional[Literal["min", "max"]] = None
+    sealDetLimitType: Optional[str] = None
     sealDetThresh: Optional[float] = None
     sealDetBoxThresh: Optional[float] = None
     sealDetUnclipRatio: Optional[float] = None

+ 1 - 1
paddlex/inference/serving/schemas/shared/image_segmentation.py

@@ -14,7 +14,7 @@
 
 from typing import List
 
-from pydantic import Field, BaseModel
+from pydantic import BaseModel, Field
 from typing_extensions import Annotated, TypeAlias
 
 __all__ = ["Size", "Mask"]

+ 3 - 3
paddlex/inference/serving/schemas/table_recognition.py

@@ -15,7 +15,7 @@
 from typing import Dict, Final, List, Optional, Union
 
 from pydantic import BaseModel, Field
-from typing_extensions import Annotated, Literal
+from typing_extensions import Annotated
 
 from ..infra.models import DataInfo, PrimaryOperations
 from .shared import ocr
@@ -41,9 +41,9 @@ class InferRequest(ocr.BaseInferRequest):
     layoutUnclipRatio: Optional[
         Union[float, Annotated[List[float], Field(min_length=2, max_length=2)]]
     ] = None
-    layoutMergeBboxesMode: Optional[Literal["union", "large", "small"]] = None
+    layoutMergeBboxesMode: Optional[str] = None
     textDetLimitSideLen: Optional[int] = None
-    textDetLimitType: Optional[Literal["min", "max"]] = None
+    textDetLimitType: Optional[str] = None
     textDetThresh: Optional[float] = None
     textDetBoxThresh: Optional[float] = None
     textDetUnclipRatio: Optional[float] = None

+ 3 - 3
paddlex/inference/serving/schemas/table_recognition_v2.py

@@ -15,7 +15,7 @@
 from typing import Dict, Final, List, Optional, Union
 
 from pydantic import BaseModel, Field
-from typing_extensions import Annotated, Literal
+from typing_extensions import Annotated
 
 from ..infra.models import DataInfo, PrimaryOperations
 from .shared import ocr
@@ -41,9 +41,9 @@ class InferRequest(ocr.BaseInferRequest):
     layoutUnclipRatio: Optional[
         Union[float, Annotated[List[float], Field(min_length=2, max_length=2)]]
     ] = None
-    layoutMergeBboxesMode: Optional[Literal["union", "large", "small"]] = None
+    layoutMergeBboxesMode: Optional[str] = None
     textDetLimitSideLen: Optional[int] = None
-    textDetLimitType: Optional[Literal["min", "max"]] = None
+    textDetLimitType: Optional[str] = None
     textDetThresh: Optional[float] = None
     textDetBoxThresh: Optional[float] = None
     textDetUnclipRatio: Optional[float] = None

+ 1 - 1
paddlex/inference/utils/official_models.py

@@ -343,7 +343,7 @@ class OfficialModelsDict(dict):
         url = super().__getitem__(key)
         save_dir = Path(CACHE_DIR) / "official_models"
         logging.info(
-            f"Using official model ({key}), the model files will be be automatically downloaded and saved in {save_dir}."
+            f"Using official model ({key}), the model files will be automatically downloaded and saved in {save_dir}."
         )
         download_and_extract(url, save_dir, f"{key}", overwrite=False)
         return save_dir / f"{key}"

+ 1 - 1
paddlex/paddle2onnx_requirements.txt

@@ -1 +1 @@
-paddle2onnx>=1.3
+paddle2onnx>=2

+ 1 - 0
setup.py

@@ -81,6 +81,7 @@ def packages_and_package_data():
     pkg_data.append("utils/fonts/PingFang-SC-Regular.ttf")
     pkg_data.append("repo_manager/requirements.txt")
     pkg_data.append("serving_requirements.txt")
+    pkg_data.append("paddle2onnx_requirements.txt")
     return pkgs, {"paddlex": pkg_data}