Kaynağa Gözat

[Feat] Add seal recognition, formula recognition, and layout parsing serving apps (#2164)

* Fix duplicate logs when using uvicorn 0.16.0

* Fix

* Enhance

* Update anomaly detection endpoint name

* Add seal recognition, formula recognition, and small object detection apps

* Update ppchatocrv3 interfaces

* Add pipeline info tracing mechanism

* Fix bugs and polish docs

* Fix and refine documents

* Fix

* Add seal recognition serving docs

* formulae->formulas

* Add layout_parsing app and fix ppchatocrv3 bugs
Lin Manhui 1 yıl önce
ebeveyn
işleme
43f224cf18
36 değiştirilmiş dosya ile 2070 ekleme ve 527 silme
  1. 9 9
      docs/pipeline_usage/tutorials/cv_pipelines/image_anomaly_detection_en.md
  2. 1 1
      docs/pipeline_usage/tutorials/cv_pipelines/image_classification_en.md
  3. 1 1
      docs/pipeline_usage/tutorials/cv_pipelines/image_multi_label_classification_en.md
  4. 4 4
      docs/pipeline_usage/tutorials/cv_pipelines/instance_segmentation.md
  5. 5 5
      docs/pipeline_usage/tutorials/cv_pipelines/instance_segmentation_en.md
  6. 12 12
      docs/pipeline_usage/tutorials/cv_pipelines/object_detection.md
  7. 13 13
      docs/pipeline_usage/tutorials/cv_pipelines/object_detection_en.md
  8. 1 1
      docs/pipeline_usage/tutorials/cv_pipelines/semantic_segmentation_en.md
  9. 9 9
      docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.md
  10. 13 13
      docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection_en.md
  11. 104 104
      docs/pipeline_usage/tutorials/information_extration_pipelines/document_scene_information_extraction.md
  12. 103 107
      docs/pipeline_usage/tutorials/information_extration_pipelines/document_scene_information_extraction_en.md
  13. 12 12
      docs/pipeline_usage/tutorials/ocr_pipelines/OCR.md
  14. 12 12
      docs/pipeline_usage/tutorials/ocr_pipelines/OCR_en.md
  15. 39 62
      docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.md
  16. 42 65
      docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition_en.md
  17. 60 9
      docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing.md
  18. 460 0
      docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.md
  19. 456 31
      docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition_en.md
  20. 11 11
      docs/pipeline_usage/tutorials/ocr_pipelines/table_recognition.md
  21. 12 12
      docs/pipeline_usage/tutorials/ocr_pipelines/table_recognition_en.md
  22. 1 1
      docs/pipeline_usage/tutorials/time_series_pipelines/time_series_anomaly_detection_en.md
  23. 1 1
      docs/pipeline_usage/tutorials/time_series_pipelines/time_series_classification_en.md
  24. 1 1
      docs/pipeline_usage/tutorials/time_series_pipelines/time_series_forecasting_en.md
  25. 29 2
      paddlex/inference/pipelines/base.py
  26. 28 1
      paddlex/inference/pipelines/serving/_pipeline_apps/__init__.py
  27. 1 1
      paddlex/inference/pipelines/serving/_pipeline_apps/anomaly_detection.py
  28. 107 0
      paddlex/inference/pipelines/serving/_pipeline_apps/formula_recognition.py
  29. 275 0
      paddlex/inference/pipelines/serving/_pipeline_apps/layout_parsing.py
  30. 22 20
      paddlex/inference/pipelines/serving/_pipeline_apps/ppchatocrv3.py
  31. 112 0
      paddlex/inference/pipelines/serving/_pipeline_apps/seal_recognition.py
  32. 88 0
      paddlex/inference/pipelines/serving/_pipeline_apps/small_object_detection.py
  33. 6 5
      paddlex/inference/pipelines/serving/app.py
  34. 4 0
      paddlex/inference/pipelines/serving/server.py
  35. 1 1
      paddlex/inference/pipelines/serving/utils.py
  36. 15 1
      paddlex/inference/results/seal_rec.py

+ 9 - 9
docs/pipeline_usage/tutorials/cv_pipelines/image_anomaly_detection_en.md

@@ -171,7 +171,7 @@ Operations provided by the service:
 
     Performs anomaly detection on images.
 
-    `POST /anomaly-detection`
+    `POST /image-anomaly-detection`
 
     - Request body properties:
 
@@ -208,7 +208,7 @@ Operations provided by the service:
 </details>
 
 <details>
-<summary>Multilingual Service Invocation Examples</summary>
+<summary>Multi-Language Service Invocation Examples</summary>
 
 <details>
 <summary>Python</summary>
@@ -217,7 +217,7 @@ Operations provided by the service:
 import base64
 import requests
 
-API_URL = "http://localhost:8080/anomaly-detection"
+API_URL = "http://localhost:8080/image-anomaly-detection"
 image_path = "./demo.jpg"
 output_image_path = "./out.jpg"
 
@@ -272,7 +272,7 @@ int main() {
     jsonObj["image"] = encodedImage;
     std::string body = jsonObj.dump();
 
-    auto response = client.Post("/anomaly-detection", headers, body, "application/json");
+    auto response = client.Post("/image-anomaly-detection", headers, body, "application/json");
     if (response && response->status == 200) {
         nlohmann::json jsonResponse = nlohmann::json::parse(response->body);
         auto result = jsonResponse["result"];
@@ -315,7 +315,7 @@ import java.util.Base64;
 
 public class Main {
     public static void main(String[] args) throws IOException {
-        String API_URL = "http://localhost:8080/anomaly-detection";
+        String API_URL = "http://localhost:8080/image-anomaly-detection";
         String imagePath = "./demo.jpg";
         String outputImagePath = "./out.jpg";
 
@@ -374,7 +374,7 @@ import (
 )
 
 func main() {
-    API_URL := "http://localhost:8080/anomaly-detection"
+    API_URL := "http://localhost:8080/image-anomaly-detection"
     imagePath := "./demo.jpg"
     outputImagePath := "./out.jpg"
 
@@ -454,7 +454,7 @@ using Newtonsoft.Json.Linq;
 
 class Program
 {
-    static readonly string API_URL = "http://localhost:8080/anomaly-detection";
+    static readonly string API_URL = "http://localhost:8080/image-anomaly-detection";
     static readonly string imagePath = "./demo.jpg";
     static readonly string outputImagePath = "./out.jpg";
 
@@ -492,7 +492,7 @@ class Program
 const axios = require('axios');
 const fs = require('fs');
 
-const API_URL = 'http://localhost:8080/anomaly-detection'
+const API_URL = 'http://localhost:8080/image-anomaly-detection'
 const imagePath = './demo.jpg'
 const outputImagePath = "./out.jpg";
 
@@ -532,7 +532,7 @@ axios.request(config)
 ```php
 <?php
 
-$API_URL = "http://localhost:8080/anomaly-detection";
+$API_URL = "http://localhost:8080/image-anomaly-detection";
 $image_path = "./demo.jpg";
 $output_image_path = "./out.jpg";
 

+ 1 - 1
docs/pipeline_usage/tutorials/cv_pipelines/image_classification_en.md

@@ -820,7 +820,7 @@ Operations provided by the service are as follows:
 </details>
 
 <details>
-<summary>Multilingual Service Invocation Examples</summary>
+<summary>Multi-Language Service Invocation Examples</summary>
 
 <details>
 <summary>Python</summary>

+ 1 - 1
docs/pipeline_usage/tutorials/cv_pipelines/image_multi_label_classification_en.md

@@ -226,7 +226,7 @@ Operations provided by the service are as follows:
 </details>
 
 <details>
-<summary>Multilingual Service Invocation Examples</summary>
+<summary>Multi-Language Service Invocation Examples</summary>
 
 <details>
 <summary>Python</summary>

+ 4 - 4
docs/pipeline_usage/tutorials/cv_pipelines/instance_segmentation.md

@@ -346,8 +346,8 @@ int main() {
 
         auto instances = result["instances"];
         std::cout << "\nInstances:" << std::endl;
-        for (const auto& category : instances) {
-            std::cout << category << std::endl;
+        for (const auto& inst : instances) {
+            std::cout << inst << std::endl;
         }
     } else {
         std::cout << "Failed to send HTTP request." << std::endl;
@@ -504,8 +504,8 @@ func main() {
     }
     fmt.Printf("Image saved at %s.jpg\n", outputImagePath)
     fmt.Println("\nInstances:")
-    for _, category := range respData.Result.Instances {
-        fmt.Println(category)
+    for _, inst := range respData.Result.Instances {
+        fmt.Println(inst)
     }
 }
 ```

+ 5 - 5
docs/pipeline_usage/tutorials/cv_pipelines/instance_segmentation_en.md

@@ -262,7 +262,7 @@ Operations provided by the service:
 </details>
 
 <details>
-<summary>Multilingual Service Invocation Examples</summary>
+<summary>Multi-Language Service Invocation Examples</summary>
 
 <details>
 <summary>Python</summary>
@@ -347,8 +347,8 @@ int main() {
 
         auto instances = result["instances"];
         std::cout << "\nInstances:" << std::endl;
-        for (const auto& category : instances) {
-            std::cout << category << std::endl;
+        for (const auto& inst : instances) {
+            std::cout << inst << std::endl;
         }
     } else {
         std::cout << "Failed to send HTTP request." << std::endl;
@@ -499,8 +499,8 @@ func main() {
     }
     fmt.Printf("Image saved at %s.jpg\n", outputImagePath)
     fmt.Println("\nInstances:")
-    for _, category := range respData.Result.Instances {
-        fmt.Println(category)
+    for _, inst := range respData.Result.Instances {
+        fmt.Println(inst)
     }
 }
 ```

+ 12 - 12
docs/pipeline_usage/tutorials/cv_pipelines/object_detection.md

@@ -573,7 +573,7 @@ result = response.json()["result"]
 with open(output_image_path, "wb") as file:
     file.write(base64.b64decode(result["image"]))
 print(f"Output image saved at {output_image_path}")
-print("\nDetectedobjects:")
+print("\nDetected objects:")
 print(result["detectedObjects"])
 ```
 
@@ -634,9 +634,9 @@ int main() {
         }
 
         auto detectedObjects = result["detectedObjects"];
-        std::cout << "\nDetectedobjects:" << std::endl;
-        for (const auto& category : detectedObjects) {
-            std::cout << category << std::endl;
+        std::cout << "\nDetected objects:" << std::endl;
+        for (const auto& obj : detectedObjects) {
+            std::cout << obj << std::endl;
         }
     } else {
         std::cout << "Failed to send HTTP request." << std::endl;
@@ -701,7 +701,7 @@ public class Main {
                     fos.write(imageBytes);
                 }
                 System.out.println("Output image saved at " + outputImagePath);
-                System.out.println("\nDetectedobjects: " + detectedObjects.toString());
+                System.out.println("\nDetected objects: " + detectedObjects.toString());
             } else {
                 System.err.println("Request failed with code: " + response.code());
             }
@@ -771,7 +771,7 @@ func main() {
     type Response struct {
         Result struct {
             Image      string   `json:"image"`
-            Detectedobjects []map[string]interface{} `json:"detectedObjects"`
+            DetectedObjects []map[string]interface{} `json:"detectedObjects"`
         } `json:"result"`
     }
     var respData Response
@@ -792,9 +792,9 @@ func main() {
         return
     }
     fmt.Printf("Image saved at %s.jpg\n", outputImagePath)
-    fmt.Println("\nDetectedobjects:")
-    for _, category := range respData.Result.Detectedobjects {
-        fmt.Println(category)
+    fmt.Println("\nDetected objects:")
+    for _, obj := range respData.Result.DetectedObjects {
+        fmt.Println(obj)
     }
 }
 ```
@@ -843,7 +843,7 @@ class Program
 
         File.WriteAllBytes(outputImagePath, outputImageBytes);
         Console.WriteLine($"Output image saved at {outputImagePath}");
-        Console.WriteLine("\nDetectedobjects:");
+        Console.WriteLine("\nDetected objects:");
         Console.WriteLine(jsonResponse["result"]["detectedObjects"].ToString());
     }
 }
@@ -887,7 +887,7 @@ axios.request(config)
       if (err) throw err;
       console.log(`Output image saved at ${outputImagePath}`);
     });
-    console.log("\nDetectedobjects:");
+    console.log("\nDetected objects:");
     console.log(result["detectedObjects"]);
 })
 .catch((error) => {
@@ -923,7 +923,7 @@ curl_close($ch);
 $result = json_decode($response, true)["result"];
 file_put_contents($output_image_path, base64_decode($result["image"]));
 echo "Output image saved at " . $output_image_path . "\n";
-echo "\nDetectedobjects:\n";
+echo "\nDetected objects:\n";
 print_r($result["detectedObjects"]);
 
 ?>

+ 13 - 13
docs/pipeline_usage/tutorials/cv_pipelines/object_detection_en.md

@@ -545,7 +545,7 @@ Operations provided by the service are as follows:
 </details>
 
 <details>
-<summary>Multilingual Service Invocation Examples</summary>
+<summary>Multi-Language Service Invocation Examples</summary>
 
 <details>
 <summary>Python</summary>
@@ -571,7 +571,7 @@ result = response.json()["result"]
 with open(output_image_path, "wb") as file:
     file.write(base64.b64decode(result["image"]))
 print(f"Output image saved at {output_image_path}")
-print("\nDetectedobjects:")
+print("\nDetected objects:")
 print(result["detectedObjects"])
 ```
 
@@ -629,9 +629,9 @@ int main() {
         }
 
         auto detectedObjects = result["detectedObjects"];
-        std::cout << "\nDetectedobjects:" << std::endl;
-        for (const auto& category : detectedObjects) {
-            std::cout << category << std::endl;
+        std::cout << "\nDetected objects:" << std::endl;
+        for (const auto& obj : detectedObjects) {
+            std::cout << obj << std::endl;
         }
     } else {
         std::cout << "Failed to send HTTP request." << std::endl;
@@ -693,7 +693,7 @@ public class Main {
                     fos.write(imageBytes);
                 }
                 System.out.println("Output image saved at " + outputImagePath);
-                System.out.println("\nDetectedobjects: " + detectedObjects.toString());
+                System.out.println("\nDetected objects: " + detectedObjects.toString());
             } else {
                 System.err.println("Request failed with code: " + response.code());
             }
@@ -760,7 +760,7 @@ func main() {
     type Response struct {
         Result struct {
             Image      string   `json:"image"`
-            Detectedobjects []map[string]interface{} `json:"detectedObjects"`
+            DetectedObjects []map[string]interface{} `json:"detectedObjects"`
         } `json:"result"`
     }
     var respData Response
@@ -781,9 +781,9 @@ func main() {
         return
     }
     fmt.Printf("Image saved at %s.jpg\n", outputImagePath)
-    fmt.Println("\nDetectedobjects:")
-    for _, category := range respData.Result.Detectedobjects {
-        fmt.Println(category)
+    fmt.Println("\nDetected objects:")
+    for _, obj := range respData.Result.DetectedObjects {
+        fmt.Println(obj)
     }
 }
 ```
@@ -829,7 +829,7 @@ class Program
 
         File.WriteAllBytes(outputImagePath, outputImageBytes);
         Console.WriteLine($"Output image saved at {outputImagePath}");
-        Console.WriteLine("\nDetectedobjects:");
+        Console.WriteLine("\nDetected objects:");
         Console.WriteLine(jsonResponse["result"]["detectedObjects"].ToString());
     }
 }
@@ -870,7 +870,7 @@ axios.request(config)
       if (err) throw err;
       console.log(`Output image saved at ${outputImagePath}`);
     });
-    console.log("\nDetectedobjects:");
+    console.log("\nDetected objects:");
     console.log(result["detectedObjects"]);
 })
 .catch((error) => {
@@ -903,7 +903,7 @@ curl_close($ch);
 $result = json_decode($response, true)["result"];
 file_put_contents($output_image_path, base64_decode($result["image"]));
 echo "Output image saved at " . $output_image_path . "\n";
-echo "\nDetectedobjects:\n";
+echo "\nDetected objects:\n";
 print_r($result["detectedObjects"]);
 
 ?>

+ 1 - 1
docs/pipeline_usage/tutorials/cv_pipelines/semantic_segmentation_en.md

@@ -243,7 +243,7 @@ Operations provided by the service are as follows:
 </details>
 
 <details>
-<summary>Multilingual Service Invocation Examples</summary>
+<summary>Multi-Language Service Invocation Examples</summary>
 
 <details>
 <summary>Python</summary>

+ 9 - 9
docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.md

@@ -262,7 +262,7 @@ result = response.json()["result"]
 with open(output_image_path, "wb") as file:
     file.write(base64.b64decode(result["image"]))
 print(f"Output image saved at {output_image_path}")
-print("\nDetectedobjects:")
+print("\nDetected objects:")
 print(result["detectedObjects"])
 ```
 
@@ -323,7 +323,7 @@ int main() {
         }
 
         auto detectedObjects = result["detectedObjects"];
-        std::cout << "\nDetectedobjects:" << std::endl;
+        std::cout << "\nDetected objects:" << std::endl;
         for (const auto& category : detectedObjects) {
             std::cout << category << std::endl;
         }
@@ -390,7 +390,7 @@ public class Main {
                     fos.write(imageBytes);
                 }
                 System.out.println("Output image saved at " + outputImagePath);
-                System.out.println("\nDetectedobjects: " + detectedObjects.toString());
+                System.out.println("\nDetected objects: " + detectedObjects.toString());
             } else {
                 System.err.println("Request failed with code: " + response.code());
             }
@@ -460,7 +460,7 @@ func main() {
     type Response struct {
         Result struct {
             Image      string   `json:"image"`
-            Detectedobjects []map[string]interface{} `json:"detectedObjects"`
+            DetectedObjects []map[string]interface{} `json:"detectedObjects"`
         } `json:"result"`
     }
     var respData Response
@@ -481,8 +481,8 @@ func main() {
         return
     }
     fmt.Printf("Image saved at %s.jpg\n", outputImagePath)
-    fmt.Println("\nDetectedobjects:")
-    for _, category := range respData.Result.Detectedobjects {
+    fmt.Println("\nDetected objects:")
+    for _, category := range respData.Result.DetectedObjects {
         fmt.Println(category)
     }
 }
@@ -532,7 +532,7 @@ class Program
 
         File.WriteAllBytes(outputImagePath, outputImageBytes);
         Console.WriteLine($"Output image saved at {outputImagePath}");
-        Console.WriteLine("\nDetectedobjects:");
+        Console.WriteLine("\nDetected objects:");
         Console.WriteLine(jsonResponse["result"]["detectedObjects"].ToString());
     }
 }
@@ -576,7 +576,7 @@ axios.request(config)
       if (err) throw err;
       console.log(`Output image saved at ${outputImagePath}`);
     });
-    console.log("\nDetectedobjects:");
+    console.log("\nDetected objects:");
     console.log(result["detectedObjects"]);
 })
 .catch((error) => {
@@ -612,7 +612,7 @@ curl_close($ch);
 $result = json_decode($response, true)["result"];
 file_put_contents($output_image_path, base64_decode($result["image"]));
 echo "Output image saved at " . $output_image_path . "\n";
-echo "\nDetectedobjects:\n";
+echo "\nDetected objects:\n";
 print_r($result["detectedObjects"]);
 
 ?>

+ 13 - 13
docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection_en.md

@@ -235,7 +235,7 @@ Operations provided by the service are as follows:
 </details>
 
 <details>
-<summary>Multilingual Service Invocation Examples</summary>
+<summary>Multi-Language Service Invocation Examples</summary>
 
 <details>
 <summary>Python</summary>
@@ -261,7 +261,7 @@ result = response.json()["result"]
 with open(output_image_path, "wb") as file:
     file.write(base64.b64decode(result["image"]))
 print(f"Output image saved at {output_image_path}")
-print("\nDetectedobjects:")
+print("\nDetected objects:")
 print(result["detectedObjects"])
 ```
 
@@ -319,9 +319,9 @@ int main() {
         }
 
         auto detectedObjects = result["detectedObjects"];
-        std::cout << "\nDetectedobjects:" << std::endl;
-        for (const auto& category : detectedObjects) {
-            std::cout << category << std::endl;
+        std::cout << "\nDetected objects:" << std::endl;
+        for (const auto& obj : detectedObjects) {
+            std::cout << obj << std::endl;
         }
     } else {
         std::cout << "Failed to send HTTP request." << std::endl;
@@ -383,7 +383,7 @@ public class Main {
                     fos.write(imageBytes);
                 }
                 System.out.println("Output image saved at " + outputImagePath);
-                System.out.println("\nDetectedobjects: " + detectedObjects.toString());
+                System.out.println("\nDetected objects: " + detectedObjects.toString());
             } else {
                 System.err.println("Request failed with code: " + response.code());
             }
@@ -450,7 +450,7 @@ func main() {
     type Response struct {
         Result struct {
             Image      string   `json:"image"`
-            Detectedobjects []map[string]interface{} `json:"detectedObjects"`
+            DetectedObjects []map[string]interface{} `json:"detectedObjects"`
         } `json:"result"`
     }
     var respData Response
@@ -471,9 +471,9 @@ func main() {
         return
     }
     fmt.Printf("Image saved at %s.jpg\n", outputImagePath)
-    fmt.Println("\nDetectedobjects:")
-    for _, category := range respData.Result.Detectedobjects {
-        fmt.Println(category)
+    fmt.Println("\nDetected objects:")
+    for _, obj := range respData.Result.DetectedObjects {
+        fmt.Println(obj)
     }
 }
 ```
@@ -519,7 +519,7 @@ class Program
 
         File.WriteAllBytes(outputImagePath, outputImageBytes);
         Console.WriteLine($"Output image saved at {outputImagePath}");
-        Console.WriteLine("\nDetectedobjects:");
+        Console.WriteLine("\nDetected objects:");
         Console.WriteLine(jsonResponse["result"]["detectedObjects"].ToString());
     }
 }
@@ -560,7 +560,7 @@ axios.request(config)
       if (err) throw err;
       console.log(`Output image saved at ${outputImagePath}`);
     });
-    console.log("\nDetectedobjects:");
+    console.log("\nDetected objects:");
     console.log(result["detectedObjects"]);
 })
 .catch((error) => {
@@ -593,7 +593,7 @@ curl_close($ch);
 $result = json_decode($response, true)["result"];
 file_put_contents($output_image_path, base64_decode($result["image"]));
 echo "Output image saved at " . $output_image_path . "\n";
-echo "\nDetectedobjects:\n";
+echo "\nDetected objects:\n";
 print_r($result["detectedObjects"]);
 
 ?>

+ 104 - 104
docs/pipeline_usage/tutorials/information_extration_pipelines/document_scene_information_extraction.md

@@ -371,11 +371,11 @@ chat_result.print()
 
         |名称|类型|含义|是否必填|
         |-|-|-|-|
-        |`image`|`string`|服务可访问的图像文件或PDF文件的URL,或上述类型文件内容的Base64编码结果。对于超过10页的PDF文件,只有前10页的内容会被使用。|是|
+        |`file`|`string`|服务可访问的图像文件或PDF文件的URL,或上述类型文件内容的Base64编码结果。对于超过10页的PDF文件,只有前10页的内容会被使用。|是|
         |`fileType`|`integer`|文件类型。`0`表示PDF文件,`1`表示图像文件。若请求体无此属性,则服务将尝试根据URL自动推断文件类型。|否|
-        |`useOricls`|`boolean`|是否启用文档图像方向分类功能。默认启用该功能。|否|
-        |`useCurve`|`boolean`|是否启用印章文本检测功能。默认启用该功能。|否|
-        |`useUvdoc`|`boolean`|是否启用文本图像矫正功能。默认启用该功能。|否|
+        |`useImgOrientationCls`|`boolean`|是否启用文档图像方向分类功能。默认启用该功能。|否|
+        |`useImgUnwrapping`|`boolean`|是否启用文本图像矫正功能。默认启用该功能。|否|
+        |`useSealTextDet`|`boolean`|是否启用印章文本检测功能。默认启用该功能。|否|
         |`inferenceParams`|`object`|推理参数。|否|
 
         `inferenceParams`的属性如下:
@@ -446,7 +446,7 @@ chat_result.print()
 
         |名称|类型|含义|
         |-|-|-|
-        |`vectorStore`|`object`|向量数据库序列化结果,可用作其他操作的输入。|
+        |`vectorStore`|`string`|向量数据库序列化结果,可用作其他操作的输入。|
 
 - **`retrieveKnowledge`**
 
@@ -459,7 +459,7 @@ chat_result.print()
         |名称|类型|含义|是否必填|
         |-|-|-|-|
         |`keys`|`array`|关键词列表。|是|
-        |`vectorStore`|`object`|向量数据库序列化结果。由`buildVectorStore`操作提供。|是|
+        |`vectorStore`|`string`|向量数据库序列化结果。由`buildVectorStore`操作提供。|是|
         |`llmName`|`string`|大语言模型名称。|否|
         |`llmParams`|`object`|大语言模型API参数。|否|
 
@@ -477,7 +477,7 @@ chat_result.print()
 
         |名称|类型|含义|
         |-|-|-|
-        |`retrievalResult`|`object`|知识检索结果,可用作其他操作的输入。|
+        |`retrievalResult`|`string`|知识检索结果,可用作其他操作的输入。|
 
 - **`chat`**
 
@@ -494,8 +494,8 @@ chat_result.print()
         |`taskDescription`|`string`|提示词任务。|否|
         |`rules`|`string`|提示词规则。用于自定义信息抽取规则,例如规范输出格式。|否|
         |`fewShot`|`string`|提示词示例。|否|
-        |`vectorStore`|`object`|向量数据库序列化结果。由`buildVectorStore`操作提供。|否|
-        |`retrievalResult`|`object`|知识检索结果。由`retrieveKnowledge`操作提供。|否|
+        |`vectorStore`|`string`|向量数据库序列化结果。由`buildVectorStore`操作提供。|否|
+        |`retrievalResult`|`string`|知识检索结果。由`retrieveKnowledge`操作提供。|否|
         |`returnPrompts`|`boolean`|是否返回使用的提示词。默认启用。|否|
         |`llmName`|`string`|大语言模型名称。|否|
         |`llmParams`|`object`|大语言模型API参数。|否|
@@ -514,7 +514,7 @@ chat_result.print()
 
         |名称|类型|含义|
         |-|-|-|
-        |`chatResult`|`string`|关键信息抽取结果。|
+        |`chatResult`|`object`|关键信息抽取结果。|
         |`prompts`|`object`|使用的提示词。|
 
         `prompts`的属性如下:
@@ -551,101 +551,101 @@ LLM_PARAMS = {
     "secretKey": SECRET_KEY,
 }
 
+file_path = "./demo.jpg"
+keys = ["电话"]
 
-if __name__ == "__main__":
-    file_path = "./demo.jpg"
-    keys = ["电话"]
-
-    with open(file_path, "rb") as file:
-        file_bytes = file.read()
-        file_data = base64.b64encode(file_bytes).decode("ascii")
-
-    payload = {
-        "file": file_data,
-        "useOricls": True,
-        "useCurve": True,
-        "useUvdoc": True,
-    }
-    resp_vision = requests.post(url=f"{API_BASE_URL}/chatocr-vision", json=payload)
-    if resp_vision.status_code != 200:
-        print(
-            f"Request to chatocr-vision failed with status code {resp_vision.status_code}."
-        )
-        pprint.pp(resp_vision.json())
-        sys.exit(1)
-    result_vision = resp_vision.json()["result"]
-
-    for i, res in enumerate(result_vision["visionResults"]):
-        print("Texts:")
-        pprint.pp(res["texts"])
-        print("Tables:")
-        pprint.pp(res["tables"])
-        ocr_img_path = f"ocr_{i}.jpg"
-        with open(ocr_img_path, "wb") as f:
-            f.write(base64.b64decode(res["ocrImage"]))
-        layout_img_path = f"layout_{i}.jpg"
-        with open(layout_img_path, "wb") as f:
-            f.write(base64.b64decode(res["layoutImage"]))
-        print(f"Output images saved at {ocr_img_path} and {layout_img_path}")
-        print("")
-
-    payload = {
-        "visionInfo": result_vision["visionInfo"],
-        "minChars": 200,
-        "llmRequestInterval": 1000,
-        "llmName": LLM_NAME,
-        "llmParams": LLM_PARAMS,
-    }
-    resp_vector = requests.post(url=f"{API_BASE_URL}/chatocr-vector", json=payload)
-    if resp_vector.status_code != 200:
-        print(
-            f"Request to chatocr-vector failed with status code {resp_vector.status_code}."
-        )
-        pprint.pp(resp_vector.json())
-        sys.exit(1)
-    result_vector = resp_vector.json()["result"]
-
-    payload = {
-        "keys": keys,
-        "vectorStore": result_vector["vectorStore"],
-        "llmName": LLM_NAME,
-        "llmParams": LLM_PARAMS,
-    }
-    resp_retrieval = requests.post(url=f"{API_BASE_URL}/chatocr-retrieval", json=payload)
-    if resp_retrieval.status_code != 200:
-        print(
-            f"Request to chatocr-retrieval failed with status code {resp_retrieval.status_code}."
-        )
-        pprint.pp(resp_retrieval.json())
-        sys.exit(1)
-    result_retrieval = resp_retrieval.json()["result"]
-
-    payload = {
-        "keys": keys,
-        "visionInfo": result_vision["visionInfo"],
-        "taskDescription": "",
-        "rules": "",
-        "fewShot": "",
-        "vectorStore": result_vector["vectorStore"],
-        "retrievalResult": result_retrieval["retrievalResult"],
-        "returnPrompts": True,
-        "llmName": LLM_NAME,
-        "llmParams": LLM_PARAMS,
-    }
-    resp_chat = requests.post(url=f"{API_BASE_URL}/chatocr-chat", json=payload)
-    if resp_chat.status_code != 200:
-        print(
-            f"Request to chatocr-chat failed with status code {resp_chat.status_code}."
-        )
-        pprint.pp(resp_chat.json())
-        sys.exit(1)
-    result_chat = resp_chat.json()["result"]
-    print("\nPrompts:")
-    pprint.pp(result_chat["prompts"])
-    print("Final result:")
-    print(len(result_chat["chatResult"]))
+with open(file_path, "rb") as file:
+    file_bytes = file.read()
+    file_data = base64.b64encode(file_bytes).decode("ascii")
+
+payload = {
+    "file": file_data,
+    "fileType": 1,
+    "useImgOrientationCls": True,
+    "useImgUnwrapping": True,
+    "useSealTextDet": True,
+}
+resp_vision = requests.post(url=f"{API_BASE_URL}/chatocr-vision", json=payload)
+if resp_vision.status_code != 200:
+    print(
+        f"Request to chatocr-vision failed with status code {resp_vision.status_code}."
+    )
+    pprint.pp(resp_vision.json())
+    sys.exit(1)
+result_vision = resp_vision.json()["result"]
+
+for i, res in enumerate(result_vision["visionResults"]):
+    print("Texts:")
+    pprint.pp(res["texts"])
+    print("Tables:")
+    pprint.pp(res["tables"])
+    ocr_img_path = f"ocr_{i}.jpg"
+    with open(ocr_img_path, "wb") as f:
+        f.write(base64.b64decode(res["ocrImage"]))
+    layout_img_path = f"layout_{i}.jpg"
+    with open(layout_img_path, "wb") as f:
+        f.write(base64.b64decode(res["layoutImage"]))
+    print(f"Output images saved at {ocr_img_path} and {layout_img_path}")
+
+payload = {
+    "visionInfo": result_vision["visionInfo"],
+    "minChars": 200,
+    "llmRequestInterval": 1000,
+    "llmName": LLM_NAME,
+    "llmParams": LLM_PARAMS,
+}
+resp_vector = requests.post(url=f"{API_BASE_URL}/chatocr-vector", json=payload)
+if resp_vector.status_code != 200:
+    print(
+        f"Request to chatocr-vector failed with status code {resp_vector.status_code}."
+    )
+    pprint.pp(resp_vector.json())
+    sys.exit(1)
+result_vector = resp_vector.json()["result"]
+
+payload = {
+    "keys": keys,
+    "vectorStore": result_vector["vectorStore"],
+    "llmName": LLM_NAME,
+    "llmParams": LLM_PARAMS,
+}
+resp_retrieval = requests.post(url=f"{API_BASE_URL}/chatocr-retrieval", json=payload)
+if resp_retrieval.status_code != 200:
+    print(
+        f"Request to chatocr-retrieval failed with status code {resp_retrieval.status_code}."
+    )
+    pprint.pp(resp_retrieval.json())
+    sys.exit(1)
+result_retrieval = resp_retrieval.json()["result"]
+
+payload = {
+    "keys": keys,
+    "visionInfo": result_vision["visionInfo"],
+    "taskDescription": "",
+    "rules": "",
+    "fewShot": "",
+    "vectorStore": result_vector["vectorStore"],
+    "retrievalResult": result_retrieval["retrievalResult"],
+    "returnPrompts": True,
+    "llmName": LLM_NAME,
+    "llmParams": LLM_PARAMS,
+}
+resp_chat = requests.post(url=f"{API_BASE_URL}/chatocr-chat", json=payload)
+if resp_chat.status_code != 200:
+    print(
+        f"Request to chatocr-chat failed with status code {resp_chat.status_code}."
+    )
+    pprint.pp(resp_chat.json())
+    sys.exit(1)
+result_chat = resp_chat.json()["result"]
+print("\nPrompts:")
+pprint.pp(result_chat["prompts"])
+print("Final result:")
+print(result_chat["chatResult"])
 ```
+
 **注**:请在 `API_KEY`、`SECRET_KEY` 处填入您的 API key 和 secret key。
+
 </details>
 </details>
 <br/>
@@ -698,8 +698,8 @@ from paddlex import create_pipeline
 pipeline = create_pipeline(
     pipeline="PP-ChatOCRv3-doc",
     llm_name="ernie-3.5",
-    llm_params={"api_type": "qianfan", "ak": "", "sk": ""}, 
+    llm_params={"api_type": "qianfan", "ak": "", "sk": ""},
     device="npu:0" # gpu:0 --> npu:0
-    )    
+    )
 ```
 若您想在更多种类的硬件上使用通用文档场景信息抽取产线,请参考[PaddleX多硬件使用指南](../../../other_devices_support/multi_devices_use_guide.md)。

+ 103 - 107
docs/pipeline_usage/tutorials/information_extration_pipelines/document_scene_information_extraction_en.md

@@ -378,11 +378,11 @@ Operations provided by the service are as follows:
 
         | Name | Type | Description | Required |
         |-|-|-|-|
-        |`image`|`string`|The URL of an accessible image file or PDF file, or the Base64 encoded content of the above file types. For PDF files with more than 10 pages, only the first 10 pages will be used. | Yes |
+        |`file`|`string`|The URL of an accessible image file or PDF file, or the Base64 encoded content of the above file types. For PDF files with more than 10 pages, only the first 10 pages will be used. | Yes |
         |`fileType`|`integer`|File type. `0` represents PDF files, `1` represents image files. If this property is not present in the request body, the service will attempt to infer the file type automatically based on the URL. | No |
-        |`useOricls`|`boolean`|Whether to enable document image orientation classification. This feature is enabled by default. | No |
-        |`useCurve`|`boolean`|Whether to enable seal text detection. This feature is enabled by default. | No |
-        |`useUvdoc`|`boolean`|Whether to enable text image correction. This feature is enabled by default. | No |
+        |`useImgOrientationCls`|`boolean`|Whether to enable document image orientation classification. This feature is enabled by default. | No |
+        |`useImgUnwrapping`|`boolean`|Whether to enable text image correction. This feature is enabled by default. | No |
+        |`useSealTextDet`|`boolean`|Whether to enable seal text detection. This feature is enabled by default. | No |
         |`inferenceParams`|`object`|Inference parameters. | No |
 
         Properties of `inferenceParams`:
@@ -453,7 +453,7 @@ Operations provided by the service are as follows:
 
         | Name | Type | Description |
         |-|-|-|
-        |`vectorStore`|`object`|Serialized result of the vector database, which can be used as input for other operations.|
+        |`vectorStore`|`string`|Serialized result of the vector database, which can be used as input for other operations.|
 
 - **`retrieveKnowledge`**
 
@@ -466,7 +466,7 @@ Operations provided by the service are as follows:
         | Name | Type | Description | Required |
         |-|-|-|-|
         |`keys`|`array`|List of keywords.|Yes|
-        |`vectorStore`|`object`|Serialized result of the vector database. Provided by the `buildVectorStore` operation.|Yes|
+        |`vectorStore`|`string`|Serialized result of the vector database. Provided by the `buildVectorStore` operation.|Yes|
         |`llmName`|`string`|Name of the large language model.|No|
         |`llmParams`|`object`|API parameters for the large language model.|No|
 
@@ -484,7 +484,7 @@ Operations provided by the service are as follows:
 
         | Name | Type | Description |
         |-|-|-|
-        |`retrievalResult`|`object`|The result of knowledge retrieval, which can be used as input for other operations.|
+        |`retrievalResult`|`string`|The result of knowledge retrieval, which can be used as input for other operations.|
 
 - **`chat`**
 
@@ -501,8 +501,8 @@ Operations provided by the service are as follows:
         |`taskDescription` | `string` | Task prompt. | No |
         |`rules` | `string` | Custom extraction rules, e.g., for output formatting. | No |
         |`fewShot` | `string` | Example prompts. | No |
-        |`vectorStore` | `object` | Serialized result of the vector database. Provided by the `buildVectorStore` operation. | No |
-        |`retrievalResult` | `object` | Results of knowledge retrieval. Provided by the `retrieveKnowledge` operation. | No |
+        |`vectorStore` | `string` | Serialized result of the vector database. Provided by the `buildVectorStore` operation. | No |
+        |`retrievalResult` | `string` | Results of knowledge retrieval. Provided by the `retrieveKnowledge` operation. | No |
         |`returnPrompts` | `boolean` | Whether to return the prompts used. Enabled by default. | No |
         |`llmName` | `string` | Name of the large language model. | No |
         |`llmParams` | `object` | API parameters for the large language model. | No |
@@ -521,7 +521,7 @@ Operations provided by the service are as follows:
 
         | Name | Type | Description |
         |-|-|-|
-        |`chatResult` | `string` | Extracted key information. |
+        |`chatResult` | `object` | Extracted key information. |
         |`prompts` | `object` | Prompts used. |
 
         Properties of `prompts`:
@@ -547,7 +547,6 @@ import sys
 
 import requests
 
-
 API_BASE_URL = "http://0.0.0.0:8080"
 API_KEY = "{Qianfan Platform API key}"
 SECRET_KEY = "{Qianfan Platform secret key}"
@@ -558,101 +557,99 @@ LLM_PARAMS = {
     "secretKey": SECRET_KEY,
 }
 
+file_path = "./demo.jpg"
+keys = ["phone number"]
+
+with open(file_path, "rb") as file:
+    file_bytes = file.read()
+    file_data = base64.b64encode(file_bytes).decode("ascii")
 
-if __name__ == "__main__":
-    file_path = "./demo.jpg"
-    keys = ["phone number"]
-
-    with open(file_path, "rb") as file:
-        file_bytes = file.read()
-        file_data = base64.b64encode(file_bytes).decode("ascii")
-
-    payload = {
-        "file": file_data,
-        "useOricls": True,
-        "useCurve": True,
-        "useUvdoc": True,
-    }
-    resp_vision = requests.post(url=f"{API_BASE_URL}/chatocr-vision", json=payload)
-    if resp_vision.status_code != 200:
-        print(
-            f"Request to chatocr-vision failed with status code {resp_vision.status_code}."
-        )
-        pprint.pp(resp_vision.json())
-        sys.exit(1)
-    result_vision = resp_vision.json()["result"]
-
-    for i, res in enumerate(result_vision["visionResults"]):
-        print("Texts:")
-        pprint.pp(res["texts"])
-        print("Tables:")
-        pprint.pp(res["tables"])
-        ocr_img_path = f"ocr_{i}.jpg"
-        with open(ocr_img_path, "wb") as f:
-            f.write(base64.b64decode(res["ocrImage"]))
-        layout_img_path = f"layout_{i}.jpg"
-        with open(layout_img_path, "wb") as f:
-            f.write(base64.b64decode(res["layoutImage"]))
-        print(f"Output images saved at {ocr_img_path} and {layout_img_path}")
-        print("")
-
-    payload = {
-        "visionInfo": result_vision["visionInfo"],
-        "minChars": 200,
-        "llmRequestInterval": 1000,
-        "llmName": LLM_NAME,
-        "llmParams": LLM_PARAMS,
-    }
-    resp_vector = requests.post(url=f"{API_BASE_URL}/chatocr-vector", json=payload)
-    if resp_vector.status_code != 200:
-        print(
-            f"Request to chatocr-vector failed with status code {resp_vector.status_code}."
-        )
-        pprint.pp(resp_vector.json())
-        sys.exit(1)
-    result_vector = resp_vector.json()["result"]
-
-    payload = {
-        "keys": keys,
-        "vectorStore": result_vector["vectorStore"],
-        "llmName": LLM_NAME,
-        "llmParams": LLM_PARAMS,
-    }
-    resp_retrieval = requests.post(url=f"{API_BASE_URL}/chatocr-retrieval", json=payload)
-    if resp_retrieval.status_code != 200:
-        print(
-            f"Request to chatocr-retrieval failed with status code {resp_retrieval.status_code}."
-        )
-        pprint.pp(resp_retrieval.json())
-        sys.exit(1)
-    result_retrieval = resp_retrieval.json()["result"]
-
-    payload = {
-        "keys": keys,
-        "visionInfo": result_vision["visionInfo"],
-        "taskDescription": "",
-        "rules": "",
-        "fewShot": "",
-        "vectorStore": result_vector["vectorStore"],
-        "retrievalResult": result_retrieval["retrievalResult"],
-        "returnPrompts": True,
-        "llmName": LLM_NAME,
-        "llmParams": LLM_PARAMS,
-    }
-    resp_chat = requests.post(url=f"{API_BASE_URL}/chatocr-chat", json=payload)
-    if resp_chat.status_code != 200:
-        print(
-            f"Request to chatocr-chat failed with status code {resp_chat.status_code}."
-        )
-        pprint.pp(resp_chat.json())
-        sys.exit(1)
-    result_chat = resp_chat.json()["result"]
-    print("\nPrompts:")
-    pprint.pp(result_chat["prompts"])
-    print("Final result:")
-    print(len(result_chat["chatResult"]))
+payload = {
+    "file": file_data,
+    "fileType": 1,
+    "useImgOrientationCls": True,
+    "useImgUnwrapping": True,
+    "useSealTextDet": True,
+}
+resp_vision = requests.post(url=f"{API_BASE_URL}/chatocr-vision", json=payload)
+if resp_vision.status_code != 200:
+    print(
+        f"Request to chatocr-vision failed with status code {resp_vision.status_code}."
+    )
+    pprint.pp(resp_vision.json())
+    sys.exit(1)
+result_vision = resp_vision.json()["result"]
+
+for i, res in enumerate(result_vision["visionResults"]):
+    print("Texts:")
+    pprint.pp(res["texts"])
+    print("Tables:")
+    pprint.pp(res["tables"])
+    ocr_img_path = f"ocr_{i}.jpg"
+    with open(ocr_img_path, "wb") as f:
+        f.write(base64.b64decode(res["ocrImage"]))
+    layout_img_path = f"layout_{i}.jpg"
+    with open(layout_img_path, "wb") as f:
+        f.write(base64.b64decode(res["layoutImage"]))
+    print(f"Output images saved at {ocr_img_path} and {layout_img_path}")
+
+payload = {
+    "visionInfo": result_vision["visionInfo"],
+    "minChars": 200,
+    "llmRequestInterval": 1000,
+    "llmName": LLM_NAME,
+    "llmParams": LLM_PARAMS,
+}
+resp_vector = requests.post(url=f"{API_BASE_URL}/chatocr-vector", json=payload)
+if resp_vector.status_code != 200:
+    print(
+        f"Request to chatocr-vector failed with status code {resp_vector.status_code}."
+    )
+    pprint.pp(resp_vector.json())
+    sys.exit(1)
+result_vector = resp_vector.json()["result"]
+
+payload = {
+    "keys": keys,
+    "vectorStore": result_vector["vectorStore"],
+    "llmName": LLM_NAME,
+    "llmParams": LLM_PARAMS,
+}
+resp_retrieval = requests.post(url=f"{API_BASE_URL}/chatocr-retrieval", json=payload)
+if resp_retrieval.status_code != 200:
+    print(
+        f"Request to chatocr-retrieval failed with status code {resp_retrieval.status_code}."
+    )
+    pprint.pp(resp_retrieval.json())
+    sys.exit(1)
+result_retrieval = resp_retrieval.json()["result"]
+
+payload = {
+    "keys": keys,
+    "visionInfo": result_vision["visionInfo"],
+    "taskDescription": "",
+    "rules": "",
+    "fewShot": "",
+    "vectorStore": result_vector["vectorStore"],
+    "retrievalResult": result_retrieval["retrievalResult"],
+    "returnPrompts": True,
+    "llmName": LLM_NAME,
+    "llmParams": LLM_PARAMS,
+}
+resp_chat = requests.post(url=f"{API_BASE_URL}/chatocr-chat", json=payload)
+if resp_chat.status_code != 200:
+    print(
+        f"Request to chatocr-chat failed with status code {resp_chat.status_code}."
+    )
+    pprint.pp(resp_chat.json())
+    sys.exit(1)
+result_chat = resp_chat.json()["result"]
+print("\nPrompts:")
+pprint.pp(result_chat["prompts"])
+print("Final result:")
+print(result_chat["chatResult"])
 ```
-  
+
 **Note**: Please fill in your API key and secret key at `API_KEY` and `SECRET_KEY`.
 
 </details>
@@ -715,10 +712,9 @@ from paddlex import create_pipeline
 pipeline = create_pipeline(
     pipeline="PP-ChatOCRv3-doc",
     llm_name="ernie-3.5",
-    llm_params={"api_type": "qianfan", "ak": "", "sk": ""}, 
+    llm_params={"api_type": "qianfan", "ak": "", "sk": ""},
     device="npu:0" # gpu:0 --> npu:0
-    ) 
+    )
 ```
 
 If you want to use the PP-ChatOCRv3-doc Pipeline on more types of hardware, please refer to the [PaddleX Multi-Device Usage Guide](../../../installation/multi_devices_use_guide_en.md).
-

+ 12 - 12
docs/pipeline_usage/tutorials/ocr_pipelines/OCR.md

@@ -21,7 +21,7 @@ OCR(光学字符识别,Optical Character Recognition)是一种将图像中
 |-|-|-|-|-|-|
 |PP-OCRv4_server_det|82.69|83.3501|2434.01|109|PP-OCRv4 的服务端文本检测模型,精度更高,适合在性能较好的服务器上部署|
 |PP-OCRv4_mobile_det|77.79|10.6923|120.177|4.7|PP-OCRv4 的移动端文本检测模型,效率更高,适合在端侧设备部署|
-  
+
 **文本识别模块:**
 <table >
     <tr>
@@ -375,7 +375,7 @@ result = response.json()["result"]
 with open(output_image_path, "wb") as file:
     file.write(base64.b64decode(result["image"]))
 print(f"Output image saved at {output_image_path}")
-print("\nTexts:")
+print("\nDetected texts:")
 print(result["texts"])
 ```
 
@@ -436,9 +436,9 @@ int main() {
         }
 
         auto texts = result["texts"];
-        std::cout << "\nTexts:" << std::endl;
-        for (const auto& category : texts) {
-            std::cout << category << std::endl;
+        std::cout << "\nDetected texts:" << std::endl;
+        for (const auto& text : texts) {
+            std::cout << text << std::endl;
         }
     } else {
         std::cout << "Failed to send HTTP request." << std::endl;
@@ -503,7 +503,7 @@ public class Main {
                     fos.write(imageBytes);
                 }
                 System.out.println("Output image saved at " + outputImagePath);
-                System.out.println("\nTexts: " + texts.toString());
+                System.out.println("\nDetected texts: " + texts.toString());
             } else {
                 System.err.println("Request failed with code: " + response.code());
             }
@@ -594,9 +594,9 @@ func main() {
         return
     }
     fmt.Printf("Image saved at %s.jpg\n", outputImagePath)
-    fmt.Println("\nTexts:")
-    for _, category := range respData.Result.Texts {
-        fmt.Println(category)
+    fmt.Println("\nDetected texts:")
+    for _, text := range respData.Result.Texts {
+        fmt.Println(text)
     }
 }
 ```
@@ -645,7 +645,7 @@ class Program
 
         File.WriteAllBytes(outputImagePath, outputImageBytes);
         Console.WriteLine($"Output image saved at {outputImagePath}");
-        Console.WriteLine("\nTexts:");
+        Console.WriteLine("\nDetected texts:");
         Console.WriteLine(jsonResponse["result"]["texts"].ToString());
     }
 }
@@ -689,7 +689,7 @@ axios.request(config)
       if (err) throw err;
       console.log(`Output image saved at ${outputImagePath}`);
     });
-    console.log("\nTexts:");
+    console.log("\nDetected texts:");
     console.log(result["texts"]);
 })
 .catch((error) => {
@@ -725,7 +725,7 @@ curl_close($ch);
 $result = json_decode($response, true)["result"];
 file_put_contents($output_image_path, base64_decode($result["image"]));
 echo "Output image saved at " . $output_image_path . "\n";
-echo "\nTexts:\n";
+echo "\nDetected texts:\n";
 print_r($result["texts"]);
 
 ?>

+ 12 - 12
docs/pipeline_usage/tutorials/ocr_pipelines/OCR_en.md

@@ -351,7 +351,7 @@ Operations provided by the service:
 </details>
 
 <details>
-<summary>Multilingual Service Invocation Examples</summary>
+<summary>Multi-Language Service Invocation Examples</summary>
 
 <details>
 <summary>Python</summary>
@@ -377,7 +377,7 @@ result = response.json()["result"]
 with open(output_image_path, "wb") as file:
     file.write(base64.b64decode(result["image"]))
 print(f"Output image saved at {output_image_path}")
-print("\nTexts:")
+print("\nDetected texts:")
 print(result["texts"])
 ```
 
@@ -435,9 +435,9 @@ int main() {
         }
 
         auto texts = result["texts"];
-        std::cout << "\nTexts:" << std::endl;
-        for (const auto& category : texts) {
-            std::cout << category << std::endl;
+        std::cout << "\nDetected texts:" << std::endl;
+        for (const auto& text : texts) {
+            std::cout << text << std::endl;
         }
     } else {
         std::cout << "Failed to send HTTP request." << std::endl;
@@ -499,7 +499,7 @@ public class Main {
                     fos.write(imageBytes);
                 }
                 System.out.println("Output image saved at " + outputImagePath);
-                System.out.println("\nTexts: " + texts.toString());
+                System.out.println("\nDetected texts: " + texts.toString());
             } else {
                 System.err.println("Request failed with code: " + response.code());
             }
@@ -587,9 +587,9 @@ func main() {
         return
     }
     fmt.Printf("Image saved at %s.jpg\n", outputImagePath)
-    fmt.Println("\nTexts:")
-    for _, category := range respData.Result.Texts {
-        fmt.Println(category)
+    fmt.Println("\nDetected texts:")
+    for _, text := range respData.Result.Texts {
+        fmt.Println(text)
     }
 }
 ```
@@ -635,7 +635,7 @@ class Program
 
         File.WriteAllBytes(outputImagePath, outputImageBytes);
         Console.WriteLine($"Output image saved at {outputImagePath}");
-        Console.WriteLine("\nTexts:");
+        Console.WriteLine("\nDetected texts:");
         Console.WriteLine(jsonResponse["result"]["texts"].ToString());
     }
 }
@@ -676,7 +676,7 @@ axios.request(config)
       if (err) throw err;
       console.log(`Output image saved at ${outputImagePath}`);
     });
-    console.log("\nTexts:");
+    console.log("\nDetected texts:");
     console.log(result["texts"]);
 })
 .catch((error) => {
@@ -709,7 +709,7 @@ curl_close($ch);
 $result = json_decode($response, true)["result"];
 file_put_contents($output_image_path, base64_decode($result["image"]));
 echo "Output image saved at " . $output_image_path . "\n";
-echo "\nTexts:\n";
+echo "\nDetected texts:\n";
 print_r($result["texts"]);
 
 ?>

+ 39 - 62
docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition.md

@@ -208,7 +208,7 @@ for res in output:
 
     获取图像公式识别结果。
 
-    `POST /formula_recognition`
+    `POST /formula-recognition`
 
     - 请求体的属性如下:
 
@@ -227,64 +227,41 @@ for res in output:
 
         |名称|类型|含义|
         |-|-|-|
-        |`texts`|`array`|文本位置、内容和得分。|
+        |`formulas`|`array`|公式位置和内容。|
         |`image`|`string`|公式识别结果图,其中标注检测到的公式位置。图像为JPEG格式,使用Base64编码。|
 
-        `texts`中的每个元素为一个`object`,具有如下属性:
+        `formulas`中的每个元素为一个`object`,具有如下属性:
 
         |名称|类型|含义|
         |-|-|-|
         |`poly`|`array`|公式位置。数组中元素依次为包围文本的多边形的顶点坐标。|
-        |`text`|`string`|公式内容。|
+        |`latex`|`string`|公式内容。|
 
         `result`示例如下:
 
         ```json
         {
-          "texts": [
+          "formulas": [
             {
               "poly": [
                 [
-                  444,
-                  244
+                  444.0,
+                  244.0
                 ],
                 [
-                  705,
-                  244
+                  705.4,
+                  244.5
                 ],
                 [
-                  705,
-                  311
+                  705.8,
+                  311.3
                 ],
                 [
-                  444,
-                  311
+                  444.1,
+                  311.0
                 ]
               ],
-              "text": "北京南站",
-              "score": 0.9
-            },
-            {
-              "poly": [
-                [
-                  992,
-                  248
-                ],
-                [
-                  1263,
-                  251
-                ],
-                [
-                  1263,
-                  318
-                ],
-                [
-                  992,
-                  315
-                ]
-              ],
-              "text": "天津站",
-              "score": 0.5
+              "latex": "F({\bf x})=C(F_{1}(x_{1}),\cdot\cdot\cdot,F_{N}(x_{N})).\qquad\qquad\qquad(1)"
             }
           ],
           "image": "xxxxxx"
@@ -303,7 +280,7 @@ for res in output:
 import base64
 import requests
 
-API_URL = "http://localhost:8080/formula_recognition" # 服务URL
+API_URL = "http://localhost:8080/formula-recognition" # 服务URL
 image_path = "./demo.jpg"
 output_image_path = "./out.jpg"
 
@@ -323,8 +300,8 @@ result = response.json()["result"]
 with open(output_image_path, "wb") as file:
     file.write(base64.b64decode(result["image"]))
 print(f"Output image saved at {output_image_path}")
-print("\nTexts:")
-print(result["texts"])
+print("\nDetected formulas:")
+print(result["formulas"])
 ```
 
 </details>
@@ -365,7 +342,7 @@ int main() {
     std::string body = jsonObj.dump();
 
     // 调用API
-    auto response = client.Post("/formula_recognition", headers, body, "application/json");
+    auto response = client.Post("/formula-recognition", headers, body, "application/json");
     // 处理接口返回数据
     if (response && response->status == 200) {
         nlohmann::json jsonResponse = nlohmann::json::parse(response->body);
@@ -383,10 +360,10 @@ int main() {
             std::cerr << "Unable to open file for writing: " << outPutImagePath << std::endl;
         }
 
-        auto texts = result["texts"];
-        std::cout << "\nTexts:" << std::endl;
-        for (const auto& category : texts) {
-            std::cout << category << std::endl;
+        auto formulas = result["formulas"];
+        std::cout << "\nDetected formulas:" << std::endl;
+        for (const auto& formula : formulas) {
+            std::cout << formula << std::endl;
         }
     } else {
         std::cout << "Failed to send HTTP request." << std::endl;
@@ -415,7 +392,7 @@ import java.util.Base64;
 
 public class Main {
     public static void main(String[] args) throws IOException {
-        String API_URL = "http://localhost:8080/formula_recognition"; // 服务URL
+        String API_URL = "http://localhost:8080/formula-recognition"; // 服务URL
         String imagePath = "./demo.jpg"; // 本地图像
         String outputImagePath = "./out.jpg"; // 输出图像
 
@@ -444,14 +421,14 @@ public class Main {
                 JsonNode resultNode = objectMapper.readTree(responseBody);
                 JsonNode result = resultNode.get("result");
                 String base64Image = result.get("image").asText();
-                JsonNode texts = result.get("texts");
+                JsonNode formulas = result.get("formulas");
 
                 byte[] imageBytes = Base64.getDecoder().decode(base64Image);
                 try (FileOutputStream fos = new FileOutputStream(outputImagePath)) {
                     fos.write(imageBytes);
                 }
                 System.out.println("Output image saved at " + outputImagePath);
-                System.out.println("\nTexts: " + texts.toString());
+                System.out.println("\nDetected formulas: " + formulas.toString());
             } else {
                 System.err.println("Request failed with code: " + response.code());
             }
@@ -478,7 +455,7 @@ import (
 )
 
 func main() {
-    API_URL := "http://localhost:8080/formula_recognition"
+    API_URL := "http://localhost:8080/formula-recognition"
     imagePath := "./demo.jpg"
     outputImagePath := "./out.jpg"
 
@@ -521,7 +498,7 @@ func main() {
     type Response struct {
         Result struct {
             Image      string   `json:"image"`
-            Texts []map[string]interface{} `json:"texts"`
+            Formulas []map[string]interface{} `json:"formulas"`
         } `json:"result"`
     }
     var respData Response
@@ -542,9 +519,9 @@ func main() {
         return
     }
     fmt.Printf("Image saved at %s.jpg\n", outputImagePath)
-    fmt.Println("\nTexts:")
-    for _, category := range respData.Result.Texts {
-        fmt.Println(category)
+    fmt.Println("\nDetected formulas:")
+    for _, formula := range respData.Result.Formulas {
+        fmt.Println(formula)
     }
 }
 ```
@@ -565,7 +542,7 @@ using Newtonsoft.Json.Linq;
 
 class Program
 {
-    static readonly string API_URL = "http://localhost:8080/formula_recognition";
+    static readonly string API_URL = "http://localhost:8080/formula-recognition";
     static readonly string imagePath = "./demo.jpg";
     static readonly string outputImagePath = "./out.jpg";
 
@@ -593,8 +570,8 @@ class Program
 
         File.WriteAllBytes(outputImagePath, outputImageBytes);
         Console.WriteLine($"Output image saved at {outputImagePath}");
-        Console.WriteLine("\nTexts:");
-        Console.WriteLine(jsonResponse["result"]["texts"].ToString());
+        Console.WriteLine("\nDetected formulas:");
+        Console.WriteLine(jsonResponse["result"]["formulas"].ToString());
     }
 }
 ```
@@ -608,7 +585,7 @@ class Program
 const axios = require('axios');
 const fs = require('fs');
 
-const API_URL = 'http://localhost:8080/formula_recognition'
+const API_URL = 'http://localhost:8080/formula-recognition'
 const imagePath = './demo.jpg'
 const outputImagePath = "./out.jpg";
 
@@ -637,8 +614,8 @@ axios.request(config)
       if (err) throw err;
       console.log(`Output image saved at ${outputImagePath}`);
     });
-    console.log("\nTexts:");
-    console.log(result["texts"]);
+    console.log("\nDetected formulas:");
+    console.log(result["formulas"]);
 })
 .catch((error) => {
   console.log(error);
@@ -653,7 +630,7 @@ axios.request(config)
 ```php
 <?php
 
-$API_URL = "http://localhost:8080/formula_recognition"; // 服务URL
+$API_URL = "http://localhost:8080/formula-recognition"; // 服务URL
 $image_path = "./demo.jpg";
 $output_image_path = "./out.jpg";
 
@@ -673,8 +650,8 @@ curl_close($ch);
 $result = json_decode($response, true)["result"];
 file_put_contents($output_image_path, base64_decode($result["image"]));
 echo "Output image saved at " . $output_image_path . "\n";
-echo "\nTexts:\n";
-print_r($result["texts"]);
+echo "\nDetected formulas:\n";
+print_r($result["formulas"]);
 
 ?>
 ```

+ 42 - 65
docs/pipeline_usage/tutorials/ocr_pipelines/formula_recognition_en.md

@@ -208,7 +208,7 @@ Operations provided by the service:
 
     Obtain formula recognition results from an image.
 
-    `POST /formula recognition`
+    `POST /formula-recognition`
 
     - Request body properties:
 
@@ -227,64 +227,41 @@ Operations provided by the service:
 
         | Name | Type | Description |
         |------|------|-------------|
-        |`texts`|`array`|Positions, contents, and scores of texts.|
-        |`image`|`string`|formula recognition result image with detected formula positions annotated. The image is in JPEG format and encoded in Base64.|
+        |`formulas`|`array`|Positions and contents of formulas.|
+        |`image`|`string`|Formula recognition result image with detected formula positions annotated. The image is in JPEG format and encoded in Base64.|
 
-        Each element in `texts` is an `object` with the following properties:
+        Each element in `formulas` is an `object` with the following properties:
 
         | Name | Type | Description |
         |------|------|-------------|
-        |`poly`|`array`|Text position. Elements in the array are the vertex coordinates of the polygon enclosing the formula.|
-        |`text`|`string`|Text content.|
+        |`poly`|`array`|Formula position. Elements in the array are the vertex coordinates of the polygon enclosing the formula.|
+        |`latex`|`string`|Formula content.|
 
         Example of `result`:
 
         ```json
         {
-          "texts": [
+          "formulas": [
             {
               "poly": [
                 [
-                  444,
-                  244
+                  444.0,
+                  244.0
                 ],
                 [
-                  705,
-                  244
+                  705.4,
+                  244.5
                 ],
                 [
-                  705,
-                  311
+                  705.8,
+                  311.3
                 ],
                 [
-                  444,
-                  311
+                  444.1,
+                  311.0
                 ]
               ],
-              "text": "Beijing South Railway Station",
-              "score": 0.9
-            },
-            {
-              "poly": [
-                [
-                  992,
-                  248
-                ],
-                [
-                  1263,
-                  251
-                ],
-                [
-                  1263,
-                  318
-                ],
-                [
-                  992,
-                  315
-                ]
-              ],
-              "text": "Tianjin Railway Station",
-              "score": 0.5
+              "latex": "F({\bf x})=C(F_{1}(x_{1}),\cdot\cdot\cdot,F_{N}(x_{N})).\qquad\qquad\qquad(1)"
             }
           ],
           "image": "xxxxxx"
@@ -294,7 +271,7 @@ Operations provided by the service:
 </details>
 
 <details>
-<summary>Multilingual Service Invocation Examples</summary>
+<summary>Multi-Language Service Invocation Examples</summary>
 
 <details>
 <summary>Python</summary>
@@ -303,7 +280,7 @@ Operations provided by the service:
 import base64
 import requests
 
-API_URL = "http://localhost:8080/formula recognition"
+API_URL = "http://localhost:8080/formula-recognition"
 image_path = "./demo.jpg"
 output_image_path = "./out.jpg"
 
@@ -320,8 +297,8 @@ result = response.json()["result"]
 with open(output_image_path, "wb") as file:
     file.write(base64.b64decode(result["image"]))
 print(f"Output image saved at {output_image_path}")
-print("\nTexts:")
-print(result["texts"])
+print("\nDetected formulas:")
+print(result["formulas"])
 ```
 
 </details>
@@ -360,7 +337,7 @@ int main() {
     jsonObj["image"] = encodedImage;
     std::string body = jsonObj.dump();
 
-    auto response = client.Post("/formula recognition", headers, body, "application/json");
+    auto response = client.Post("/formula-recognition", headers, body, "application/json");
     if (response && response->status == 200) {
         nlohmann::json jsonResponse = nlohmann::json::parse(response->body);
         auto result = jsonResponse["result"];
@@ -377,10 +354,10 @@ int main() {
             std::cerr << "Unable to open file for writing: " << outPutImagePath << std::endl;
         }
 
-        auto texts = result["texts"];
-        std::cout << "\nTexts:" << std::endl;
-        for (const auto& category : texts) {
-            std::cout << category << std::endl;
+        auto formulas = result["formulas"];
+        std::cout << "\nDetected formulas:" << std::endl;
+        for (const auto& formula : formulas) {
+            std::cout << formula << std::endl;
         }
     } else {
         std::cout << "Failed to send HTTP request." << std::endl;
@@ -409,7 +386,7 @@ import java.util.Base64;
 
 public class Main {
     public static void main(String[] args) throws IOException {
-        String API_URL = "http://localhost:8080/formula recognition";
+        String API_URL = "http://localhost:8080/formula-recognition";
         String imagePath = "./demo.jpg";
         String outputImagePath = "./out.jpg";
 
@@ -435,14 +412,14 @@ public class Main {
                 JsonNode resultNode = objectMapper.readTree(responseBody);
                 JsonNode result = resultNode.get("result");
                 String base64Image = result.get("image").asText();
-                JsonNode texts = result.get("texts");
+                JsonNode formulas = result.get("formulas");
 
                 byte[] imageBytes = Base64.getDecoder().decode(base64Image);
                 try (FileOutputStream fos = new FileOutputStream(outputImagePath)) {
                     fos.write(imageBytes);
                 }
                 System.out.println("Output image saved at " + outputImagePath);
-                System.out.println("\nTexts: " + texts.toString());
+                System.out.println("\nDetected formulas: " + formulas.toString());
             } else {
                 System.err.println("Request failed with code: " + response.code());
             }
@@ -469,7 +446,7 @@ import (
 )
 
 func main() {
-    API_URL := "http://localhost:8080/formula recognition"
+    API_URL := "http://localhost:8080/formula-recognition"
     imagePath := "./demo.jpg"
     outputImagePath := "./out.jpg"
 
@@ -509,7 +486,7 @@ func main() {
     type Response struct {
         Result struct {
             Image      string   `json:"image"`
-            Texts []map[string]interface{} `json:"texts"`
+            Formulas []map[string]interface{} `json:"formulas"`
         } `json:"result"`
     }
     var respData Response
@@ -530,9 +507,9 @@ func main() {
         return
     }
     fmt.Printf("Image saved at %s.jpg\n", outputImagePath)
-    fmt.Println("\nTexts:")
-    for _, category := range respData.Result.Texts {
-        fmt.Println(category)
+    fmt.Println("\nDetected formulas:")
+    for _, formula := range respData.Result.Formulas {
+        fmt.Println(formula)
     }
 }
 ```
@@ -553,7 +530,7 @@ using Newtonsoft.Json.Linq;
 
 class Program
 {
-    static readonly string API_URL = "http://localhost:8080/formula recognition";
+    static readonly string API_URL = "http://localhost:8080/formula-recognition";
     static readonly string imagePath = "./demo.jpg";
     static readonly string outputImagePath = "./out.jpg";
 
@@ -578,8 +555,8 @@ class Program
 
         File.WriteAllBytes(outputImagePath, outputImageBytes);
         Console.WriteLine($"Output image saved at {outputImagePath}");
-        Console.WriteLine("\nTexts:");
-        Console.WriteLine(jsonResponse["result"]["texts"].ToString());
+        Console.WriteLine("\nDetected formulas:");
+        Console.WriteLine(jsonResponse["result"]["formulas"].ToString());
     }
 }
 ```
@@ -593,7 +570,7 @@ class Program
 const axios = require('axios');
 const fs = require('fs');
 
-const API_URL = 'http://localhost:8080/formula recognition'
+const API_URL = 'http://localhost:8080/formula-recognition'
 const imagePath = './demo.jpg'
 const outputImagePath = "./out.jpg";
 
@@ -619,8 +596,8 @@ axios.request(config)
       if (err) throw err;
       console.log(`Output image saved at ${outputImagePath}`);
     });
-    console.log("\nTexts:");
-    console.log(result["texts"]);
+    console.log("\nDetected formulas:");
+    console.log(result["formulas"]);
 })
 .catch((error) => {
   console.log(error);
@@ -635,7 +612,7 @@ axios.request(config)
 ```php
 <?php
 
-$API_URL = "http://localhost:8080/formula recognition";
+$API_URL = "http://localhost:8080/formula-recognition";
 $image_path = "./demo.jpg";
 $output_image_path = "./out.jpg";
 
@@ -652,8 +629,8 @@ curl_close($ch);
 $result = json_decode($response, true)["result"];
 file_put_contents($output_image_path, base64_decode($result["image"]));
 echo "Output image saved at " . $output_image_path . "\n";
-echo "\nTexts:\n";
-print_r($result["texts"]);
+echo "\nDetected formulas:\n";
+print_r($result["formulas"]);
 
 ?>
 ```

+ 60 - 9
docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing.md

@@ -354,15 +354,19 @@ for res in output:
 
 - **`infer`**
 
-    定位并识别图中的表格
+    进行版面解析
 
-    `POST /table-recognition`
+    `POST /layout-parsing`
 
     - 请求体的属性如下:
 
         |名称|类型|含义|是否必填|
         |-|-|-|-|
-        |`image`|`string`|服务可访问的图像文件的URL或图像文件内容的Base64编码结果。|是|
+        |`file`|`string`|服务可访问的图像文件或PDF文件的URL,或上述类型文件内容的Base64编码结果。对于超过10页的PDF文件,只有前10页的内容会被使用。|是|
+        |`fileType`|`integer`|文件类型。`0`表示PDF文件,`1`表示图像文件。若请求体无此属性,则服务将尝试根据URL自动推断文件类型。|否|
+        |`useImgOrientationCls`|`boolean`|是否启用文档图像方向分类功能。默认启用该功能。|否|
+        |`useImgUnwrapping`|`boolean`|是否启用文本图像矫正功能。默认启用该功能。|否|
+        |`useSealTextDet`|`boolean`|是否启用印章文本检测功能。默认启用该功能。|否|
         |`inferenceParams`|`object`|推理参数。|否|
 
         `inferenceParams`的属性如下:
@@ -375,19 +379,66 @@ for res in output:
 
         |名称|类型|含义|
         |-|-|-|
-        |`tables`|`array`|表格位置和内容。|
-        |`layoutImage`|`string`|版面区域检测结果图。图像为JPEG格式,使用Base64编码。|
-        |`ocrImage`|`string`|OCR结果图。图像为JPEG格式,使用Base64编码。|
+        |`layoutParsingResults`|`array`|版面解析结果。数组长度为1(对于图像输入)或文档页数与10中的较小者(对于PDF输入)。对于PDF输入,数组中的每个元素依次表示PDF文件中每一页的处理结果。|
 
-        `tables`中的每个元素为一个`object`,具有如下属性:
+        `layoutParsingResults`中的每个元素为一个`object`,具有如下属性:
 
         |名称|类型|含义|
         |-|-|-|
-        |`bbox`|`array`|表格位置。数组中元素依次为边界框左上角x坐标、左上角y坐标、右下角x坐标以及右下角y坐标。|
-        |`html`|`string`|HTML格式的表格识别结果。|
+        |`layoutElements`|`array`|版面元素信息。|
+
+        `layoutElements`中的每个元素为一个`object`,具有如下属性:
+
+        |名称|类型|含义|
+        |-|-|-|
+        |`bbox`|`array`|版面元素位置。数组中元素依次为边界框左上角x坐标、左上角y坐标、右下角x坐标以及右下角y坐标。|
+        |`label`|`string`|版面元素标签。|
+        |`text`|`string`|版面元素包含的文本。|
+        |`layoutType`|`string`|版面元素排列方式。|
+        |`image`|`string`|版面元素图像,JPEG格式,使用Base64编码。|
 
 </details>
 
+<details>
+<summary>多语言调用服务示例</summary>
+
+<details>
+<summary>Python</summary>
+
+```python
+import base64
+import requests
+
+API_URL = "http://localhost:8080/layout-parsing" # 服务URL
+
+# 对本地图像进行Base64编码
+with open(image_path, "rb") as file:
+    image_bytes = file.read()
+    image_data = base64.b64encode(image_bytes).decode("ascii")
+
+payload = {
+    "file": image_data, # Base64编码的文件内容或者文件URL
+    "fileType": 1,
+    "useImgOrientationCls": True,
+    "useImgUnwrapping": True,
+    "useSealTextDet": True,
+}
+
+# 调用API
+response = requests.post(API_URL, json=payload)
+
+# 处理接口返回数据
+assert response.status_code == 200
+result = response.json()["result"]
+print("\nDetected layout elements:")
+for res in result["layoutParsingResults"]:
+    for ele in res["layoutElements"]:
+        print("===============================")
+        print("bbox:", ele["bbox"])
+        print("label:", ele["label"])
+        print("text:", repr(ele["text"]))
+```
+
 </details>
 </details>
 <br/>

+ 460 - 0
docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition.md

@@ -292,7 +292,467 @@ for res in output:
 
 下面是API参考和多语言服务调用示例:
 
+<details>
+<summary>API参考</summary>
+
+对于服务提供的所有操作:
+
+- 响应体以及POST请求的请求体均为JSON数据(JSON对象)。
+- 当请求处理成功时,响应状态码为`200`,响应体的属性如下:
+
+    |名称|类型|含义|
+    |-|-|-|
+    |`errorCode`|`integer`|错误码。固定为`0`。|
+    |`errorMsg`|`string`|错误说明。固定为`"Success"`。|
+
+    响应体还可能有`result`属性,类型为`object`,其中存储操作结果信息。
+
+- 当请求处理未成功时,响应体的属性如下:
+
+    |名称|类型|含义|
+    |-|-|-|
+    |`errorCode`|`integer`|错误码。与响应状态码相同。|
+    |`errorMsg`|`string`|错误说明。|
+
+服务提供的操作如下:
+
+- **`infer`**
+
+    获取印章文本识别结果。
+
+    `POST /seal-recognition`
+
+    - 请求体的属性如下:
+
+        |名称|类型|含义|是否必填|
+        |-|-|-|-|
+        |`image`|`string`|服务可访问的图像文件的URL或图像文件内容的Base64编码结果。|是|
+        |`inferenceParams`|`object`|推理参数。|否|
+
+        `inferenceParams`的属性如下:
+
+        |名称|类型|含义|是否必填|
+        |-|-|-|-|
+        |`maxLongSide`|`integer`|推理时,若文本检测模型的输入图像较长边的长度大于`maxLongSide`,则将对图像进行缩放,使其较长边的长度等于`maxLongSide`。|否|
+
+    - 请求处理成功时,响应体的`result`具有如下属性:
+
+        |名称|类型|含义|
+        |-|-|-|
+        |`sealImpressions`|`array`|印章文本识别结果。|
+        |`layoutImage`|`string`|版面区域检测结果图。图像为JPEG格式,使用Base64编码。|
+
+        `sealImpressions`中的每个元素为一个`object`,具有如下属性:
+
+        |名称|类型|含义|
+        |-|-|-|
+        |`texts`|`array`|文本位置、内容和得分。|
+
+        `texts`中的每个元素为一个`object`,具有如下属性:
+
+        |名称|类型|含义|
+        |-|-|-|
+        |`poly`|`array`|文本位置。数组中元素依次为包围文本的多边形的顶点坐标。|
+        |`text`|`string`|文本内容。|
+        |`score`|`number`|文本识别得分。|
+
+</details>
+
+<details>
+<summary>多语言调用服务示例</summary>
+
+<details>
+<summary>Python</summary>
+
+```python
+import base64
+import requests
+
+API_URL = "http://localhost:8080/seal-recognition" # 服务URL
+image_path = "./demo.jpg"
+layout_image_path = "./layout.jpg"
+
+# 对本地图像进行Base64编码
+with open(image_path, "rb") as file:
+    image_bytes = file.read()
+    image_data = base64.b64encode(image_bytes).decode("ascii")
+
+payload = {"image": image_data}  # Base64编码的文件内容或者图像URL
+
+# 调用API
+response = requests.post(API_URL, json=payload)
+
+# 处理接口返回数据
+assert response.status_code == 200
+result = response.json()["result"]
+with open(layout_image_path, "wb") as file:
+    file.write(base64.b64decode(result["layoutImage"]))
+print(f"Output image saved at {layout_image_path}")
+print("\nDetected seal impressions:")
+print(result["sealImpressions"])
+```
+
+</details>
+
+<details>
+<summary>C++</summary>
+
+```cpp
+#include <iostream>
+#include "cpp-httplib/httplib.h" // https://github.com/Huiyicc/cpp-httplib
+#include "nlohmann/json.hpp" // https://github.com/nlohmann/json
+#include "base64.hpp" // https://github.com/tobiaslocker/base64
+
+int main() {
+    httplib::Client client("localhost:8080");
+    const std::string imagePath = "./demo.jpg";
+    const std::string layoutImagePath = "./layout.jpg";
+
+    httplib::Headers headers = {
+        {"Content-Type", "application/json"}
+    };
+
+    // 对本地图像进行Base64编码
+    std::ifstream file(imagePath, std::ios::binary | std::ios::ate);
+    std::streamsize size = file.tellg();
+    file.seekg(0, std::ios::beg);
+
+    std::vector<char> buffer(size);
+    if (!file.read(buffer.data(), size)) {
+        std::cerr << "Error reading file." << std::endl;
+        return 1;
+    }
+    std::string bufferStr(reinterpret_cast<const char*>(buffer.data()), buffer.size());
+    std::string encodedImage = base64::to_base64(bufferStr);
+
+    nlohmann::json jsonObj;
+    jsonObj["image"] = encodedImage;
+    std::string body = jsonObj.dump();
+
+    // 调用API
+    auto response = client.Post("/seal-recognition", headers, body, "application/json");
+    // 处理接口返回数据
+    if (response && response->status == 200) {
+        nlohmann::json jsonResponse = nlohmann::json::parse(response->body);
+        auto result = jsonResponse["result"];
+
+        encodedImage = result["layoutImage"];
+        decodedString = base64::from_base64(encodedImage);
+        std::vector<unsigned char> decodedLayoutImage(decodedString.begin(), decodedString.end());
+        std::ofstream outputLayoutFile(layoutImagePath, std::ios::binary | std::ios::out);
+        if (outputLayoutFile.is_open()) {
+            outputLayoutFile.write(reinterpret_cast<char*>(decodedLayoutImage.data()), decodedLayoutImage.size());
+            outputLayoutFile.close();
+            std::cout << "Output image saved at " << layoutImagePath << std::endl;
+        } else {
+            std::cerr << "Unable to open file for writing: " << layoutImagePath << std::endl;
+        }
+
+        auto impressions = result["sealImpressions"];
+        std::cout << "\nDetected seal impressions:" << std::endl;
+        for (const auto& impression : impressions) {
+            std::cout << impression << std::endl;
+        }
+    } else {
+        std::cout << "Failed to send HTTP request." << std::endl;
+        return 1;
+    }
+
+    return 0;
+}
+```
 
+</details>
+
+<details>
+<summary>Java</summary>
+
+```java
+import okhttp3.*;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Base64;
+
+public class Main {
+    public static void main(String[] args) throws IOException {
+        String API_URL = "http://localhost:8080/seal-recognition"; // 服务URL
+        String imagePath = "./demo.jpg"; // 本地图像
+        String layoutImagePath = "./layout.jpg";
+
+        // 对本地图像进行Base64编码
+        File file = new File(imagePath);
+        byte[] fileContent = java.nio.file.Files.readAllBytes(file.toPath());
+        String imageData = Base64.getEncoder().encodeToString(fileContent);
+
+        ObjectMapper objectMapper = new ObjectMapper();
+        ObjectNode params = objectMapper.createObjectNode();
+        params.put("image", imageData); // Base64编码的文件内容或者图像URL
+
+        // 创建 OkHttpClient 实例
+        OkHttpClient client = new OkHttpClient();
+        MediaType JSON = MediaType.Companion.get("application/json; charset=utf-8");
+        RequestBody body = RequestBody.Companion.create(params.toString(), JSON);
+        Request request = new Request.Builder()
+                .url(API_URL)
+                .post(body)
+                .build();
+
+        // 调用API并处理接口返回数据
+        try (Response response = client.newCall(request).execute()) {
+            if (response.isSuccessful()) {
+                String responseBody = response.body().string();
+                JsonNode resultNode = objectMapper.readTree(responseBody);
+                JsonNode result = resultNode.get("result");
+                String layoutBase64Image = result.get("layoutImage").asText();
+                JsonNode impressions = result.get("sealImpressions");
+
+                imageBytes = Base64.getDecoder().decode(layoutBase64Image);
+                try (FileOutputStream fos = new FileOutputStream(layoutImagePath)) {
+                    fos.write(imageBytes);
+                }
+                System.out.println("Output image saved at " + layoutImagePath);
+
+                System.out.println("\nDetected seal impressions: " + impressions.toString());
+            } else {
+                System.err.println("Request failed with code: " + response.code());
+            }
+        }
+    }
+}
+```
+
+</details>
+
+<details>
+<summary>Go</summary>
+
+```go
+package main
+
+import (
+    "bytes"
+    "encoding/base64"
+    "encoding/json"
+    "fmt"
+    "io/ioutil"
+    "net/http"
+)
+
+func main() {
+    API_URL := "http://localhost:8080/seal-recognition"
+    imagePath := "./demo.jpg"
+    layoutImagePath := "./layout.jpg"
+
+    // 对本地图像进行Base64编码
+    imageBytes, err := ioutil.ReadFile(imagePath)
+    if err != nil {
+        fmt.Println("Error reading image file:", err)
+        return
+    }
+    imageData := base64.StdEncoding.EncodeToString(imageBytes)
+
+    payload := map[string]string{"image": imageData} // Base64编码的文件内容或者图像URL
+    payloadBytes, err := json.Marshal(payload)
+    if err != nil {
+        fmt.Println("Error marshaling payload:", err)
+        return
+    }
+
+    // 调用API
+    client := &http.Client{}
+    req, err := http.NewRequest("POST", API_URL, bytes.NewBuffer(payloadBytes))
+    if err != nil {
+        fmt.Println("Error creating request:", err)
+        return
+    }
+
+    res, err := client.Do(req)
+    if err != nil {
+        fmt.Println("Error sending request:", err)
+        return
+    }
+    defer res.Body.Close()
+
+    // 处理接口返回数据
+    body, err := ioutil.ReadAll(res.Body)
+    if err != nil {
+        fmt.Println("Error reading response body:", err)
+        return
+    }
+    type Response struct {
+        Result struct {
+            LayoutImage      string   `json:"layoutImage"`
+            Impressions []map[string]interface{} `json:"sealImpressions"`
+        } `json:"result"`
+    }
+    var respData Response
+    err = json.Unmarshal([]byte(string(body)), &respData)
+    if err != nil {
+        fmt.Println("Error unmarshaling response body:", err)
+        return
+    }
+
+    layoutImageData, err := base64.StdEncoding.DecodeString(respData.Result.LayoutImage)
+    if err != nil {
+        fmt.Println("Error decoding base64 image data:", err)
+        return
+    }
+    err = ioutil.WriteFile(layoutImagePath, layoutImageData, 0644)
+    if err != nil {
+        fmt.Println("Error writing image to file:", err)
+        return
+    }
+    fmt.Printf("Image saved at %s.jpg\n", layoutImagePath)
+
+    fmt.Println("\nDetected seal impressions:")
+    for _, impression := range respData.Result.Impressions {
+        fmt.Println(impression)
+    }
+}
+```
+
+</details>
+
+<details>
+<summary>C#</summary>
+
+```csharp
+using System;
+using System.IO;
+using System.Net.Http;
+using System.Net.Http.Headers;
+using System.Text;
+using System.Threading.Tasks;
+using Newtonsoft.Json.Linq;
+
+class Program
+{
+    static readonly string API_URL = "http://localhost:8080/seal-recognition";
+    static readonly string imagePath = "./demo.jpg";
+    static readonly string layoutImagePath = "./layout.jpg";
+
+    static async Task Main(string[] args)
+    {
+        var httpClient = new HttpClient();
+
+        // 对本地图像进行Base64编码
+        byte[] imageBytes = File.ReadAllBytes(imagePath);
+        string image_data = Convert.ToBase64String(imageBytes);
+
+        var payload = new JObject{ { "image", image_data } }; // Base64编码的文件内容或者图像URL
+        var content = new StringContent(payload.ToString(), Encoding.UTF8, "application/json");
+
+        // 调用API
+        HttpResponseMessage response = await httpClient.PostAsync(API_URL, content);
+        response.EnsureSuccessStatusCode();
+
+        // 处理接口返回数据
+        string responseBody = await response.Content.ReadAsStringAsync();
+        JObject jsonResponse = JObject.Parse(responseBody);
+
+        string layoutBase64Image = jsonResponse["result"]["layoutImage"].ToString();
+        byte[] layoutImageBytes = Convert.FromBase64String(layoutBase64Image);
+        File.WriteAllBytes(layoutImagePath, layoutImageBytes);
+        Console.WriteLine($"Output image saved at {layoutImagePath}");
+
+        Console.WriteLine("\nDetected seal impressions:");
+        Console.WriteLine(jsonResponse["result"]["sealImpressions"].ToString());
+    }
+}
+```
+
+</details>
+
+<details>
+<summary>Node.js</summary>
+
+```js
+const axios = require('axios');
+const fs = require('fs');
+
+const API_URL = 'http://localhost:8080/seal-recognition'
+const imagePath = './demo.jpg'
+const layoutImagePath = "./layout.jpg";
+
+let config = {
+   method: 'POST',
+   maxBodyLength: Infinity,
+   url: API_URL,
+   data: JSON.stringify({
+    'image': encodeImageToBase64(imagePath)  // Base64编码的文件内容或者图像URL
+  })
+};
+
+// 对本地图像进行Base64编码
+function encodeImageToBase64(filePath) {
+  const bitmap = fs.readFileSync(filePath);
+  return Buffer.from(bitmap).toString('base64');
+}
+
+// 调用API
+axios.request(config)
+.then((response) => {
+    // 处理接口返回数据
+    const result = response.data["result"];
+
+    imageBuffer = Buffer.from(result["layoutImage"], 'base64');
+    fs.writeFile(layoutImagePath, imageBuffer, (err) => {
+      if (err) throw err;
+      console.log(`Output image saved at ${layoutImagePath}`);
+    });
+
+    console.log("\nDetected seal impressions:");
+    console.log(result["sealImpressions"]);
+})
+.catch((error) => {
+  console.log(error);
+});
+```
+
+</details>
+
+<details>
+<summary>PHP</summary>
+
+```php
+<?php
+
+$API_URL = "http://localhost:8080/seal-recognition"; // 服务URL
+$image_path = "./demo.jpg";
+$layout_image_path = "./layout.jpg";
+
+// 对本地图像进行Base64编码
+$image_data = base64_encode(file_get_contents($image_path));
+$payload = array("image" => $image_data); // Base64编码的文件内容或者图像URL
+
+// 调用API
+$ch = curl_init($API_URL);
+curl_setopt($ch, CURLOPT_POST, true);
+curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload));
+curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
+$response = curl_exec($ch);
+curl_close($ch);
+
+// 处理接口返回数据
+$result = json_decode($response, true)["result"];
+
+file_put_contents($layout_image_path, base64_decode($result["layoutImage"]));
+echo "Output image saved at " . $layout_image_path . "\n";
+
+echo "\nDetected seal impressions:\n";
+print_r($result["sealImpressions"]);
+
+?>
+```
+
+</details>
+</details>
+<br/>
 
 📱 **端侧部署**:端侧部署是一种将计算和数据处理功能放在用户设备本身上的方式,设备可以直接处理数据,而不需要依赖远程的服务器。PaddleX 支持将模型部署在 Android 等端侧设备上,详细的端侧部署流程请参考[PaddleX端侧部署指南](../../../pipeline_deploy/lite_deploy.md)。
 您可以根据需要选择合适的方式部署模型产线,进而进行后续的 AI 应用集成。

+ 456 - 31
docs/pipeline_usage/tutorials/ocr_pipelines/seal_recognition_en.md

@@ -1,22 +1,22 @@
 [简体中文](seal_recognition.md) | English
-  
-# Tutorial for Using Seal Text Recognition Pipeline  
-  
-## 1. Introduction to the Seal Text Recognition Pipeline  
-Seal text recognition is a technology that automatically extracts and recognizes seal content from documents or images. The recognition of seal text is part of document processing and has various applications in many scenarios, such as contract comparison, inventory access approval, and invoice reimbursement approval.  
-  
-![](https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/doc_images/practical_tutorial/PP-ChatOCRv3_doc_seal/01.png)  
-  
-The **Seal Text Recognition** pipeline includes a layout area analysis module, a seal text detection module, and a text recognition module.  
-  
-**If you prioritize model accuracy, please choose a model with higher accuracy. If you prioritize inference speed, please choose a model with faster inference. If you prioritize model storage size, please choose a model with a smaller storage footprint.**  
-  
-<details>  
-   <summary> 👉 Detailed Model List </summary>  
-  
+
+# Tutorial for Using Seal Text Recognition Pipeline
+
+## 1. Introduction to the Seal Text Recognition Pipeline
+Seal text recognition is a technology that automatically extracts and recognizes seal content from documents or images. The recognition of seal text is part of document processing and has various applications in many scenarios, such as contract comparison, inventory access approval, and invoice reimbursement approval.
+
+![](https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/doc_images/practical_tutorial/PP-ChatOCRv3_doc_seal/01.png)
+
+The **Seal Text Recognition** pipeline includes a layout area analysis module, a seal text detection module, and a text recognition module.
+
+**If you prioritize model accuracy, please choose a model with higher accuracy. If you prioritize inference speed, please choose a model with faster inference. If you prioritize model storage size, please choose a model with a smaller storage footprint.**
+
+<details>
+   <summary> 👉 Detailed Model List </summary>
+
 
 **Layout Analysis Module Models:**
-  
+
 |Model Name|mAP (%)|GPU Inference Time (ms)|CPU Inference Time|Model Size (M)|
 |-|-|-|-|-|
 |PicoDet-L_layout_3cls|89.3|15.7425|159.771|22.6 M|
@@ -45,7 +45,7 @@ The **Seal Text Recognition** pipeline includes a layout area analysis module, a
 
 **Note: The evaluation set for the above accuracy indicators is a self-built Chinese dataset from PaddleOCR, covering various scenarios such as street scenes, web images, documents, and handwriting. The text recognition subset includes 11,000 images. The GPU inference time for all models above is based on an NVIDIA Tesla T4 machine with a precision type of FP32. The CPU inference speed is based on an Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz with 8 threads, and the precision type is also FP32.**
 
-</details>  
+</details>
 
 ## 2.  Quick Start
 The pre trained model production line provided by PaddleX can quickly experience the effect. You can experience the effect of the seal text recognition production line online, or use the command line or Python locally to experience the effect of the seal text recognition production line.
@@ -64,7 +64,7 @@ Before using the seal text recognition production line locally, please ensure th
 One command can quickly experience the effect of seal text recognition production line, use [test file](https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/seal_text_det.png), and replace ` --input ` with the local path for prediction
 
 ```
-paddlex --pipeline seal_recognition --input seal_text_det.png --device gpu:0 --save_path output 
+paddlex --pipeline seal_recognition --input seal_text_det.png --device gpu:0 --save_path output
 ```
 
 Parameter description:
@@ -87,13 +87,13 @@ paddlex --get_pipeline_config seal_recognition
 After execution, the seal text recognition production line configuration file will be saved in the current path. If you want to customize the save location, you can execute the following command (assuming the custom save location is `./my_path `):
 
 ```bash
-paddlex --get_pipeline_config seal_recognition --save_path ./my_path --save_path output 
+paddlex --get_pipeline_config seal_recognition --save_path ./my_path --save_path output
 ```
 
 After obtaining the production line configuration file, you can replace '-- pipeline' with the configuration file save path to make the configuration file effective. For example, if the configuration file save path is `/ seal_recognition.yaml`, Just need to execute:
 
 ```bash
-paddlex --pipeline ./seal_recognition.yaml --input seal_text_det.png --save_path output 
+paddlex --pipeline ./seal_recognition.yaml --input seal_text_det.png --save_path output
 ```
 Among them, parameters such as `--model` and `--device` do not need to be specified and will use the parameters in the configuration file. If the parameters are still specified, the specified parameters will prevail.
 
@@ -217,7 +217,7 @@ After running, the result obtained is:
 
 ![](https://raw.githubusercontent.com/cuicheng01/PaddleX_doc_images/main/images/pipelines/seal_recognition/03.png)
 
-The visualized image not saved by default. You can customize the save path through `--save_path`, and then all results will be saved in the specified path. 
+The visualized image not saved by default. You can customize the save path through `--save_path`, and then all results will be saved in the specified path.
 
 
 ###  2.2 Python Script Integration
@@ -230,7 +230,7 @@ pipeline = create_pipeline(pipeline="seal_recognition")
 
 output = pipeline.predict("seal_text_det.png")
 for res in output:
-    res.print() 
+    res.print()
     res.save_to_img("./output/") # Save the results in img
 ```
 
@@ -292,9 +292,442 @@ In addition, PaddleX also offers three other deployment methods, detailed as fol
 
 ☁️ ** Service deployment * *: Service deployment is a common form of deployment in actual production environments. By encapsulating inference functions as services, clients can access these services through network requests to obtain inference results. PaddleX supports users to achieve service-oriented deployment of production lines at low cost. For detailed service-oriented deployment processes, please refer to the PaddleX Service Deployment Guide (../../../ipeline_deploy/service_deploy. md).
 
-Here are API references and examples of calling multilingual services:
+Below are the API references and multi-language service invocation examples:
+
+<details>
+<summary>API Reference</summary>
+
+For all operations provided by the service:
+
+- Both the response body and the request body for POST requests are JSON data (JSON objects).
+- When the request is processed successfully, the response status code is `200`, and the response body properties are as follows:
+
+    | Name | Type | Description |
+    |------|------|-------------|
+    |`errorCode`|`integer`|Error code. Fixed as `0`.|
+    |`errorMsg`|`string`|Error message. Fixed as `"Success"`.|
+
+    The response body may also have a `result` property of type `object`, which stores the operation result information.
+
+- When the request is not processed successfully, the response body properties are as follows:
+
+    | Name | Type | Description |
+    |------|------|-------------|
+    |`errorCode`|`integer`|Error code. Same as the response status code.|
+    |`errorMsg`|`string`|Error message.|
+
+Operations provided by the service:
+
+- **`infer`**
+
+    Obtain seal text recognition results from an image.
+
+    `POST /seal-recognition`
+
+    - Request body properties:
+
+        | Name | Type | Description | Required |
+        |------|------|-------------|----------|
+        |`image`|`string`|The URL of an image file accessible by the service or the Base64 encoded result of the image file content.|Yes|
+        |`inferenceParams`|`object`|Inference parameters.|No|
+
+        Properties of `inferenceParams`:
+
+        | Name | Type | Description | Required |
+        |------|------|-------------|----------|
+        |`maxLongSide`|`integer`|During inference, if the length of the longer side of the input image for the text detection model is greater than `maxLongSide`, the image will be scaled so that the length of the longer side equals `maxLongSide`.|No|
+
+    - When the request is processed successfully, the `result` of the response body has the following properties:
+
+        | Name | Type | Description |
+        |------|------|-------------|
+        |`sealImpressions`|`array`|Seal text recognition results.|
+        |`layoutImage`|`string`|Layout area detection result image. The image is in JPEG format and encoded using Base64.|
+
+        Each element in `sealImpressions` is an `object` with the following properties:
+
+        | Name | Type | Description |
+        |------|------|-------------|
+        |`poly`|`array`|Text position. Elements in the array are the vertex coordinates of the polygon enclosing the text.|
+        |`text`|`string`|Text content.|
+        |`score`|`number`|Text recognition score.|
+
+</details>
+
+<details>
+<summary>Multi-Language Service Invocation Examples</summary>
+
+<details>
+<summary>Python</summary>
+
+```python
+import base64
+import requests
+
+API_URL = "http://localhost:8080/seal-recognition"
+image_path = "./demo.jpg"
+layout_image_path = "./layout.jpg"
+
+with open(image_path, "rb") as file:
+    image_bytes = file.read()
+    image_data = base64.b64encode(image_bytes).decode("ascii")
+
+payload = {"image": image_data}
+
+response = requests.post(API_URL, json=payload)
+
+assert response.status_code == 200
+result = response.json()["result"]
+with open(layout_image_path, "wb") as file:
+    file.write(base64.b64decode(result["layoutImage"]))
+print(f"Output image saved at {layout_image_path}")
+print("\nDetected seal impressions:")
+print(result["sealImpressions"])
+```
+
+</details>
+
+<details>
+<summary>C++</summary>
+
+```cpp
+#include <iostream>
+#include "cpp-httplib/httplib.h" // https://github.com/Huiyicc/cpp-httplib
+#include "nlohmann/json.hpp" // https://github.com/nlohmann/json
+#include "base64.hpp" // https://github.com/tobiaslocker/base64
+
+int main() {
+    httplib::Client client("localhost:8080");
+    const std::string imagePath = "./demo.jpg";
+    const std::string layoutImagePath = "./layout.jpg";
+
+    httplib::Headers headers = {
+        {"Content-Type", "application/json"}
+    };
+
+    std::ifstream file(imagePath, std::ios::binary | std::ios::ate);
+    std::streamsize size = file.tellg();
+    file.seekg(0, std::ios::beg);
+
+    std::vector<char> buffer(size);
+    if (!file.read(buffer.data(), size)) {
+        std::cerr << "Error reading file." << std::endl;
+        return 1;
+    }
+    std::string bufferStr(reinterpret_cast<const char*>(buffer.data()), buffer.size());
+    std::string encodedImage = base64::to_base64(bufferStr);
+
+    nlohmann::json jsonObj;
+    jsonObj["image"] = encodedImage;
+    std::string body = jsonObj.dump();
+
+    auto response = client.Post("/seal-recognition", headers, body, "application/json");
+    if (response && response->status == 200) {
+        nlohmann::json jsonResponse = nlohmann::json::parse(response->body);
+        auto result = jsonResponse["result"];
+
+        encodedImage = result["layoutImage"];
+        decodedString = base64::from_base64(encodedImage);
+        std::vector<unsigned char> decodedLayoutImage(decodedString.begin(), decodedString.end());
+        std::ofstream outputLayoutFile(layoutImagePath, std::ios::binary | std::ios::out);
+        if (outputLayoutFile.is_open()) {
+            outputLayoutFile.write(reinterpret_cast<char*>(decodedLayoutImage.data()), decodedLayoutImage.size());
+            outputLayoutFile.close();
+            std::cout << "Output image saved at " << layoutImagePath << std::endl;
+        } else {
+            std::cerr << "Unable to open file for writing: " << layoutImagePath << std::endl;
+        }
+
+        auto impressions = result["sealImpressions"];
+        std::cout << "\nDetected seal impressions:" << std::endl;
+        for (const auto& impression : impressions) {
+            std::cout << impression << std::endl;
+        }
+    } else {
+        std::cout << "Failed to send HTTP request." << std::endl;
+        return 1;
+    }
+
+    return 0;
+}
+```
+
+</details>
+
+<details>
+<summary>Java</summary>
+
+```java
+import okhttp3.*;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Base64;
+
+public class Main {
+    public static void main(String[] args) throws IOException {
+        String API_URL = "http://localhost:8080/seal-recognition";
+        String imagePath = "./demo.jpg";
+        String layoutImagePath = "./layout.jpg";
+
+        File file = new File(imagePath);
+        byte[] fileContent = java.nio.file.Files.readAllBytes(file.toPath());
+        String imageData = Base64.getEncoder().encodeToString(fileContent);
+
+        ObjectMapper objectMapper = new ObjectMapper();
+        ObjectNode params = objectMapper.createObjectNode();
+        params.put("image", imageData);
+
+        OkHttpClient client = new OkHttpClient();
+        MediaType JSON = MediaType.Companion.get("application/json; charset=utf-8");
+        RequestBody body = RequestBody.Companion.create(params.toString(), JSON);
+        Request request = new Request.Builder()
+                .url(API_URL)
+                .post(body)
+                .build();
+
+        try (Response response = client.newCall(request).execute()) {
+            if (response.isSuccessful()) {
+                String responseBody = response.body().string();
+                JsonNode resultNode = objectMapper.readTree(responseBody);
+                JsonNode result = resultNode.get("result");
+                String layoutBase64Image = result.get("layoutImage").asText();
+                JsonNode impressions = result.get("sealImpressions");
+
+                imageBytes = Base64.getDecoder().decode(layoutBase64Image);
+                try (FileOutputStream fos = new FileOutputStream(layoutImagePath)) {
+                    fos.write(imageBytes);
+                }
+                System.out.println("Output image saved at " + layoutImagePath);
+
+                System.out.println("\nDetected seal impressions: " + impressions.toString());
+            } else {
+                System.err.println("Request failed with code: " + response.code());
+            }
+        }
+    }
+}
+```
+
+</details>
+
+<details>
+<summary>Go</summary>
+
+```go
+package main
+
+import (
+    "bytes"
+    "encoding/base64"
+    "encoding/json"
+    "fmt"
+    "io/ioutil"
+    "net/http"
+)
+
+func main() {
+    API_URL := "http://localhost:8080/seal-recognition"
+    imagePath := "./demo.jpg"
+    layoutImagePath := "./layout.jpg"
+
+    imageBytes, err := ioutil.ReadFile(imagePath)
+    if err != nil {
+        fmt.Println("Error reading image file:", err)
+        return
+    }
+    imageData := base64.StdEncoding.EncodeToString(imageBytes)
+
+    payload := map[string]string{"image": imageData}
+    payloadBytes, err := json.Marshal(payload)
+    if err != nil {
+        fmt.Println("Error marshaling payload:", err)
+        return
+    }
+
+    client := &http.Client{}
+    req, err := http.NewRequest("POST", API_URL, bytes.NewBuffer(payloadBytes))
+    if err != nil {
+        fmt.Println("Error creating request:", err)
+        return
+    }
+
+    res, err := client.Do(req)
+    if err != nil {
+        fmt.Println("Error sending request:", err)
+        return
+    }
+    defer res.Body.Close()
+
+    body, err := ioutil.ReadAll(res.Body)
+    if err != nil {
+        fmt.Println("Error reading response body:", err)
+        return
+    }
+    type Response struct {
+        Result struct {
+            LayoutImage      string   `json:"layoutImage"`
+            Impressions []map[string]interface{} `json:"sealImpressions"`
+        } `json:"result"`
+    }
+    var respData Response
+    err = json.Unmarshal([]byte(string(body)), &respData)
+    if err != nil {
+        fmt.Println("Error unmarshaling response body:", err)
+        return
+    }
+
+    layoutImageData, err := base64.StdEncoding.DecodeString(respData.Result.LayoutImage)
+    if err != nil {
+        fmt.Println("Error decoding base64 image data:", err)
+        return
+    }
+    err = ioutil.WriteFile(layoutImagePath, layoutImageData, 0644)
+    if err != nil {
+        fmt.Println("Error writing image to file:", err)
+        return
+    }
+    fmt.Printf("Image saved at %s.jpg\n", layoutImagePath)
+
+    fmt.Println("\nDetected seal impressions:")
+    for _, impression := range respData.Result.Impressions {
+        fmt.Println(impression)
+    }
+}
+```
+
+</details>
+
+<details>
+<summary>C#</summary>
+
+```csharp
+using System;
+using System.IO;
+using System.Net.Http;
+using System.Net.Http.Headers;
+using System.Text;
+using System.Threading.Tasks;
+using Newtonsoft.Json.Linq;
+
+class Program
+{
+    static readonly string API_URL = "http://localhost:8080/seal-recognition";
+    static readonly string imagePath = "./demo.jpg";
+    static readonly string layoutImagePath = "./layout.jpg";
+
+    static async Task Main(string[] args)
+    {
+        var httpClient = new HttpClient();
+
+        byte[] imageBytes = File.ReadAllBytes(imagePath);
+        string image_data = Convert.ToBase64String(imageBytes);
+
+        var payload = new JObject{ { "image", image_data } };
+        var content = new StringContent(payload.ToString(), Encoding.UTF8, "application/json");
+
+        HttpResponseMessage response = await httpClient.PostAsync(API_URL, content);
+        response.EnsureSuccessStatusCode();
+
+        string responseBody = await response.Content.ReadAsStringAsync();
+        JObject jsonResponse = JObject.Parse(responseBody);
+
+        string layoutBase64Image = jsonResponse["result"]["layoutImage"].ToString();
+        byte[] layoutImageBytes = Convert.FromBase64String(layoutBase64Image);
+        File.WriteAllBytes(layoutImagePath, layoutImageBytes);
+        Console.WriteLine($"Output image saved at {layoutImagePath}");
+
+        Console.WriteLine("\nDetected seal impressions:");
+        Console.WriteLine(jsonResponse["result"]["sealImpressions"].ToString());
+    }
+}
+```
+
+</details>
+
+<details>
+<summary>Node.js</summary>
+
+```js
+const axios = require('axios');
+const fs = require('fs');
+
+const API_URL = 'http://localhost:8080/seal-recognition'
+const imagePath = './demo.jpg'
+const layoutImagePath = "./layout.jpg";
+
+let config = {
+   method: 'POST',
+   maxBodyLength: Infinity,
+   url: API_URL,
+   data: JSON.stringify({
+    'image': encodeImageToBase64(imagePath)
+  })
+};
+
+function encodeImageToBase64(filePath) {
+  const bitmap = fs.readFileSync(filePath);
+  return Buffer.from(bitmap).toString('base64');
+}
+
+axios.request(config)
+.then((response) => {
+    const result = response.data["result"];
+
+    imageBuffer = Buffer.from(result["layoutImage"], 'base64');
+    fs.writeFile(layoutImagePath, imageBuffer, (err) => {
+      if (err) throw err;
+      console.log(`Output image saved at ${layoutImagePath}`);
+    });
+
+    console.log("\nDetected seal impressions:");
+    console.log(result["sealImpressions"]);
+})
+.catch((error) => {
+  console.log(error);
+});
+```
+
+</details>
+
+<details>
+<summary>PHP</summary>
+
+```php
+<?php
 
+$API_URL = "http://localhost:8080/seal-recognition";
+$image_path = "./demo.jpg";
+$layout_image_path = "./layout.jpg";
 
+$image_data = base64_encode(file_get_contents($image_path));
+$payload = array("image" => $image_data);
+
+$ch = curl_init($API_URL);
+curl_setopt($ch, CURLOPT_POST, true);
+curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload));
+curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
+$response = curl_exec($ch);
+curl_close($ch);
+
+$result = json_decode($response, true)["result"];
+
+file_put_contents($layout_image_path, base64_decode($result["layoutImage"]));
+echo "Output image saved at " . $layout_image_path . "\n";
+
+echo "\nDetected seal impressions:\n";
+print_r($result["sealImpressions"]);
+
+?>
+```
+
+</details>
+</details>
+<br/>
 
 ## 4.  Secondary development
 If the default model weights provided by the seal text recognition production line are not satisfactory in terms of accuracy or speed in your scenario, you can try using your own specific domain or application scenario data to further fine tune the existing model to improve the recognition performance of the seal text recognition production line in your scenario.
@@ -344,11 +777,3 @@ paddlex --pipeline seal_recognition --input seal_text_det.png --device npu:0 --s
 ```
 
 If you want to use the seal text recognition production line on a wider range of hardware, please refer to the [PaddleX Multi Hardware Usage Guide](../../../other_devices_support/installation_other_devices_en.md)。
-
-
-
-
-
-
-
-

+ 11 - 11
docs/pipeline_usage/tutorials/ocr_pipelines/table_recognition.md

@@ -431,11 +431,11 @@ int main() {
 
         encodedImage = result["layoutImage"];
         decodedString = base64::from_base64(encodedImage);
-        std::vector<unsigned char> decodedTableImage(decodedString.begin(), decodedString.end());
-        std::ofstream outputTableFile(layoutImagePath, std::ios::binary | std::ios::out);
-        if (outputTableFile.is_open()) {
-            outputTableFile.write(reinterpret_cast<char*>(decodedTableImage.data()), decodedTableImage.size());
-            outputTableFile.close();
+        std::vector<unsigned char> decodedLayoutImage(decodedString.begin(), decodedString.end());
+        std::ofstream outputLayoutFile(layoutImagePath, std::ios::binary | std::ios::out);
+        if (outputLayoutFile.is_open()) {
+            outputLayoutFile.write(reinterpret_cast<char*>(decodedLayoutImage.data()), decodedLayoutImage.size());
+            outputLayoutFile.close();
             std::cout << "Output image saved at " << layoutImagePath << std::endl;
         } else {
             std::cerr << "Unable to open file for writing: " << layoutImagePath << std::endl;
@@ -443,8 +443,8 @@ int main() {
 
         auto tables = result["tables"];
         std::cout << "\nDetected tables:" << std::endl;
-        for (const auto& category : tables) {
-            std::cout << category << std::endl;
+        for (const auto& table : tables) {
+            std::cout << table << std::endl;
         }
     } else {
         std::cout << "Failed to send HTTP request." << std::endl;
@@ -589,7 +589,7 @@ func main() {
     type Response struct {
         Result struct {
             OcrImage      string   `json:"ocrImage"`
-            TableImage      string   `json:"layoutImage"`
+            LayoutImage      string   `json:"layoutImage"`
             Tables []map[string]interface{} `json:"tables"`
         } `json:"result"`
     }
@@ -612,7 +612,7 @@ func main() {
     }
     fmt.Printf("Image saved at %s.jpg\n", ocrImagePath)
 
-    layoutImageData, err := base64.StdEncoding.DecodeString(respData.Result.TableImage)
+    layoutImageData, err := base64.StdEncoding.DecodeString(respData.Result.LayoutImage)
     if err != nil {
         fmt.Println("Error decoding base64 image data:", err)
         return
@@ -625,8 +625,8 @@ func main() {
     fmt.Printf("Image saved at %s.jpg\n", layoutImagePath)
 
     fmt.Println("\nDetected tables:")
-    for _, category := range respData.Result.Tables {
-        fmt.Println(category)
+    for _, table := range respData.Result.Tables {
+        fmt.Println(table)
     }
 }
 ```

+ 12 - 12
docs/pipeline_usage/tutorials/ocr_pipelines/table_recognition_en.md

@@ -268,7 +268,7 @@ Operations provided by the service:
 </details>
 
 <details>
-<summary>Multilingual Service Invocation Examples</summary>
+<summary>Multi-Language Service Invocation Examples</summary>
 
 <details>
 <summary>Python</summary>
@@ -359,11 +359,11 @@ int main() {
 
         encodedImage = result["layoutImage"];
         decodedString = base64::from_base64(encodedImage);
-        std::vector<unsigned char> decodedTableImage(decodedString.begin(), decodedString.end());
-        std::ofstream outputTableFile(layoutImagePath, std::ios::binary | std::ios::out);
-        if (outputTableFile.is_open()) {
-            outputTableFile.write(reinterpret_cast<char*>(decodedTableImage.data()), decodedTableImage.size());
-            outputTableFile.close();
+        std::vector<unsigned char> decodedLayoutImage(decodedString.begin(), decodedString.end());
+        std::ofstream outputLayoutFile(layoutImagePath, std::ios::binary | std::ios::out);
+        if (outputLayoutFile.is_open()) {
+            outputLayoutFile.write(reinterpret_cast<char*>(decodedLayoutImage.data()), decodedlayoutImage.size());
+            outputLayoutFile.close();
             std::cout << "Output image saved at " << layoutImagePath << std::endl;
         } else {
             std::cerr << "Unable to open file for writing: " << layoutImagePath << std::endl;
@@ -371,8 +371,8 @@ int main() {
 
         auto tables = result["tables"];
         std::cout << "\nDetected tables:" << std::endl;
-        for (const auto& category : tables) {
-            std::cout << category << std::endl;
+        for (const auto& table : tables) {
+            std::cout << table << std::endl;
         }
     } else {
         std::cout << "Failed to send HTTP request." << std::endl;
@@ -511,7 +511,7 @@ func main() {
     type Response struct {
         Result struct {
             OcrImage      string   `json:"ocrImage"`
-            TableImage      string   `json:"layoutImage"`
+            LayoutImage      string   `json:"layoutImage"`
             Tables []map[string]interface{} `json:"tables"`
         } `json:"result"`
     }
@@ -534,7 +534,7 @@ func main() {
     }
     fmt.Printf("Image saved at %s.jpg\n", ocrImagePath)
 
-    layoutImageData, err := base64.StdEncoding.DecodeString(respData.Result.TableImage)
+    layoutImageData, err := base64.StdEncoding.DecodeString(respData.Result.LayoutImage)
     if err != nil {
         fmt.Println("Error decoding base64 image data:", err)
         return
@@ -547,8 +547,8 @@ func main() {
     fmt.Printf("Image saved at %s.jpg\n", layoutImagePath)
 
     fmt.Println("\nDetected tables:")
-    for _, category := range respData.Result.Tables {
-        fmt.Println(category)
+    for _, table := range respData.Result.Tables {
+        fmt.Println(table)
     }
 }
 ```

+ 1 - 1
docs/pipeline_usage/tutorials/time_series_pipelines/time_series_anomaly_detection_en.md

@@ -219,7 +219,7 @@ Operations provided by the service:
 </details>
 
 <details>
-<summary>Multilingual Service Invocation Examples</summary>
+<summary>Multi-Language Service Invocation Examples</summary>
 
 <details>
 <summary>Python</summary>

+ 1 - 1
docs/pipeline_usage/tutorials/time_series_pipelines/time_series_classification_en.md

@@ -212,7 +212,7 @@ Operations provided by the service:
 </details>
 
 <details>
-<summary>Multilingual Service Invocation Examples</summary>
+<summary>Multi-Language Service Invocation Examples</summary>
 
 <details>
 <summary>Python</summary>

+ 1 - 1
docs/pipeline_usage/tutorials/time_series_pipelines/time_series_forecasting_en.md

@@ -221,7 +221,7 @@ Operations provided by the service are as follows:
 </details>
 
 <details>
-<summary>Multilingual Service Invocation Examples</summary>
+<summary>Multi-Language Service Invocation Examples</summary>
 
 <details>
 <summary>Python</summary>

+ 29 - 2
paddlex/inference/pipelines/base.py

@@ -13,13 +13,40 @@
 # limitations under the License.
 
 from abc import ABC, abstractmethod
-from typing import Any, Dict, Optional
+from contextvars import ContextVar, copy_context
+from typing import TypedDict, Type
 
 from ...utils.subclass_register import AutoRegisterABCMetaClass
 from ..models import create_predictor
 
+pipeline_info_list_var = ContextVar("pipeline_info_list", default=None)
 
-class BasePipeline(ABC, metaclass=AutoRegisterABCMetaClass):
+
+class _PipelineInfo(TypedDict):
+    cls: Type["BasePipeline"]
+
+
+class _PipelineMetaClass(AutoRegisterABCMetaClass):
+    def __new__(mcs, name, bases, attrs):
+        def _patch_init_func(init_func):
+            def _patched___init__(self, *args, **kwargs):
+                ctx = copy_context()
+                pipeline_info_list = [
+                    *ctx.get(pipeline_info_list_var, []),
+                    _PipelineInfo(cls=type(self)),
+                ]
+                ctx.run(pipeline_info_list_var.set, pipeline_info_list)
+                ret = ctx.run(init_func, self, *args, **kwargs)
+                return ret
+
+            return _patched___init__
+
+        cls = super().__new__(mcs, name, bases, attrs)
+        cls.__init__ = _patch_init_func(cls.__init__)
+        return cls
+
+
+class BasePipeline(ABC, metaclass=_PipelineMetaClass):
     """Base Pipeline"""
 
     __is_base = True

+ 28 - 1
paddlex/inference/pipelines/serving/_pipeline_apps/__init__.py

@@ -17,8 +17,11 @@ from typing import Any, Dict
 from fastapi import FastAPI
 
 from ...base import BasePipeline
+from ...formula_recognition import FormulaRecognitionPipeline
+from ...layout_parsing import LayoutParsingPipeline
 from ...ocr import OCRPipeline
 from ...ppchatocrv3 import PPChatOCRPipeline
+from ...seal_recognition import SealOCRPipeline
 from ...single_model_pipeline import (
     AnomalyDetection,
     ImageClassification,
@@ -34,6 +37,8 @@ from ...single_model_pipeline import (
 from ...table_recognition import TableRecPipeline
 from ..app import create_app_config
 from .anomaly_detection import create_pipeline_app as create_anomaly_detection_app
+from .formula_recognition import create_pipeline_app as create_formula_recognition_app
+from .layout_parsing import create_pipeline_app as create_layout_parsing_app
 from .image_classification import create_pipeline_app as create_image_classification_app
 from .instance_segmentation import (
     create_pipeline_app as create_instance_segmentation_app,
@@ -44,9 +49,13 @@ from .multi_label_image_classification import (
 from .object_detection import create_pipeline_app as create_object_detection_app
 from .ocr import create_pipeline_app as create_ocr_app
 from .ppchatocrv3 import create_pipeline_app as create_ppchatocrv3_app
+from .seal_recognition import create_pipeline_app as create_seal_recognition_app
 from .semantic_segmentation import (
     create_pipeline_app as create_semantic_segmentation_app,
 )
+from .small_object_detection import (
+    create_pipeline_app as create_small_object_detection_app,
+)
 from .table_recognition import create_pipeline_app as create_table_recognition_app
 from .ts_ad import create_pipeline_app as create_ts_ad_app
 from .ts_cls import create_pipeline_app as create_ts_cls_app
@@ -118,7 +127,7 @@ def create_pipeline_app(
     elif pipeline_name == "small_object_detection":
         if not isinstance(pipeline, SmallObjDet):
             raise TypeError("Expected `pipeline` to be an instance of `SmallObjDet`.")
-        return create_object_detection_app(pipeline, app_config)
+        return create_small_object_detection_app(pipeline, app_config)
     elif pipeline_name == "anomaly_detection":
         if not isinstance(pipeline, AnomalyDetection):
             raise TypeError(
@@ -131,6 +140,24 @@ def create_pipeline_app(
                 "Expected `pipeline` to be an instance of `PPChatOCRPipeline`."
             )
         return create_ppchatocrv3_app(pipeline, app_config)
+    elif pipeline_name == "seal_recognition":
+        if not isinstance(pipeline, SealOCRPipeline):
+            raise TypeError(
+                "Expected `pipeline` to be an instance of `SealOCRPipeline`."
+            )
+        return create_seal_recognition_app(pipeline, app_config)
+    elif pipeline_name == "formula_recognition":
+        if not isinstance(pipeline, FormulaRecognitionPipeline):
+            raise TypeError(
+                "Expected `pipeline` to be an instance of `FormulaRecognitionPipeline`."
+            )
+        return create_formula_recognition_app(pipeline, app_config)
+    elif pipeline_name == "layout_parsing":
+        if not isinstance(pipeline, LayoutParsingPipeline):
+            raise TypeError(
+                "Expected `pipeline` to be an instance of `LayoutParsingPipeline`."
+            )
+        return create_layout_parsing_app(pipeline, app_config)
     else:
         if BasePipeline.get(pipeline_name):
             raise ValueError(

+ 1 - 1
paddlex/inference/pipelines/serving/_pipeline_apps/anomaly_detection.py

@@ -41,7 +41,7 @@ def create_pipeline_app(pipeline: AnomalyDetection, app_config: AppConfig) -> Fa
     )
 
     @app.post(
-        "/anomaly-detection",
+        "/image-anomaly-detection",
         operation_id="infer",
         responses={422: {"model": Response}},
     )

+ 107 - 0
paddlex/inference/pipelines/serving/_pipeline_apps/formula_recognition.py

@@ -0,0 +1,107 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List, Optional
+
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated, TypeAlias
+
+from .....utils import logging
+from ...formula_recognition import FormulaRecognitionPipeline
+from .. import utils as serving_utils
+from ..app import AppConfig, create_app
+from ..models import Response, ResultResponse
+
+
+class InferenceParams(BaseModel):
+    maxLongSide: Optional[Annotated[int, Field(gt=0)]] = None
+
+
+class InferRequest(BaseModel):
+    image: str
+    inferenceParams: Optional[InferenceParams] = None
+
+
+Point: TypeAlias = Annotated[List[float], Field(min_length=2, max_length=2)]
+Polygon: TypeAlias = Annotated[List[Point], Field(min_length=3)]
+
+
+class Formula(BaseModel):
+    poly: Polygon
+    latex: str
+
+
+class InferResult(BaseModel):
+    formulas: List[Formula]
+    image: str
+
+
+def create_pipeline_app(
+    pipeline: FormulaRecognitionPipeline, app_config: AppConfig
+) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    @app.post(
+        "/formula-recognition",
+        operation_id="infer",
+        responses={422: {"model": Response}},
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        if request.inferenceParams:
+            max_long_side = request.inferenceParams.maxLongSide
+            if max_long_side:
+                raise HTTPException(
+                    status_code=422,
+                    detail="`max_long_side` is currently not supported.",
+                )
+
+        try:
+            file_bytes = await serving_utils.get_raw_bytes(
+                request.image, aiohttp_session
+            )
+            image = serving_utils.image_bytes_to_array(file_bytes)
+
+            result = (await pipeline.infer(image))[0]
+
+            formulas: List[Formula] = []
+            for poly, latex in zip(result["dt_polys"], result["rec_formula"]):
+                formulas.append(
+                    Formula(
+                        poly=poly,
+                        latex=latex,
+                    )
+                )
+            output_image_base64 = serving_utils.image_to_base64(result.img)
+
+            return ResultResponse(
+                logId=serving_utils.generate_log_id(),
+                errorCode=0,
+                errorMsg="Success",
+                result=InferResult(
+                    formulas=formulas,
+                    image=output_image_base64,
+                ),
+            )
+
+        except Exception as e:
+            logging.exception(e)
+            raise HTTPException(status_code=500, detail="Internal server error")
+
+    return app

+ 275 - 0
paddlex/inference/pipelines/serving/_pipeline_apps/layout_parsing.py

@@ -0,0 +1,275 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+import uuid
+from typing import Final, List, Literal, Optional, Tuple
+from urllib.parse import parse_qs, urlparse
+
+import cv2
+import numpy as np
+from fastapi import FastAPI, HTTPException
+from numpy.typing import ArrayLike
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated, TypeAlias, assert_never
+
+from .....utils import logging
+from ...layout_parsing import LayoutParsingPipeline
+from .. import file_storage
+from .. import utils as serving_utils
+from ..app import AppConfig, create_app
+from ..models import Response, ResultResponse
+
+_DEFAULT_MAX_IMG_SIZE: Final[Tuple[int, int]] = (2000, 2000)
+_DEFAULT_MAX_NUM_IMGS: Final[int] = 10
+
+
+FileType: TypeAlias = Literal[0, 1]
+
+
+class InferenceParams(BaseModel):
+    maxLongSide: Optional[Annotated[int, Field(gt=0)]] = None
+
+
+class InferRequest(BaseModel):
+    file: str
+    fileType: Optional[FileType] = None
+    useImgOrientationCls: bool = True
+    useImgUnwrapping: bool = True
+    useSealTextDet: bool = True
+    inferenceParams: Optional[InferenceParams] = None
+
+
+BoundingBox: TypeAlias = Annotated[List[float], Field(min_length=4, max_length=4)]
+
+
+class LayoutElement(BaseModel):
+    bbox: BoundingBox
+    label: str
+    text: str
+    layoutType: Literal["single", "double"]
+    image: Optional[str] = None
+
+
+class LayoutParsingResult(BaseModel):
+    layoutElements: List[LayoutElement]
+
+
+class InferResult(BaseModel):
+    layoutParsingResults: List[LayoutParsingResult]
+
+
+def _generate_request_id() -> str:
+    return str(uuid.uuid4())
+
+
+def _infer_file_type(url: str) -> FileType:
+    # Is it more reliable to guess the file type based on the response headers?
+    SUPPORTED_IMG_EXTS: Final[List[str]] = [".jpg", ".jpeg", ".png"]
+
+    url_parts = urlparse(url)
+    ext = os.path.splitext(url_parts.path)[1]
+    # HACK: The support for BOS URLs with query params is implementation-based,
+    # not interface-based.
+    is_bos_url = (
+        re.fullmatch(r"(?:bj|bd|su|gz|cd|hkg|fwh|fsh)\.bcebos\.com", url_parts.netloc)
+        is not None
+    )
+    if is_bos_url and url_parts.query:
+        params = parse_qs(url_parts.query)
+        if (
+            "responseContentDisposition" not in params
+            or len(params["responseContentDisposition"]) != 1
+        ):
+            raise ValueError("`responseContentDisposition` not found")
+        match_ = re.match(
+            r"attachment;filename=(.*)", params["responseContentDisposition"][0]
+        )
+        if not match_ or not match_.groups()[0] is not None:
+            raise ValueError(
+                "Failed to extract the filename from `responseContentDisposition`"
+            )
+        ext = os.path.splitext(match_.groups()[0])[1]
+    ext = ext.lower()
+    if ext == ".pdf":
+        return 0
+    elif ext in SUPPORTED_IMG_EXTS:
+        return 1
+    else:
+        raise ValueError("Unsupported file type")
+
+
+def _bytes_to_arrays(
+    file_bytes: bytes,
+    file_type: FileType,
+    *,
+    max_img_size: Tuple[int, int],
+    max_num_imgs: int,
+) -> List[np.ndarray]:
+    if file_type == 0:
+        images = serving_utils.read_pdf(
+            file_bytes, resize=True, max_num_imgs=max_num_imgs
+        )
+    elif file_type == 1:
+        images = [serving_utils.image_bytes_to_array(file_bytes)]
+    else:
+        assert_never(file_type)
+    h, w = images[0].shape[0:2]
+    if w > max_img_size[1] or h > max_img_size[0]:
+        if w / h > max_img_size[0] / max_img_size[1]:
+            factor = max_img_size[0] / w
+        else:
+            factor = max_img_size[1] / h
+        images = [cv2.resize(img, (int(factor * w), int(factor * h))) for img in images]
+    return images
+
+
+def _postprocess_image(
+    img: ArrayLike,
+    request_id: str,
+    filename: str,
+    file_storage_config: file_storage.FileStorageConfig,
+) -> str:
+    key = f"{request_id}/{filename}"
+    ext = os.path.splitext(filename)[1]
+    img = np.asarray(img)
+    _, encoded_img = cv2.imencode(ext, img)
+    encoded_img = encoded_img.tobytes()
+    return file_storage.postprocess_file(
+        encoded_img, config=file_storage_config, key=key
+    )
+
+
+def create_pipeline_app(
+    pipeline: LayoutParsingPipeline, app_config: AppConfig
+) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    if "file_storage_config" in ctx.extra:
+        ctx.extra["file_storage_config"] = file_storage.parse_file_storage_config(
+            ctx.extra["file_storage_config"]
+        )
+    else:
+        ctx.extra["file_storage_config"] = file_storage.InMemoryStorageConfig()
+    ctx.extra.setdefault("max_img_size", _DEFAULT_MAX_IMG_SIZE)
+    ctx.extra.setdefault("max_num_imgs", _DEFAULT_MAX_NUM_IMGS)
+
+    @app.post(
+        "/layout-parsing",
+        operation_id="infer",
+        responses={422: {"model": Response}},
+        response_model_exclude_none=True,
+    )
+    async def _infer(
+        request: InferRequest,
+    ) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        request_id = _generate_request_id()
+
+        if request.fileType is None:
+            if serving_utils.is_url(request.file):
+                try:
+                    file_type = _infer_file_type(request.file)
+                except Exception as e:
+                    logging.exception(e)
+                    raise HTTPException(
+                        status_code=422,
+                        detail="The file type cannot be inferred from the URL. Please specify the file type explicitly.",
+                    )
+            else:
+                raise HTTPException(status_code=422, detail="Unknown file type")
+        else:
+            file_type = request.fileType
+
+        if request.inferenceParams:
+            max_long_side = request.inferenceParams.maxLongSide
+            if max_long_side:
+                raise HTTPException(
+                    status_code=422,
+                    detail="`max_long_side` is currently not supported.",
+                )
+
+        try:
+            file_bytes = await serving_utils.get_raw_bytes(
+                request.file, aiohttp_session
+            )
+            images = await serving_utils.call_async(
+                _bytes_to_arrays,
+                file_bytes,
+                file_type,
+                max_img_size=ctx.extra["max_img_size"],
+                max_num_imgs=ctx.extra["max_num_imgs"],
+            )
+
+            result = await pipeline.infer(
+                images,
+                use_doc_image_ori_cls_model=request.useImgOrientationCls,
+                use_doc_image_unwarp_model=request.useImgUnwrapping,
+                use_seal_text_det_model=request.useSealTextDet,
+            )
+
+            layout_parsing_results: List[LayoutParsingResult] = []
+            for i, item in enumerate(result):
+                layout_elements: List[LayoutElement] = []
+                for j, subitem in enumerate(
+                    item["layout_parsing_result"]["parsing_result"]
+                ):
+                    dyn_keys = subitem.keys() - {"input_path", "layout_bbox", "layout"}
+                    if len(dyn_keys) != 1:
+                        raise RuntimeError(f"Unexpected result: {subitem}")
+                    label = next(iter(dyn_keys))
+                    if label in ("image", "figure", "img", "fig"):
+                        image_ = await serving_utils.call_async(
+                            _postprocess_image,
+                            subitem[label]["img"],
+                            request_id=request_id,
+                            filename=f"image_{i}_{j}.jpg",
+                            file_storage_config=ctx.extra["file_storage_config"],
+                        )
+                        text = subitem[label]["image_text"]
+                    else:
+                        image_ = None
+                        text = subitem[label]
+                    layout_elements.append(
+                        LayoutElement(
+                            bbox=subitem["layout_bbox"],
+                            label=label,
+                            text=text,
+                            layoutType=subitem["layout"],
+                            image=image_,
+                        )
+                    )
+                layout_parsing_results.append(
+                    LayoutParsingResult(layoutElements=layout_elements)
+                )
+
+            return ResultResponse(
+                logId=serving_utils.generate_log_id(),
+                errorCode=0,
+                errorMsg="Success",
+                result=InferResult(
+                    layoutParsingResults=layout_parsing_results,
+                ),
+            )
+
+        except Exception as e:
+            logging.exception(e)
+            raise HTTPException(status_code=500, detail="Internal server error")
+
+    return app

+ 22 - 20
paddlex/inference/pipelines/serving/_pipeline_apps/ppchatocrv3.py

@@ -48,9 +48,9 @@ class InferenceParams(BaseModel):
 class AnalyzeImageRequest(BaseModel):
     file: str
     fileType: Optional[FileType] = None
-    useOricls: bool = True
-    useCurve: bool = True
-    useUvdoc: bool = True
+    useImgOrientationCls: bool = True
+    useImgUnwrapping: bool = True
+    useSealTextDet: bool = True
     inferenceParams: Optional[InferenceParams] = None
 
 
@@ -117,18 +117,18 @@ class BuildVectorStoreRequest(BaseModel):
 
 
 class BuildVectorStoreResult(BaseModel):
-    vectorStore: dict
+    vectorStore: str
 
 
 class RetrieveKnowledgeRequest(BaseModel):
     keys: List[str]
-    vectorStore: dict
+    vectorStore: str
     llmName: Optional[LLMName] = None
     llmParams: Optional[Annotated[LLMParams, Field(discriminator="apiType")]] = None
 
 
 class RetrieveKnowledgeResult(BaseModel):
-    retrievalResult: dict
+    retrievalResult: str
 
 
 class ChatRequest(BaseModel):
@@ -137,8 +137,8 @@ class ChatRequest(BaseModel):
     taskDescription: Optional[str] = None
     rules: Optional[str] = None
     fewShot: Optional[str] = None
-    vectorStore: Optional[dict] = None
-    retrievalResult: Optional[dict] = None
+    vectorStore: Optional[str] = None
+    retrievalResult: Optional[str] = None
     returnPrompts: bool = True
     llmName: Optional[LLMName] = None
     llmParams: Optional[Annotated[LLMParams, Field(discriminator="apiType")]] = None
@@ -151,7 +151,7 @@ class Prompts(BaseModel):
 
 
 class ChatResult(BaseModel):
-    chatResult: str
+    chatResult: dict
     prompts: Optional[Prompts] = None
 
 
@@ -311,11 +311,12 @@ def create_pipeline_app(pipeline: PPChatOCRPipeline, app_config: AppConfig) -> F
                 max_num_imgs=ctx.extra["max_num_imgs"],
             )
 
-            result = await pipeline.infer(
+            result = await pipeline.call(
+                pipeline.pipeline.visual_predict,
                 images,
-                use_doc_image_ori_cls_model=request.useOricls,
-                use_doc_image_unwarp_model=request.useCurve,
-                use_seal_text_det_model=request.useUvdoc,
+                use_doc_image_ori_cls_model=request.useImgOrientationCls,
+                use_doc_image_unwarp_model=request.useImgUnwrapping,
+                use_seal_text_det_model=request.useSealTextDet,
             )
 
             vision_results: List[VisionResult] = []
@@ -404,14 +405,14 @@ def create_pipeline_app(pipeline: PPChatOCRPipeline, app_config: AppConfig) -> F
                 kwargs["llm_params"] = _llm_params_to_dict(request.llmParams)
 
             result = await serving_utils.call_async(
-                pipeline.pipeline.get_vector_text, **kwargs
+                pipeline.pipeline.build_vector, **kwargs
             )
 
             return ResultResponse(
                 logId=serving_utils.generate_log_id(),
                 errorCode=0,
                 errorMsg="Success",
-                result=BuildVectorStoreResult(vectorStore=result),
+                result=BuildVectorStoreResult(vectorStore=result["vector"]),
             )
 
         except Exception as e:
@@ -431,7 +432,7 @@ def create_pipeline_app(pipeline: PPChatOCRPipeline, app_config: AppConfig) -> F
         try:
             kwargs = {
                 "key_list": request.keys,
-                "vector": results.VectorResult(request.vectorStore),
+                "vector": results.VectorResult({"vector": request.vectorStore}),
             }
             if request.llmName is not None:
                 kwargs["llm_name"] = request.llmName
@@ -439,14 +440,14 @@ def create_pipeline_app(pipeline: PPChatOCRPipeline, app_config: AppConfig) -> F
                 kwargs["llm_params"] = _llm_params_to_dict(request.llmParams)
 
             result = await serving_utils.call_async(
-                pipeline.pipeline.get_retrieval_text, **kwargs
+                pipeline.pipeline.retrieval, **kwargs
             )
 
             return ResultResponse(
                 logId=serving_utils.generate_log_id(),
                 errorCode=0,
                 errorMsg="Success",
-                result=RetrieveKnowledgeResult(retrievalResult=result),
+                result=RetrieveKnowledgeResult(retrievalResult=result["retrieval"]),
             )
 
         except Exception as e:
@@ -476,10 +477,10 @@ def create_pipeline_app(pipeline: PPChatOCRPipeline, app_config: AppConfig) -> F
             if request.fewShot is not None:
                 kwargs["few_shot"] = request.fewShot
             if request.vectorStore is not None:
-                kwargs["vector"] = results.VectorResult(request.vectorStore)
+                kwargs["vector"] = results.VectorResult({"vector": request.vectorStore})
             if request.retrievalResult is not None:
                 kwargs["retrieval_result"] = results.RetrievalResult(
-                    request.retrievalResult
+                    {"retrieval": request.retrievalResult}
                 )
             kwargs["save_prompt"] = request.returnPrompts
             if request.llmName is not None:
@@ -501,6 +502,7 @@ def create_pipeline_app(pipeline: PPChatOCRPipeline, app_config: AppConfig) -> F
                 chatResult=result["chat_res"],
                 prompts=prompts,
             )
+
             return ResultResponse(
                 logId=serving_utils.generate_log_id(),
                 errorCode=0,

+ 112 - 0
paddlex/inference/pipelines/serving/_pipeline_apps/seal_recognition.py

@@ -0,0 +1,112 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List, Optional
+
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated, TypeAlias
+
+from .....utils import logging
+from ...seal_recognition import SealOCRPipeline
+from .. import utils as serving_utils
+from ..app import AppConfig, create_app
+from ..models import Response, ResultResponse
+
+
+class InferenceParams(BaseModel):
+    maxLongSide: Optional[Annotated[int, Field(gt=0)]] = None
+
+
+class InferRequest(BaseModel):
+    image: str
+    inferenceParams: Optional[InferenceParams] = None
+
+
+Point: TypeAlias = Annotated[List[int], Field(min_length=2, max_length=2)]
+Polygon: TypeAlias = Annotated[List[Point], Field(min_length=3)]
+
+
+class Text(BaseModel):
+    poly: Polygon
+    text: str
+    score: float
+
+
+class SealImpression(BaseModel):
+    texts: List[Text]
+
+
+class InferResult(BaseModel):
+    sealImpressions: List[SealImpression]
+    layoutImage: str
+
+
+def create_pipeline_app(pipeline: SealOCRPipeline, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    @app.post(
+        "/seal-recognition", operation_id="infer", responses={422: {"model": Response}}
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        if request.inferenceParams:
+            max_long_side = request.inferenceParams.maxLongSide
+            if max_long_side:
+                raise HTTPException(
+                    status_code=422,
+                    detail="`max_long_side` is currently not supported.",
+                )
+
+        try:
+            file_bytes = await serving_utils.get_raw_bytes(
+                request.image, aiohttp_session
+            )
+            image = serving_utils.image_bytes_to_array(file_bytes)
+
+            result = (await pipeline.infer(image))[0]
+
+            seal_impressions: List[SealImpression] = []
+            for item in result["ocr_result"]:
+                texts: List[Text] = []
+                for poly, text, score in zip(
+                    item["dt_polys"], item["rec_text"], item["rec_score"]
+                ):
+                    texts.append(Text(poly=poly, text=text, score=score))
+                seal_impressions.append(SealImpression(texts=texts))
+            layout_image_base64 = serving_utils.image_to_base64(
+                result["layout_result"].img
+            )
+
+            # TODO: OCR image
+
+            return ResultResponse(
+                logId=serving_utils.generate_log_id(),
+                errorCode=0,
+                errorMsg="Success",
+                result=InferResult(
+                    sealImpressions=seal_impressions,
+                    layoutImage=layout_image_base64,
+                ),
+            )
+
+        except Exception as e:
+            logging.exception(e)
+            raise HTTPException(status_code=500, detail="Internal server error")
+
+    return app

+ 88 - 0
paddlex/inference/pipelines/serving/_pipeline_apps/small_object_detection.py

@@ -0,0 +1,88 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List
+
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated, TypeAlias
+
+from .....utils import logging
+from ...single_model_pipeline import SmallObjDet
+from .. import utils as serving_utils
+from ..app import AppConfig, create_app
+from ..models import Response, ResultResponse
+
+
+class InferRequest(BaseModel):
+    image: str
+
+
+BoundingBox: TypeAlias = Annotated[List[float], Field(min_length=4, max_length=4)]
+
+
+class DetectedObject(BaseModel):
+    bbox: BoundingBox
+    categoryId: int
+    score: float
+
+
+class InferResult(BaseModel):
+    detectedObjects: List[DetectedObject]
+    image: str
+
+
+def create_pipeline_app(pipeline: SmallObjDet, app_config: AppConfig) -> FastAPI:
+    app, ctx = create_app(
+        pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
+    )
+
+    @app.post(
+        "/object-detection", operation_id="infer", responses={422: {"model": Response}}
+    )
+    async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
+        pipeline = ctx.pipeline
+        aiohttp_session = ctx.aiohttp_session
+
+        try:
+            file_bytes = await serving_utils.get_raw_bytes(
+                request.image, aiohttp_session
+            )
+            image = serving_utils.image_bytes_to_array(file_bytes)
+
+            result = (await pipeline.infer(image))[0]
+
+            objects: List[DetectedObject] = []
+            for obj in result["boxes"]:
+                objects.append(
+                    DetectedObject(
+                        bbox=obj["coordinate"],
+                        categoryId=obj["cls_id"],
+                        score=obj["score"],
+                    )
+                )
+            output_image_base64 = serving_utils.image_to_base64(result.img)
+
+            return ResultResponse(
+                logId=serving_utils.generate_log_id(),
+                errorCode=0,
+                errorMsg="Success",
+                result=InferResult(detectedObjects=objects, image=output_image_base64),
+            )
+
+        except Exception as e:
+            logging.exception(e)
+            raise HTTPException(status_code=500, detail="Internal server error")
+
+    return app

+ 6 - 5
paddlex/inference/pipelines/serving/app.py

@@ -121,12 +121,13 @@ def create_app(
     async def _app_lifespan(app: fastapi.FastAPI) -> AsyncGenerator[None, None]:
         ctx.pipeline = PipelineWrapper[_PipelineT](pipeline)
         if app_aiohttp_session:
-            ctx.aiohttp_session = aiohttp.ClientSession(
+            async with aiohttp.ClientSession(
                 cookie_jar=aiohttp.DummyCookieJar()
-            )
-        yield
-        if app_aiohttp_session:
-            await ctx.aiohttp_session.close()
+            ) as aiohttp_session:
+                ctx.aiohttp_session = aiohttp_session
+                yield
+        else:
+            yield
 
     app = fastapi.FastAPI(lifespan=_app_lifespan)
     ctx = AppContext[_PipelineT](config=app_config)

+ 4 - 0
paddlex/inference/pipelines/serving/server.py

@@ -18,4 +18,8 @@ from fastapi import FastAPI
 
 def run_server(app: FastAPI, *, host: str, port: int, debug: bool) -> None:
     # XXX: Currently, `debug` is not used.
+    # HACK: Fix duplicate logs
+    uvicorn_version = tuple(int(x) for x in uvicorn.__version__.split("."))
+    if uvicorn_version < (0, 19, 0):
+        uvicorn.config.LOGGING_CONFIG["loggers"]["uvicorn"]["propagate"] = False
     uvicorn.run(app, host=host, port=port, log_level="info")

+ 1 - 1
paddlex/inference/pipelines/serving/utils.py

@@ -91,7 +91,7 @@ def read_pdf(
             image = np.frombuffer(pixmap.samples, dtype=np.uint8).reshape(
                 pixmap.h, pixmap.w, pixmap.n
             )
-            image = np.ascontiguousarray(image[..., ::-1])
+            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
             if resize:
                 if img_size is None:
                     img_size = (image.shape[1], image.shape[0])

+ 15 - 1
paddlex/inference/results/seal_rec.py

@@ -1,3 +1,17 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from pathlib import Path
 from .base import BaseResult, CVResult
 
@@ -16,4 +30,4 @@ class SealOCRResult(CVResult):
         layout_result.save_to_img(layout_save_path)
         for idx, seal_result in enumerate(self["ocr_result"]):
             ocr_save_path = f"{save_path}_{idx}_seal_ocr.jpg"
-            seal_result.save_to_img(ocr_save_path)
+            seal_result.save_to_img(ocr_save_path)