소스 검색

update doc (#3452)

* update

* fix

* fix format
zhang-prog 8 달 전
부모
커밋
56ab2c2d6e
27개의 변경된 파일89개의 추가작업 그리고 401개의 파일을 삭제
  1. 1 0
      docs/pipeline_deploy/serving.en.md
  2. 1 0
      docs/pipeline_deploy/serving.md
  3. 11 9
      docs/pipeline_usage/tutorials/cv_pipelines/3d_bev_detection.en.md
  4. 11 9
      docs/pipeline_usage/tutorials/cv_pipelines/3d_bev_detection.md
  5. 2 0
      docs/pipeline_usage/tutorials/cv_pipelines/image_classification.en.md
  6. 2 0
      docs/pipeline_usage/tutorials/cv_pipelines/image_classification.md
  7. 2 0
      docs/pipeline_usage/tutorials/cv_pipelines/instance_segmentation.en.md
  8. 2 0
      docs/pipeline_usage/tutorials/cv_pipelines/instance_segmentation.md
  9. 2 0
      docs/pipeline_usage/tutorials/cv_pipelines/object_detection.en.md
  10. 2 0
      docs/pipeline_usage/tutorials/cv_pipelines/object_detection.md
  11. 1 3
      docs/pipeline_usage/tutorials/cv_pipelines/open_vocabulary_segmentation.en.md
  12. 1 3
      docs/pipeline_usage/tutorials/cv_pipelines/open_vocabulary_segmentation.md
  13. 2 0
      docs/pipeline_usage/tutorials/cv_pipelines/semantic_segmentation.en.md
  14. 2 0
      docs/pipeline_usage/tutorials/cv_pipelines/semantic_segmentation.md
  15. 2 0
      docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.en.md
  16. 2 0
      docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.md
  17. 0 1
      docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing.en.md
  18. 0 1
      docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing.md
  19. 5 2
      docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing_v2.en.md
  20. 5 3
      docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing_v2.md
  21. 6 9
      docs/pipeline_usage/tutorials/ocr_pipelines/table_recognition_v2.en.md
  22. 6 9
      docs/pipeline_usage/tutorials/ocr_pipelines/table_recognition_v2.md
  23. 11 343
      docs/pipeline_usage/tutorials/speech_pipelines/multilingual_speech_recognition.en.md
  24. 9 6
      docs/pipeline_usage/tutorials/speech_pipelines/multilingual_speech_recognition.md
  25. 0 1
      docs/pipeline_usage/tutorials/video_pipelines/video_classification.en.md
  26. 0 1
      docs/pipeline_usage/tutorials/video_pipelines/video_classification.md
  27. 1 1
      paddlex/inference/serving/infra/utils.py

+ 1 - 0
docs/pipeline_deploy/serving.en.md

@@ -335,6 +335,7 @@ docker run \
     -w /workspace \
     --rm \
     --gpus all \
+    --init \
     --network host \
     --shm-size 8g \
     {image name} \

+ 1 - 0
docs/pipeline_deploy/serving.md

@@ -336,6 +336,7 @@ docker run \
     -w /workspace \
     --rm \
     --gpus all \
+    --init \
     --network host \
     --shm-size 8g \
     {镜像名称} \

+ 11 - 9
docs/pipeline_usage/tutorials/cv_pipelines/3d_bev_detection.en.md

@@ -437,24 +437,26 @@ In addition, PaddleX also provides three other deployment methods, detailed as f
 <summary>Python</summary>
 
 <pre><code class="language-python">
+import base64
 import requests
 
-API_URL = &quot;http://localhost:8080/bev-3d-object-detection&quot; # Service URL
-tar_path = &quot;./nuscenes_demo_infer.tar&quot;
+API_URL = "http://localhost:8080/bev-3d-object-detection" # Service URL
+tar_path = "./nuscenes_demo_infer.tar"
 
-payload = {&quot;tar&quot;: tar_path}
+with open(tar_path, "rb") as file:
+    tar_bytes = file.read()
+    tar_data = base64.b64encode(tar_bytes).decode("ascii")
+
+payload = {"tar": tar_data}
 
 # Call the API
 response = requests.post(API_URL, json=payload)
 
 # Process the response data
 assert response.status_code == 200
-result = response.json()[&quot;result&quot;]
-with open(output_image_path, &quot;wb&quot;) as file:
-    file.write(base64.b64decode(result[&quot;image&quot;]))
-print(f&quot;Output image saved at {output_image_path}&quot;)
-print(&quot;Detected objects:&quot;)
-print(result[&quot;detectedObjects&quot;])
+result = response.json()["result"]
+print("Detected objects:")
+print(result["detectedObjects"])
 </code></pre></details>
 </details>
 <br/>

+ 11 - 9
docs/pipeline_usage/tutorials/cv_pipelines/3d_bev_detection.md

@@ -429,24 +429,26 @@ for res in output:
 
 
 <pre><code class="language-python">
+import base64
 import requests
 
-API_URL = &quot;http://localhost:8080/bev-3d-object-detection&quot; # 服务URL
-tar_path = &quot;./nuscenes_demo_infer.tar&quot;
+API_URL = "http://localhost:8080/bev-3d-object-detection" # 服务URL
+tar_path = "./nuscenes_demo_infer.tar"
 
-payload = {&quot;tar&quot;: tar_path}
+with open(tar_path, "rb") as file:
+    tar_bytes = file.read()
+    tar_data = base64.b64encode(tar_bytes).decode("ascii")
+
+payload = {"tar": tar_data}
 
 # 调用API
 response = requests.post(API_URL, json=payload)
 
 # 处理接口返回数据
 assert response.status_code == 200
-result = response.json()[&quot;result&quot;]
-with open(output_image_path, &quot;wb&quot;) as file:
-    file.write(base64.b64decode(result[&quot;image&quot;]))
-print(f&quot;Output image saved at {output_image_path}&quot;)
-print(&quot;Detected objects:&quot;)
-print(result[&quot;detectedObjects&quot;])
+result = response.json()["result"]
+print("Detected objects:")
+print(result["detectedObjects"])
 </code></pre></details>
 </details>
 <br/>

+ 2 - 0
docs/pipeline_usage/tutorials/cv_pipelines/image_classification.en.md

@@ -1326,6 +1326,8 @@ Below are the API references for basic service deployment and examples of multi-
 <details><summary>Multi-language Service Call Examples</summary>
 <details>
 <summary>Python</summary>
+
+
 <pre><code class="language-python">import base64
 import requests
 

+ 2 - 0
docs/pipeline_usage/tutorials/cv_pipelines/image_classification.md

@@ -1142,6 +1142,8 @@ for res in output:
 <details><summary>多语言调用服务示例</summary>
 <details>
 <summary>Python</summary>
+
+
 <pre><code class="language-python">import base64
 import requests
 

+ 2 - 0
docs/pipeline_usage/tutorials/cv_pipelines/instance_segmentation.en.md

@@ -679,6 +679,8 @@ Below are the API references for basic service deployment and examples of multi-
 <details><summary>Multi-Language Service Call Examples</summary>
 <details>
 <summary>Python</summary>
+
+
 <pre><code class="language-python">import base64
 import requests
 

+ 2 - 0
docs/pipeline_usage/tutorials/cv_pipelines/instance_segmentation.md

@@ -676,6 +676,8 @@ for res in output:
 <details><summary>多语言调用服务示例</summary>
 <details>
 <summary>Python</summary>
+
+
 <pre><code class="language-python">import base64
 import requests
 

+ 2 - 0
docs/pipeline_usage/tutorials/cv_pipelines/object_detection.en.md

@@ -817,6 +817,8 @@ Below is the API reference for basic service deployment and multi-language servi
 <details><summary>Multilingual API Call Examples</summary>
 <details>
 <summary>Python</summary>
+
+
 <pre><code class="language-python">import base64
 import requests
 

+ 2 - 0
docs/pipeline_usage/tutorials/cv_pipelines/object_detection.md

@@ -831,6 +831,8 @@ for res in output:
 <details><summary>多语言调用服务示例</summary>
 <details>
 <summary>Python</summary>
+
+
 <pre><code class="language-python">import base64
 import requests
 

+ 1 - 3
docs/pipeline_usage/tutorials/cv_pipelines/open_vocabulary_segmentation.en.md

@@ -541,7 +541,7 @@ with open(image_path, "rb") as file:
 
 payload = {
     "image": image_data, # Base64-encoded file content or image URL
-    "prompt_type": "box",
+    "promptType": "box",
     "prompt": [[112.9,118.4,513.8,382.1],[4.6,263.6,92.2,336.6],[592.4,260.9,607.2,294.2]]
 }
 
@@ -556,8 +556,6 @@ image = base64.b64decode(image_base64)
 with open(output_image_path, "wb") as file:
     file.write(base64.b64decode(result["image"]))
 print(f"Output image saved at {output_image_path}")
-print("\nresult(with rle encoded binary mask):")
-print(result)
 </code></pre></details>
 
 </details>

+ 1 - 3
docs/pipeline_usage/tutorials/cv_pipelines/open_vocabulary_segmentation.md

@@ -539,7 +539,7 @@ with open(image_path, "rb") as file:
 
 payload = {
     "image": image_data, # Base64编码的文件内容或者图像URL
-    "prompt_type": "box",
+    "promptType": "box",
     "prompt": [[112.9,118.4,513.8,382.1],[4.6,263.6,92.2,336.6],[592.4,260.9,607.2,294.2]]
 }
 
@@ -554,8 +554,6 @@ image = base64.b64decode(image_base64)
 with open(output_image_path, "wb") as file:
     file.write(base64.b64decode(result["image"]))
 print(f"Output image saved at {output_image_path}")
-print("\nresult(with rle encoded binary mask):")
-print(result)
 </code></pre></details>
 </details>
 <br/>

+ 2 - 0
docs/pipeline_usage/tutorials/cv_pipelines/semantic_segmentation.en.md

@@ -643,6 +643,8 @@ Below are the API references for basic service deployment and examples of multi-
 <details><summary>Multi-language Service Call Example</summary>
 <details>
 <summary>Python</summary>
+
+
 <pre><code class="language-python">import base64
 import requests
 

+ 2 - 0
docs/pipeline_usage/tutorials/cv_pipelines/semantic_segmentation.md

@@ -645,6 +645,8 @@ for res in output:
 <details><summary>多语言调用服务示例</summary>
 <details>
 <summary>Python</summary>
+
+
 <pre><code class="language-python">import base64
 import requests
 

+ 2 - 0
docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.en.md

@@ -539,6 +539,8 @@ Below is the API reference for basic service deployment and multi-language servi
 <details><summary>Multi-language Service Call Examples</summary>
 <details>
 <summary>Python</summary>
+
+
 <pre><code class="language-python">import base64
 import requests
 

+ 2 - 0
docs/pipeline_usage/tutorials/cv_pipelines/small_object_detection.md

@@ -537,6 +537,8 @@ for res in output:
 <details><summary>多语言调用服务示例</summary>
 <details>
 <summary>Python</summary>
+
+
 <pre><code class="language-python">import base64
 import requests
 

+ 0 - 1
docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing.en.md

@@ -1450,7 +1450,6 @@ response = requests.post(API_URL, json=payload)
 # Process the response data
 assert response.status_code == 200
 result = response.json()["result"]
-print("\nDetected layout elements:")
 for i, res in enumerate(result["layoutParsingResults"]):
     print(res["prunedResult"])
     for img_name, img in res["outputImages"].items():

+ 0 - 1
docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing.md

@@ -1501,7 +1501,6 @@ response = requests.post(API_URL, json=payload)
 # 处理接口返回数据
 assert response.status_code == 200
 result = response.json()["result"]
-print("\nDetected layout elements:")
 for i, res in enumerate(result["layoutParsingResults"]):
     print(res["prunedResult"])
     for img_name, img in res["outputImages"].items():

+ 5 - 2
docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing_v2.en.md

@@ -1658,8 +1658,10 @@ Below is the API reference for basic service-oriented deployment and examples of
 <details>
 <summary>Python</summary>
 
-<pre><code class="language-python">import base64
+<pre><code class="language-python">
+import base64
 import requests
+import pathlib
 
 API_URL = "http://localhost:8080/layout-parsing" # Service URL
 
@@ -1672,6 +1674,7 @@ with open(image_path, "rb") as file:
 
 payload = {
     "file": image_data, # Base64-encoded file content or file URL
+    "fileType": 1, # file type, 1 represents image file
 }
 
 # Call the API
@@ -1681,7 +1684,7 @@ response = requests.post(API_URL, json=payload)
 assert response.status_code == 200
 result = response.json()["result"]
 print("\nDetected layout elements:")
-for res in result["layoutParsingResults"]:
+for i, res in enumerate(result["layoutParsingResults"]):
     print(res["prunedResult"])
     md_dir = pathlib.Path(f"markdown_{i}")
     md_dir.mkdir(exist_ok=True)

+ 5 - 3
docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing_v2.md

@@ -1554,8 +1554,10 @@ for res in output:
 <details>
 <summary>Python</summary>
 
-<pre><code class="language-python">import base64
+<pre><code class="language-python">
+import base64
 import requests
+import pathlib
 
 API_URL = "http://localhost:8080/layout-parsing" # 服务URL
 
@@ -1568,6 +1570,7 @@ with open(image_path, "rb") as file:
 
 payload = {
     "file": image_data, # Base64编码的文件内容或者文件URL
+    "fileType": 1, # 文件类型,1表示图像文件
 }
 
 # 调用API
@@ -1576,8 +1579,7 @@ response = requests.post(API_URL, json=payload)
 # 处理接口返回数据
 assert response.status_code == 200
 result = response.json()["result"]
-print("\nDetected layout elements:")
-for res in result["layoutParsingResults"]:
+for i, res in enumerate(result["layoutParsingResults"]):
     print(res["prunedResult"])
     md_dir = pathlib.Path(f"markdown_{i}")
     md_dir.mkdir(exist_ok=True)

+ 6 - 9
docs/pipeline_usage/tutorials/ocr_pipelines/table_recognition_v2.en.md

@@ -1481,15 +1481,12 @@ response = requests.post(API_URL, json=payload)
 assert response.status_code == 200
 result = response.json()["result"]
 for i, res in enumerate(result["tableRecResults"]):
-    print("Detected tables:")
-    print(res["tables"])
-    layout_img_path = f"layout_{i}.jpg"
-    with open(layout_img_path, "wb") as f:
-        f.write(base64.b64decode(res["layoutImage"]))
-    ocr_img_path = f"ocr_{i}.jpg"
-    with open(ocr_img_path, "wb") as f:
-        f.write(base64.b64decode(res["ocrImage"]))
-    print(f"Output images saved at {layout_img_path} and {ocr_img_path}")
+    print(res["prunedResult"])
+    for img_name, img in res["outputImages"].items():
+        img_path = f"{img_name}_{i}.jpg"
+        with open(img_path, "wb") as f:
+            f.write(base64.b64decode(img))
+        print(f"Output image saved at {img_path}")
 </code></pre></details>
 </details>
 <br/>

+ 6 - 9
docs/pipeline_usage/tutorials/ocr_pipelines/table_recognition_v2.md

@@ -1328,15 +1328,12 @@ response = requests.post(API_URL, json=payload)
 assert response.status_code == 200
 result = response.json()["result"]
 for i, res in enumerate(result["tableRecResults"]):
-    print("Detected tables:")
-    print(res["tables"])
-    layout_img_path = f"layout_{i}.jpg"
-    with open(layout_img_path, "wb") as f:
-        f.write(base64.b64decode(res["layoutImage"]))
-    ocr_img_path = f"ocr_{i}.jpg"
-    with open(ocr_img_path, "wb") as f:
-        f.write(base64.b64decode(res["ocrImage"]))
-    print(f"Output images saved at {layout_img_path} and {ocr_img_path}")
+    print(res["prunedResult"])
+    for img_name, img in res["outputImages"].items():
+        img_path = f"{img_name}_{i}.jpg"
+        with open(img_path, "wb") as f:
+            f.write(base64.b64decode(img))
+        print(f"Output image saved at {img_path}")
 </code></pre></details>
 </details>
 <br/>

+ 11 - 343
docs/pipeline_usage/tutorials/speech_pipelines/multilingual_speech_recognition.en.md

@@ -291,356 +291,24 @@ for res in output:
 <summary>Python</summary>
 
 
-<pre><code class="language-python">import base64
+<pre><code class="language-python">
+import base64
 import requests
 
-API_URL = &quot;http://localhost:8080/video-classification&quot; # Service URL
-video_path = &quot;./demo.mp4&quot;
-output_video_path = &quot;./out.mp4&quot;
+API_URL = "http://localhost:8080/multilingual-speech-recognition"
+audio_path = "./zh.wav"
 
-# Encode local video to Base64
-with open(video_path, &quot;rb&quot;) as file:
-    video_bytes = file.read()
-    video_data = base64.b64encode(video_bytes).decode(&quot;ascii&quot;)
+with open(audio_path, "rb") as file:
+    audio_bytes = file.read()
+    audio_data = base64.b64encode(audio_bytes).decode("ascii")
 
-payload = {&quot;video&quot;: video_data}  # Base64 encoded file content or video URL
+payload = {"audio": audio_data}
 
-# Call API
 response = requests.post(API_URL, json=payload)
 
-# Process API response
 assert response.status_code == 200
-result = response.json()[&quot;result&quot;]
-with open(output_video_path, &quot;wb&quot;) as file:
-    file.write(base64.b64decode(result[&quot;video&quot;]))
-print(f&quot;Output video saved at {output_video_path}&quot;)
-print(&quot;\nCategories:&quot;)
-print(result[&quot;categories&quot;])
-</code></pre></details>
-<details><summary>C++</summary>
-
-<pre><code class="language-cpp">#include &lt;iostream&gt;
-#include &quot;cpp-httplib/httplib.h&quot; // <url id="cu9qjr7f2ena5466v3o0" type="url" status="parsed" title="GitHub - Huiyicc/cpp-httplib: A C++ header-only HTTP/HTTPS server and client library" wc="15064">https://github.com/Huiyicc/cpp-httplib</url> 
-#include &quot;nlohmann/json.hpp&quot; // <url id="cu9qjr7f2ena5466v3og" type="url" status="parsed" title="GitHub - nlohmann/json: JSON for Modern C++" wc="80311">https://github.com/nlohmann/json</url> 
-#include &quot;base64.hpp&quot; // <url id="cu9qjr7f2ena5466v3p0" type="url" status="parsed" title="GitHub - tobiaslocker/base64: A modern C++ base64 encoder / decoder" wc="2293">https://github.com/tobiaslocker/base64</url> 
-
-int main() {
-    httplib::Client client(&quot;localhost:8080&quot;);
-    const std::string videoPath = &quot;./demo.mp4&quot;;
-    const std::string outputImagePath = &quot;./out.mp4&quot;;
-
-    httplib::Headers headers = {
-        {&quot;Content-Type&quot;, &quot;application/json&quot;}
-    };
-
-    // Encode local video to Base64
-    std::ifstream file(videoPath, std::ios::binary | std::ios::ate);
-    std::streamsize size = file.tellg();
-    file.seekg(0, std::ios::beg);
-
-    std::vector&lt;char&gt; buffer(size);
-    if (!file.read(buffer.data(), size)) {
-        std::cerr &lt;&lt; &quot;Error reading file.&quot; &lt;&lt; std::endl;
-        return 1;
-    }
-    std::string bufferStr(reinterpret_cast&lt;const char*&gt;(buffer.data()), buffer.size());
-    std::string encodedImage = base64::to_base64(bufferStr);
-
-    nlohmann::json jsonObj;
-    jsonObj[&quot;video&quot;] = encodedImage;
-    std::string body = jsonObj.dump();
-
-    // Call API
-    auto response = client.Post(&quot;/video-classification&quot;, headers, body, &quot;application/json&quot;);
-    // Process API response
-    if (response &amp;&amp; response-&gt;status == 200) {
-        nlohmann::json jsonResponse = nlohmann::json::parse(response-&gt;body);
-        auto result = jsonResponse[&quot;result&quot;];
-
-        encodedImage = result[&quot;video&quot;];
-        std::string decodedString = base64::from_base64(encodedImage);
-        std::vector&lt;unsigned char&gt; decodedImage(decodedString.begin(), decodedString.end());
-        std::ofstream outputImage(outPutImagePath, std::ios::binary | std::ios::out);
-        if (outputImage.is_open()) {
-            outputImage.write(reinterpret_cast&lt;char*&gt;(decodedImage.data()), decodedImage.size());
-            outputImage.close();
-            std::cout &lt;&lt; &quot;Output video saved at &quot; &lt;&lt; outPutImagePath &lt;&lt; std::endl;
-        } else {
-            std::cerr &lt;&lt; &quot;Unable to open file for writing: &quot; &lt;&lt; outPutImagePath &lt;&lt; std::endl;
-        }
-
-        auto categories = result[&quot;categories&quot;];
-        std::cout &lt;&lt; &quot;\nCategories:&quot; &lt;&lt; std::endl;
-        for (const auto&amp; category : categories) {
-            std::cout &lt;&lt; category &lt;&lt; std::endl;
-        }
-    } else {
-        std::cout &lt;&lt; &quot;Failed to send HTTP request.&quot; &lt;&lt; std::endl;
-        return 1;
-    }
-
-    return 0;
-}
-</code></pre></details>
-
-<details><summary>Java</summary>
-
-<pre><code class="language-java">import okhttp3.*;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.node.ObjectNode;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.Base64;
-
-public class Main {
-    public static void main(String[] args) throws IOException {
-        String API_URL = &quot;http://localhost:8080/video-classification&quot;; // Service URL
-        String videoPath = &quot;./demo.mp4&quot;; // Local video
-        String outputImagePath = &quot;./out.mp4&quot;; // Output video
-
-        // Encode local video to Base64
-        File file = new File(videoPath);
-        byte[] fileContent = java.nio.file.Files.readAllBytes(file.toPath());
-        String videoData = Base64.getEncoder().encodeToString(fileContent);
-
-        ObjectMapper objectMapper = new ObjectMapper();
-        ObjectNode params = objectMapper.createObjectNode();
-        params.put(&quot;video&quot;, videoData); // Base64 encoded file content or video URL
-
-        // Create OkHttpClient instance
-        OkHttpClient client = new OkHttpClient();
-        MediaType JSON = MediaType.Companion.get(&quot;application/json; charset=utf-8&quot;);
-        RequestBody body = RequestBody.Companion.create(params.toString(), JSON);
-        Request request = new Request.Builder()
-                .url(API_URL)
-                .post(body)
-                .build();
-
-        // Call API and process API response
-        try (Response response = client.newCall(request).execute()) {
-            if (response.isSuccessful()) {
-                String responseBody = response.body().string();
-                JsonNode resultNode = objectMapper.readTree(responseBody);
-                JsonNode result = resultNode.get(&quot;result&quot;);
-                String base64Image = result.get(&quot;video&quot;).asText();
-                JsonNode categories = result.get(&quot;categories&quot;);
-
-                byte[] videoBytes = Base64.getDecoder().decode(base64Image);
-                try (FileOutputStream fos = new FileOutputStream(outputImagePath)) {
-                    fos.write(videoBytes);
-                }
-                System.out.println(&quot;Output video saved at &quot; + outputImagePath);
-                System.out.println(&quot;\nCategories: &quot; + categories.toString());
-            } else {
-                System.err.println(&quot;Request failed with code: &quot; + response.code());
-            }
-        }
-    }
-}
-</code></pre></details>
-
-<details><summary>Go</summary>
-
-<pre><code class="language-go">package main
-
-import (
-    "bytes"
-    "encoding/base64"
-    "encoding/json"
-    "fmt"
-    "io/ioutil"
-    "net/http"
-)
-
-func main() {
-    API_URL := "http://localhost:8080/video-classification"
-    videoPath := "./demo.mp4"
-    outputImagePath := "./out.mp4"
-
-    // Base64 encode the local video
-    videoBytes, err := ioutil.ReadFile(videoPath)
-    if err != nil {
-        fmt.Println("Error reading video file:", err)
-        return
-    }
-    videoData := base64.StdEncoding.EncodeToString(videoBytes)
-
-    payload := map[string]string{"video": videoData} // Base64 encoded file content or video URL
-    payloadBytes, err := json.Marshal(payload)
-    if err != nil {
-        fmt.Println("Error marshaling payload:", err)
-        return
-    }
-
-    // Call the API
-    client := &http.Client{}
-    req, err := http.NewRequest("POST", API_URL, bytes.NewBuffer(payloadBytes))
-    if err != nil {
-        fmt.Println("Error creating request:", err)
-        return
-    }
-
-    res, err := client.Do(req)
-    if err != nil {
-        fmt.Println("Error sending request:", err)
-        return
-    }
-    defer res.Body.Close()
-
-    // Handle the API response
-    body, err := ioutil.ReadAll(res.Body)
-    if err != nil {
-        fmt.Println("Error reading response body:", err)
-        return
-    }
-    type Response struct {
-        Result struct {
-            Image      string   `json:"video"`
-            Categories []map[string]interface{} `json:"categories"`
-        } `json:"result"`
-    }
-    var respData Response
-    err = json.Unmarshal([]byte(string(body)), &respData)
-    if err != nil {
-        fmt.Println("Error unmarshaling response body:", err)
-        return
-    }
-
-    outputImageData, err := base64.StdEncoding.DecodeString(respData.Result.Image)
-    if err != nil {
-        fmt.Println("Error decoding base64 video data:", err)
-        return
-    }
-    err = ioutil.WriteFile(outputImagePath, outputImageData, 0644)
-    if err != nil {
-        fmt.Println("Error writing video to file:", err)
-        return
-    }
-    fmt.Printf("Image saved at %s.mp4\n", outputImagePath)
-    fmt.Println("\nCategories:")
-    for _, category := range respData.Result.Categories {
-        fmt.Println(category)
-    }
-}
-</code></pre></details>
-
-<details><summary>C#</summary>
-
-<pre><code class="language-csharp">using System;
-using System.IO;
-using System.Net.Http;
-using System.Net.Http.Headers;
-using System.Text;
-using System.Threading.Tasks;
-using Newtonsoft.Json.Linq;
-
-class Program
-{
-    static readonly string API_URL = "http://localhost:8080/video-classification";
-    static readonly string videoPath = "./demo.mp4";
-    static readonly string outputImagePath = "./out.mp4";
-
-    static async Task Main(string[] args)
-    {
-        var httpClient = new HttpClient();
-
-        // Base64 encode the local video
-        byte[] videoBytes = File.ReadAllBytes(videoPath);
-        string video_data = Convert.ToBase64String(videoBytes);
-
-        var payload = new JObject{ { "video", video_data } }; // Base64 encoded file content or video URL
-        var content = new StringContent(payload.ToString(), Encoding.UTF8, "application/json");
-
-        // Call the API
-        HttpResponseMessage response = await httpClient.PostAsync(API_URL, content);
-        response.EnsureSuccessStatusCode();
-
-        // Handle the API response
-        string responseBody = await response.Content.ReadAsStringAsync();
-        JObject jsonResponse = JObject.Parse(responseBody);
-
-        string base64Image = jsonResponse["result"]["video"].ToString();
-        byte[] outputImageBytes = Convert.FromBase64String(base64Image);
-
-        File.WriteAllBytes(outputImagePath, outputImageBytes);
-        Console.WriteLine($"Output video saved at {outputImagePath}");
-        Console.WriteLine("\nCategories:");
-        Console.WriteLine(jsonResponse["result"]["categories"].ToString());
-    }
-}
-</code></pre></details>
-
-<details><summary>Node.js</summary>
-
-<pre><code class="language-js">const axios = require('axios');
-const fs = require('fs');
-
-const API_URL = 'http://localhost:8080/video-classification'
-const videoPath = './demo.mp4'
-const outputImagePath = &quot;./out.mp4&quot;;
-
-let config = {
-   method: 'POST',
-   maxBodyLength: Infinity,
-   url: API_URL,
-   data: JSON.stringify({
-    'video': encodeImageToBase64(videoPath)  // Base64 encoded file content or video URL
-  })
-};
-
-// Base64 encode the local video
-function encodeImageToBase64(filePath) {
-  const bitmap = fs.readFileSync(filePath);
-  return Buffer.from(bitmap).toString('base64');
-}
-
-// Call the API
-axios.request(config)
-.then((response) =&gt; {
-    // Process the API response
-    const result = response.data[&quot;result&quot;];
-    const videoBuffer = Buffer.from(result[&quot;video&quot;], 'base64');
-    fs.writeFile(outputImagePath, videoBuffer, (err) =&gt; {
-      if (err) throw err;
-      console.log(`Output video saved at ${outputImagePath}`);
-    });
-    console.log(&quot;\nCategories:&quot;);
-    console.log(result[&quot;categories&quot;]);
-})
-.catch((error) =&gt; {
-  console.log(error);
-});
-</code></pre></details>
-<details><summary>PHP</summary>
-
-<pre><code class="language-php">&lt;?php
-
-$API_URL = &quot;http://localhost:8080/video-classification&quot;; // Service URL
-$video_path = &quot;./demo.mp4&quot;;
-$output_video_path = &quot;./out.mp4&quot;;
-
-// Base64 encode the local video
-$video_data = base64_encode(file_get_contents($video_path));
-$payload = array(&quot;video&quot; =&gt; $video_data); // Base64 encoded file content or video URL
-
-// Call the API
-$ch = curl_init($API_URL);
-curl_setopt($ch, CURLOPT_POST, true);
-curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload));
-curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
-$response = curl_exec($ch);
-curl_close($ch);
-
-// Process the API response
-$result = json_decode($response, true)[&quot;result&quot;];
-file_put_contents($output_video_path, base64_decode($result[&quot;video&quot;]));
-echo &quot;Output video saved at &quot; . $output_video_path . &quot;\n&quot;;
-echo &quot;\nCategories:\n&quot;;
-print_r($result[&quot;categories&quot;]);
-?&gt;
+result = response.json()["result"]
+print(result)
 </code></pre></details>
 </details>
 <br/>
@@ -923,4 +591,4 @@ Subsequently, refer to the command-line method or Python script method in the lo
 ## 5. Multi-Hardware Support
 PaddleX supports a variety of mainstream hardware devices, including NVIDIA GPU, Kunlunxin XPU, Ascend NPU, and Cambricon MLU. <b>Simply modify the `--device` parameter</b> to seamlessly switch between different hardware devices.
 
-For example, if you use Ascend NPU for video classification in the pipeline, the Python command used is:
+For example, if you use Ascend NPU for video classification in the pipeline, the Python command used is:

+ 9 - 6
docs/pipeline_usage/tutorials/speech_pipelines/multilingual_speech_recognition.md

@@ -493,19 +493,22 @@ for res in output:
 
 
 <pre><code class="language-python">
+import base64
 import requests
 
-API_URL = &quot;http://localhost:8080/multilingual-speech-recognition&quot; # 服务URL
-audio_path = &quot;./zh.wav&quot;
+API_URL = "http://localhost:8080/multilingual-speech-recognition" # 服务URL
+audio_path = "./zh.wav"
 
-payload = {&quot;audio&quot;: audio_path}
+with open(audio_path, "rb") as file:
+    audio_bytes = file.read()
+    audio_data = base64.b64encode(audio_bytes).decode("ascii")
+
+payload = {"audio": audio_data}
 
-# 调用API
 response = requests.post(API_URL, json=payload)
 
-# 处理接口返回数据
 assert response.status_code == 200
-result = response.json()[&quot;result&quot;]
+result = response.json()["result"]
 print(result)
 </code></pre></details>
 </details>

+ 0 - 1
docs/pipeline_usage/tutorials/video_pipelines/video_classification.en.md

@@ -494,7 +494,6 @@ import requests
 
 API_URL = "http://localhost:8080/video-classification" # Service URL
 video_path = "./demo.mp4"
-output_video_path = "./out.mp4"
 
 # Encode the local video using Base64
 with open(video_path, "rb") as file:

+ 0 - 1
docs/pipeline_usage/tutorials/video_pipelines/video_classification.md

@@ -494,7 +494,6 @@ import requests
 
 API_URL = &quot;http://localhost:8080/video-classification&quot; # 服务URL
 video_path = &quot;./demo.mp4&quot;
-output_video_path = &quot;./out.mp4&quot;
 
 # 对本地视频进行Base64编码
 with open(video_path, &quot;rb&quot;) as file:

+ 1 - 1
paddlex/inference/serving/infra/utils.py

@@ -113,7 +113,7 @@ def infer_file_ext(file: str) -> Optional[str]:
         return mimetypes.guess_extension(mime_type)
     else:
         bytes_ = base64.b64decode(file)
-        return filetype.guess_extension(bytes_)
+        return "." + filetype.guess_extension(bytes_)
 
 
 def image_bytes_to_array(data: bytes) -> np.ndarray: