test_api_call.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. import base64
  2. import requests
  3. import pathlib
  4. def test_ocr_api():
  5. API_URL = "http://localhost:28888/ocr"
  6. file_path = "./sample_data/PictureCheckCode.jpeg"
  7. with open(file_path, "rb") as file:
  8. file_bytes = file.read()
  9. file_data = base64.b64encode(file_bytes).decode("ascii")
  10. payload = {"file": file_data, "fileType": 1}
  11. response = requests.post(API_URL, json=payload)
  12. assert response.status_code == 200
  13. result = response.json()["result"]
  14. for i, res in enumerate(result["ocrResults"]):
  15. print(res["prunedResult"])
  16. ocr_img_path = f"sample_data/验证码_ocr_{i}.jpg"
  17. with open(ocr_img_path, "wb") as f:
  18. f.write(base64.b64decode(res["ocrImage"]))
  19. print(f"Output image saved at {ocr_img_path}")
  20. print(f"rec_texts: {res['prunedResult']['rec_texts'][0]}")
  21. def test_ppstructurev3_api(image_path: str, API_URL: str):
  22. # 对本地图像进行Base64编码
  23. with open(image_path, "rb") as file:
  24. image_bytes = file.read()
  25. image_data = base64.b64encode(image_bytes).decode("ascii")
  26. payload = {
  27. "file": image_data, # Base64编码的文件内容或者文件URL
  28. "fileType": 1, # 文件类型,1表示图像文件
  29. }
  30. # 调用API
  31. response = requests.post(API_URL, json=payload)
  32. # 处理接口返回数据
  33. assert response.status_code == 200
  34. result = response.json()["result"]
  35. for i, res in enumerate(result["layoutParsingResults"]):
  36. print(res["prunedResult"])
  37. md_dir = pathlib.Path(f"markdown_{i}")
  38. md_dir.mkdir(exist_ok=True)
  39. (md_dir / "doc.md").write_text(res["markdown"]["text"])
  40. for img_path, img in res["markdown"]["images"].items():
  41. img_path = md_dir / img_path
  42. img_path.parent.mkdir(parents=True, exist_ok=True)
  43. img_path.write_bytes(base64.b64decode(img))
  44. print(f"Markdown document saved at {md_dir / 'doc.md'}")
  45. for img_name, img in res["outputImages"].items():
  46. img_path = f"{img_name}_{i}.jpg"
  47. with open(img_path, "wb") as f:
  48. f.write(base64.b64decode(img))
  49. print(f"Output image saved at {img_path}")
  50. if __name__ == "__main__":
  51. # OCR API测试
  52. # test_ocr_api()
  53. # PP-StructureV3 API测试
  54. # paddlex --serve --pipeline zhch/PP-StructureV3-zhch.yaml # 启动服务
  55. API_URL = "http://localhost:8080/layout-parsing" # 服务URL
  56. image_path = "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_DotsOCR_Results/2023年度报告母公司/2023年度报告母公司_page_004.png"
  57. test_ppstructurev3_api(image_path, API_URL)