|
|
@@ -1,23 +1,67 @@
|
|
|
import base64
|
|
|
import requests
|
|
|
+import pathlib
|
|
|
|
|
|
-API_URL = "http://localhost:28888/ocr"
|
|
|
-file_path = "./sample_data/PictureCheckCode.jpeg"
|
|
|
+def test_ocr_api():
|
|
|
+ API_URL = "http://localhost:28888/ocr"
|
|
|
+ file_path = "./sample_data/PictureCheckCode.jpeg"
|
|
|
+ with open(file_path, "rb") as file:
|
|
|
+ file_bytes = file.read()
|
|
|
+ file_data = base64.b64encode(file_bytes).decode("ascii")
|
|
|
|
|
|
-with open(file_path, "rb") as file:
|
|
|
- file_bytes = file.read()
|
|
|
- file_data = base64.b64encode(file_bytes).decode("ascii")
|
|
|
+ payload = {"file": file_data, "fileType": 1}
|
|
|
|
|
|
-payload = {"file": file_data, "fileType": 1}
|
|
|
+ response = requests.post(API_URL, json=payload)
|
|
|
|
|
|
-response = requests.post(API_URL, json=payload)
|
|
|
+ assert response.status_code == 200
|
|
|
+ result = response.json()["result"]
|
|
|
+ for i, res in enumerate(result["ocrResults"]):
|
|
|
+ print(res["prunedResult"])
|
|
|
+ ocr_img_path = f"sample_data/验证码_ocr_{i}.jpg"
|
|
|
+ with open(ocr_img_path, "wb") as f:
|
|
|
+ f.write(base64.b64decode(res["ocrImage"]))
|
|
|
+ print(f"Output image saved at {ocr_img_path}")
|
|
|
+ print(f"rec_texts: {res['prunedResult']['rec_texts'][0]}")
|
|
|
|
|
|
-assert response.status_code == 200
|
|
|
-result = response.json()["result"]
|
|
|
-for i, res in enumerate(result["ocrResults"]):
|
|
|
- print(res["prunedResult"])
|
|
|
- ocr_img_path = f"sample_data/验证码_ocr_{i}.jpg"
|
|
|
- with open(ocr_img_path, "wb") as f:
|
|
|
- f.write(base64.b64decode(res["ocrImage"]))
|
|
|
- print(f"Output image saved at {ocr_img_path}")
|
|
|
- print(f"rec_texts: {res['prunedResult']['rec_texts'][0]}")
|
|
|
+def test_ppstructurev3_api(image_path: str, API_URL: str):
|
|
|
+ # 对本地图像进行Base64编码
|
|
|
+ with open(image_path, "rb") as file:
|
|
|
+ image_bytes = file.read()
|
|
|
+ image_data = base64.b64encode(image_bytes).decode("ascii")
|
|
|
+
|
|
|
+ payload = {
|
|
|
+ "file": image_data, # Base64编码的文件内容或者文件URL
|
|
|
+ "fileType": 1, # 文件类型,1表示图像文件
|
|
|
+ }
|
|
|
+
|
|
|
+ # 调用API
|
|
|
+ response = requests.post(API_URL, json=payload)
|
|
|
+
|
|
|
+ # 处理接口返回数据
|
|
|
+ assert response.status_code == 200
|
|
|
+ result = response.json()["result"]
|
|
|
+ for i, res in enumerate(result["layoutParsingResults"]):
|
|
|
+ print(res["prunedResult"])
|
|
|
+ md_dir = pathlib.Path(f"markdown_{i}")
|
|
|
+ md_dir.mkdir(exist_ok=True)
|
|
|
+ (md_dir / "doc.md").write_text(res["markdown"]["text"])
|
|
|
+ for img_path, img in res["markdown"]["images"].items():
|
|
|
+ img_path = md_dir / img_path
|
|
|
+ img_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
+ img_path.write_bytes(base64.b64decode(img))
|
|
|
+ print(f"Markdown document saved at {md_dir / 'doc.md'}")
|
|
|
+ for img_name, img in res["outputImages"].items():
|
|
|
+ img_path = f"{img_name}_{i}.jpg"
|
|
|
+ with open(img_path, "wb") as f:
|
|
|
+ f.write(base64.b64decode(img))
|
|
|
+ print(f"Output image saved at {img_path}")
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ # OCR API测试
|
|
|
+ # test_ocr_api()
|
|
|
+
|
|
|
+ # PP-StructureV3 API测试
|
|
|
+ # paddlex --serve --pipeline zhch/PP-StructureV3-zhch.yaml # 启动服务
|
|
|
+ API_URL = "http://localhost:8080/layout-parsing" # 服务URL
|
|
|
+ image_path = "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_DotsOCR_Results/2023年度报告母公司/2023年度报告母公司_page_004.png"
|
|
|
+ test_ppstructurev3_api(image_path, API_URL)
|