import base64 import requests import pathlib def test_ocr_api(): API_URL = "http://localhost:28888/ocr" file_path = "./sample_data/PictureCheckCode.jpeg" with open(file_path, "rb") as file: file_bytes = file.read() file_data = base64.b64encode(file_bytes).decode("ascii") payload = {"file": file_data, "fileType": 1} response = requests.post(API_URL, json=payload) assert response.status_code == 200 result = response.json()["result"] for i, res in enumerate(result["ocrResults"]): print(res["prunedResult"]) ocr_img_path = f"sample_data/验证码_ocr_{i}.jpg" with open(ocr_img_path, "wb") as f: f.write(base64.b64decode(res["ocrImage"])) print(f"Output image saved at {ocr_img_path}") print(f"rec_texts: {res['prunedResult']['rec_texts'][0]}") def test_ppstructurev3_api(image_path: str, API_URL: str): # 对本地图像进行Base64编码 with open(image_path, "rb") as file: image_bytes = file.read() image_data = base64.b64encode(image_bytes).decode("ascii") payload = { "file": image_data, # Base64编码的文件内容或者文件URL "fileType": 1, # 文件类型,1表示图像文件 } # 调用API response = requests.post(API_URL, json=payload) # 处理接口返回数据 assert response.status_code == 200 result = response.json()["result"] for i, res in enumerate(result["layoutParsingResults"]): print(res["prunedResult"]) md_dir = pathlib.Path(f"markdown_{i}") md_dir.mkdir(exist_ok=True) (md_dir / "doc.md").write_text(res["markdown"]["text"]) for img_path, img in res["markdown"]["images"].items(): img_path = md_dir / img_path img_path.parent.mkdir(parents=True, exist_ok=True) img_path.write_bytes(base64.b64decode(img)) print(f"Markdown document saved at {md_dir / 'doc.md'}") for img_name, img in res["outputImages"].items(): img_path = f"{img_name}_{i}.jpg" with open(img_path, "wb") as f: f.write(base64.b64decode(img)) print(f"Output image saved at {img_path}") if __name__ == "__main__": # OCR API测试 # test_ocr_api() # PP-StructureV3 API测试 # paddlex --serve --pipeline zhch/PP-StructureV3-zhch.yaml # 启动服务 API_URL = "http://localhost:8080/layout-parsing" # 服务URL image_path = "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_DotsOCR_Results/2023年度报告母公司/2023年度报告母公司_page_004.png" test_ppstructurev3_api(image_path, API_URL)