client.py 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. #!/usr/bin/env python
  2. import argparse
  3. import sys
  4. from pathlib import Path
  5. from paddlex_hps_client import triton_request, utils
  6. from tritonclient import grpc as triton_grpc
  7. def ensure_no_error(output, additional_msg):
  8. if output["errorCode"] != 0:
  9. print(additional_msg, file=sys.stderr)
  10. print(f"Error code: {output['errorCode']}", file=sys.stderr)
  11. print(f"Error message: {output['errorMsg']}", file=sys.stderr)
  12. sys.exit(1)
  13. def main():
  14. parser = argparse.ArgumentParser()
  15. parser.add_argument("--file", type=str, required=True)
  16. parser.add_argument("--target-language", type=str, default="zh")
  17. parser.add_argument("--file-type", type=int, choices=[0, 1])
  18. parser.add_argument("--no-visualization", action="store_true")
  19. parser.add_argument("--url", type=str, default="localhost:8001")
  20. args = parser.parse_args()
  21. client = triton_grpc.InferenceServerClient(args.url)
  22. input_ = {"file": utils.prepare_input_file(args.file)}
  23. if args.file_type is not None:
  24. input_["fileType"] = args.file_type
  25. if args.no_visualization:
  26. input_["visualize"] = False
  27. output = triton_request(client, "doctrans-visual", input_)
  28. ensure_no_error(output, "Failed to analyze the images")
  29. result_visual = output["result"]
  30. markdown_list = []
  31. for i, res in enumerate(result_visual["layoutParsingResults"]):
  32. print(res["prunedResult"])
  33. md_dir = Path(f"markdown_{i}")
  34. md_dir.mkdir(exist_ok=True)
  35. (md_dir / "doc.md").write_text(res["markdown"]["text"])
  36. for img_path, img in res["markdown"]["images"].items():
  37. img_path = md_dir / img_path
  38. img_path.parent.mkdir(parents=True, exist_ok=True)
  39. utils.save_output_file(img, img_path)
  40. print(f"Markdown document to be translated is saved at {md_dir / 'doc.md'}")
  41. del res["markdown"]["images"]
  42. markdown_list.append(res["markdown"])
  43. for img_name, img in res["outputImages"].items():
  44. img_path = f"{img_name}_{i}.jpg"
  45. utils.save_output_file(img, img_path)
  46. print(f"Output image saved at {img_path}")
  47. input_ = {
  48. "markdownList": markdown_list,
  49. "targetLanguage": args.target_language,
  50. }
  51. output = triton_request(client, "doctrans-translate", input_)
  52. ensure_no_error(output, "Failed to translate the markdown")
  53. result_translate = output["result"]
  54. for i, res in enumerate(result_translate["translationResults"]):
  55. md_dir = Path(f"markdown_{i}")
  56. (md_dir / "doc_translated.md").write_text(res["markdown"]["text"])
  57. print(f"Translated markdown document saved at {md_dir / 'doc_translated.md'}")
  58. if __name__ == "__main__":
  59. main()