client.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. #!/usr/bin/env python
  2. import argparse
  3. import sys
  4. from paddlex_hps_client import triton_request, utils
  5. from tritonclient import grpc as triton_grpc
  6. def main():
  7. parser = argparse.ArgumentParser()
  8. parser.add_argument("--image", type=str, required=True)
  9. parser.add_argument("--query", type=str, required=True)
  10. parser.add_argument("--max-image-tokens", type=int, default=None)
  11. parser.add_argument("--url", type=str, default="localhost:8001")
  12. args = parser.parse_args()
  13. client = triton_grpc.InferenceServerClient(
  14. args.url,
  15. # HACK
  16. keepalive_options=triton_grpc.KeepAliveOptions(keepalive_timeout_ms=1000000),
  17. )
  18. image = utils.prepare_input_file(args.image, include_header=True)
  19. input_ = {
  20. "model": "pp-docbee",
  21. "messages": [
  22. {"role": "system", "content": "You are a helpful assistant."},
  23. {
  24. "role": "user",
  25. "content": [
  26. {"type": "text", "text": args.query},
  27. {"type": "image_url", "image_url": {"url": image}},
  28. ],
  29. },
  30. ],
  31. "max_image_tokens": args.max_image_tokens,
  32. }
  33. output = triton_request(client, "document-understanding", input_)
  34. if output["errorCode"] != 0:
  35. print(f"Error code: {output['errorCode']}", file=sys.stderr)
  36. print(f"Error message: {output['errorMsg']}", file=sys.stderr)
  37. sys.exit(1)
  38. result = output["result"]
  39. print("Final result:")
  40. print(result["choices"][0]["message"]["content"])
  41. if __name__ == "__main__":
  42. main()