inference.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. import requests
  2. from dots_ocr.utils.image_utils import PILimage_to_base64
  3. from openai import OpenAI
  4. import os
  5. def inference_with_vllm(
  6. image,
  7. prompt,
  8. protocol="http",
  9. ip="localhost",
  10. port=8000,
  11. temperature=0.1,
  12. top_p=0.9,
  13. max_completion_tokens=32768,
  14. model_name='rednote-hilab/dots.ocr',
  15. ):
  16. addr = f"{protocol}://{ip}:{port}/v1"
  17. client = OpenAI(api_key="{}".format(os.environ.get("API_KEY", "0")), base_url=addr)
  18. messages = []
  19. messages.append(
  20. {
  21. "role": "user",
  22. "content": [
  23. {
  24. "type": "image_url",
  25. "image_url": {"url": PILimage_to_base64(image)},
  26. },
  27. {"type": "text", "text": f"<|img|><|imgpad|><|endofimg|>{prompt}"} # if no "<|img|><|imgpad|><|endofimg|>" here,vllm v1 will add "\n" here
  28. ],
  29. }
  30. )
  31. try:
  32. response = client.chat.completions.create(
  33. messages=messages,
  34. model=model_name,
  35. max_completion_tokens=max_completion_tokens,
  36. temperature=temperature,
  37. top_p=top_p)
  38. response = response.choices[0].message.content
  39. return response
  40. except requests.exceptions.RequestException as e:
  41. print(f"request error: {e}")
  42. return None