doc_understanding.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from enum import Enum
  15. from typing import Final, List, Literal, Optional, Union
  16. from pydantic import BaseModel, HttpUrl
  17. from ....utils.deps import is_dep_available
  18. from ..infra.models import PrimaryOperations
  19. if is_dep_available("openai"):
  20. from openai.types.chat import ChatCompletion
  21. __all__ = [
  22. "INFER_ENDPOINT",
  23. "InferRequest",
  24. "PRIMARY_OPERATIONS",
  25. ]
  26. INFER_ENDPOINT: Final[str] = "/document-understanding"
  27. class ContentType(str, Enum):
  28. TEXT = "text"
  29. IMAGE_URL = "image_url"
  30. class RoleType(str, Enum):
  31. USER = "user"
  32. ASSISTANT = "assistant"
  33. SYSTEM = "system"
  34. class ImageUrl(BaseModel):
  35. url: Union[HttpUrl, str]
  36. detail: Optional[Literal["low", "high", "auto"]] = "auto"
  37. class TextContent(BaseModel):
  38. type: Literal[ContentType.TEXT] = ContentType.TEXT
  39. text: str
  40. class ImageContent(BaseModel):
  41. type: Literal[ContentType.IMAGE_URL] = ContentType.IMAGE_URL
  42. image_url: Union[HttpUrl, ImageUrl]
  43. class Message(BaseModel):
  44. role: str
  45. content: Union[str, List[Union[TextContent, ImageContent]]]
  46. class InferRequest(BaseModel):
  47. model: str
  48. messages: List[Message]
  49. max_tokens: Optional[int] = 1024
  50. temperature: Optional[float] = 0.1
  51. top_p: Optional[float] = 0.95
  52. stream: Optional[bool] = False
  53. PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
  54. "infer": (INFER_ENDPOINT, InferRequest, ChatCompletion),
  55. }