azure.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. """Azure OpenAI embeddings wrapper."""
  2. from __future__ import annotations
  3. from collections.abc import Awaitable, Callable
  4. from typing import cast
  5. import openai
  6. from langchain_core.utils import from_env, secret_from_env
  7. from pydantic import Field, SecretStr, model_validator
  8. from typing_extensions import Self
  9. from langchain_openai.embeddings.base import OpenAIEmbeddings
  10. class AzureOpenAIEmbeddings(OpenAIEmbeddings): # type: ignore[override]
  11. """AzureOpenAI embedding model integration.
  12. Setup:
  13. To access AzureOpenAI embedding models you'll need to create an Azure account,
  14. get an API key, and install the `langchain-openai` integration package.
  15. You'll need to have an Azure OpenAI instance deployed.
  16. You can deploy a version on Azure Portal following this
  17. [guide](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource?pivots=web-portal).
  18. Once you have your instance running, make sure you have the name of your
  19. instance and key. You can find the key in the Azure Portal,
  20. under the “Keys and Endpoint” section of your instance.
  21. ```bash
  22. pip install -U langchain_openai
  23. # Set up your environment variables (or pass them directly to the model)
  24. export AZURE_OPENAI_API_KEY="your-api-key"
  25. export AZURE_OPENAI_ENDPOINT="https://<your-endpoint>.openai.azure.com/"
  26. export AZURE_OPENAI_API_VERSION="2024-02-01"
  27. ```
  28. Key init args — completion params:
  29. model:
  30. Name of `AzureOpenAI` model to use.
  31. dimensions:
  32. Number of dimensions for the embeddings. Can be specified only if the
  33. underlying model supports it.
  34. See full list of supported init args and their descriptions in the params section.
  35. Instantiate:
  36. ```python
  37. from langchain_openai import AzureOpenAIEmbeddings
  38. embeddings = AzureOpenAIEmbeddings(
  39. model="text-embedding-3-large"
  40. # dimensions: int | None = None, # Can specify dimensions with new text-embedding-3 models
  41. # azure_endpoint="https://<your-endpoint>.openai.azure.com/", If not provided, will read env variable AZURE_OPENAI_ENDPOINT
  42. # api_key=... # Can provide an API key directly. If missing read env variable AZURE_OPENAI_API_KEY
  43. # openai_api_version=..., # If not provided, will read env variable AZURE_OPENAI_API_VERSION
  44. )
  45. ```
  46. Embed single text:
  47. ```python
  48. input_text = "The meaning of life is 42"
  49. vector = embed.embed_query(input_text)
  50. print(vector[:3])
  51. ```
  52. ```python
  53. [-0.024603435769677162, -0.007543657906353474, 0.0039630369283258915]
  54. ```
  55. Embed multiple texts:
  56. ```python
  57. input_texts = ["Document 1...", "Document 2..."]
  58. vectors = embed.embed_documents(input_texts)
  59. print(len(vectors))
  60. # The first 3 coordinates for the first vector
  61. print(vectors[0][:3])
  62. ```
  63. ```python
  64. 2
  65. [-0.024603435769677162, -0.007543657906353474, 0.0039630369283258915]
  66. ```
  67. Async:
  68. ```python
  69. vector = await embed.aembed_query(input_text)
  70. print(vector[:3])
  71. # multiple:
  72. # await embed.aembed_documents(input_texts)
  73. ```
  74. ```python
  75. [-0.009100092574954033, 0.005071679595857859, -0.0029193938244134188]
  76. ```
  77. """ # noqa: E501
  78. azure_endpoint: str | None = Field(
  79. default_factory=from_env("AZURE_OPENAI_ENDPOINT", default=None)
  80. )
  81. """Your Azure endpoint, including the resource.
  82. Automatically inferred from env var `AZURE_OPENAI_ENDPOINT` if not provided.
  83. Example: `https://example-resource.azure.openai.com/`
  84. """
  85. deployment: str | None = Field(default=None, alias="azure_deployment")
  86. """A model deployment.
  87. If given sets the base client URL to include `/deployments/{azure_deployment}`.
  88. !!! note
  89. This means you won't be able to use non-deployment endpoints.
  90. """
  91. # Check OPENAI_KEY for backwards compatibility.
  92. # TODO: Remove OPENAI_API_KEY support to avoid possible conflict when using
  93. # other forms of azure credentials.
  94. openai_api_key: SecretStr | None = Field(
  95. alias="api_key",
  96. default_factory=secret_from_env(
  97. ["AZURE_OPENAI_API_KEY", "OPENAI_API_KEY"], default=None
  98. ),
  99. )
  100. """Automatically inferred from env var `AZURE_OPENAI_API_KEY` if not provided."""
  101. openai_api_version: str | None = Field(
  102. default_factory=from_env("OPENAI_API_VERSION", default="2023-05-15"),
  103. alias="api_version",
  104. )
  105. """Automatically inferred from env var `OPENAI_API_VERSION` if not provided.
  106. Set to `'2023-05-15'` by default if env variable `OPENAI_API_VERSION` is not
  107. set.
  108. """
  109. azure_ad_token: SecretStr | None = Field(
  110. default_factory=secret_from_env("AZURE_OPENAI_AD_TOKEN", default=None)
  111. )
  112. """Your Azure Active Directory token.
  113. Automatically inferred from env var `AZURE_OPENAI_AD_TOKEN` if not provided.
  114. [For more, see this page.](https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id)
  115. """
  116. azure_ad_token_provider: Callable[[], str] | None = None
  117. """A function that returns an Azure Active Directory token.
  118. Will be invoked on every sync request. For async requests,
  119. will be invoked if `azure_ad_async_token_provider` is not provided.
  120. """
  121. azure_ad_async_token_provider: Callable[[], Awaitable[str]] | None = None
  122. """A function that returns an Azure Active Directory token.
  123. Will be invoked on every async request.
  124. """
  125. openai_api_type: str | None = Field(
  126. default_factory=from_env("OPENAI_API_TYPE", default="azure")
  127. )
  128. validate_base_url: bool = True
  129. chunk_size: int = 2048
  130. """Maximum number of texts to embed in each batch"""
  131. @model_validator(mode="after")
  132. def validate_environment(self) -> Self:
  133. """Validate that api key and python package exists in environment."""
  134. # For backwards compatibility. Before openai v1, no distinction was made
  135. # between azure_endpoint and base_url (openai_api_base).
  136. openai_api_base = self.openai_api_base
  137. if openai_api_base and self.validate_base_url:
  138. # Only validate openai_api_base if azure_endpoint is not provided
  139. if not self.azure_endpoint and "/openai" not in openai_api_base:
  140. self.openai_api_base = cast(str, self.openai_api_base) + "/openai"
  141. msg = (
  142. "As of openai>=1.0.0, Azure endpoints should be specified via "
  143. "the `azure_endpoint` param not `openai_api_base` "
  144. "(or alias `base_url`). "
  145. )
  146. raise ValueError(msg)
  147. if self.deployment:
  148. msg = (
  149. "As of openai>=1.0.0, if `deployment` (or alias "
  150. "`azure_deployment`) is specified then "
  151. "`openai_api_base` (or alias `base_url`) should not be. "
  152. "Instead use `deployment` (or alias `azure_deployment`) "
  153. "and `azure_endpoint`."
  154. )
  155. raise ValueError(msg)
  156. client_params: dict = {
  157. "api_version": self.openai_api_version,
  158. "azure_endpoint": self.azure_endpoint,
  159. "azure_deployment": self.deployment,
  160. "api_key": (
  161. self.openai_api_key.get_secret_value() if self.openai_api_key else None
  162. ),
  163. "azure_ad_token": (
  164. self.azure_ad_token.get_secret_value() if self.azure_ad_token else None
  165. ),
  166. "azure_ad_token_provider": self.azure_ad_token_provider,
  167. "organization": self.openai_organization,
  168. "base_url": self.openai_api_base,
  169. "timeout": self.request_timeout,
  170. "max_retries": self.max_retries,
  171. "default_headers": {
  172. **(self.default_headers or {}),
  173. "User-Agent": "langchain-partner-python-azure-openai",
  174. },
  175. "default_query": self.default_query,
  176. }
  177. if not self.client:
  178. sync_specific: dict = {"http_client": self.http_client}
  179. self.client = openai.AzureOpenAI(
  180. **client_params, # type: ignore[arg-type]
  181. **sync_specific,
  182. ).embeddings
  183. if not self.async_client:
  184. async_specific: dict = {"http_client": self.http_async_client}
  185. if self.azure_ad_async_token_provider:
  186. client_params["azure_ad_token_provider"] = (
  187. self.azure_ad_async_token_provider
  188. )
  189. self.async_client = openai.AsyncAzureOpenAI(
  190. **client_params, # type: ignore[arg-type]
  191. **async_specific,
  192. ).embeddings
  193. return self
  194. @property
  195. def _llm_type(self) -> str:
  196. return "azure-openai-chat"