assistants.py 47 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053
  1. # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
  2. from __future__ import annotations
  3. from typing import Union, Iterable, Optional
  4. from typing_extensions import Literal
  5. import httpx
  6. from ... import _legacy_response
  7. from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
  8. from ..._utils import maybe_transform, async_maybe_transform
  9. from ..._compat import cached_property
  10. from ..._resource import SyncAPIResource, AsyncAPIResource
  11. from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
  12. from ...pagination import SyncCursorPage, AsyncCursorPage
  13. from ...types.beta import (
  14. assistant_list_params,
  15. assistant_create_params,
  16. assistant_update_params,
  17. )
  18. from ..._base_client import AsyncPaginator, make_request_options
  19. from ...types.beta.assistant import Assistant
  20. from ...types.shared.chat_model import ChatModel
  21. from ...types.beta.assistant_deleted import AssistantDeleted
  22. from ...types.shared_params.metadata import Metadata
  23. from ...types.shared.reasoning_effort import ReasoningEffort
  24. from ...types.beta.assistant_tool_param import AssistantToolParam
  25. from ...types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
  26. __all__ = ["Assistants", "AsyncAssistants"]
  27. class Assistants(SyncAPIResource):
  28. @cached_property
  29. def with_raw_response(self) -> AssistantsWithRawResponse:
  30. """
  31. This property can be used as a prefix for any HTTP method call to return
  32. the raw response object instead of the parsed content.
  33. For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  34. """
  35. return AssistantsWithRawResponse(self)
  36. @cached_property
  37. def with_streaming_response(self) -> AssistantsWithStreamingResponse:
  38. """
  39. An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  40. For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  41. """
  42. return AssistantsWithStreamingResponse(self)
  43. def create(
  44. self,
  45. *,
  46. model: Union[str, ChatModel],
  47. description: Optional[str] | Omit = omit,
  48. instructions: Optional[str] | Omit = omit,
  49. metadata: Optional[Metadata] | Omit = omit,
  50. name: Optional[str] | Omit = omit,
  51. reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
  52. response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  53. temperature: Optional[float] | Omit = omit,
  54. tool_resources: Optional[assistant_create_params.ToolResources] | Omit = omit,
  55. tools: Iterable[AssistantToolParam] | Omit = omit,
  56. top_p: Optional[float] | Omit = omit,
  57. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  58. # The extra values given here take precedence over values defined on the client or passed to this method.
  59. extra_headers: Headers | None = None,
  60. extra_query: Query | None = None,
  61. extra_body: Body | None = None,
  62. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  63. ) -> Assistant:
  64. """
  65. Create an assistant with a model and instructions.
  66. Args:
  67. model: ID of the model to use. You can use the
  68. [List models](https://platform.openai.com/docs/api-reference/models/list) API to
  69. see all of your available models, or see our
  70. [Model overview](https://platform.openai.com/docs/models) for descriptions of
  71. them.
  72. description: The description of the assistant. The maximum length is 512 characters.
  73. instructions: The system instructions that the assistant uses. The maximum length is 256,000
  74. characters.
  75. metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
  76. for storing additional information about the object in a structured format, and
  77. querying for objects via API or the dashboard.
  78. Keys are strings with a maximum length of 64 characters. Values are strings with
  79. a maximum length of 512 characters.
  80. name: The name of the assistant. The maximum length is 256 characters.
  81. reasoning_effort: Constrains effort on reasoning for
  82. [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
  83. supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
  84. Reducing reasoning effort can result in faster responses and fewer tokens used
  85. on reasoning in a response.
  86. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
  87. reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
  88. calls are supported for all reasoning values in gpt-5.1.
  89. - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
  90. support `none`.
  91. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
  92. - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
  93. response_format: Specifies the format that the model must output. Compatible with
  94. [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
  95. [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
  96. and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
  97. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
  98. Outputs which ensures the model will match your supplied JSON schema. Learn more
  99. in the
  100. [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
  101. Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
  102. message the model generates is valid JSON.
  103. **Important:** when using JSON mode, you **must** also instruct the model to
  104. produce JSON yourself via a system or user message. Without this, the model may
  105. generate an unending stream of whitespace until the generation reaches the token
  106. limit, resulting in a long-running and seemingly "stuck" request. Also note that
  107. the message content may be partially cut off if `finish_reason="length"`, which
  108. indicates the generation exceeded `max_tokens` or the conversation exceeded the
  109. max context length.
  110. temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
  111. make the output more random, while lower values like 0.2 will make it more
  112. focused and deterministic.
  113. tool_resources: A set of resources that are used by the assistant's tools. The resources are
  114. specific to the type of tool. For example, the `code_interpreter` tool requires
  115. a list of file IDs, while the `file_search` tool requires a list of vector store
  116. IDs.
  117. tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
  118. assistant. Tools can be of types `code_interpreter`, `file_search`, or
  119. `function`.
  120. top_p: An alternative to sampling with temperature, called nucleus sampling, where the
  121. model considers the results of the tokens with top_p probability mass. So 0.1
  122. means only the tokens comprising the top 10% probability mass are considered.
  123. We generally recommend altering this or temperature but not both.
  124. extra_headers: Send extra headers
  125. extra_query: Add additional query parameters to the request
  126. extra_body: Add additional JSON properties to the request
  127. timeout: Override the client-level default timeout for this request, in seconds
  128. """
  129. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  130. return self._post(
  131. "/assistants",
  132. body=maybe_transform(
  133. {
  134. "model": model,
  135. "description": description,
  136. "instructions": instructions,
  137. "metadata": metadata,
  138. "name": name,
  139. "reasoning_effort": reasoning_effort,
  140. "response_format": response_format,
  141. "temperature": temperature,
  142. "tool_resources": tool_resources,
  143. "tools": tools,
  144. "top_p": top_p,
  145. },
  146. assistant_create_params.AssistantCreateParams,
  147. ),
  148. options=make_request_options(
  149. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  150. ),
  151. cast_to=Assistant,
  152. )
  153. def retrieve(
  154. self,
  155. assistant_id: str,
  156. *,
  157. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  158. # The extra values given here take precedence over values defined on the client or passed to this method.
  159. extra_headers: Headers | None = None,
  160. extra_query: Query | None = None,
  161. extra_body: Body | None = None,
  162. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  163. ) -> Assistant:
  164. """
  165. Retrieves an assistant.
  166. Args:
  167. extra_headers: Send extra headers
  168. extra_query: Add additional query parameters to the request
  169. extra_body: Add additional JSON properties to the request
  170. timeout: Override the client-level default timeout for this request, in seconds
  171. """
  172. if not assistant_id:
  173. raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
  174. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  175. return self._get(
  176. f"/assistants/{assistant_id}",
  177. options=make_request_options(
  178. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  179. ),
  180. cast_to=Assistant,
  181. )
  182. def update(
  183. self,
  184. assistant_id: str,
  185. *,
  186. description: Optional[str] | Omit = omit,
  187. instructions: Optional[str] | Omit = omit,
  188. metadata: Optional[Metadata] | Omit = omit,
  189. model: Union[
  190. str,
  191. Literal[
  192. "gpt-5",
  193. "gpt-5-mini",
  194. "gpt-5-nano",
  195. "gpt-5-2025-08-07",
  196. "gpt-5-mini-2025-08-07",
  197. "gpt-5-nano-2025-08-07",
  198. "gpt-4.1",
  199. "gpt-4.1-mini",
  200. "gpt-4.1-nano",
  201. "gpt-4.1-2025-04-14",
  202. "gpt-4.1-mini-2025-04-14",
  203. "gpt-4.1-nano-2025-04-14",
  204. "o3-mini",
  205. "o3-mini-2025-01-31",
  206. "o1",
  207. "o1-2024-12-17",
  208. "gpt-4o",
  209. "gpt-4o-2024-11-20",
  210. "gpt-4o-2024-08-06",
  211. "gpt-4o-2024-05-13",
  212. "gpt-4o-mini",
  213. "gpt-4o-mini-2024-07-18",
  214. "gpt-4.5-preview",
  215. "gpt-4.5-preview-2025-02-27",
  216. "gpt-4-turbo",
  217. "gpt-4-turbo-2024-04-09",
  218. "gpt-4-0125-preview",
  219. "gpt-4-turbo-preview",
  220. "gpt-4-1106-preview",
  221. "gpt-4-vision-preview",
  222. "gpt-4",
  223. "gpt-4-0314",
  224. "gpt-4-0613",
  225. "gpt-4-32k",
  226. "gpt-4-32k-0314",
  227. "gpt-4-32k-0613",
  228. "gpt-3.5-turbo",
  229. "gpt-3.5-turbo-16k",
  230. "gpt-3.5-turbo-0613",
  231. "gpt-3.5-turbo-1106",
  232. "gpt-3.5-turbo-0125",
  233. "gpt-3.5-turbo-16k-0613",
  234. ],
  235. ]
  236. | Omit = omit,
  237. name: Optional[str] | Omit = omit,
  238. reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
  239. response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  240. temperature: Optional[float] | Omit = omit,
  241. tool_resources: Optional[assistant_update_params.ToolResources] | Omit = omit,
  242. tools: Iterable[AssistantToolParam] | Omit = omit,
  243. top_p: Optional[float] | Omit = omit,
  244. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  245. # The extra values given here take precedence over values defined on the client or passed to this method.
  246. extra_headers: Headers | None = None,
  247. extra_query: Query | None = None,
  248. extra_body: Body | None = None,
  249. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  250. ) -> Assistant:
  251. """Modifies an assistant.
  252. Args:
  253. description: The description of the assistant.
  254. The maximum length is 512 characters.
  255. instructions: The system instructions that the assistant uses. The maximum length is 256,000
  256. characters.
  257. metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
  258. for storing additional information about the object in a structured format, and
  259. querying for objects via API or the dashboard.
  260. Keys are strings with a maximum length of 64 characters. Values are strings with
  261. a maximum length of 512 characters.
  262. model: ID of the model to use. You can use the
  263. [List models](https://platform.openai.com/docs/api-reference/models/list) API to
  264. see all of your available models, or see our
  265. [Model overview](https://platform.openai.com/docs/models) for descriptions of
  266. them.
  267. name: The name of the assistant. The maximum length is 256 characters.
  268. reasoning_effort: Constrains effort on reasoning for
  269. [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
  270. supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
  271. Reducing reasoning effort can result in faster responses and fewer tokens used
  272. on reasoning in a response.
  273. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
  274. reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
  275. calls are supported for all reasoning values in gpt-5.1.
  276. - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
  277. support `none`.
  278. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
  279. - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
  280. response_format: Specifies the format that the model must output. Compatible with
  281. [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
  282. [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
  283. and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
  284. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
  285. Outputs which ensures the model will match your supplied JSON schema. Learn more
  286. in the
  287. [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
  288. Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
  289. message the model generates is valid JSON.
  290. **Important:** when using JSON mode, you **must** also instruct the model to
  291. produce JSON yourself via a system or user message. Without this, the model may
  292. generate an unending stream of whitespace until the generation reaches the token
  293. limit, resulting in a long-running and seemingly "stuck" request. Also note that
  294. the message content may be partially cut off if `finish_reason="length"`, which
  295. indicates the generation exceeded `max_tokens` or the conversation exceeded the
  296. max context length.
  297. temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
  298. make the output more random, while lower values like 0.2 will make it more
  299. focused and deterministic.
  300. tool_resources: A set of resources that are used by the assistant's tools. The resources are
  301. specific to the type of tool. For example, the `code_interpreter` tool requires
  302. a list of file IDs, while the `file_search` tool requires a list of vector store
  303. IDs.
  304. tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
  305. assistant. Tools can be of types `code_interpreter`, `file_search`, or
  306. `function`.
  307. top_p: An alternative to sampling with temperature, called nucleus sampling, where the
  308. model considers the results of the tokens with top_p probability mass. So 0.1
  309. means only the tokens comprising the top 10% probability mass are considered.
  310. We generally recommend altering this or temperature but not both.
  311. extra_headers: Send extra headers
  312. extra_query: Add additional query parameters to the request
  313. extra_body: Add additional JSON properties to the request
  314. timeout: Override the client-level default timeout for this request, in seconds
  315. """
  316. if not assistant_id:
  317. raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
  318. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  319. return self._post(
  320. f"/assistants/{assistant_id}",
  321. body=maybe_transform(
  322. {
  323. "description": description,
  324. "instructions": instructions,
  325. "metadata": metadata,
  326. "model": model,
  327. "name": name,
  328. "reasoning_effort": reasoning_effort,
  329. "response_format": response_format,
  330. "temperature": temperature,
  331. "tool_resources": tool_resources,
  332. "tools": tools,
  333. "top_p": top_p,
  334. },
  335. assistant_update_params.AssistantUpdateParams,
  336. ),
  337. options=make_request_options(
  338. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  339. ),
  340. cast_to=Assistant,
  341. )
  342. def list(
  343. self,
  344. *,
  345. after: str | Omit = omit,
  346. before: str | Omit = omit,
  347. limit: int | Omit = omit,
  348. order: Literal["asc", "desc"] | Omit = omit,
  349. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  350. # The extra values given here take precedence over values defined on the client or passed to this method.
  351. extra_headers: Headers | None = None,
  352. extra_query: Query | None = None,
  353. extra_body: Body | None = None,
  354. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  355. ) -> SyncCursorPage[Assistant]:
  356. """Returns a list of assistants.
  357. Args:
  358. after: A cursor for use in pagination.
  359. `after` is an object ID that defines your place
  360. in the list. For instance, if you make a list request and receive 100 objects,
  361. ending with obj_foo, your subsequent call can include after=obj_foo in order to
  362. fetch the next page of the list.
  363. before: A cursor for use in pagination. `before` is an object ID that defines your place
  364. in the list. For instance, if you make a list request and receive 100 objects,
  365. starting with obj_foo, your subsequent call can include before=obj_foo in order
  366. to fetch the previous page of the list.
  367. limit: A limit on the number of objects to be returned. Limit can range between 1 and
  368. 100, and the default is 20.
  369. order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
  370. order and `desc` for descending order.
  371. extra_headers: Send extra headers
  372. extra_query: Add additional query parameters to the request
  373. extra_body: Add additional JSON properties to the request
  374. timeout: Override the client-level default timeout for this request, in seconds
  375. """
  376. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  377. return self._get_api_list(
  378. "/assistants",
  379. page=SyncCursorPage[Assistant],
  380. options=make_request_options(
  381. extra_headers=extra_headers,
  382. extra_query=extra_query,
  383. extra_body=extra_body,
  384. timeout=timeout,
  385. query=maybe_transform(
  386. {
  387. "after": after,
  388. "before": before,
  389. "limit": limit,
  390. "order": order,
  391. },
  392. assistant_list_params.AssistantListParams,
  393. ),
  394. ),
  395. model=Assistant,
  396. )
  397. def delete(
  398. self,
  399. assistant_id: str,
  400. *,
  401. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  402. # The extra values given here take precedence over values defined on the client or passed to this method.
  403. extra_headers: Headers | None = None,
  404. extra_query: Query | None = None,
  405. extra_body: Body | None = None,
  406. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  407. ) -> AssistantDeleted:
  408. """
  409. Delete an assistant.
  410. Args:
  411. extra_headers: Send extra headers
  412. extra_query: Add additional query parameters to the request
  413. extra_body: Add additional JSON properties to the request
  414. timeout: Override the client-level default timeout for this request, in seconds
  415. """
  416. if not assistant_id:
  417. raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
  418. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  419. return self._delete(
  420. f"/assistants/{assistant_id}",
  421. options=make_request_options(
  422. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  423. ),
  424. cast_to=AssistantDeleted,
  425. )
  426. class AsyncAssistants(AsyncAPIResource):
  427. @cached_property
  428. def with_raw_response(self) -> AsyncAssistantsWithRawResponse:
  429. """
  430. This property can be used as a prefix for any HTTP method call to return
  431. the raw response object instead of the parsed content.
  432. For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  433. """
  434. return AsyncAssistantsWithRawResponse(self)
  435. @cached_property
  436. def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse:
  437. """
  438. An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  439. For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  440. """
  441. return AsyncAssistantsWithStreamingResponse(self)
  442. async def create(
  443. self,
  444. *,
  445. model: Union[str, ChatModel],
  446. description: Optional[str] | Omit = omit,
  447. instructions: Optional[str] | Omit = omit,
  448. metadata: Optional[Metadata] | Omit = omit,
  449. name: Optional[str] | Omit = omit,
  450. reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
  451. response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  452. temperature: Optional[float] | Omit = omit,
  453. tool_resources: Optional[assistant_create_params.ToolResources] | Omit = omit,
  454. tools: Iterable[AssistantToolParam] | Omit = omit,
  455. top_p: Optional[float] | Omit = omit,
  456. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  457. # The extra values given here take precedence over values defined on the client or passed to this method.
  458. extra_headers: Headers | None = None,
  459. extra_query: Query | None = None,
  460. extra_body: Body | None = None,
  461. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  462. ) -> Assistant:
  463. """
  464. Create an assistant with a model and instructions.
  465. Args:
  466. model: ID of the model to use. You can use the
  467. [List models](https://platform.openai.com/docs/api-reference/models/list) API to
  468. see all of your available models, or see our
  469. [Model overview](https://platform.openai.com/docs/models) for descriptions of
  470. them.
  471. description: The description of the assistant. The maximum length is 512 characters.
  472. instructions: The system instructions that the assistant uses. The maximum length is 256,000
  473. characters.
  474. metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
  475. for storing additional information about the object in a structured format, and
  476. querying for objects via API or the dashboard.
  477. Keys are strings with a maximum length of 64 characters. Values are strings with
  478. a maximum length of 512 characters.
  479. name: The name of the assistant. The maximum length is 256 characters.
  480. reasoning_effort: Constrains effort on reasoning for
  481. [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
  482. supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
  483. Reducing reasoning effort can result in faster responses and fewer tokens used
  484. on reasoning in a response.
  485. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
  486. reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
  487. calls are supported for all reasoning values in gpt-5.1.
  488. - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
  489. support `none`.
  490. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
  491. - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
  492. response_format: Specifies the format that the model must output. Compatible with
  493. [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
  494. [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
  495. and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
  496. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
  497. Outputs which ensures the model will match your supplied JSON schema. Learn more
  498. in the
  499. [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
  500. Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
  501. message the model generates is valid JSON.
  502. **Important:** when using JSON mode, you **must** also instruct the model to
  503. produce JSON yourself via a system or user message. Without this, the model may
  504. generate an unending stream of whitespace until the generation reaches the token
  505. limit, resulting in a long-running and seemingly "stuck" request. Also note that
  506. the message content may be partially cut off if `finish_reason="length"`, which
  507. indicates the generation exceeded `max_tokens` or the conversation exceeded the
  508. max context length.
  509. temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
  510. make the output more random, while lower values like 0.2 will make it more
  511. focused and deterministic.
  512. tool_resources: A set of resources that are used by the assistant's tools. The resources are
  513. specific to the type of tool. For example, the `code_interpreter` tool requires
  514. a list of file IDs, while the `file_search` tool requires a list of vector store
  515. IDs.
  516. tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
  517. assistant. Tools can be of types `code_interpreter`, `file_search`, or
  518. `function`.
  519. top_p: An alternative to sampling with temperature, called nucleus sampling, where the
  520. model considers the results of the tokens with top_p probability mass. So 0.1
  521. means only the tokens comprising the top 10% probability mass are considered.
  522. We generally recommend altering this or temperature but not both.
  523. extra_headers: Send extra headers
  524. extra_query: Add additional query parameters to the request
  525. extra_body: Add additional JSON properties to the request
  526. timeout: Override the client-level default timeout for this request, in seconds
  527. """
  528. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  529. return await self._post(
  530. "/assistants",
  531. body=await async_maybe_transform(
  532. {
  533. "model": model,
  534. "description": description,
  535. "instructions": instructions,
  536. "metadata": metadata,
  537. "name": name,
  538. "reasoning_effort": reasoning_effort,
  539. "response_format": response_format,
  540. "temperature": temperature,
  541. "tool_resources": tool_resources,
  542. "tools": tools,
  543. "top_p": top_p,
  544. },
  545. assistant_create_params.AssistantCreateParams,
  546. ),
  547. options=make_request_options(
  548. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  549. ),
  550. cast_to=Assistant,
  551. )
  552. async def retrieve(
  553. self,
  554. assistant_id: str,
  555. *,
  556. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  557. # The extra values given here take precedence over values defined on the client or passed to this method.
  558. extra_headers: Headers | None = None,
  559. extra_query: Query | None = None,
  560. extra_body: Body | None = None,
  561. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  562. ) -> Assistant:
  563. """
  564. Retrieves an assistant.
  565. Args:
  566. extra_headers: Send extra headers
  567. extra_query: Add additional query parameters to the request
  568. extra_body: Add additional JSON properties to the request
  569. timeout: Override the client-level default timeout for this request, in seconds
  570. """
  571. if not assistant_id:
  572. raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
  573. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  574. return await self._get(
  575. f"/assistants/{assistant_id}",
  576. options=make_request_options(
  577. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  578. ),
  579. cast_to=Assistant,
  580. )
  581. async def update(
  582. self,
  583. assistant_id: str,
  584. *,
  585. description: Optional[str] | Omit = omit,
  586. instructions: Optional[str] | Omit = omit,
  587. metadata: Optional[Metadata] | Omit = omit,
  588. model: Union[
  589. str,
  590. Literal[
  591. "gpt-5",
  592. "gpt-5-mini",
  593. "gpt-5-nano",
  594. "gpt-5-2025-08-07",
  595. "gpt-5-mini-2025-08-07",
  596. "gpt-5-nano-2025-08-07",
  597. "gpt-4.1",
  598. "gpt-4.1-mini",
  599. "gpt-4.1-nano",
  600. "gpt-4.1-2025-04-14",
  601. "gpt-4.1-mini-2025-04-14",
  602. "gpt-4.1-nano-2025-04-14",
  603. "o3-mini",
  604. "o3-mini-2025-01-31",
  605. "o1",
  606. "o1-2024-12-17",
  607. "gpt-4o",
  608. "gpt-4o-2024-11-20",
  609. "gpt-4o-2024-08-06",
  610. "gpt-4o-2024-05-13",
  611. "gpt-4o-mini",
  612. "gpt-4o-mini-2024-07-18",
  613. "gpt-4.5-preview",
  614. "gpt-4.5-preview-2025-02-27",
  615. "gpt-4-turbo",
  616. "gpt-4-turbo-2024-04-09",
  617. "gpt-4-0125-preview",
  618. "gpt-4-turbo-preview",
  619. "gpt-4-1106-preview",
  620. "gpt-4-vision-preview",
  621. "gpt-4",
  622. "gpt-4-0314",
  623. "gpt-4-0613",
  624. "gpt-4-32k",
  625. "gpt-4-32k-0314",
  626. "gpt-4-32k-0613",
  627. "gpt-3.5-turbo",
  628. "gpt-3.5-turbo-16k",
  629. "gpt-3.5-turbo-0613",
  630. "gpt-3.5-turbo-1106",
  631. "gpt-3.5-turbo-0125",
  632. "gpt-3.5-turbo-16k-0613",
  633. ],
  634. ]
  635. | Omit = omit,
  636. name: Optional[str] | Omit = omit,
  637. reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
  638. response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
  639. temperature: Optional[float] | Omit = omit,
  640. tool_resources: Optional[assistant_update_params.ToolResources] | Omit = omit,
  641. tools: Iterable[AssistantToolParam] | Omit = omit,
  642. top_p: Optional[float] | Omit = omit,
  643. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  644. # The extra values given here take precedence over values defined on the client or passed to this method.
  645. extra_headers: Headers | None = None,
  646. extra_query: Query | None = None,
  647. extra_body: Body | None = None,
  648. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  649. ) -> Assistant:
  650. """Modifies an assistant.
  651. Args:
  652. description: The description of the assistant.
  653. The maximum length is 512 characters.
  654. instructions: The system instructions that the assistant uses. The maximum length is 256,000
  655. characters.
  656. metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
  657. for storing additional information about the object in a structured format, and
  658. querying for objects via API or the dashboard.
  659. Keys are strings with a maximum length of 64 characters. Values are strings with
  660. a maximum length of 512 characters.
  661. model: ID of the model to use. You can use the
  662. [List models](https://platform.openai.com/docs/api-reference/models/list) API to
  663. see all of your available models, or see our
  664. [Model overview](https://platform.openai.com/docs/models) for descriptions of
  665. them.
  666. name: The name of the assistant. The maximum length is 256 characters.
  667. reasoning_effort: Constrains effort on reasoning for
  668. [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
  669. supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
  670. Reducing reasoning effort can result in faster responses and fewer tokens used
  671. on reasoning in a response.
  672. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
  673. reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
  674. calls are supported for all reasoning values in gpt-5.1.
  675. - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
  676. support `none`.
  677. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
  678. - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
  679. response_format: Specifies the format that the model must output. Compatible with
  680. [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
  681. [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
  682. and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
  683. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
  684. Outputs which ensures the model will match your supplied JSON schema. Learn more
  685. in the
  686. [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
  687. Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
  688. message the model generates is valid JSON.
  689. **Important:** when using JSON mode, you **must** also instruct the model to
  690. produce JSON yourself via a system or user message. Without this, the model may
  691. generate an unending stream of whitespace until the generation reaches the token
  692. limit, resulting in a long-running and seemingly "stuck" request. Also note that
  693. the message content may be partially cut off if `finish_reason="length"`, which
  694. indicates the generation exceeded `max_tokens` or the conversation exceeded the
  695. max context length.
  696. temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
  697. make the output more random, while lower values like 0.2 will make it more
  698. focused and deterministic.
  699. tool_resources: A set of resources that are used by the assistant's tools. The resources are
  700. specific to the type of tool. For example, the `code_interpreter` tool requires
  701. a list of file IDs, while the `file_search` tool requires a list of vector store
  702. IDs.
  703. tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
  704. assistant. Tools can be of types `code_interpreter`, `file_search`, or
  705. `function`.
  706. top_p: An alternative to sampling with temperature, called nucleus sampling, where the
  707. model considers the results of the tokens with top_p probability mass. So 0.1
  708. means only the tokens comprising the top 10% probability mass are considered.
  709. We generally recommend altering this or temperature but not both.
  710. extra_headers: Send extra headers
  711. extra_query: Add additional query parameters to the request
  712. extra_body: Add additional JSON properties to the request
  713. timeout: Override the client-level default timeout for this request, in seconds
  714. """
  715. if not assistant_id:
  716. raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
  717. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  718. return await self._post(
  719. f"/assistants/{assistant_id}",
  720. body=await async_maybe_transform(
  721. {
  722. "description": description,
  723. "instructions": instructions,
  724. "metadata": metadata,
  725. "model": model,
  726. "name": name,
  727. "reasoning_effort": reasoning_effort,
  728. "response_format": response_format,
  729. "temperature": temperature,
  730. "tool_resources": tool_resources,
  731. "tools": tools,
  732. "top_p": top_p,
  733. },
  734. assistant_update_params.AssistantUpdateParams,
  735. ),
  736. options=make_request_options(
  737. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  738. ),
  739. cast_to=Assistant,
  740. )
  741. def list(
  742. self,
  743. *,
  744. after: str | Omit = omit,
  745. before: str | Omit = omit,
  746. limit: int | Omit = omit,
  747. order: Literal["asc", "desc"] | Omit = omit,
  748. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  749. # The extra values given here take precedence over values defined on the client or passed to this method.
  750. extra_headers: Headers | None = None,
  751. extra_query: Query | None = None,
  752. extra_body: Body | None = None,
  753. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  754. ) -> AsyncPaginator[Assistant, AsyncCursorPage[Assistant]]:
  755. """Returns a list of assistants.
  756. Args:
  757. after: A cursor for use in pagination.
  758. `after` is an object ID that defines your place
  759. in the list. For instance, if you make a list request and receive 100 objects,
  760. ending with obj_foo, your subsequent call can include after=obj_foo in order to
  761. fetch the next page of the list.
  762. before: A cursor for use in pagination. `before` is an object ID that defines your place
  763. in the list. For instance, if you make a list request and receive 100 objects,
  764. starting with obj_foo, your subsequent call can include before=obj_foo in order
  765. to fetch the previous page of the list.
  766. limit: A limit on the number of objects to be returned. Limit can range between 1 and
  767. 100, and the default is 20.
  768. order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
  769. order and `desc` for descending order.
  770. extra_headers: Send extra headers
  771. extra_query: Add additional query parameters to the request
  772. extra_body: Add additional JSON properties to the request
  773. timeout: Override the client-level default timeout for this request, in seconds
  774. """
  775. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  776. return self._get_api_list(
  777. "/assistants",
  778. page=AsyncCursorPage[Assistant],
  779. options=make_request_options(
  780. extra_headers=extra_headers,
  781. extra_query=extra_query,
  782. extra_body=extra_body,
  783. timeout=timeout,
  784. query=maybe_transform(
  785. {
  786. "after": after,
  787. "before": before,
  788. "limit": limit,
  789. "order": order,
  790. },
  791. assistant_list_params.AssistantListParams,
  792. ),
  793. ),
  794. model=Assistant,
  795. )
  796. async def delete(
  797. self,
  798. assistant_id: str,
  799. *,
  800. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  801. # The extra values given here take precedence over values defined on the client or passed to this method.
  802. extra_headers: Headers | None = None,
  803. extra_query: Query | None = None,
  804. extra_body: Body | None = None,
  805. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  806. ) -> AssistantDeleted:
  807. """
  808. Delete an assistant.
  809. Args:
  810. extra_headers: Send extra headers
  811. extra_query: Add additional query parameters to the request
  812. extra_body: Add additional JSON properties to the request
  813. timeout: Override the client-level default timeout for this request, in seconds
  814. """
  815. if not assistant_id:
  816. raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
  817. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  818. return await self._delete(
  819. f"/assistants/{assistant_id}",
  820. options=make_request_options(
  821. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  822. ),
  823. cast_to=AssistantDeleted,
  824. )
  825. class AssistantsWithRawResponse:
  826. def __init__(self, assistants: Assistants) -> None:
  827. self._assistants = assistants
  828. self.create = _legacy_response.to_raw_response_wrapper(
  829. assistants.create,
  830. )
  831. self.retrieve = _legacy_response.to_raw_response_wrapper(
  832. assistants.retrieve,
  833. )
  834. self.update = _legacy_response.to_raw_response_wrapper(
  835. assistants.update,
  836. )
  837. self.list = _legacy_response.to_raw_response_wrapper(
  838. assistants.list,
  839. )
  840. self.delete = _legacy_response.to_raw_response_wrapper(
  841. assistants.delete,
  842. )
  843. class AsyncAssistantsWithRawResponse:
  844. def __init__(self, assistants: AsyncAssistants) -> None:
  845. self._assistants = assistants
  846. self.create = _legacy_response.async_to_raw_response_wrapper(
  847. assistants.create,
  848. )
  849. self.retrieve = _legacy_response.async_to_raw_response_wrapper(
  850. assistants.retrieve,
  851. )
  852. self.update = _legacy_response.async_to_raw_response_wrapper(
  853. assistants.update,
  854. )
  855. self.list = _legacy_response.async_to_raw_response_wrapper(
  856. assistants.list,
  857. )
  858. self.delete = _legacy_response.async_to_raw_response_wrapper(
  859. assistants.delete,
  860. )
  861. class AssistantsWithStreamingResponse:
  862. def __init__(self, assistants: Assistants) -> None:
  863. self._assistants = assistants
  864. self.create = to_streamed_response_wrapper(
  865. assistants.create,
  866. )
  867. self.retrieve = to_streamed_response_wrapper(
  868. assistants.retrieve,
  869. )
  870. self.update = to_streamed_response_wrapper(
  871. assistants.update,
  872. )
  873. self.list = to_streamed_response_wrapper(
  874. assistants.list,
  875. )
  876. self.delete = to_streamed_response_wrapper(
  877. assistants.delete,
  878. )
  879. class AsyncAssistantsWithStreamingResponse:
  880. def __init__(self, assistants: AsyncAssistants) -> None:
  881. self._assistants = assistants
  882. self.create = async_to_streamed_response_wrapper(
  883. assistants.create,
  884. )
  885. self.retrieve = async_to_streamed_response_wrapper(
  886. assistants.retrieve,
  887. )
  888. self.update = async_to_streamed_response_wrapper(
  889. assistants.update,
  890. )
  891. self.list = async_to_streamed_response_wrapper(
  892. assistants.list,
  893. )
  894. self.delete = async_to_streamed_response_wrapper(
  895. assistants.delete,
  896. )