| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053 |
- # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
- from __future__ import annotations
- from typing import Union, Iterable, Optional
- from typing_extensions import Literal
- import httpx
- from ... import _legacy_response
- from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
- from ..._utils import maybe_transform, async_maybe_transform
- from ..._compat import cached_property
- from ..._resource import SyncAPIResource, AsyncAPIResource
- from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
- from ...pagination import SyncCursorPage, AsyncCursorPage
- from ...types.beta import (
- assistant_list_params,
- assistant_create_params,
- assistant_update_params,
- )
- from ..._base_client import AsyncPaginator, make_request_options
- from ...types.beta.assistant import Assistant
- from ...types.shared.chat_model import ChatModel
- from ...types.beta.assistant_deleted import AssistantDeleted
- from ...types.shared_params.metadata import Metadata
- from ...types.shared.reasoning_effort import ReasoningEffort
- from ...types.beta.assistant_tool_param import AssistantToolParam
- from ...types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
- __all__ = ["Assistants", "AsyncAssistants"]
- class Assistants(SyncAPIResource):
- @cached_property
- def with_raw_response(self) -> AssistantsWithRawResponse:
- """
- This property can be used as a prefix for any HTTP method call to return
- the raw response object instead of the parsed content.
- For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
- """
- return AssistantsWithRawResponse(self)
- @cached_property
- def with_streaming_response(self) -> AssistantsWithStreamingResponse:
- """
- An alternative to `.with_raw_response` that doesn't eagerly read the response body.
- For more information, see https://www.github.com/openai/openai-python#with_streaming_response
- """
- return AssistantsWithStreamingResponse(self)
- def create(
- self,
- *,
- model: Union[str, ChatModel],
- description: Optional[str] | Omit = omit,
- instructions: Optional[str] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- name: Optional[str] | Omit = omit,
- reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
- response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
- temperature: Optional[float] | Omit = omit,
- tool_resources: Optional[assistant_create_params.ToolResources] | Omit = omit,
- tools: Iterable[AssistantToolParam] | Omit = omit,
- top_p: Optional[float] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> Assistant:
- """
- Create an assistant with a model and instructions.
- Args:
- model: ID of the model to use. You can use the
- [List models](https://platform.openai.com/docs/api-reference/models/list) API to
- see all of your available models, or see our
- [Model overview](https://platform.openai.com/docs/models) for descriptions of
- them.
- description: The description of the assistant. The maximum length is 512 characters.
- instructions: The system instructions that the assistant uses. The maximum length is 256,000
- characters.
- metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
- for storing additional information about the object in a structured format, and
- querying for objects via API or the dashboard.
- Keys are strings with a maximum length of 64 characters. Values are strings with
- a maximum length of 512 characters.
- name: The name of the assistant. The maximum length is 256 characters.
- reasoning_effort: Constrains effort on reasoning for
- [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
- Reducing reasoning effort can result in faster responses and fewer tokens used
- on reasoning in a response.
- - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
- reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
- calls are supported for all reasoning values in gpt-5.1.
- - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
- support `none`.
- - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
- - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
- response_format: Specifies the format that the model must output. Compatible with
- [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
- [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
- and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
- Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
- Outputs which ensures the model will match your supplied JSON schema. Learn more
- in the
- [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
- Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
- message the model generates is valid JSON.
- **Important:** when using JSON mode, you **must** also instruct the model to
- produce JSON yourself via a system or user message. Without this, the model may
- generate an unending stream of whitespace until the generation reaches the token
- limit, resulting in a long-running and seemingly "stuck" request. Also note that
- the message content may be partially cut off if `finish_reason="length"`, which
- indicates the generation exceeded `max_tokens` or the conversation exceeded the
- max context length.
- temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
- make the output more random, while lower values like 0.2 will make it more
- focused and deterministic.
- tool_resources: A set of resources that are used by the assistant's tools. The resources are
- specific to the type of tool. For example, the `code_interpreter` tool requires
- a list of file IDs, while the `file_search` tool requires a list of vector store
- IDs.
- tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
- assistant. Tools can be of types `code_interpreter`, `file_search`, or
- `function`.
- top_p: An alternative to sampling with temperature, called nucleus sampling, where the
- model considers the results of the tokens with top_p probability mass. So 0.1
- means only the tokens comprising the top 10% probability mass are considered.
- We generally recommend altering this or temperature but not both.
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
- return self._post(
- "/assistants",
- body=maybe_transform(
- {
- "model": model,
- "description": description,
- "instructions": instructions,
- "metadata": metadata,
- "name": name,
- "reasoning_effort": reasoning_effort,
- "response_format": response_format,
- "temperature": temperature,
- "tool_resources": tool_resources,
- "tools": tools,
- "top_p": top_p,
- },
- assistant_create_params.AssistantCreateParams,
- ),
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=Assistant,
- )
- def retrieve(
- self,
- assistant_id: str,
- *,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> Assistant:
- """
- Retrieves an assistant.
- Args:
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- if not assistant_id:
- raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
- extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
- return self._get(
- f"/assistants/{assistant_id}",
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=Assistant,
- )
- def update(
- self,
- assistant_id: str,
- *,
- description: Optional[str] | Omit = omit,
- instructions: Optional[str] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- model: Union[
- str,
- Literal[
- "gpt-5",
- "gpt-5-mini",
- "gpt-5-nano",
- "gpt-5-2025-08-07",
- "gpt-5-mini-2025-08-07",
- "gpt-5-nano-2025-08-07",
- "gpt-4.1",
- "gpt-4.1-mini",
- "gpt-4.1-nano",
- "gpt-4.1-2025-04-14",
- "gpt-4.1-mini-2025-04-14",
- "gpt-4.1-nano-2025-04-14",
- "o3-mini",
- "o3-mini-2025-01-31",
- "o1",
- "o1-2024-12-17",
- "gpt-4o",
- "gpt-4o-2024-11-20",
- "gpt-4o-2024-08-06",
- "gpt-4o-2024-05-13",
- "gpt-4o-mini",
- "gpt-4o-mini-2024-07-18",
- "gpt-4.5-preview",
- "gpt-4.5-preview-2025-02-27",
- "gpt-4-turbo",
- "gpt-4-turbo-2024-04-09",
- "gpt-4-0125-preview",
- "gpt-4-turbo-preview",
- "gpt-4-1106-preview",
- "gpt-4-vision-preview",
- "gpt-4",
- "gpt-4-0314",
- "gpt-4-0613",
- "gpt-4-32k",
- "gpt-4-32k-0314",
- "gpt-4-32k-0613",
- "gpt-3.5-turbo",
- "gpt-3.5-turbo-16k",
- "gpt-3.5-turbo-0613",
- "gpt-3.5-turbo-1106",
- "gpt-3.5-turbo-0125",
- "gpt-3.5-turbo-16k-0613",
- ],
- ]
- | Omit = omit,
- name: Optional[str] | Omit = omit,
- reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
- response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
- temperature: Optional[float] | Omit = omit,
- tool_resources: Optional[assistant_update_params.ToolResources] | Omit = omit,
- tools: Iterable[AssistantToolParam] | Omit = omit,
- top_p: Optional[float] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> Assistant:
- """Modifies an assistant.
- Args:
- description: The description of the assistant.
- The maximum length is 512 characters.
- instructions: The system instructions that the assistant uses. The maximum length is 256,000
- characters.
- metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
- for storing additional information about the object in a structured format, and
- querying for objects via API or the dashboard.
- Keys are strings with a maximum length of 64 characters. Values are strings with
- a maximum length of 512 characters.
- model: ID of the model to use. You can use the
- [List models](https://platform.openai.com/docs/api-reference/models/list) API to
- see all of your available models, or see our
- [Model overview](https://platform.openai.com/docs/models) for descriptions of
- them.
- name: The name of the assistant. The maximum length is 256 characters.
- reasoning_effort: Constrains effort on reasoning for
- [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
- Reducing reasoning effort can result in faster responses and fewer tokens used
- on reasoning in a response.
- - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
- reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
- calls are supported for all reasoning values in gpt-5.1.
- - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
- support `none`.
- - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
- - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
- response_format: Specifies the format that the model must output. Compatible with
- [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
- [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
- and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
- Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
- Outputs which ensures the model will match your supplied JSON schema. Learn more
- in the
- [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
- Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
- message the model generates is valid JSON.
- **Important:** when using JSON mode, you **must** also instruct the model to
- produce JSON yourself via a system or user message. Without this, the model may
- generate an unending stream of whitespace until the generation reaches the token
- limit, resulting in a long-running and seemingly "stuck" request. Also note that
- the message content may be partially cut off if `finish_reason="length"`, which
- indicates the generation exceeded `max_tokens` or the conversation exceeded the
- max context length.
- temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
- make the output more random, while lower values like 0.2 will make it more
- focused and deterministic.
- tool_resources: A set of resources that are used by the assistant's tools. The resources are
- specific to the type of tool. For example, the `code_interpreter` tool requires
- a list of file IDs, while the `file_search` tool requires a list of vector store
- IDs.
- tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
- assistant. Tools can be of types `code_interpreter`, `file_search`, or
- `function`.
- top_p: An alternative to sampling with temperature, called nucleus sampling, where the
- model considers the results of the tokens with top_p probability mass. So 0.1
- means only the tokens comprising the top 10% probability mass are considered.
- We generally recommend altering this or temperature but not both.
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- if not assistant_id:
- raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
- extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
- return self._post(
- f"/assistants/{assistant_id}",
- body=maybe_transform(
- {
- "description": description,
- "instructions": instructions,
- "metadata": metadata,
- "model": model,
- "name": name,
- "reasoning_effort": reasoning_effort,
- "response_format": response_format,
- "temperature": temperature,
- "tool_resources": tool_resources,
- "tools": tools,
- "top_p": top_p,
- },
- assistant_update_params.AssistantUpdateParams,
- ),
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=Assistant,
- )
- def list(
- self,
- *,
- after: str | Omit = omit,
- before: str | Omit = omit,
- limit: int | Omit = omit,
- order: Literal["asc", "desc"] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> SyncCursorPage[Assistant]:
- """Returns a list of assistants.
- Args:
- after: A cursor for use in pagination.
- `after` is an object ID that defines your place
- in the list. For instance, if you make a list request and receive 100 objects,
- ending with obj_foo, your subsequent call can include after=obj_foo in order to
- fetch the next page of the list.
- before: A cursor for use in pagination. `before` is an object ID that defines your place
- in the list. For instance, if you make a list request and receive 100 objects,
- starting with obj_foo, your subsequent call can include before=obj_foo in order
- to fetch the previous page of the list.
- limit: A limit on the number of objects to be returned. Limit can range between 1 and
- 100, and the default is 20.
- order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
- order and `desc` for descending order.
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
- return self._get_api_list(
- "/assistants",
- page=SyncCursorPage[Assistant],
- options=make_request_options(
- extra_headers=extra_headers,
- extra_query=extra_query,
- extra_body=extra_body,
- timeout=timeout,
- query=maybe_transform(
- {
- "after": after,
- "before": before,
- "limit": limit,
- "order": order,
- },
- assistant_list_params.AssistantListParams,
- ),
- ),
- model=Assistant,
- )
- def delete(
- self,
- assistant_id: str,
- *,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> AssistantDeleted:
- """
- Delete an assistant.
- Args:
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- if not assistant_id:
- raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
- extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
- return self._delete(
- f"/assistants/{assistant_id}",
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=AssistantDeleted,
- )
- class AsyncAssistants(AsyncAPIResource):
- @cached_property
- def with_raw_response(self) -> AsyncAssistantsWithRawResponse:
- """
- This property can be used as a prefix for any HTTP method call to return
- the raw response object instead of the parsed content.
- For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
- """
- return AsyncAssistantsWithRawResponse(self)
- @cached_property
- def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse:
- """
- An alternative to `.with_raw_response` that doesn't eagerly read the response body.
- For more information, see https://www.github.com/openai/openai-python#with_streaming_response
- """
- return AsyncAssistantsWithStreamingResponse(self)
- async def create(
- self,
- *,
- model: Union[str, ChatModel],
- description: Optional[str] | Omit = omit,
- instructions: Optional[str] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- name: Optional[str] | Omit = omit,
- reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
- response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
- temperature: Optional[float] | Omit = omit,
- tool_resources: Optional[assistant_create_params.ToolResources] | Omit = omit,
- tools: Iterable[AssistantToolParam] | Omit = omit,
- top_p: Optional[float] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> Assistant:
- """
- Create an assistant with a model and instructions.
- Args:
- model: ID of the model to use. You can use the
- [List models](https://platform.openai.com/docs/api-reference/models/list) API to
- see all of your available models, or see our
- [Model overview](https://platform.openai.com/docs/models) for descriptions of
- them.
- description: The description of the assistant. The maximum length is 512 characters.
- instructions: The system instructions that the assistant uses. The maximum length is 256,000
- characters.
- metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
- for storing additional information about the object in a structured format, and
- querying for objects via API or the dashboard.
- Keys are strings with a maximum length of 64 characters. Values are strings with
- a maximum length of 512 characters.
- name: The name of the assistant. The maximum length is 256 characters.
- reasoning_effort: Constrains effort on reasoning for
- [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
- Reducing reasoning effort can result in faster responses and fewer tokens used
- on reasoning in a response.
- - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
- reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
- calls are supported for all reasoning values in gpt-5.1.
- - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
- support `none`.
- - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
- - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
- response_format: Specifies the format that the model must output. Compatible with
- [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
- [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
- and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
- Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
- Outputs which ensures the model will match your supplied JSON schema. Learn more
- in the
- [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
- Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
- message the model generates is valid JSON.
- **Important:** when using JSON mode, you **must** also instruct the model to
- produce JSON yourself via a system or user message. Without this, the model may
- generate an unending stream of whitespace until the generation reaches the token
- limit, resulting in a long-running and seemingly "stuck" request. Also note that
- the message content may be partially cut off if `finish_reason="length"`, which
- indicates the generation exceeded `max_tokens` or the conversation exceeded the
- max context length.
- temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
- make the output more random, while lower values like 0.2 will make it more
- focused and deterministic.
- tool_resources: A set of resources that are used by the assistant's tools. The resources are
- specific to the type of tool. For example, the `code_interpreter` tool requires
- a list of file IDs, while the `file_search` tool requires a list of vector store
- IDs.
- tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
- assistant. Tools can be of types `code_interpreter`, `file_search`, or
- `function`.
- top_p: An alternative to sampling with temperature, called nucleus sampling, where the
- model considers the results of the tokens with top_p probability mass. So 0.1
- means only the tokens comprising the top 10% probability mass are considered.
- We generally recommend altering this or temperature but not both.
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
- return await self._post(
- "/assistants",
- body=await async_maybe_transform(
- {
- "model": model,
- "description": description,
- "instructions": instructions,
- "metadata": metadata,
- "name": name,
- "reasoning_effort": reasoning_effort,
- "response_format": response_format,
- "temperature": temperature,
- "tool_resources": tool_resources,
- "tools": tools,
- "top_p": top_p,
- },
- assistant_create_params.AssistantCreateParams,
- ),
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=Assistant,
- )
- async def retrieve(
- self,
- assistant_id: str,
- *,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> Assistant:
- """
- Retrieves an assistant.
- Args:
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- if not assistant_id:
- raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
- extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
- return await self._get(
- f"/assistants/{assistant_id}",
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=Assistant,
- )
- async def update(
- self,
- assistant_id: str,
- *,
- description: Optional[str] | Omit = omit,
- instructions: Optional[str] | Omit = omit,
- metadata: Optional[Metadata] | Omit = omit,
- model: Union[
- str,
- Literal[
- "gpt-5",
- "gpt-5-mini",
- "gpt-5-nano",
- "gpt-5-2025-08-07",
- "gpt-5-mini-2025-08-07",
- "gpt-5-nano-2025-08-07",
- "gpt-4.1",
- "gpt-4.1-mini",
- "gpt-4.1-nano",
- "gpt-4.1-2025-04-14",
- "gpt-4.1-mini-2025-04-14",
- "gpt-4.1-nano-2025-04-14",
- "o3-mini",
- "o3-mini-2025-01-31",
- "o1",
- "o1-2024-12-17",
- "gpt-4o",
- "gpt-4o-2024-11-20",
- "gpt-4o-2024-08-06",
- "gpt-4o-2024-05-13",
- "gpt-4o-mini",
- "gpt-4o-mini-2024-07-18",
- "gpt-4.5-preview",
- "gpt-4.5-preview-2025-02-27",
- "gpt-4-turbo",
- "gpt-4-turbo-2024-04-09",
- "gpt-4-0125-preview",
- "gpt-4-turbo-preview",
- "gpt-4-1106-preview",
- "gpt-4-vision-preview",
- "gpt-4",
- "gpt-4-0314",
- "gpt-4-0613",
- "gpt-4-32k",
- "gpt-4-32k-0314",
- "gpt-4-32k-0613",
- "gpt-3.5-turbo",
- "gpt-3.5-turbo-16k",
- "gpt-3.5-turbo-0613",
- "gpt-3.5-turbo-1106",
- "gpt-3.5-turbo-0125",
- "gpt-3.5-turbo-16k-0613",
- ],
- ]
- | Omit = omit,
- name: Optional[str] | Omit = omit,
- reasoning_effort: Optional[ReasoningEffort] | Omit = omit,
- response_format: Optional[AssistantResponseFormatOptionParam] | Omit = omit,
- temperature: Optional[float] | Omit = omit,
- tool_resources: Optional[assistant_update_params.ToolResources] | Omit = omit,
- tools: Iterable[AssistantToolParam] | Omit = omit,
- top_p: Optional[float] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> Assistant:
- """Modifies an assistant.
- Args:
- description: The description of the assistant.
- The maximum length is 512 characters.
- instructions: The system instructions that the assistant uses. The maximum length is 256,000
- characters.
- metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
- for storing additional information about the object in a structured format, and
- querying for objects via API or the dashboard.
- Keys are strings with a maximum length of 64 characters. Values are strings with
- a maximum length of 512 characters.
- model: ID of the model to use. You can use the
- [List models](https://platform.openai.com/docs/api-reference/models/list) API to
- see all of your available models, or see our
- [Model overview](https://platform.openai.com/docs/models) for descriptions of
- them.
- name: The name of the assistant. The maximum length is 256 characters.
- reasoning_effort: Constrains effort on reasoning for
- [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
- Reducing reasoning effort can result in faster responses and fewer tokens used
- on reasoning in a response.
- - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
- reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
- calls are supported for all reasoning values in gpt-5.1.
- - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
- support `none`.
- - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
- - `xhigh` is supported for all models after `gpt-5.1-codex-max`.
- response_format: Specifies the format that the model must output. Compatible with
- [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
- [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
- and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
- Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
- Outputs which ensures the model will match your supplied JSON schema. Learn more
- in the
- [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
- Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
- message the model generates is valid JSON.
- **Important:** when using JSON mode, you **must** also instruct the model to
- produce JSON yourself via a system or user message. Without this, the model may
- generate an unending stream of whitespace until the generation reaches the token
- limit, resulting in a long-running and seemingly "stuck" request. Also note that
- the message content may be partially cut off if `finish_reason="length"`, which
- indicates the generation exceeded `max_tokens` or the conversation exceeded the
- max context length.
- temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
- make the output more random, while lower values like 0.2 will make it more
- focused and deterministic.
- tool_resources: A set of resources that are used by the assistant's tools. The resources are
- specific to the type of tool. For example, the `code_interpreter` tool requires
- a list of file IDs, while the `file_search` tool requires a list of vector store
- IDs.
- tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
- assistant. Tools can be of types `code_interpreter`, `file_search`, or
- `function`.
- top_p: An alternative to sampling with temperature, called nucleus sampling, where the
- model considers the results of the tokens with top_p probability mass. So 0.1
- means only the tokens comprising the top 10% probability mass are considered.
- We generally recommend altering this or temperature but not both.
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- if not assistant_id:
- raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
- extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
- return await self._post(
- f"/assistants/{assistant_id}",
- body=await async_maybe_transform(
- {
- "description": description,
- "instructions": instructions,
- "metadata": metadata,
- "model": model,
- "name": name,
- "reasoning_effort": reasoning_effort,
- "response_format": response_format,
- "temperature": temperature,
- "tool_resources": tool_resources,
- "tools": tools,
- "top_p": top_p,
- },
- assistant_update_params.AssistantUpdateParams,
- ),
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=Assistant,
- )
- def list(
- self,
- *,
- after: str | Omit = omit,
- before: str | Omit = omit,
- limit: int | Omit = omit,
- order: Literal["asc", "desc"] | Omit = omit,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> AsyncPaginator[Assistant, AsyncCursorPage[Assistant]]:
- """Returns a list of assistants.
- Args:
- after: A cursor for use in pagination.
- `after` is an object ID that defines your place
- in the list. For instance, if you make a list request and receive 100 objects,
- ending with obj_foo, your subsequent call can include after=obj_foo in order to
- fetch the next page of the list.
- before: A cursor for use in pagination. `before` is an object ID that defines your place
- in the list. For instance, if you make a list request and receive 100 objects,
- starting with obj_foo, your subsequent call can include before=obj_foo in order
- to fetch the previous page of the list.
- limit: A limit on the number of objects to be returned. Limit can range between 1 and
- 100, and the default is 20.
- order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
- order and `desc` for descending order.
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
- return self._get_api_list(
- "/assistants",
- page=AsyncCursorPage[Assistant],
- options=make_request_options(
- extra_headers=extra_headers,
- extra_query=extra_query,
- extra_body=extra_body,
- timeout=timeout,
- query=maybe_transform(
- {
- "after": after,
- "before": before,
- "limit": limit,
- "order": order,
- },
- assistant_list_params.AssistantListParams,
- ),
- ),
- model=Assistant,
- )
- async def delete(
- self,
- assistant_id: str,
- *,
- # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
- # The extra values given here take precedence over values defined on the client or passed to this method.
- extra_headers: Headers | None = None,
- extra_query: Query | None = None,
- extra_body: Body | None = None,
- timeout: float | httpx.Timeout | None | NotGiven = not_given,
- ) -> AssistantDeleted:
- """
- Delete an assistant.
- Args:
- extra_headers: Send extra headers
- extra_query: Add additional query parameters to the request
- extra_body: Add additional JSON properties to the request
- timeout: Override the client-level default timeout for this request, in seconds
- """
- if not assistant_id:
- raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
- extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
- return await self._delete(
- f"/assistants/{assistant_id}",
- options=make_request_options(
- extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
- ),
- cast_to=AssistantDeleted,
- )
- class AssistantsWithRawResponse:
- def __init__(self, assistants: Assistants) -> None:
- self._assistants = assistants
- self.create = _legacy_response.to_raw_response_wrapper(
- assistants.create,
- )
- self.retrieve = _legacy_response.to_raw_response_wrapper(
- assistants.retrieve,
- )
- self.update = _legacy_response.to_raw_response_wrapper(
- assistants.update,
- )
- self.list = _legacy_response.to_raw_response_wrapper(
- assistants.list,
- )
- self.delete = _legacy_response.to_raw_response_wrapper(
- assistants.delete,
- )
- class AsyncAssistantsWithRawResponse:
- def __init__(self, assistants: AsyncAssistants) -> None:
- self._assistants = assistants
- self.create = _legacy_response.async_to_raw_response_wrapper(
- assistants.create,
- )
- self.retrieve = _legacy_response.async_to_raw_response_wrapper(
- assistants.retrieve,
- )
- self.update = _legacy_response.async_to_raw_response_wrapper(
- assistants.update,
- )
- self.list = _legacy_response.async_to_raw_response_wrapper(
- assistants.list,
- )
- self.delete = _legacy_response.async_to_raw_response_wrapper(
- assistants.delete,
- )
- class AssistantsWithStreamingResponse:
- def __init__(self, assistants: Assistants) -> None:
- self._assistants = assistants
- self.create = to_streamed_response_wrapper(
- assistants.create,
- )
- self.retrieve = to_streamed_response_wrapper(
- assistants.retrieve,
- )
- self.update = to_streamed_response_wrapper(
- assistants.update,
- )
- self.list = to_streamed_response_wrapper(
- assistants.list,
- )
- self.delete = to_streamed_response_wrapper(
- assistants.delete,
- )
- class AsyncAssistantsWithStreamingResponse:
- def __init__(self, assistants: AsyncAssistants) -> None:
- self._assistants = assistants
- self.create = async_to_streamed_response_wrapper(
- assistants.create,
- )
- self.retrieve = async_to_streamed_response_wrapper(
- assistants.retrieve,
- )
- self.update = async_to_streamed_response_wrapper(
- assistants.update,
- )
- self.list = async_to_streamed_response_wrapper(
- assistants.list,
- )
- self.delete = async_to_streamed_response_wrapper(
- assistants.delete,
- )
|