realtime.py 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079
  1. # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
  2. from __future__ import annotations
  3. import json
  4. import logging
  5. from types import TracebackType
  6. from typing import TYPE_CHECKING, Any, Iterator, cast
  7. from typing_extensions import AsyncIterator
  8. import httpx
  9. from pydantic import BaseModel
  10. from .calls import (
  11. Calls,
  12. AsyncCalls,
  13. CallsWithRawResponse,
  14. AsyncCallsWithRawResponse,
  15. CallsWithStreamingResponse,
  16. AsyncCallsWithStreamingResponse,
  17. )
  18. from ..._types import Omit, Query, Headers, omit
  19. from ..._utils import (
  20. is_azure_client,
  21. maybe_transform,
  22. strip_not_given,
  23. async_maybe_transform,
  24. is_async_azure_client,
  25. )
  26. from ..._compat import cached_property
  27. from ..._models import construct_type_unchecked
  28. from ..._resource import SyncAPIResource, AsyncAPIResource
  29. from ..._exceptions import OpenAIError
  30. from ..._base_client import _merge_mappings
  31. from .client_secrets import (
  32. ClientSecrets,
  33. AsyncClientSecrets,
  34. ClientSecretsWithRawResponse,
  35. AsyncClientSecretsWithRawResponse,
  36. ClientSecretsWithStreamingResponse,
  37. AsyncClientSecretsWithStreamingResponse,
  38. )
  39. from ...types.realtime import session_update_event_param
  40. from ...types.websocket_connection_options import WebsocketConnectionOptions
  41. from ...types.realtime.realtime_client_event import RealtimeClientEvent
  42. from ...types.realtime.realtime_server_event import RealtimeServerEvent
  43. from ...types.realtime.conversation_item_param import ConversationItemParam
  44. from ...types.realtime.realtime_client_event_param import RealtimeClientEventParam
  45. from ...types.realtime.realtime_response_create_params_param import RealtimeResponseCreateParamsParam
  46. if TYPE_CHECKING:
  47. from websockets.sync.client import ClientConnection as WebsocketConnection
  48. from websockets.asyncio.client import ClientConnection as AsyncWebsocketConnection
  49. from ..._client import OpenAI, AsyncOpenAI
  50. __all__ = ["Realtime", "AsyncRealtime"]
  51. log: logging.Logger = logging.getLogger(__name__)
  52. class Realtime(SyncAPIResource):
  53. @cached_property
  54. def client_secrets(self) -> ClientSecrets:
  55. return ClientSecrets(self._client)
  56. @cached_property
  57. def calls(self) -> Calls:
  58. from ...lib._realtime import _Calls
  59. return _Calls(self._client)
  60. @cached_property
  61. def with_raw_response(self) -> RealtimeWithRawResponse:
  62. """
  63. This property can be used as a prefix for any HTTP method call to return
  64. the raw response object instead of the parsed content.
  65. For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  66. """
  67. return RealtimeWithRawResponse(self)
  68. @cached_property
  69. def with_streaming_response(self) -> RealtimeWithStreamingResponse:
  70. """
  71. An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  72. For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  73. """
  74. return RealtimeWithStreamingResponse(self)
  75. def connect(
  76. self,
  77. *,
  78. call_id: str | Omit = omit,
  79. model: str | Omit = omit,
  80. extra_query: Query = {},
  81. extra_headers: Headers = {},
  82. websocket_connection_options: WebsocketConnectionOptions = {},
  83. ) -> RealtimeConnectionManager:
  84. """
  85. The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
  86. Some notable benefits of the API include:
  87. - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
  88. - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
  89. - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
  90. The Realtime API is a stateful, event-based API that communicates over a WebSocket.
  91. """
  92. return RealtimeConnectionManager(
  93. client=self._client,
  94. extra_query=extra_query,
  95. extra_headers=extra_headers,
  96. websocket_connection_options=websocket_connection_options,
  97. call_id=call_id,
  98. model=model,
  99. )
  100. class AsyncRealtime(AsyncAPIResource):
  101. @cached_property
  102. def client_secrets(self) -> AsyncClientSecrets:
  103. return AsyncClientSecrets(self._client)
  104. @cached_property
  105. def calls(self) -> AsyncCalls:
  106. from ...lib._realtime import _AsyncCalls
  107. return _AsyncCalls(self._client)
  108. @cached_property
  109. def with_raw_response(self) -> AsyncRealtimeWithRawResponse:
  110. """
  111. This property can be used as a prefix for any HTTP method call to return
  112. the raw response object instead of the parsed content.
  113. For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  114. """
  115. return AsyncRealtimeWithRawResponse(self)
  116. @cached_property
  117. def with_streaming_response(self) -> AsyncRealtimeWithStreamingResponse:
  118. """
  119. An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  120. For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  121. """
  122. return AsyncRealtimeWithStreamingResponse(self)
  123. def connect(
  124. self,
  125. *,
  126. call_id: str | Omit = omit,
  127. model: str | Omit = omit,
  128. extra_query: Query = {},
  129. extra_headers: Headers = {},
  130. websocket_connection_options: WebsocketConnectionOptions = {},
  131. ) -> AsyncRealtimeConnectionManager:
  132. """
  133. The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
  134. Some notable benefits of the API include:
  135. - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
  136. - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
  137. - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
  138. The Realtime API is a stateful, event-based API that communicates over a WebSocket.
  139. """
  140. return AsyncRealtimeConnectionManager(
  141. client=self._client,
  142. extra_query=extra_query,
  143. extra_headers=extra_headers,
  144. websocket_connection_options=websocket_connection_options,
  145. call_id=call_id,
  146. model=model,
  147. )
  148. class RealtimeWithRawResponse:
  149. def __init__(self, realtime: Realtime) -> None:
  150. self._realtime = realtime
  151. @cached_property
  152. def client_secrets(self) -> ClientSecretsWithRawResponse:
  153. return ClientSecretsWithRawResponse(self._realtime.client_secrets)
  154. @cached_property
  155. def calls(self) -> CallsWithRawResponse:
  156. return CallsWithRawResponse(self._realtime.calls)
  157. class AsyncRealtimeWithRawResponse:
  158. def __init__(self, realtime: AsyncRealtime) -> None:
  159. self._realtime = realtime
  160. @cached_property
  161. def client_secrets(self) -> AsyncClientSecretsWithRawResponse:
  162. return AsyncClientSecretsWithRawResponse(self._realtime.client_secrets)
  163. @cached_property
  164. def calls(self) -> AsyncCallsWithRawResponse:
  165. return AsyncCallsWithRawResponse(self._realtime.calls)
  166. class RealtimeWithStreamingResponse:
  167. def __init__(self, realtime: Realtime) -> None:
  168. self._realtime = realtime
  169. @cached_property
  170. def client_secrets(self) -> ClientSecretsWithStreamingResponse:
  171. return ClientSecretsWithStreamingResponse(self._realtime.client_secrets)
  172. @cached_property
  173. def calls(self) -> CallsWithStreamingResponse:
  174. return CallsWithStreamingResponse(self._realtime.calls)
  175. class AsyncRealtimeWithStreamingResponse:
  176. def __init__(self, realtime: AsyncRealtime) -> None:
  177. self._realtime = realtime
  178. @cached_property
  179. def client_secrets(self) -> AsyncClientSecretsWithStreamingResponse:
  180. return AsyncClientSecretsWithStreamingResponse(self._realtime.client_secrets)
  181. @cached_property
  182. def calls(self) -> AsyncCallsWithStreamingResponse:
  183. return AsyncCallsWithStreamingResponse(self._realtime.calls)
  184. class AsyncRealtimeConnection:
  185. """Represents a live WebSocket connection to the Realtime API"""
  186. session: AsyncRealtimeSessionResource
  187. response: AsyncRealtimeResponseResource
  188. input_audio_buffer: AsyncRealtimeInputAudioBufferResource
  189. conversation: AsyncRealtimeConversationResource
  190. output_audio_buffer: AsyncRealtimeOutputAudioBufferResource
  191. _connection: AsyncWebsocketConnection
  192. def __init__(self, connection: AsyncWebsocketConnection) -> None:
  193. self._connection = connection
  194. self.session = AsyncRealtimeSessionResource(self)
  195. self.response = AsyncRealtimeResponseResource(self)
  196. self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self)
  197. self.conversation = AsyncRealtimeConversationResource(self)
  198. self.output_audio_buffer = AsyncRealtimeOutputAudioBufferResource(self)
  199. async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]:
  200. """
  201. An infinite-iterator that will continue to yield events until
  202. the connection is closed.
  203. """
  204. from websockets.exceptions import ConnectionClosedOK
  205. try:
  206. while True:
  207. yield await self.recv()
  208. except ConnectionClosedOK:
  209. return
  210. async def recv(self) -> RealtimeServerEvent:
  211. """
  212. Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
  213. Canceling this method is safe. There's no risk of losing data.
  214. """
  215. return self.parse_event(await self.recv_bytes())
  216. async def recv_bytes(self) -> bytes:
  217. """Receive the next message from the connection as raw bytes.
  218. Canceling this method is safe. There's no risk of losing data.
  219. If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
  220. then you can call `.parse_event(data)`.
  221. """
  222. message = await self._connection.recv(decode=False)
  223. log.debug(f"Received websocket message: %s", message)
  224. return message
  225. async def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
  226. data = (
  227. event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
  228. if isinstance(event, BaseModel)
  229. else json.dumps(await async_maybe_transform(event, RealtimeClientEventParam))
  230. )
  231. await self._connection.send(data)
  232. async def close(self, *, code: int = 1000, reason: str = "") -> None:
  233. await self._connection.close(code=code, reason=reason)
  234. def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
  235. """
  236. Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
  237. This is helpful if you're using `.recv_bytes()`.
  238. """
  239. return cast(
  240. RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
  241. )
  242. class AsyncRealtimeConnectionManager:
  243. """
  244. Context manager over a `AsyncRealtimeConnection` that is returned by `realtime.connect()`
  245. This context manager ensures that the connection will be closed when it exits.
  246. ---
  247. Note that if your application doesn't work well with the context manager approach then you
  248. can call the `.enter()` method directly to initiate a connection.
  249. **Warning**: You must remember to close the connection with `.close()`.
  250. ```py
  251. connection = await client.realtime.connect(...).enter()
  252. # ...
  253. await connection.close()
  254. ```
  255. """
  256. def __init__(
  257. self,
  258. *,
  259. client: AsyncOpenAI,
  260. call_id: str | Omit = omit,
  261. model: str | Omit = omit,
  262. extra_query: Query,
  263. extra_headers: Headers,
  264. websocket_connection_options: WebsocketConnectionOptions,
  265. ) -> None:
  266. self.__client = client
  267. self.__call_id = call_id
  268. self.__model = model
  269. self.__connection: AsyncRealtimeConnection | None = None
  270. self.__extra_query = extra_query
  271. self.__extra_headers = extra_headers
  272. self.__websocket_connection_options = websocket_connection_options
  273. async def __aenter__(self) -> AsyncRealtimeConnection:
  274. """
  275. 👋 If your application doesn't work well with the context manager approach then you
  276. can call this method directly to initiate a connection.
  277. **Warning**: You must remember to close the connection with `.close()`.
  278. ```py
  279. connection = await client.realtime.connect(...).enter()
  280. # ...
  281. await connection.close()
  282. ```
  283. """
  284. try:
  285. from websockets.asyncio.client import connect
  286. except ImportError as exc:
  287. raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
  288. extra_query = self.__extra_query
  289. await self.__client._refresh_api_key()
  290. auth_headers = self.__client.auth_headers
  291. extra_query = self.__extra_query
  292. if self.__call_id is not omit:
  293. extra_query = {**extra_query, "call_id": self.__call_id}
  294. if is_async_azure_client(self.__client):
  295. model = self.__model
  296. if not model:
  297. raise OpenAIError("`model` is required for Azure Realtime API")
  298. else:
  299. url, auth_headers = await self.__client._configure_realtime(model, extra_query)
  300. else:
  301. url = self._prepare_url().copy_with(
  302. params={
  303. **self.__client.base_url.params,
  304. **({"model": self.__model} if self.__model is not omit else {}),
  305. **extra_query,
  306. },
  307. )
  308. log.debug("Connecting to %s", url)
  309. if self.__websocket_connection_options:
  310. log.debug("Connection options: %s", self.__websocket_connection_options)
  311. self.__connection = AsyncRealtimeConnection(
  312. await connect(
  313. str(url),
  314. user_agent_header=self.__client.user_agent,
  315. additional_headers=_merge_mappings(
  316. {
  317. **auth_headers,
  318. },
  319. self.__extra_headers,
  320. ),
  321. **self.__websocket_connection_options,
  322. )
  323. )
  324. return self.__connection
  325. enter = __aenter__
  326. def _prepare_url(self) -> httpx.URL:
  327. if self.__client.websocket_base_url is not None:
  328. base_url = httpx.URL(self.__client.websocket_base_url)
  329. else:
  330. base_url = self.__client._base_url.copy_with(scheme="wss")
  331. merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
  332. return base_url.copy_with(raw_path=merge_raw_path)
  333. async def __aexit__(
  334. self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
  335. ) -> None:
  336. if self.__connection is not None:
  337. await self.__connection.close()
  338. class RealtimeConnection:
  339. """Represents a live WebSocket connection to the Realtime API"""
  340. session: RealtimeSessionResource
  341. response: RealtimeResponseResource
  342. input_audio_buffer: RealtimeInputAudioBufferResource
  343. conversation: RealtimeConversationResource
  344. output_audio_buffer: RealtimeOutputAudioBufferResource
  345. _connection: WebsocketConnection
  346. def __init__(self, connection: WebsocketConnection) -> None:
  347. self._connection = connection
  348. self.session = RealtimeSessionResource(self)
  349. self.response = RealtimeResponseResource(self)
  350. self.input_audio_buffer = RealtimeInputAudioBufferResource(self)
  351. self.conversation = RealtimeConversationResource(self)
  352. self.output_audio_buffer = RealtimeOutputAudioBufferResource(self)
  353. def __iter__(self) -> Iterator[RealtimeServerEvent]:
  354. """
  355. An infinite-iterator that will continue to yield events until
  356. the connection is closed.
  357. """
  358. from websockets.exceptions import ConnectionClosedOK
  359. try:
  360. while True:
  361. yield self.recv()
  362. except ConnectionClosedOK:
  363. return
  364. def recv(self) -> RealtimeServerEvent:
  365. """
  366. Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
  367. Canceling this method is safe. There's no risk of losing data.
  368. """
  369. return self.parse_event(self.recv_bytes())
  370. def recv_bytes(self) -> bytes:
  371. """Receive the next message from the connection as raw bytes.
  372. Canceling this method is safe. There's no risk of losing data.
  373. If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
  374. then you can call `.parse_event(data)`.
  375. """
  376. message = self._connection.recv(decode=False)
  377. log.debug(f"Received websocket message: %s", message)
  378. return message
  379. def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
  380. data = (
  381. event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
  382. if isinstance(event, BaseModel)
  383. else json.dumps(maybe_transform(event, RealtimeClientEventParam))
  384. )
  385. self._connection.send(data)
  386. def close(self, *, code: int = 1000, reason: str = "") -> None:
  387. self._connection.close(code=code, reason=reason)
  388. def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
  389. """
  390. Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
  391. This is helpful if you're using `.recv_bytes()`.
  392. """
  393. return cast(
  394. RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
  395. )
  396. class RealtimeConnectionManager:
  397. """
  398. Context manager over a `RealtimeConnection` that is returned by `realtime.connect()`
  399. This context manager ensures that the connection will be closed when it exits.
  400. ---
  401. Note that if your application doesn't work well with the context manager approach then you
  402. can call the `.enter()` method directly to initiate a connection.
  403. **Warning**: You must remember to close the connection with `.close()`.
  404. ```py
  405. connection = client.realtime.connect(...).enter()
  406. # ...
  407. connection.close()
  408. ```
  409. """
  410. def __init__(
  411. self,
  412. *,
  413. client: OpenAI,
  414. call_id: str | Omit = omit,
  415. model: str | Omit = omit,
  416. extra_query: Query,
  417. extra_headers: Headers,
  418. websocket_connection_options: WebsocketConnectionOptions,
  419. ) -> None:
  420. self.__client = client
  421. self.__call_id = call_id
  422. self.__model = model
  423. self.__connection: RealtimeConnection | None = None
  424. self.__extra_query = extra_query
  425. self.__extra_headers = extra_headers
  426. self.__websocket_connection_options = websocket_connection_options
  427. def __enter__(self) -> RealtimeConnection:
  428. """
  429. 👋 If your application doesn't work well with the context manager approach then you
  430. can call this method directly to initiate a connection.
  431. **Warning**: You must remember to close the connection with `.close()`.
  432. ```py
  433. connection = client.realtime.connect(...).enter()
  434. # ...
  435. connection.close()
  436. ```
  437. """
  438. try:
  439. from websockets.sync.client import connect
  440. except ImportError as exc:
  441. raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
  442. extra_query = self.__extra_query
  443. self.__client._refresh_api_key()
  444. auth_headers = self.__client.auth_headers
  445. extra_query = self.__extra_query
  446. if self.__call_id is not omit:
  447. extra_query = {**extra_query, "call_id": self.__call_id}
  448. if is_azure_client(self.__client):
  449. model = self.__model
  450. if not model:
  451. raise OpenAIError("`model` is required for Azure Realtime API")
  452. else:
  453. url, auth_headers = self.__client._configure_realtime(model, extra_query)
  454. else:
  455. url = self._prepare_url().copy_with(
  456. params={
  457. **self.__client.base_url.params,
  458. **({"model": self.__model} if self.__model is not omit else {}),
  459. **extra_query,
  460. },
  461. )
  462. log.debug("Connecting to %s", url)
  463. if self.__websocket_connection_options:
  464. log.debug("Connection options: %s", self.__websocket_connection_options)
  465. self.__connection = RealtimeConnection(
  466. connect(
  467. str(url),
  468. user_agent_header=self.__client.user_agent,
  469. additional_headers=_merge_mappings(
  470. {
  471. **auth_headers,
  472. },
  473. self.__extra_headers,
  474. ),
  475. **self.__websocket_connection_options,
  476. )
  477. )
  478. return self.__connection
  479. enter = __enter__
  480. def _prepare_url(self) -> httpx.URL:
  481. if self.__client.websocket_base_url is not None:
  482. base_url = httpx.URL(self.__client.websocket_base_url)
  483. else:
  484. base_url = self.__client._base_url.copy_with(scheme="wss")
  485. merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
  486. return base_url.copy_with(raw_path=merge_raw_path)
  487. def __exit__(
  488. self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
  489. ) -> None:
  490. if self.__connection is not None:
  491. self.__connection.close()
  492. class BaseRealtimeConnectionResource:
  493. def __init__(self, connection: RealtimeConnection) -> None:
  494. self._connection = connection
  495. class RealtimeSessionResource(BaseRealtimeConnectionResource):
  496. def update(self, *, session: session_update_event_param.Session, event_id: str | Omit = omit) -> None:
  497. """
  498. Send this event to update the session’s configuration.
  499. The client may send this event at any time to update any field
  500. except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet.
  501. When the server receives a `session.update`, it will respond
  502. with a `session.updated` event showing the full, effective configuration.
  503. Only the fields that are present in the `session.update` are updated. To clear a field like
  504. `instructions`, pass an empty string. To clear a field like `tools`, pass an empty array.
  505. To clear a field like `turn_detection`, pass `null`.
  506. """
  507. self._connection.send(
  508. cast(
  509. RealtimeClientEventParam,
  510. strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
  511. )
  512. )
  513. class RealtimeResponseResource(BaseRealtimeConnectionResource):
  514. def create(self, *, event_id: str | Omit = omit, response: RealtimeResponseCreateParamsParam | Omit = omit) -> None:
  515. """
  516. This event instructs the server to create a Response, which means triggering
  517. model inference. When in Server VAD mode, the server will create Responses
  518. automatically.
  519. A Response will include at least one Item, and may have two, in which case
  520. the second will be a function call. These Items will be appended to the
  521. conversation history by default.
  522. The server will respond with a `response.created` event, events for Items
  523. and content created, and finally a `response.done` event to indicate the
  524. Response is complete.
  525. The `response.create` event includes inference configuration like
  526. `instructions` and `tools`. If these are set, they will override the Session's
  527. configuration for this Response only.
  528. Responses can be created out-of-band of the default Conversation, meaning that they can
  529. have arbitrary input, and it's possible to disable writing the output to the Conversation.
  530. Only one Response can write to the default Conversation at a time, but otherwise multiple
  531. Responses can be created in parallel. The `metadata` field is a good way to disambiguate
  532. multiple simultaneous Responses.
  533. Clients can set `conversation` to `none` to create a Response that does not write to the default
  534. Conversation. Arbitrary input can be provided with the `input` field, which is an array accepting
  535. raw Items and references to existing Items.
  536. """
  537. self._connection.send(
  538. cast(
  539. RealtimeClientEventParam,
  540. strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
  541. )
  542. )
  543. def cancel(self, *, event_id: str | Omit = omit, response_id: str | Omit = omit) -> None:
  544. """Send this event to cancel an in-progress response.
  545. The server will respond
  546. with a `response.done` event with a status of `response.status=cancelled`. If
  547. there is no response to cancel, the server will respond with an error. It's safe
  548. to call `response.cancel` even if no response is in progress, an error will be
  549. returned the session will remain unaffected.
  550. """
  551. self._connection.send(
  552. cast(
  553. RealtimeClientEventParam,
  554. strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
  555. )
  556. )
  557. class RealtimeInputAudioBufferResource(BaseRealtimeConnectionResource):
  558. def clear(self, *, event_id: str | Omit = omit) -> None:
  559. """Send this event to clear the audio bytes in the buffer.
  560. The server will
  561. respond with an `input_audio_buffer.cleared` event.
  562. """
  563. self._connection.send(
  564. cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
  565. )
  566. def commit(self, *, event_id: str | Omit = omit) -> None:
  567. """
  568. Send this event to commit the user input audio buffer, which will create a new user message item in the conversation. This event will produce an error if the input audio buffer is empty. When in Server VAD mode, the client does not need to send this event, the server will commit the audio buffer automatically.
  569. Committing the input audio buffer will trigger input audio transcription (if enabled in session configuration), but it will not create a response from the model. The server will respond with an `input_audio_buffer.committed` event.
  570. """
  571. self._connection.send(
  572. cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
  573. )
  574. def append(self, *, audio: str, event_id: str | Omit = omit) -> None:
  575. """Send this event to append audio bytes to the input audio buffer.
  576. The audio
  577. buffer is temporary storage you can write to and later commit. A "commit" will create a new
  578. user message item in the conversation history from the buffer content and clear the buffer.
  579. Input audio transcription (if enabled) will be generated when the buffer is committed.
  580. If VAD is enabled the audio buffer is used to detect speech and the server will decide
  581. when to commit. When Server VAD is disabled, you must commit the audio buffer
  582. manually. Input audio noise reduction operates on writes to the audio buffer.
  583. The client may choose how much audio to place in each event up to a maximum
  584. of 15 MiB, for example streaming smaller chunks from the client may allow the
  585. VAD to be more responsive. Unlike most other client events, the server will
  586. not send a confirmation response to this event.
  587. """
  588. self._connection.send(
  589. cast(
  590. RealtimeClientEventParam,
  591. strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
  592. )
  593. )
  594. class RealtimeConversationResource(BaseRealtimeConnectionResource):
  595. @cached_property
  596. def item(self) -> RealtimeConversationItemResource:
  597. return RealtimeConversationItemResource(self._connection)
  598. class RealtimeConversationItemResource(BaseRealtimeConnectionResource):
  599. def delete(self, *, item_id: str, event_id: str | Omit = omit) -> None:
  600. """Send this event when you want to remove any item from the conversation
  601. history.
  602. The server will respond with a `conversation.item.deleted` event,
  603. unless the item does not exist in the conversation history, in which case the
  604. server will respond with an error.
  605. """
  606. self._connection.send(
  607. cast(
  608. RealtimeClientEventParam,
  609. strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
  610. )
  611. )
  612. def create(
  613. self, *, item: ConversationItemParam, event_id: str | Omit = omit, previous_item_id: str | Omit = omit
  614. ) -> None:
  615. """
  616. Add a new Item to the Conversation's context, including messages, function
  617. calls, and function call responses. This event can be used both to populate a
  618. "history" of the conversation and to add new items mid-stream, but has the
  619. current limitation that it cannot populate assistant audio messages.
  620. If successful, the server will respond with a `conversation.item.created`
  621. event, otherwise an `error` event will be sent.
  622. """
  623. self._connection.send(
  624. cast(
  625. RealtimeClientEventParam,
  626. strip_not_given(
  627. {
  628. "type": "conversation.item.create",
  629. "item": item,
  630. "event_id": event_id,
  631. "previous_item_id": previous_item_id,
  632. }
  633. ),
  634. )
  635. )
  636. def truncate(self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | Omit = omit) -> None:
  637. """Send this event to truncate a previous assistant message’s audio.
  638. The server
  639. will produce audio faster than realtime, so this event is useful when the user
  640. interrupts to truncate audio that has already been sent to the client but not
  641. yet played. This will synchronize the server's understanding of the audio with
  642. the client's playback.
  643. Truncating audio will delete the server-side text transcript to ensure there
  644. is not text in the context that hasn't been heard by the user.
  645. If successful, the server will respond with a `conversation.item.truncated`
  646. event.
  647. """
  648. self._connection.send(
  649. cast(
  650. RealtimeClientEventParam,
  651. strip_not_given(
  652. {
  653. "type": "conversation.item.truncate",
  654. "audio_end_ms": audio_end_ms,
  655. "content_index": content_index,
  656. "item_id": item_id,
  657. "event_id": event_id,
  658. }
  659. ),
  660. )
  661. )
  662. def retrieve(self, *, item_id: str, event_id: str | Omit = omit) -> None:
  663. """
  664. Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
  665. The server will respond with a `conversation.item.retrieved` event,
  666. unless the item does not exist in the conversation history, in which case the
  667. server will respond with an error.
  668. """
  669. self._connection.send(
  670. cast(
  671. RealtimeClientEventParam,
  672. strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
  673. )
  674. )
  675. class RealtimeOutputAudioBufferResource(BaseRealtimeConnectionResource):
  676. def clear(self, *, event_id: str | Omit = omit) -> None:
  677. """**WebRTC/SIP Only:** Emit to cut off the current audio response.
  678. This will trigger the server to
  679. stop generating audio and emit a `output_audio_buffer.cleared` event. This
  680. event should be preceded by a `response.cancel` client event to stop the
  681. generation of the current response.
  682. [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
  683. """
  684. self._connection.send(
  685. cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
  686. )
  687. class BaseAsyncRealtimeConnectionResource:
  688. def __init__(self, connection: AsyncRealtimeConnection) -> None:
  689. self._connection = connection
  690. class AsyncRealtimeSessionResource(BaseAsyncRealtimeConnectionResource):
  691. async def update(self, *, session: session_update_event_param.Session, event_id: str | Omit = omit) -> None:
  692. """
  693. Send this event to update the session’s configuration.
  694. The client may send this event at any time to update any field
  695. except for `voice` and `model`. `voice` can be updated only if there have been no other audio outputs yet.
  696. When the server receives a `session.update`, it will respond
  697. with a `session.updated` event showing the full, effective configuration.
  698. Only the fields that are present in the `session.update` are updated. To clear a field like
  699. `instructions`, pass an empty string. To clear a field like `tools`, pass an empty array.
  700. To clear a field like `turn_detection`, pass `null`.
  701. """
  702. await self._connection.send(
  703. cast(
  704. RealtimeClientEventParam,
  705. strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
  706. )
  707. )
  708. class AsyncRealtimeResponseResource(BaseAsyncRealtimeConnectionResource):
  709. async def create(
  710. self, *, event_id: str | Omit = omit, response: RealtimeResponseCreateParamsParam | Omit = omit
  711. ) -> None:
  712. """
  713. This event instructs the server to create a Response, which means triggering
  714. model inference. When in Server VAD mode, the server will create Responses
  715. automatically.
  716. A Response will include at least one Item, and may have two, in which case
  717. the second will be a function call. These Items will be appended to the
  718. conversation history by default.
  719. The server will respond with a `response.created` event, events for Items
  720. and content created, and finally a `response.done` event to indicate the
  721. Response is complete.
  722. The `response.create` event includes inference configuration like
  723. `instructions` and `tools`. If these are set, they will override the Session's
  724. configuration for this Response only.
  725. Responses can be created out-of-band of the default Conversation, meaning that they can
  726. have arbitrary input, and it's possible to disable writing the output to the Conversation.
  727. Only one Response can write to the default Conversation at a time, but otherwise multiple
  728. Responses can be created in parallel. The `metadata` field is a good way to disambiguate
  729. multiple simultaneous Responses.
  730. Clients can set `conversation` to `none` to create a Response that does not write to the default
  731. Conversation. Arbitrary input can be provided with the `input` field, which is an array accepting
  732. raw Items and references to existing Items.
  733. """
  734. await self._connection.send(
  735. cast(
  736. RealtimeClientEventParam,
  737. strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
  738. )
  739. )
  740. async def cancel(self, *, event_id: str | Omit = omit, response_id: str | Omit = omit) -> None:
  741. """Send this event to cancel an in-progress response.
  742. The server will respond
  743. with a `response.done` event with a status of `response.status=cancelled`. If
  744. there is no response to cancel, the server will respond with an error. It's safe
  745. to call `response.cancel` even if no response is in progress, an error will be
  746. returned the session will remain unaffected.
  747. """
  748. await self._connection.send(
  749. cast(
  750. RealtimeClientEventParam,
  751. strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
  752. )
  753. )
  754. class AsyncRealtimeInputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
  755. async def clear(self, *, event_id: str | Omit = omit) -> None:
  756. """Send this event to clear the audio bytes in the buffer.
  757. The server will
  758. respond with an `input_audio_buffer.cleared` event.
  759. """
  760. await self._connection.send(
  761. cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
  762. )
  763. async def commit(self, *, event_id: str | Omit = omit) -> None:
  764. """
  765. Send this event to commit the user input audio buffer, which will create a new user message item in the conversation. This event will produce an error if the input audio buffer is empty. When in Server VAD mode, the client does not need to send this event, the server will commit the audio buffer automatically.
  766. Committing the input audio buffer will trigger input audio transcription (if enabled in session configuration), but it will not create a response from the model. The server will respond with an `input_audio_buffer.committed` event.
  767. """
  768. await self._connection.send(
  769. cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
  770. )
  771. async def append(self, *, audio: str, event_id: str | Omit = omit) -> None:
  772. """Send this event to append audio bytes to the input audio buffer.
  773. The audio
  774. buffer is temporary storage you can write to and later commit. A "commit" will create a new
  775. user message item in the conversation history from the buffer content and clear the buffer.
  776. Input audio transcription (if enabled) will be generated when the buffer is committed.
  777. If VAD is enabled the audio buffer is used to detect speech and the server will decide
  778. when to commit. When Server VAD is disabled, you must commit the audio buffer
  779. manually. Input audio noise reduction operates on writes to the audio buffer.
  780. The client may choose how much audio to place in each event up to a maximum
  781. of 15 MiB, for example streaming smaller chunks from the client may allow the
  782. VAD to be more responsive. Unlike most other client events, the server will
  783. not send a confirmation response to this event.
  784. """
  785. await self._connection.send(
  786. cast(
  787. RealtimeClientEventParam,
  788. strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
  789. )
  790. )
  791. class AsyncRealtimeConversationResource(BaseAsyncRealtimeConnectionResource):
  792. @cached_property
  793. def item(self) -> AsyncRealtimeConversationItemResource:
  794. return AsyncRealtimeConversationItemResource(self._connection)
  795. class AsyncRealtimeConversationItemResource(BaseAsyncRealtimeConnectionResource):
  796. async def delete(self, *, item_id: str, event_id: str | Omit = omit) -> None:
  797. """Send this event when you want to remove any item from the conversation
  798. history.
  799. The server will respond with a `conversation.item.deleted` event,
  800. unless the item does not exist in the conversation history, in which case the
  801. server will respond with an error.
  802. """
  803. await self._connection.send(
  804. cast(
  805. RealtimeClientEventParam,
  806. strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
  807. )
  808. )
  809. async def create(
  810. self, *, item: ConversationItemParam, event_id: str | Omit = omit, previous_item_id: str | Omit = omit
  811. ) -> None:
  812. """
  813. Add a new Item to the Conversation's context, including messages, function
  814. calls, and function call responses. This event can be used both to populate a
  815. "history" of the conversation and to add new items mid-stream, but has the
  816. current limitation that it cannot populate assistant audio messages.
  817. If successful, the server will respond with a `conversation.item.created`
  818. event, otherwise an `error` event will be sent.
  819. """
  820. await self._connection.send(
  821. cast(
  822. RealtimeClientEventParam,
  823. strip_not_given(
  824. {
  825. "type": "conversation.item.create",
  826. "item": item,
  827. "event_id": event_id,
  828. "previous_item_id": previous_item_id,
  829. }
  830. ),
  831. )
  832. )
  833. async def truncate(
  834. self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | Omit = omit
  835. ) -> None:
  836. """Send this event to truncate a previous assistant message’s audio.
  837. The server
  838. will produce audio faster than realtime, so this event is useful when the user
  839. interrupts to truncate audio that has already been sent to the client but not
  840. yet played. This will synchronize the server's understanding of the audio with
  841. the client's playback.
  842. Truncating audio will delete the server-side text transcript to ensure there
  843. is not text in the context that hasn't been heard by the user.
  844. If successful, the server will respond with a `conversation.item.truncated`
  845. event.
  846. """
  847. await self._connection.send(
  848. cast(
  849. RealtimeClientEventParam,
  850. strip_not_given(
  851. {
  852. "type": "conversation.item.truncate",
  853. "audio_end_ms": audio_end_ms,
  854. "content_index": content_index,
  855. "item_id": item_id,
  856. "event_id": event_id,
  857. }
  858. ),
  859. )
  860. )
  861. async def retrieve(self, *, item_id: str, event_id: str | Omit = omit) -> None:
  862. """
  863. Send this event when you want to retrieve the server's representation of a specific item in the conversation history. This is useful, for example, to inspect user audio after noise cancellation and VAD.
  864. The server will respond with a `conversation.item.retrieved` event,
  865. unless the item does not exist in the conversation history, in which case the
  866. server will respond with an error.
  867. """
  868. await self._connection.send(
  869. cast(
  870. RealtimeClientEventParam,
  871. strip_not_given({"type": "conversation.item.retrieve", "item_id": item_id, "event_id": event_id}),
  872. )
  873. )
  874. class AsyncRealtimeOutputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
  875. async def clear(self, *, event_id: str | Omit = omit) -> None:
  876. """**WebRTC/SIP Only:** Emit to cut off the current audio response.
  877. This will trigger the server to
  878. stop generating audio and emit a `output_audio_buffer.cleared` event. This
  879. event should be preceded by a `response.cancel` client event to stop the
  880. generation of the current response.
  881. [Learn more](https://platform.openai.com/docs/guides/realtime-conversations#client-and-server-events-for-audio-in-webrtc).
  882. """
  883. await self._connection.send(
  884. cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
  885. )