completions.py 58 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160
  1. # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
  2. from __future__ import annotations
  3. from typing import Dict, Union, Iterable, Optional
  4. from typing_extensions import Literal, overload
  5. import httpx
  6. from .. import _legacy_response
  7. from ..types import completion_create_params
  8. from .._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
  9. from .._utils import required_args, maybe_transform, async_maybe_transform
  10. from .._compat import cached_property
  11. from .._resource import SyncAPIResource, AsyncAPIResource
  12. from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
  13. from .._streaming import Stream, AsyncStream
  14. from .._base_client import (
  15. make_request_options,
  16. )
  17. from ..types.completion import Completion
  18. from ..types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
  19. __all__ = ["Completions", "AsyncCompletions"]
  20. class Completions(SyncAPIResource):
  21. @cached_property
  22. def with_raw_response(self) -> CompletionsWithRawResponse:
  23. """
  24. This property can be used as a prefix for any HTTP method call to return
  25. the raw response object instead of the parsed content.
  26. For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  27. """
  28. return CompletionsWithRawResponse(self)
  29. @cached_property
  30. def with_streaming_response(self) -> CompletionsWithStreamingResponse:
  31. """
  32. An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  33. For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  34. """
  35. return CompletionsWithStreamingResponse(self)
  36. @overload
  37. def create(
  38. self,
  39. *,
  40. model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
  41. prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
  42. best_of: Optional[int] | Omit = omit,
  43. echo: Optional[bool] | Omit = omit,
  44. frequency_penalty: Optional[float] | Omit = omit,
  45. logit_bias: Optional[Dict[str, int]] | Omit = omit,
  46. logprobs: Optional[int] | Omit = omit,
  47. max_tokens: Optional[int] | Omit = omit,
  48. n: Optional[int] | Omit = omit,
  49. presence_penalty: Optional[float] | Omit = omit,
  50. seed: Optional[int] | Omit = omit,
  51. stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
  52. stream: Optional[Literal[False]] | Omit = omit,
  53. stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
  54. suffix: Optional[str] | Omit = omit,
  55. temperature: Optional[float] | Omit = omit,
  56. top_p: Optional[float] | Omit = omit,
  57. user: str | Omit = omit,
  58. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  59. # The extra values given here take precedence over values defined on the client or passed to this method.
  60. extra_headers: Headers | None = None,
  61. extra_query: Query | None = None,
  62. extra_body: Body | None = None,
  63. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  64. ) -> Completion:
  65. """
  66. Creates a completion for the provided prompt and parameters.
  67. Args:
  68. model: ID of the model to use. You can use the
  69. [List models](https://platform.openai.com/docs/api-reference/models/list) API to
  70. see all of your available models, or see our
  71. [Model overview](https://platform.openai.com/docs/models) for descriptions of
  72. them.
  73. prompt: The prompt(s) to generate completions for, encoded as a string, array of
  74. strings, array of tokens, or array of token arrays.
  75. Note that <|endoftext|> is the document separator that the model sees during
  76. training, so if a prompt is not specified the model will generate as if from the
  77. beginning of a new document.
  78. best_of: Generates `best_of` completions server-side and returns the "best" (the one with
  79. the highest log probability per token). Results cannot be streamed.
  80. When used with `n`, `best_of` controls the number of candidate completions and
  81. `n` specifies how many to return – `best_of` must be greater than `n`.
  82. **Note:** Because this parameter generates many completions, it can quickly
  83. consume your token quota. Use carefully and ensure that you have reasonable
  84. settings for `max_tokens` and `stop`.
  85. echo: Echo back the prompt in addition to the completion
  86. frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
  87. existing frequency in the text so far, decreasing the model's likelihood to
  88. repeat the same line verbatim.
  89. [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
  90. logit_bias: Modify the likelihood of specified tokens appearing in the completion.
  91. Accepts a JSON object that maps tokens (specified by their token ID in the GPT
  92. tokenizer) to an associated bias value from -100 to 100. You can use this
  93. [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
  94. Mathematically, the bias is added to the logits generated by the model prior to
  95. sampling. The exact effect will vary per model, but values between -1 and 1
  96. should decrease or increase likelihood of selection; values like -100 or 100
  97. should result in a ban or exclusive selection of the relevant token.
  98. As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
  99. from being generated.
  100. logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
  101. well the chosen tokens. For example, if `logprobs` is 5, the API will return a
  102. list of the 5 most likely tokens. The API will always return the `logprob` of
  103. the sampled token, so there may be up to `logprobs+1` elements in the response.
  104. The maximum value for `logprobs` is 5.
  105. max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
  106. completion.
  107. The token count of your prompt plus `max_tokens` cannot exceed the model's
  108. context length.
  109. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
  110. for counting tokens.
  111. n: How many completions to generate for each prompt.
  112. **Note:** Because this parameter generates many completions, it can quickly
  113. consume your token quota. Use carefully and ensure that you have reasonable
  114. settings for `max_tokens` and `stop`.
  115. presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
  116. whether they appear in the text so far, increasing the model's likelihood to
  117. talk about new topics.
  118. [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
  119. seed: If specified, our system will make a best effort to sample deterministically,
  120. such that repeated requests with the same `seed` and parameters should return
  121. the same result.
  122. Determinism is not guaranteed, and you should refer to the `system_fingerprint`
  123. response parameter to monitor changes in the backend.
  124. stop: Not supported with latest reasoning models `o3` and `o4-mini`.
  125. Up to 4 sequences where the API will stop generating further tokens. The
  126. returned text will not contain the stop sequence.
  127. stream: Whether to stream back partial progress. If set, tokens will be sent as
  128. data-only
  129. [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
  130. as they become available, with the stream terminated by a `data: [DONE]`
  131. message.
  132. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
  133. stream_options: Options for streaming response. Only set this when you set `stream: true`.
  134. suffix: The suffix that comes after a completion of inserted text.
  135. This parameter is only supported for `gpt-3.5-turbo-instruct`.
  136. temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
  137. make the output more random, while lower values like 0.2 will make it more
  138. focused and deterministic.
  139. We generally recommend altering this or `top_p` but not both.
  140. top_p: An alternative to sampling with temperature, called nucleus sampling, where the
  141. model considers the results of the tokens with top_p probability mass. So 0.1
  142. means only the tokens comprising the top 10% probability mass are considered.
  143. We generally recommend altering this or `temperature` but not both.
  144. user: A unique identifier representing your end-user, which can help OpenAI to monitor
  145. and detect abuse.
  146. [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
  147. extra_headers: Send extra headers
  148. extra_query: Add additional query parameters to the request
  149. extra_body: Add additional JSON properties to the request
  150. timeout: Override the client-level default timeout for this request, in seconds
  151. """
  152. ...
  153. @overload
  154. def create(
  155. self,
  156. *,
  157. model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
  158. prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
  159. stream: Literal[True],
  160. best_of: Optional[int] | Omit = omit,
  161. echo: Optional[bool] | Omit = omit,
  162. frequency_penalty: Optional[float] | Omit = omit,
  163. logit_bias: Optional[Dict[str, int]] | Omit = omit,
  164. logprobs: Optional[int] | Omit = omit,
  165. max_tokens: Optional[int] | Omit = omit,
  166. n: Optional[int] | Omit = omit,
  167. presence_penalty: Optional[float] | Omit = omit,
  168. seed: Optional[int] | Omit = omit,
  169. stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
  170. stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
  171. suffix: Optional[str] | Omit = omit,
  172. temperature: Optional[float] | Omit = omit,
  173. top_p: Optional[float] | Omit = omit,
  174. user: str | Omit = omit,
  175. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  176. # The extra values given here take precedence over values defined on the client or passed to this method.
  177. extra_headers: Headers | None = None,
  178. extra_query: Query | None = None,
  179. extra_body: Body | None = None,
  180. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  181. ) -> Stream[Completion]:
  182. """
  183. Creates a completion for the provided prompt and parameters.
  184. Args:
  185. model: ID of the model to use. You can use the
  186. [List models](https://platform.openai.com/docs/api-reference/models/list) API to
  187. see all of your available models, or see our
  188. [Model overview](https://platform.openai.com/docs/models) for descriptions of
  189. them.
  190. prompt: The prompt(s) to generate completions for, encoded as a string, array of
  191. strings, array of tokens, or array of token arrays.
  192. Note that <|endoftext|> is the document separator that the model sees during
  193. training, so if a prompt is not specified the model will generate as if from the
  194. beginning of a new document.
  195. stream: Whether to stream back partial progress. If set, tokens will be sent as
  196. data-only
  197. [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
  198. as they become available, with the stream terminated by a `data: [DONE]`
  199. message.
  200. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
  201. best_of: Generates `best_of` completions server-side and returns the "best" (the one with
  202. the highest log probability per token). Results cannot be streamed.
  203. When used with `n`, `best_of` controls the number of candidate completions and
  204. `n` specifies how many to return – `best_of` must be greater than `n`.
  205. **Note:** Because this parameter generates many completions, it can quickly
  206. consume your token quota. Use carefully and ensure that you have reasonable
  207. settings for `max_tokens` and `stop`.
  208. echo: Echo back the prompt in addition to the completion
  209. frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
  210. existing frequency in the text so far, decreasing the model's likelihood to
  211. repeat the same line verbatim.
  212. [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
  213. logit_bias: Modify the likelihood of specified tokens appearing in the completion.
  214. Accepts a JSON object that maps tokens (specified by their token ID in the GPT
  215. tokenizer) to an associated bias value from -100 to 100. You can use this
  216. [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
  217. Mathematically, the bias is added to the logits generated by the model prior to
  218. sampling. The exact effect will vary per model, but values between -1 and 1
  219. should decrease or increase likelihood of selection; values like -100 or 100
  220. should result in a ban or exclusive selection of the relevant token.
  221. As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
  222. from being generated.
  223. logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
  224. well the chosen tokens. For example, if `logprobs` is 5, the API will return a
  225. list of the 5 most likely tokens. The API will always return the `logprob` of
  226. the sampled token, so there may be up to `logprobs+1` elements in the response.
  227. The maximum value for `logprobs` is 5.
  228. max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
  229. completion.
  230. The token count of your prompt plus `max_tokens` cannot exceed the model's
  231. context length.
  232. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
  233. for counting tokens.
  234. n: How many completions to generate for each prompt.
  235. **Note:** Because this parameter generates many completions, it can quickly
  236. consume your token quota. Use carefully and ensure that you have reasonable
  237. settings for `max_tokens` and `stop`.
  238. presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
  239. whether they appear in the text so far, increasing the model's likelihood to
  240. talk about new topics.
  241. [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
  242. seed: If specified, our system will make a best effort to sample deterministically,
  243. such that repeated requests with the same `seed` and parameters should return
  244. the same result.
  245. Determinism is not guaranteed, and you should refer to the `system_fingerprint`
  246. response parameter to monitor changes in the backend.
  247. stop: Not supported with latest reasoning models `o3` and `o4-mini`.
  248. Up to 4 sequences where the API will stop generating further tokens. The
  249. returned text will not contain the stop sequence.
  250. stream_options: Options for streaming response. Only set this when you set `stream: true`.
  251. suffix: The suffix that comes after a completion of inserted text.
  252. This parameter is only supported for `gpt-3.5-turbo-instruct`.
  253. temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
  254. make the output more random, while lower values like 0.2 will make it more
  255. focused and deterministic.
  256. We generally recommend altering this or `top_p` but not both.
  257. top_p: An alternative to sampling with temperature, called nucleus sampling, where the
  258. model considers the results of the tokens with top_p probability mass. So 0.1
  259. means only the tokens comprising the top 10% probability mass are considered.
  260. We generally recommend altering this or `temperature` but not both.
  261. user: A unique identifier representing your end-user, which can help OpenAI to monitor
  262. and detect abuse.
  263. [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
  264. extra_headers: Send extra headers
  265. extra_query: Add additional query parameters to the request
  266. extra_body: Add additional JSON properties to the request
  267. timeout: Override the client-level default timeout for this request, in seconds
  268. """
  269. ...
  270. @overload
  271. def create(
  272. self,
  273. *,
  274. model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
  275. prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
  276. stream: bool,
  277. best_of: Optional[int] | Omit = omit,
  278. echo: Optional[bool] | Omit = omit,
  279. frequency_penalty: Optional[float] | Omit = omit,
  280. logit_bias: Optional[Dict[str, int]] | Omit = omit,
  281. logprobs: Optional[int] | Omit = omit,
  282. max_tokens: Optional[int] | Omit = omit,
  283. n: Optional[int] | Omit = omit,
  284. presence_penalty: Optional[float] | Omit = omit,
  285. seed: Optional[int] | Omit = omit,
  286. stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
  287. stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
  288. suffix: Optional[str] | Omit = omit,
  289. temperature: Optional[float] | Omit = omit,
  290. top_p: Optional[float] | Omit = omit,
  291. user: str | Omit = omit,
  292. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  293. # The extra values given here take precedence over values defined on the client or passed to this method.
  294. extra_headers: Headers | None = None,
  295. extra_query: Query | None = None,
  296. extra_body: Body | None = None,
  297. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  298. ) -> Completion | Stream[Completion]:
  299. """
  300. Creates a completion for the provided prompt and parameters.
  301. Args:
  302. model: ID of the model to use. You can use the
  303. [List models](https://platform.openai.com/docs/api-reference/models/list) API to
  304. see all of your available models, or see our
  305. [Model overview](https://platform.openai.com/docs/models) for descriptions of
  306. them.
  307. prompt: The prompt(s) to generate completions for, encoded as a string, array of
  308. strings, array of tokens, or array of token arrays.
  309. Note that <|endoftext|> is the document separator that the model sees during
  310. training, so if a prompt is not specified the model will generate as if from the
  311. beginning of a new document.
  312. stream: Whether to stream back partial progress. If set, tokens will be sent as
  313. data-only
  314. [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
  315. as they become available, with the stream terminated by a `data: [DONE]`
  316. message.
  317. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
  318. best_of: Generates `best_of` completions server-side and returns the "best" (the one with
  319. the highest log probability per token). Results cannot be streamed.
  320. When used with `n`, `best_of` controls the number of candidate completions and
  321. `n` specifies how many to return – `best_of` must be greater than `n`.
  322. **Note:** Because this parameter generates many completions, it can quickly
  323. consume your token quota. Use carefully and ensure that you have reasonable
  324. settings for `max_tokens` and `stop`.
  325. echo: Echo back the prompt in addition to the completion
  326. frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
  327. existing frequency in the text so far, decreasing the model's likelihood to
  328. repeat the same line verbatim.
  329. [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
  330. logit_bias: Modify the likelihood of specified tokens appearing in the completion.
  331. Accepts a JSON object that maps tokens (specified by their token ID in the GPT
  332. tokenizer) to an associated bias value from -100 to 100. You can use this
  333. [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
  334. Mathematically, the bias is added to the logits generated by the model prior to
  335. sampling. The exact effect will vary per model, but values between -1 and 1
  336. should decrease or increase likelihood of selection; values like -100 or 100
  337. should result in a ban or exclusive selection of the relevant token.
  338. As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
  339. from being generated.
  340. logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
  341. well the chosen tokens. For example, if `logprobs` is 5, the API will return a
  342. list of the 5 most likely tokens. The API will always return the `logprob` of
  343. the sampled token, so there may be up to `logprobs+1` elements in the response.
  344. The maximum value for `logprobs` is 5.
  345. max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
  346. completion.
  347. The token count of your prompt plus `max_tokens` cannot exceed the model's
  348. context length.
  349. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
  350. for counting tokens.
  351. n: How many completions to generate for each prompt.
  352. **Note:** Because this parameter generates many completions, it can quickly
  353. consume your token quota. Use carefully and ensure that you have reasonable
  354. settings for `max_tokens` and `stop`.
  355. presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
  356. whether they appear in the text so far, increasing the model's likelihood to
  357. talk about new topics.
  358. [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
  359. seed: If specified, our system will make a best effort to sample deterministically,
  360. such that repeated requests with the same `seed` and parameters should return
  361. the same result.
  362. Determinism is not guaranteed, and you should refer to the `system_fingerprint`
  363. response parameter to monitor changes in the backend.
  364. stop: Not supported with latest reasoning models `o3` and `o4-mini`.
  365. Up to 4 sequences where the API will stop generating further tokens. The
  366. returned text will not contain the stop sequence.
  367. stream_options: Options for streaming response. Only set this when you set `stream: true`.
  368. suffix: The suffix that comes after a completion of inserted text.
  369. This parameter is only supported for `gpt-3.5-turbo-instruct`.
  370. temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
  371. make the output more random, while lower values like 0.2 will make it more
  372. focused and deterministic.
  373. We generally recommend altering this or `top_p` but not both.
  374. top_p: An alternative to sampling with temperature, called nucleus sampling, where the
  375. model considers the results of the tokens with top_p probability mass. So 0.1
  376. means only the tokens comprising the top 10% probability mass are considered.
  377. We generally recommend altering this or `temperature` but not both.
  378. user: A unique identifier representing your end-user, which can help OpenAI to monitor
  379. and detect abuse.
  380. [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
  381. extra_headers: Send extra headers
  382. extra_query: Add additional query parameters to the request
  383. extra_body: Add additional JSON properties to the request
  384. timeout: Override the client-level default timeout for this request, in seconds
  385. """
  386. ...
  387. @required_args(["model", "prompt"], ["model", "prompt", "stream"])
  388. def create(
  389. self,
  390. *,
  391. model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
  392. prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
  393. best_of: Optional[int] | Omit = omit,
  394. echo: Optional[bool] | Omit = omit,
  395. frequency_penalty: Optional[float] | Omit = omit,
  396. logit_bias: Optional[Dict[str, int]] | Omit = omit,
  397. logprobs: Optional[int] | Omit = omit,
  398. max_tokens: Optional[int] | Omit = omit,
  399. n: Optional[int] | Omit = omit,
  400. presence_penalty: Optional[float] | Omit = omit,
  401. seed: Optional[int] | Omit = omit,
  402. stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
  403. stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
  404. stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
  405. suffix: Optional[str] | Omit = omit,
  406. temperature: Optional[float] | Omit = omit,
  407. top_p: Optional[float] | Omit = omit,
  408. user: str | Omit = omit,
  409. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  410. # The extra values given here take precedence over values defined on the client or passed to this method.
  411. extra_headers: Headers | None = None,
  412. extra_query: Query | None = None,
  413. extra_body: Body | None = None,
  414. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  415. ) -> Completion | Stream[Completion]:
  416. return self._post(
  417. "/completions",
  418. body=maybe_transform(
  419. {
  420. "model": model,
  421. "prompt": prompt,
  422. "best_of": best_of,
  423. "echo": echo,
  424. "frequency_penalty": frequency_penalty,
  425. "logit_bias": logit_bias,
  426. "logprobs": logprobs,
  427. "max_tokens": max_tokens,
  428. "n": n,
  429. "presence_penalty": presence_penalty,
  430. "seed": seed,
  431. "stop": stop,
  432. "stream": stream,
  433. "stream_options": stream_options,
  434. "suffix": suffix,
  435. "temperature": temperature,
  436. "top_p": top_p,
  437. "user": user,
  438. },
  439. completion_create_params.CompletionCreateParamsStreaming
  440. if stream
  441. else completion_create_params.CompletionCreateParamsNonStreaming,
  442. ),
  443. options=make_request_options(
  444. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  445. ),
  446. cast_to=Completion,
  447. stream=stream or False,
  448. stream_cls=Stream[Completion],
  449. )
  450. class AsyncCompletions(AsyncAPIResource):
  451. @cached_property
  452. def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
  453. """
  454. This property can be used as a prefix for any HTTP method call to return
  455. the raw response object instead of the parsed content.
  456. For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  457. """
  458. return AsyncCompletionsWithRawResponse(self)
  459. @cached_property
  460. def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
  461. """
  462. An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  463. For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  464. """
  465. return AsyncCompletionsWithStreamingResponse(self)
  466. @overload
  467. async def create(
  468. self,
  469. *,
  470. model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
  471. prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
  472. best_of: Optional[int] | Omit = omit,
  473. echo: Optional[bool] | Omit = omit,
  474. frequency_penalty: Optional[float] | Omit = omit,
  475. logit_bias: Optional[Dict[str, int]] | Omit = omit,
  476. logprobs: Optional[int] | Omit = omit,
  477. max_tokens: Optional[int] | Omit = omit,
  478. n: Optional[int] | Omit = omit,
  479. presence_penalty: Optional[float] | Omit = omit,
  480. seed: Optional[int] | Omit = omit,
  481. stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
  482. stream: Optional[Literal[False]] | Omit = omit,
  483. stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
  484. suffix: Optional[str] | Omit = omit,
  485. temperature: Optional[float] | Omit = omit,
  486. top_p: Optional[float] | Omit = omit,
  487. user: str | Omit = omit,
  488. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  489. # The extra values given here take precedence over values defined on the client or passed to this method.
  490. extra_headers: Headers | None = None,
  491. extra_query: Query | None = None,
  492. extra_body: Body | None = None,
  493. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  494. ) -> Completion:
  495. """
  496. Creates a completion for the provided prompt and parameters.
  497. Args:
  498. model: ID of the model to use. You can use the
  499. [List models](https://platform.openai.com/docs/api-reference/models/list) API to
  500. see all of your available models, or see our
  501. [Model overview](https://platform.openai.com/docs/models) for descriptions of
  502. them.
  503. prompt: The prompt(s) to generate completions for, encoded as a string, array of
  504. strings, array of tokens, or array of token arrays.
  505. Note that <|endoftext|> is the document separator that the model sees during
  506. training, so if a prompt is not specified the model will generate as if from the
  507. beginning of a new document.
  508. best_of: Generates `best_of` completions server-side and returns the "best" (the one with
  509. the highest log probability per token). Results cannot be streamed.
  510. When used with `n`, `best_of` controls the number of candidate completions and
  511. `n` specifies how many to return – `best_of` must be greater than `n`.
  512. **Note:** Because this parameter generates many completions, it can quickly
  513. consume your token quota. Use carefully and ensure that you have reasonable
  514. settings for `max_tokens` and `stop`.
  515. echo: Echo back the prompt in addition to the completion
  516. frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
  517. existing frequency in the text so far, decreasing the model's likelihood to
  518. repeat the same line verbatim.
  519. [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
  520. logit_bias: Modify the likelihood of specified tokens appearing in the completion.
  521. Accepts a JSON object that maps tokens (specified by their token ID in the GPT
  522. tokenizer) to an associated bias value from -100 to 100. You can use this
  523. [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
  524. Mathematically, the bias is added to the logits generated by the model prior to
  525. sampling. The exact effect will vary per model, but values between -1 and 1
  526. should decrease or increase likelihood of selection; values like -100 or 100
  527. should result in a ban or exclusive selection of the relevant token.
  528. As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
  529. from being generated.
  530. logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
  531. well the chosen tokens. For example, if `logprobs` is 5, the API will return a
  532. list of the 5 most likely tokens. The API will always return the `logprob` of
  533. the sampled token, so there may be up to `logprobs+1` elements in the response.
  534. The maximum value for `logprobs` is 5.
  535. max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
  536. completion.
  537. The token count of your prompt plus `max_tokens` cannot exceed the model's
  538. context length.
  539. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
  540. for counting tokens.
  541. n: How many completions to generate for each prompt.
  542. **Note:** Because this parameter generates many completions, it can quickly
  543. consume your token quota. Use carefully and ensure that you have reasonable
  544. settings for `max_tokens` and `stop`.
  545. presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
  546. whether they appear in the text so far, increasing the model's likelihood to
  547. talk about new topics.
  548. [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
  549. seed: If specified, our system will make a best effort to sample deterministically,
  550. such that repeated requests with the same `seed` and parameters should return
  551. the same result.
  552. Determinism is not guaranteed, and you should refer to the `system_fingerprint`
  553. response parameter to monitor changes in the backend.
  554. stop: Not supported with latest reasoning models `o3` and `o4-mini`.
  555. Up to 4 sequences where the API will stop generating further tokens. The
  556. returned text will not contain the stop sequence.
  557. stream: Whether to stream back partial progress. If set, tokens will be sent as
  558. data-only
  559. [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
  560. as they become available, with the stream terminated by a `data: [DONE]`
  561. message.
  562. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
  563. stream_options: Options for streaming response. Only set this when you set `stream: true`.
  564. suffix: The suffix that comes after a completion of inserted text.
  565. This parameter is only supported for `gpt-3.5-turbo-instruct`.
  566. temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
  567. make the output more random, while lower values like 0.2 will make it more
  568. focused and deterministic.
  569. We generally recommend altering this or `top_p` but not both.
  570. top_p: An alternative to sampling with temperature, called nucleus sampling, where the
  571. model considers the results of the tokens with top_p probability mass. So 0.1
  572. means only the tokens comprising the top 10% probability mass are considered.
  573. We generally recommend altering this or `temperature` but not both.
  574. user: A unique identifier representing your end-user, which can help OpenAI to monitor
  575. and detect abuse.
  576. [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
  577. extra_headers: Send extra headers
  578. extra_query: Add additional query parameters to the request
  579. extra_body: Add additional JSON properties to the request
  580. timeout: Override the client-level default timeout for this request, in seconds
  581. """
  582. ...
  583. @overload
  584. async def create(
  585. self,
  586. *,
  587. model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
  588. prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
  589. stream: Literal[True],
  590. best_of: Optional[int] | Omit = omit,
  591. echo: Optional[bool] | Omit = omit,
  592. frequency_penalty: Optional[float] | Omit = omit,
  593. logit_bias: Optional[Dict[str, int]] | Omit = omit,
  594. logprobs: Optional[int] | Omit = omit,
  595. max_tokens: Optional[int] | Omit = omit,
  596. n: Optional[int] | Omit = omit,
  597. presence_penalty: Optional[float] | Omit = omit,
  598. seed: Optional[int] | Omit = omit,
  599. stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
  600. stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
  601. suffix: Optional[str] | Omit = omit,
  602. temperature: Optional[float] | Omit = omit,
  603. top_p: Optional[float] | Omit = omit,
  604. user: str | Omit = omit,
  605. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  606. # The extra values given here take precedence over values defined on the client or passed to this method.
  607. extra_headers: Headers | None = None,
  608. extra_query: Query | None = None,
  609. extra_body: Body | None = None,
  610. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  611. ) -> AsyncStream[Completion]:
  612. """
  613. Creates a completion for the provided prompt and parameters.
  614. Args:
  615. model: ID of the model to use. You can use the
  616. [List models](https://platform.openai.com/docs/api-reference/models/list) API to
  617. see all of your available models, or see our
  618. [Model overview](https://platform.openai.com/docs/models) for descriptions of
  619. them.
  620. prompt: The prompt(s) to generate completions for, encoded as a string, array of
  621. strings, array of tokens, or array of token arrays.
  622. Note that <|endoftext|> is the document separator that the model sees during
  623. training, so if a prompt is not specified the model will generate as if from the
  624. beginning of a new document.
  625. stream: Whether to stream back partial progress. If set, tokens will be sent as
  626. data-only
  627. [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
  628. as they become available, with the stream terminated by a `data: [DONE]`
  629. message.
  630. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
  631. best_of: Generates `best_of` completions server-side and returns the "best" (the one with
  632. the highest log probability per token). Results cannot be streamed.
  633. When used with `n`, `best_of` controls the number of candidate completions and
  634. `n` specifies how many to return – `best_of` must be greater than `n`.
  635. **Note:** Because this parameter generates many completions, it can quickly
  636. consume your token quota. Use carefully and ensure that you have reasonable
  637. settings for `max_tokens` and `stop`.
  638. echo: Echo back the prompt in addition to the completion
  639. frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
  640. existing frequency in the text so far, decreasing the model's likelihood to
  641. repeat the same line verbatim.
  642. [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
  643. logit_bias: Modify the likelihood of specified tokens appearing in the completion.
  644. Accepts a JSON object that maps tokens (specified by their token ID in the GPT
  645. tokenizer) to an associated bias value from -100 to 100. You can use this
  646. [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
  647. Mathematically, the bias is added to the logits generated by the model prior to
  648. sampling. The exact effect will vary per model, but values between -1 and 1
  649. should decrease or increase likelihood of selection; values like -100 or 100
  650. should result in a ban or exclusive selection of the relevant token.
  651. As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
  652. from being generated.
  653. logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
  654. well the chosen tokens. For example, if `logprobs` is 5, the API will return a
  655. list of the 5 most likely tokens. The API will always return the `logprob` of
  656. the sampled token, so there may be up to `logprobs+1` elements in the response.
  657. The maximum value for `logprobs` is 5.
  658. max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
  659. completion.
  660. The token count of your prompt plus `max_tokens` cannot exceed the model's
  661. context length.
  662. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
  663. for counting tokens.
  664. n: How many completions to generate for each prompt.
  665. **Note:** Because this parameter generates many completions, it can quickly
  666. consume your token quota. Use carefully and ensure that you have reasonable
  667. settings for `max_tokens` and `stop`.
  668. presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
  669. whether they appear in the text so far, increasing the model's likelihood to
  670. talk about new topics.
  671. [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
  672. seed: If specified, our system will make a best effort to sample deterministically,
  673. such that repeated requests with the same `seed` and parameters should return
  674. the same result.
  675. Determinism is not guaranteed, and you should refer to the `system_fingerprint`
  676. response parameter to monitor changes in the backend.
  677. stop: Not supported with latest reasoning models `o3` and `o4-mini`.
  678. Up to 4 sequences where the API will stop generating further tokens. The
  679. returned text will not contain the stop sequence.
  680. stream_options: Options for streaming response. Only set this when you set `stream: true`.
  681. suffix: The suffix that comes after a completion of inserted text.
  682. This parameter is only supported for `gpt-3.5-turbo-instruct`.
  683. temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
  684. make the output more random, while lower values like 0.2 will make it more
  685. focused and deterministic.
  686. We generally recommend altering this or `top_p` but not both.
  687. top_p: An alternative to sampling with temperature, called nucleus sampling, where the
  688. model considers the results of the tokens with top_p probability mass. So 0.1
  689. means only the tokens comprising the top 10% probability mass are considered.
  690. We generally recommend altering this or `temperature` but not both.
  691. user: A unique identifier representing your end-user, which can help OpenAI to monitor
  692. and detect abuse.
  693. [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
  694. extra_headers: Send extra headers
  695. extra_query: Add additional query parameters to the request
  696. extra_body: Add additional JSON properties to the request
  697. timeout: Override the client-level default timeout for this request, in seconds
  698. """
  699. ...
  700. @overload
  701. async def create(
  702. self,
  703. *,
  704. model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
  705. prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
  706. stream: bool,
  707. best_of: Optional[int] | Omit = omit,
  708. echo: Optional[bool] | Omit = omit,
  709. frequency_penalty: Optional[float] | Omit = omit,
  710. logit_bias: Optional[Dict[str, int]] | Omit = omit,
  711. logprobs: Optional[int] | Omit = omit,
  712. max_tokens: Optional[int] | Omit = omit,
  713. n: Optional[int] | Omit = omit,
  714. presence_penalty: Optional[float] | Omit = omit,
  715. seed: Optional[int] | Omit = omit,
  716. stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
  717. stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
  718. suffix: Optional[str] | Omit = omit,
  719. temperature: Optional[float] | Omit = omit,
  720. top_p: Optional[float] | Omit = omit,
  721. user: str | Omit = omit,
  722. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  723. # The extra values given here take precedence over values defined on the client or passed to this method.
  724. extra_headers: Headers | None = None,
  725. extra_query: Query | None = None,
  726. extra_body: Body | None = None,
  727. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  728. ) -> Completion | AsyncStream[Completion]:
  729. """
  730. Creates a completion for the provided prompt and parameters.
  731. Args:
  732. model: ID of the model to use. You can use the
  733. [List models](https://platform.openai.com/docs/api-reference/models/list) API to
  734. see all of your available models, or see our
  735. [Model overview](https://platform.openai.com/docs/models) for descriptions of
  736. them.
  737. prompt: The prompt(s) to generate completions for, encoded as a string, array of
  738. strings, array of tokens, or array of token arrays.
  739. Note that <|endoftext|> is the document separator that the model sees during
  740. training, so if a prompt is not specified the model will generate as if from the
  741. beginning of a new document.
  742. stream: Whether to stream back partial progress. If set, tokens will be sent as
  743. data-only
  744. [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
  745. as they become available, with the stream terminated by a `data: [DONE]`
  746. message.
  747. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
  748. best_of: Generates `best_of` completions server-side and returns the "best" (the one with
  749. the highest log probability per token). Results cannot be streamed.
  750. When used with `n`, `best_of` controls the number of candidate completions and
  751. `n` specifies how many to return – `best_of` must be greater than `n`.
  752. **Note:** Because this parameter generates many completions, it can quickly
  753. consume your token quota. Use carefully and ensure that you have reasonable
  754. settings for `max_tokens` and `stop`.
  755. echo: Echo back the prompt in addition to the completion
  756. frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
  757. existing frequency in the text so far, decreasing the model's likelihood to
  758. repeat the same line verbatim.
  759. [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
  760. logit_bias: Modify the likelihood of specified tokens appearing in the completion.
  761. Accepts a JSON object that maps tokens (specified by their token ID in the GPT
  762. tokenizer) to an associated bias value from -100 to 100. You can use this
  763. [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
  764. Mathematically, the bias is added to the logits generated by the model prior to
  765. sampling. The exact effect will vary per model, but values between -1 and 1
  766. should decrease or increase likelihood of selection; values like -100 or 100
  767. should result in a ban or exclusive selection of the relevant token.
  768. As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
  769. from being generated.
  770. logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
  771. well the chosen tokens. For example, if `logprobs` is 5, the API will return a
  772. list of the 5 most likely tokens. The API will always return the `logprob` of
  773. the sampled token, so there may be up to `logprobs+1` elements in the response.
  774. The maximum value for `logprobs` is 5.
  775. max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
  776. completion.
  777. The token count of your prompt plus `max_tokens` cannot exceed the model's
  778. context length.
  779. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
  780. for counting tokens.
  781. n: How many completions to generate for each prompt.
  782. **Note:** Because this parameter generates many completions, it can quickly
  783. consume your token quota. Use carefully and ensure that you have reasonable
  784. settings for `max_tokens` and `stop`.
  785. presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
  786. whether they appear in the text so far, increasing the model's likelihood to
  787. talk about new topics.
  788. [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
  789. seed: If specified, our system will make a best effort to sample deterministically,
  790. such that repeated requests with the same `seed` and parameters should return
  791. the same result.
  792. Determinism is not guaranteed, and you should refer to the `system_fingerprint`
  793. response parameter to monitor changes in the backend.
  794. stop: Not supported with latest reasoning models `o3` and `o4-mini`.
  795. Up to 4 sequences where the API will stop generating further tokens. The
  796. returned text will not contain the stop sequence.
  797. stream_options: Options for streaming response. Only set this when you set `stream: true`.
  798. suffix: The suffix that comes after a completion of inserted text.
  799. This parameter is only supported for `gpt-3.5-turbo-instruct`.
  800. temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
  801. make the output more random, while lower values like 0.2 will make it more
  802. focused and deterministic.
  803. We generally recommend altering this or `top_p` but not both.
  804. top_p: An alternative to sampling with temperature, called nucleus sampling, where the
  805. model considers the results of the tokens with top_p probability mass. So 0.1
  806. means only the tokens comprising the top 10% probability mass are considered.
  807. We generally recommend altering this or `temperature` but not both.
  808. user: A unique identifier representing your end-user, which can help OpenAI to monitor
  809. and detect abuse.
  810. [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
  811. extra_headers: Send extra headers
  812. extra_query: Add additional query parameters to the request
  813. extra_body: Add additional JSON properties to the request
  814. timeout: Override the client-level default timeout for this request, in seconds
  815. """
  816. ...
  817. @required_args(["model", "prompt"], ["model", "prompt", "stream"])
  818. async def create(
  819. self,
  820. *,
  821. model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
  822. prompt: Union[str, SequenceNotStr[str], Iterable[int], Iterable[Iterable[int]], None],
  823. best_of: Optional[int] | Omit = omit,
  824. echo: Optional[bool] | Omit = omit,
  825. frequency_penalty: Optional[float] | Omit = omit,
  826. logit_bias: Optional[Dict[str, int]] | Omit = omit,
  827. logprobs: Optional[int] | Omit = omit,
  828. max_tokens: Optional[int] | Omit = omit,
  829. n: Optional[int] | Omit = omit,
  830. presence_penalty: Optional[float] | Omit = omit,
  831. seed: Optional[int] | Omit = omit,
  832. stop: Union[Optional[str], SequenceNotStr[str], None] | Omit = omit,
  833. stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
  834. stream_options: Optional[ChatCompletionStreamOptionsParam] | Omit = omit,
  835. suffix: Optional[str] | Omit = omit,
  836. temperature: Optional[float] | Omit = omit,
  837. top_p: Optional[float] | Omit = omit,
  838. user: str | Omit = omit,
  839. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  840. # The extra values given here take precedence over values defined on the client or passed to this method.
  841. extra_headers: Headers | None = None,
  842. extra_query: Query | None = None,
  843. extra_body: Body | None = None,
  844. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  845. ) -> Completion | AsyncStream[Completion]:
  846. return await self._post(
  847. "/completions",
  848. body=await async_maybe_transform(
  849. {
  850. "model": model,
  851. "prompt": prompt,
  852. "best_of": best_of,
  853. "echo": echo,
  854. "frequency_penalty": frequency_penalty,
  855. "logit_bias": logit_bias,
  856. "logprobs": logprobs,
  857. "max_tokens": max_tokens,
  858. "n": n,
  859. "presence_penalty": presence_penalty,
  860. "seed": seed,
  861. "stop": stop,
  862. "stream": stream,
  863. "stream_options": stream_options,
  864. "suffix": suffix,
  865. "temperature": temperature,
  866. "top_p": top_p,
  867. "user": user,
  868. },
  869. completion_create_params.CompletionCreateParamsStreaming
  870. if stream
  871. else completion_create_params.CompletionCreateParamsNonStreaming,
  872. ),
  873. options=make_request_options(
  874. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  875. ),
  876. cast_to=Completion,
  877. stream=stream or False,
  878. stream_cls=AsyncStream[Completion],
  879. )
  880. class CompletionsWithRawResponse:
  881. def __init__(self, completions: Completions) -> None:
  882. self._completions = completions
  883. self.create = _legacy_response.to_raw_response_wrapper(
  884. completions.create,
  885. )
  886. class AsyncCompletionsWithRawResponse:
  887. def __init__(self, completions: AsyncCompletions) -> None:
  888. self._completions = completions
  889. self.create = _legacy_response.async_to_raw_response_wrapper(
  890. completions.create,
  891. )
  892. class CompletionsWithStreamingResponse:
  893. def __init__(self, completions: Completions) -> None:
  894. self._completions = completions
  895. self.create = to_streamed_response_wrapper(
  896. completions.create,
  897. )
  898. class AsyncCompletionsWithStreamingResponse:
  899. def __init__(self, completions: AsyncCompletions) -> None:
  900. self._completions = completions
  901. self.create = async_to_streamed_response_wrapper(
  902. completions.create,
  903. )