evals.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662
  1. # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
  2. from __future__ import annotations
  3. from typing import Iterable, Optional
  4. from typing_extensions import Literal
  5. import httpx
  6. from ... import _legacy_response
  7. from ...types import eval_list_params, eval_create_params, eval_update_params
  8. from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
  9. from ..._utils import maybe_transform, async_maybe_transform
  10. from ..._compat import cached_property
  11. from .runs.runs import (
  12. Runs,
  13. AsyncRuns,
  14. RunsWithRawResponse,
  15. AsyncRunsWithRawResponse,
  16. RunsWithStreamingResponse,
  17. AsyncRunsWithStreamingResponse,
  18. )
  19. from ..._resource import SyncAPIResource, AsyncAPIResource
  20. from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
  21. from ...pagination import SyncCursorPage, AsyncCursorPage
  22. from ..._base_client import AsyncPaginator, make_request_options
  23. from ...types.eval_list_response import EvalListResponse
  24. from ...types.eval_create_response import EvalCreateResponse
  25. from ...types.eval_delete_response import EvalDeleteResponse
  26. from ...types.eval_update_response import EvalUpdateResponse
  27. from ...types.eval_retrieve_response import EvalRetrieveResponse
  28. from ...types.shared_params.metadata import Metadata
  29. __all__ = ["Evals", "AsyncEvals"]
  30. class Evals(SyncAPIResource):
  31. @cached_property
  32. def runs(self) -> Runs:
  33. return Runs(self._client)
  34. @cached_property
  35. def with_raw_response(self) -> EvalsWithRawResponse:
  36. """
  37. This property can be used as a prefix for any HTTP method call to return
  38. the raw response object instead of the parsed content.
  39. For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  40. """
  41. return EvalsWithRawResponse(self)
  42. @cached_property
  43. def with_streaming_response(self) -> EvalsWithStreamingResponse:
  44. """
  45. An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  46. For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  47. """
  48. return EvalsWithStreamingResponse(self)
  49. def create(
  50. self,
  51. *,
  52. data_source_config: eval_create_params.DataSourceConfig,
  53. testing_criteria: Iterable[eval_create_params.TestingCriterion],
  54. metadata: Optional[Metadata] | Omit = omit,
  55. name: str | Omit = omit,
  56. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  57. # The extra values given here take precedence over values defined on the client or passed to this method.
  58. extra_headers: Headers | None = None,
  59. extra_query: Query | None = None,
  60. extra_body: Body | None = None,
  61. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  62. ) -> EvalCreateResponse:
  63. """
  64. Create the structure of an evaluation that can be used to test a model's
  65. performance. An evaluation is a set of testing criteria and the config for a
  66. data source, which dictates the schema of the data used in the evaluation. After
  67. creating an evaluation, you can run it on different models and model parameters.
  68. We support several types of graders and datasources. For more information, see
  69. the [Evals guide](https://platform.openai.com/docs/guides/evals).
  70. Args:
  71. data_source_config: The configuration for the data source used for the evaluation runs. Dictates the
  72. schema of the data used in the evaluation.
  73. testing_criteria: A list of graders for all eval runs in this group. Graders can reference
  74. variables in the data source using double curly braces notation, like
  75. `{{item.variable_name}}`. To reference the model's output, use the `sample`
  76. namespace (ie, `{{sample.output_text}}`).
  77. metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
  78. for storing additional information about the object in a structured format, and
  79. querying for objects via API or the dashboard.
  80. Keys are strings with a maximum length of 64 characters. Values are strings with
  81. a maximum length of 512 characters.
  82. name: The name of the evaluation.
  83. extra_headers: Send extra headers
  84. extra_query: Add additional query parameters to the request
  85. extra_body: Add additional JSON properties to the request
  86. timeout: Override the client-level default timeout for this request, in seconds
  87. """
  88. return self._post(
  89. "/evals",
  90. body=maybe_transform(
  91. {
  92. "data_source_config": data_source_config,
  93. "testing_criteria": testing_criteria,
  94. "metadata": metadata,
  95. "name": name,
  96. },
  97. eval_create_params.EvalCreateParams,
  98. ),
  99. options=make_request_options(
  100. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  101. ),
  102. cast_to=EvalCreateResponse,
  103. )
  104. def retrieve(
  105. self,
  106. eval_id: str,
  107. *,
  108. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  109. # The extra values given here take precedence over values defined on the client or passed to this method.
  110. extra_headers: Headers | None = None,
  111. extra_query: Query | None = None,
  112. extra_body: Body | None = None,
  113. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  114. ) -> EvalRetrieveResponse:
  115. """
  116. Get an evaluation by ID.
  117. Args:
  118. extra_headers: Send extra headers
  119. extra_query: Add additional query parameters to the request
  120. extra_body: Add additional JSON properties to the request
  121. timeout: Override the client-level default timeout for this request, in seconds
  122. """
  123. if not eval_id:
  124. raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
  125. return self._get(
  126. f"/evals/{eval_id}",
  127. options=make_request_options(
  128. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  129. ),
  130. cast_to=EvalRetrieveResponse,
  131. )
  132. def update(
  133. self,
  134. eval_id: str,
  135. *,
  136. metadata: Optional[Metadata] | Omit = omit,
  137. name: str | Omit = omit,
  138. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  139. # The extra values given here take precedence over values defined on the client or passed to this method.
  140. extra_headers: Headers | None = None,
  141. extra_query: Query | None = None,
  142. extra_body: Body | None = None,
  143. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  144. ) -> EvalUpdateResponse:
  145. """
  146. Update certain properties of an evaluation.
  147. Args:
  148. metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
  149. for storing additional information about the object in a structured format, and
  150. querying for objects via API or the dashboard.
  151. Keys are strings with a maximum length of 64 characters. Values are strings with
  152. a maximum length of 512 characters.
  153. name: Rename the evaluation.
  154. extra_headers: Send extra headers
  155. extra_query: Add additional query parameters to the request
  156. extra_body: Add additional JSON properties to the request
  157. timeout: Override the client-level default timeout for this request, in seconds
  158. """
  159. if not eval_id:
  160. raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
  161. return self._post(
  162. f"/evals/{eval_id}",
  163. body=maybe_transform(
  164. {
  165. "metadata": metadata,
  166. "name": name,
  167. },
  168. eval_update_params.EvalUpdateParams,
  169. ),
  170. options=make_request_options(
  171. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  172. ),
  173. cast_to=EvalUpdateResponse,
  174. )
  175. def list(
  176. self,
  177. *,
  178. after: str | Omit = omit,
  179. limit: int | Omit = omit,
  180. order: Literal["asc", "desc"] | Omit = omit,
  181. order_by: Literal["created_at", "updated_at"] | Omit = omit,
  182. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  183. # The extra values given here take precedence over values defined on the client or passed to this method.
  184. extra_headers: Headers | None = None,
  185. extra_query: Query | None = None,
  186. extra_body: Body | None = None,
  187. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  188. ) -> SyncCursorPage[EvalListResponse]:
  189. """
  190. List evaluations for a project.
  191. Args:
  192. after: Identifier for the last eval from the previous pagination request.
  193. limit: Number of evals to retrieve.
  194. order: Sort order for evals by timestamp. Use `asc` for ascending order or `desc` for
  195. descending order.
  196. order_by: Evals can be ordered by creation time or last updated time. Use `created_at` for
  197. creation time or `updated_at` for last updated time.
  198. extra_headers: Send extra headers
  199. extra_query: Add additional query parameters to the request
  200. extra_body: Add additional JSON properties to the request
  201. timeout: Override the client-level default timeout for this request, in seconds
  202. """
  203. return self._get_api_list(
  204. "/evals",
  205. page=SyncCursorPage[EvalListResponse],
  206. options=make_request_options(
  207. extra_headers=extra_headers,
  208. extra_query=extra_query,
  209. extra_body=extra_body,
  210. timeout=timeout,
  211. query=maybe_transform(
  212. {
  213. "after": after,
  214. "limit": limit,
  215. "order": order,
  216. "order_by": order_by,
  217. },
  218. eval_list_params.EvalListParams,
  219. ),
  220. ),
  221. model=EvalListResponse,
  222. )
  223. def delete(
  224. self,
  225. eval_id: str,
  226. *,
  227. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  228. # The extra values given here take precedence over values defined on the client or passed to this method.
  229. extra_headers: Headers | None = None,
  230. extra_query: Query | None = None,
  231. extra_body: Body | None = None,
  232. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  233. ) -> EvalDeleteResponse:
  234. """
  235. Delete an evaluation.
  236. Args:
  237. extra_headers: Send extra headers
  238. extra_query: Add additional query parameters to the request
  239. extra_body: Add additional JSON properties to the request
  240. timeout: Override the client-level default timeout for this request, in seconds
  241. """
  242. if not eval_id:
  243. raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
  244. return self._delete(
  245. f"/evals/{eval_id}",
  246. options=make_request_options(
  247. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  248. ),
  249. cast_to=EvalDeleteResponse,
  250. )
  251. class AsyncEvals(AsyncAPIResource):
  252. @cached_property
  253. def runs(self) -> AsyncRuns:
  254. return AsyncRuns(self._client)
  255. @cached_property
  256. def with_raw_response(self) -> AsyncEvalsWithRawResponse:
  257. """
  258. This property can be used as a prefix for any HTTP method call to return
  259. the raw response object instead of the parsed content.
  260. For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  261. """
  262. return AsyncEvalsWithRawResponse(self)
  263. @cached_property
  264. def with_streaming_response(self) -> AsyncEvalsWithStreamingResponse:
  265. """
  266. An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  267. For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  268. """
  269. return AsyncEvalsWithStreamingResponse(self)
  270. async def create(
  271. self,
  272. *,
  273. data_source_config: eval_create_params.DataSourceConfig,
  274. testing_criteria: Iterable[eval_create_params.TestingCriterion],
  275. metadata: Optional[Metadata] | Omit = omit,
  276. name: str | Omit = omit,
  277. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  278. # The extra values given here take precedence over values defined on the client or passed to this method.
  279. extra_headers: Headers | None = None,
  280. extra_query: Query | None = None,
  281. extra_body: Body | None = None,
  282. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  283. ) -> EvalCreateResponse:
  284. """
  285. Create the structure of an evaluation that can be used to test a model's
  286. performance. An evaluation is a set of testing criteria and the config for a
  287. data source, which dictates the schema of the data used in the evaluation. After
  288. creating an evaluation, you can run it on different models and model parameters.
  289. We support several types of graders and datasources. For more information, see
  290. the [Evals guide](https://platform.openai.com/docs/guides/evals).
  291. Args:
  292. data_source_config: The configuration for the data source used for the evaluation runs. Dictates the
  293. schema of the data used in the evaluation.
  294. testing_criteria: A list of graders for all eval runs in this group. Graders can reference
  295. variables in the data source using double curly braces notation, like
  296. `{{item.variable_name}}`. To reference the model's output, use the `sample`
  297. namespace (ie, `{{sample.output_text}}`).
  298. metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
  299. for storing additional information about the object in a structured format, and
  300. querying for objects via API or the dashboard.
  301. Keys are strings with a maximum length of 64 characters. Values are strings with
  302. a maximum length of 512 characters.
  303. name: The name of the evaluation.
  304. extra_headers: Send extra headers
  305. extra_query: Add additional query parameters to the request
  306. extra_body: Add additional JSON properties to the request
  307. timeout: Override the client-level default timeout for this request, in seconds
  308. """
  309. return await self._post(
  310. "/evals",
  311. body=await async_maybe_transform(
  312. {
  313. "data_source_config": data_source_config,
  314. "testing_criteria": testing_criteria,
  315. "metadata": metadata,
  316. "name": name,
  317. },
  318. eval_create_params.EvalCreateParams,
  319. ),
  320. options=make_request_options(
  321. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  322. ),
  323. cast_to=EvalCreateResponse,
  324. )
  325. async def retrieve(
  326. self,
  327. eval_id: str,
  328. *,
  329. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  330. # The extra values given here take precedence over values defined on the client or passed to this method.
  331. extra_headers: Headers | None = None,
  332. extra_query: Query | None = None,
  333. extra_body: Body | None = None,
  334. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  335. ) -> EvalRetrieveResponse:
  336. """
  337. Get an evaluation by ID.
  338. Args:
  339. extra_headers: Send extra headers
  340. extra_query: Add additional query parameters to the request
  341. extra_body: Add additional JSON properties to the request
  342. timeout: Override the client-level default timeout for this request, in seconds
  343. """
  344. if not eval_id:
  345. raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
  346. return await self._get(
  347. f"/evals/{eval_id}",
  348. options=make_request_options(
  349. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  350. ),
  351. cast_to=EvalRetrieveResponse,
  352. )
  353. async def update(
  354. self,
  355. eval_id: str,
  356. *,
  357. metadata: Optional[Metadata] | Omit = omit,
  358. name: str | Omit = omit,
  359. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  360. # The extra values given here take precedence over values defined on the client or passed to this method.
  361. extra_headers: Headers | None = None,
  362. extra_query: Query | None = None,
  363. extra_body: Body | None = None,
  364. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  365. ) -> EvalUpdateResponse:
  366. """
  367. Update certain properties of an evaluation.
  368. Args:
  369. metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
  370. for storing additional information about the object in a structured format, and
  371. querying for objects via API or the dashboard.
  372. Keys are strings with a maximum length of 64 characters. Values are strings with
  373. a maximum length of 512 characters.
  374. name: Rename the evaluation.
  375. extra_headers: Send extra headers
  376. extra_query: Add additional query parameters to the request
  377. extra_body: Add additional JSON properties to the request
  378. timeout: Override the client-level default timeout for this request, in seconds
  379. """
  380. if not eval_id:
  381. raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
  382. return await self._post(
  383. f"/evals/{eval_id}",
  384. body=await async_maybe_transform(
  385. {
  386. "metadata": metadata,
  387. "name": name,
  388. },
  389. eval_update_params.EvalUpdateParams,
  390. ),
  391. options=make_request_options(
  392. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  393. ),
  394. cast_to=EvalUpdateResponse,
  395. )
  396. def list(
  397. self,
  398. *,
  399. after: str | Omit = omit,
  400. limit: int | Omit = omit,
  401. order: Literal["asc", "desc"] | Omit = omit,
  402. order_by: Literal["created_at", "updated_at"] | Omit = omit,
  403. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  404. # The extra values given here take precedence over values defined on the client or passed to this method.
  405. extra_headers: Headers | None = None,
  406. extra_query: Query | None = None,
  407. extra_body: Body | None = None,
  408. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  409. ) -> AsyncPaginator[EvalListResponse, AsyncCursorPage[EvalListResponse]]:
  410. """
  411. List evaluations for a project.
  412. Args:
  413. after: Identifier for the last eval from the previous pagination request.
  414. limit: Number of evals to retrieve.
  415. order: Sort order for evals by timestamp. Use `asc` for ascending order or `desc` for
  416. descending order.
  417. order_by: Evals can be ordered by creation time or last updated time. Use `created_at` for
  418. creation time or `updated_at` for last updated time.
  419. extra_headers: Send extra headers
  420. extra_query: Add additional query parameters to the request
  421. extra_body: Add additional JSON properties to the request
  422. timeout: Override the client-level default timeout for this request, in seconds
  423. """
  424. return self._get_api_list(
  425. "/evals",
  426. page=AsyncCursorPage[EvalListResponse],
  427. options=make_request_options(
  428. extra_headers=extra_headers,
  429. extra_query=extra_query,
  430. extra_body=extra_body,
  431. timeout=timeout,
  432. query=maybe_transform(
  433. {
  434. "after": after,
  435. "limit": limit,
  436. "order": order,
  437. "order_by": order_by,
  438. },
  439. eval_list_params.EvalListParams,
  440. ),
  441. ),
  442. model=EvalListResponse,
  443. )
  444. async def delete(
  445. self,
  446. eval_id: str,
  447. *,
  448. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  449. # The extra values given here take precedence over values defined on the client or passed to this method.
  450. extra_headers: Headers | None = None,
  451. extra_query: Query | None = None,
  452. extra_body: Body | None = None,
  453. timeout: float | httpx.Timeout | None | NotGiven = not_given,
  454. ) -> EvalDeleteResponse:
  455. """
  456. Delete an evaluation.
  457. Args:
  458. extra_headers: Send extra headers
  459. extra_query: Add additional query parameters to the request
  460. extra_body: Add additional JSON properties to the request
  461. timeout: Override the client-level default timeout for this request, in seconds
  462. """
  463. if not eval_id:
  464. raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
  465. return await self._delete(
  466. f"/evals/{eval_id}",
  467. options=make_request_options(
  468. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  469. ),
  470. cast_to=EvalDeleteResponse,
  471. )
  472. class EvalsWithRawResponse:
  473. def __init__(self, evals: Evals) -> None:
  474. self._evals = evals
  475. self.create = _legacy_response.to_raw_response_wrapper(
  476. evals.create,
  477. )
  478. self.retrieve = _legacy_response.to_raw_response_wrapper(
  479. evals.retrieve,
  480. )
  481. self.update = _legacy_response.to_raw_response_wrapper(
  482. evals.update,
  483. )
  484. self.list = _legacy_response.to_raw_response_wrapper(
  485. evals.list,
  486. )
  487. self.delete = _legacy_response.to_raw_response_wrapper(
  488. evals.delete,
  489. )
  490. @cached_property
  491. def runs(self) -> RunsWithRawResponse:
  492. return RunsWithRawResponse(self._evals.runs)
  493. class AsyncEvalsWithRawResponse:
  494. def __init__(self, evals: AsyncEvals) -> None:
  495. self._evals = evals
  496. self.create = _legacy_response.async_to_raw_response_wrapper(
  497. evals.create,
  498. )
  499. self.retrieve = _legacy_response.async_to_raw_response_wrapper(
  500. evals.retrieve,
  501. )
  502. self.update = _legacy_response.async_to_raw_response_wrapper(
  503. evals.update,
  504. )
  505. self.list = _legacy_response.async_to_raw_response_wrapper(
  506. evals.list,
  507. )
  508. self.delete = _legacy_response.async_to_raw_response_wrapper(
  509. evals.delete,
  510. )
  511. @cached_property
  512. def runs(self) -> AsyncRunsWithRawResponse:
  513. return AsyncRunsWithRawResponse(self._evals.runs)
  514. class EvalsWithStreamingResponse:
  515. def __init__(self, evals: Evals) -> None:
  516. self._evals = evals
  517. self.create = to_streamed_response_wrapper(
  518. evals.create,
  519. )
  520. self.retrieve = to_streamed_response_wrapper(
  521. evals.retrieve,
  522. )
  523. self.update = to_streamed_response_wrapper(
  524. evals.update,
  525. )
  526. self.list = to_streamed_response_wrapper(
  527. evals.list,
  528. )
  529. self.delete = to_streamed_response_wrapper(
  530. evals.delete,
  531. )
  532. @cached_property
  533. def runs(self) -> RunsWithStreamingResponse:
  534. return RunsWithStreamingResponse(self._evals.runs)
  535. class AsyncEvalsWithStreamingResponse:
  536. def __init__(self, evals: AsyncEvals) -> None:
  537. self._evals = evals
  538. self.create = async_to_streamed_response_wrapper(
  539. evals.create,
  540. )
  541. self.retrieve = async_to_streamed_response_wrapper(
  542. evals.retrieve,
  543. )
  544. self.update = async_to_streamed_response_wrapper(
  545. evals.update,
  546. )
  547. self.list = async_to_streamed_response_wrapper(
  548. evals.list,
  549. )
  550. self.delete = async_to_streamed_response_wrapper(
  551. evals.delete,
  552. )
  553. @cached_property
  554. def runs(self) -> AsyncRunsWithStreamingResponse:
  555. return AsyncRunsWithStreamingResponse(self._evals.runs)