schemas.py 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382
  1. """Schemas for the LangSmith API."""
  2. from __future__ import annotations
  3. from collections.abc import Iterator
  4. from datetime import datetime, timedelta, timezone
  5. from decimal import Decimal
  6. from enum import Enum
  7. from typing import (
  8. Annotated,
  9. Any,
  10. NamedTuple,
  11. Optional,
  12. Protocol,
  13. Union,
  14. runtime_checkable,
  15. )
  16. from uuid import UUID
  17. from typing_extensions import NotRequired, TypedDict
  18. try:
  19. from pydantic.v1 import (
  20. BaseModel,
  21. Field, # type: ignore[import]
  22. PrivateAttr,
  23. StrictBool,
  24. StrictFloat,
  25. StrictInt,
  26. )
  27. except ImportError:
  28. from pydantic import ( # type: ignore[assignment]
  29. BaseModel,
  30. Field,
  31. PrivateAttr,
  32. StrictBool,
  33. StrictFloat,
  34. StrictInt,
  35. )
  36. from pathlib import Path
  37. from typing_extensions import Literal
  38. SCORE_TYPE = Union[StrictBool, StrictInt, StrictFloat, None]
  39. VALUE_TYPE = Union[dict, str, None]
  40. class Attachment(NamedTuple):
  41. """Annotated type that will be stored as an attachment if used.
  42. Examples:
  43. ```python
  44. from langsmith import traceable
  45. from langsmith.schemas import Attachment
  46. @traceable
  47. def my_function(bar: int, my_val: Attachment):
  48. # my_val will be stored as an attachment
  49. # bar will be stored as inputs
  50. return bar
  51. ```
  52. """
  53. mime_type: str
  54. data: Union[bytes, Path]
  55. Attachments = dict[str, Union[tuple[str, bytes], Attachment, tuple[str, Path]]]
  56. """Attachments associated with the run.
  57. Each entry is a tuple of `(mime_type, bytes)`, or `(mime_type, file_path)`
  58. """
  59. @runtime_checkable
  60. class BinaryIOLike(Protocol):
  61. """Protocol for binary IO-like objects."""
  62. def read(self, size: int = -1) -> bytes:
  63. """Read function."""
  64. ...
  65. def seek(self, offset: int, whence: int = 0) -> int:
  66. """Seek function."""
  67. ...
  68. def getvalue(self) -> bytes:
  69. """Get value function."""
  70. ...
  71. class ExampleBase(BaseModel):
  72. """Example base model."""
  73. dataset_id: UUID
  74. inputs: Optional[dict[str, Any]] = Field(default=None)
  75. outputs: Optional[dict[str, Any]] = Field(default=None)
  76. metadata: Optional[dict[str, Any]] = Field(default=None)
  77. class Config:
  78. """Configuration class for the schema."""
  79. frozen = True
  80. arbitrary_types_allowed = True
  81. class _AttachmentDict(TypedDict):
  82. mime_type: str
  83. data: Union[bytes, Path]
  84. _AttachmentLike = Union[
  85. Attachment, _AttachmentDict, tuple[str, bytes], tuple[str, Path]
  86. ]
  87. class ExampleCreate(BaseModel):
  88. """Example upload with attachments."""
  89. id: Optional[UUID]
  90. created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
  91. inputs: Optional[dict[str, Any]] = Field(default=None)
  92. outputs: Optional[dict[str, Any]] = Field(default=None)
  93. metadata: Optional[dict[str, Any]] = Field(default=None)
  94. split: Optional[Union[str, list[str]]] = None
  95. attachments: Optional[dict[str, _AttachmentLike]] = None
  96. use_source_run_io: bool = False
  97. use_source_run_attachments: Optional[list[str]] = None
  98. source_run_id: Optional[UUID] = None
  99. def __init__(self, **data):
  100. """Initialize from dict."""
  101. super().__init__(**data)
  102. ExampleUploadWithAttachments = ExampleCreate
  103. class ExampleUpsertWithAttachments(ExampleCreate):
  104. """Example create with attachments."""
  105. dataset_id: UUID
  106. class AttachmentInfo(TypedDict):
  107. """Info for an attachment."""
  108. presigned_url: str
  109. reader: BinaryIOLike
  110. mime_type: Optional[str]
  111. class Example(ExampleBase):
  112. """Example model."""
  113. id: UUID
  114. created_at: datetime = Field(
  115. default_factory=lambda: datetime.fromtimestamp(0, tz=timezone.utc)
  116. )
  117. dataset_id: UUID = Field(default=UUID("00000000-0000-0000-0000-000000000000"))
  118. modified_at: Optional[datetime] = Field(default=None)
  119. source_run_id: Optional[UUID] = None
  120. attachments: Optional[dict[str, AttachmentInfo]] = Field(default=None)
  121. """Dictionary with attachment names as keys and a tuple of the S3 url
  122. and a reader of the data for the file."""
  123. _host_url: Optional[str] = PrivateAttr(default=None)
  124. _tenant_id: Optional[UUID] = PrivateAttr(default=None)
  125. def __init__(
  126. self,
  127. _host_url: Optional[str] = None,
  128. _tenant_id: Optional[UUID] = None,
  129. **kwargs: Any,
  130. ) -> None:
  131. """Initialize a Dataset object."""
  132. super().__init__(**kwargs)
  133. self._host_url = _host_url
  134. self._tenant_id = _tenant_id
  135. @property
  136. def url(self) -> Optional[str]:
  137. """URL of this run within the app."""
  138. if self._host_url:
  139. path = f"/datasets/{self.dataset_id}/e/{self.id}"
  140. if self._tenant_id:
  141. return f"{self._host_url}/o/{str(self._tenant_id)}{path}"
  142. return f"{self._host_url}{path}"
  143. return None
  144. def __repr__(self):
  145. """Return a string representation of the RunBase object."""
  146. return f"{self.__class__}(id={self.id}, dataset_id={self.dataset_id}, link='{self.url}')"
  147. class ExampleSearch(ExampleBase):
  148. """Example returned via search."""
  149. id: UUID
  150. class AttachmentsOperations(BaseModel):
  151. """Operations to perform on attachments."""
  152. rename: dict[str, str] = Field(
  153. default_factory=dict, description="Mapping of old attachment names to new names"
  154. )
  155. retain: list[str] = Field(
  156. default_factory=list, description="List of attachment names to keep"
  157. )
  158. class ExampleUpdate(BaseModel):
  159. """Example update with attachments."""
  160. id: UUID
  161. dataset_id: Optional[UUID] = None
  162. inputs: Optional[dict[str, Any]] = Field(default=None)
  163. outputs: Optional[dict[str, Any]] = Field(default=None)
  164. metadata: Optional[dict[str, Any]] = Field(default=None)
  165. split: Optional[Union[str, list[str]]] = None
  166. attachments: Optional[Attachments] = None
  167. attachments_operations: Optional[AttachmentsOperations] = None
  168. class Config:
  169. """Configuration class for the schema."""
  170. frozen = True
  171. def __init__(self, **data):
  172. """Initialize from dict."""
  173. super().__init__(**data)
  174. ExampleUpdateWithAttachments = ExampleUpdate
  175. class DataType(str, Enum):
  176. """Enum for dataset data types."""
  177. kv = "kv"
  178. llm = "llm"
  179. chat = "chat"
  180. class DatasetBase(BaseModel):
  181. """Dataset base model."""
  182. name: str
  183. description: Optional[str] = None
  184. data_type: Optional[DataType] = None
  185. class Config:
  186. """Configuration class for the schema."""
  187. frozen = True
  188. DatasetTransformationType = Literal[
  189. "remove_system_messages",
  190. "convert_to_openai_message",
  191. "convert_to_openai_tool",
  192. "remove_extra_fields",
  193. "extract_tools_from_run",
  194. ]
  195. class DatasetTransformation(TypedDict, total=False):
  196. """Schema for dataset transformations."""
  197. path: list[str]
  198. transformation_type: Union[DatasetTransformationType, str]
  199. class Dataset(DatasetBase):
  200. """Dataset ORM model."""
  201. id: UUID
  202. created_at: datetime
  203. modified_at: Optional[datetime] = Field(default=None)
  204. example_count: Optional[int] = None
  205. session_count: Optional[int] = None
  206. last_session_start_time: Optional[datetime] = None
  207. inputs_schema: Optional[dict[str, Any]] = None
  208. outputs_schema: Optional[dict[str, Any]] = None
  209. transformations: Optional[list[DatasetTransformation]] = None
  210. metadata: Optional[dict[str, Any]] = None
  211. _host_url: Optional[str] = PrivateAttr(default=None)
  212. _tenant_id: Optional[UUID] = PrivateAttr(default=None)
  213. _public_path: Optional[str] = PrivateAttr(default=None)
  214. def __init__(
  215. self,
  216. _host_url: Optional[str] = None,
  217. _tenant_id: Optional[UUID] = None,
  218. _public_path: Optional[str] = None,
  219. **kwargs: Any,
  220. ) -> None:
  221. """Initialize a Dataset object."""
  222. if "inputs_schema_definition" in kwargs:
  223. kwargs["inputs_schema"] = kwargs.pop("inputs_schema_definition")
  224. if "outputs_schema_definition" in kwargs:
  225. kwargs["outputs_schema"] = kwargs.pop("outputs_schema_definition")
  226. super().__init__(**kwargs)
  227. self._host_url = _host_url
  228. self._tenant_id = _tenant_id
  229. self._public_path = _public_path
  230. @property
  231. def url(self) -> Optional[str]:
  232. """URL of this run within the app."""
  233. if self._host_url:
  234. if self._public_path:
  235. return f"{self._host_url}{self._public_path}"
  236. if self._tenant_id:
  237. return f"{self._host_url}/o/{str(self._tenant_id)}/datasets/{self.id}"
  238. return f"{self._host_url}/datasets/{self.id}"
  239. return None
  240. class DatasetVersion(BaseModel):
  241. """Class representing a dataset version."""
  242. tags: Optional[list[str]] = None
  243. as_of: datetime
  244. def _default_extra():
  245. return {"metadata": {}}
  246. class RunBase(BaseModel):
  247. """Base Run schema.
  248. A Run is a span representing a single unit of work or operation within your LLM app.
  249. This could be a single call to an LLM or chain, to a prompt formatting call,
  250. to a runnable lambda invocation. If you are familiar with OpenTelemetry,
  251. you can think of a run as a span.
  252. """
  253. id: UUID
  254. """Unique identifier for the run."""
  255. name: str
  256. """Human-readable name for the run."""
  257. start_time: datetime
  258. """Start time of the run."""
  259. run_type: str
  260. """The type of run, such as tool, chain, llm, retriever,
  261. embedding, prompt, parser."""
  262. end_time: Optional[datetime] = None
  263. """End time of the run, if applicable."""
  264. extra: Optional[dict] = Field(default_factory=_default_extra)
  265. """Additional metadata or settings related to the run."""
  266. error: Optional[str] = None
  267. """Error message, if the run encountered any issues."""
  268. serialized: Optional[dict] = None
  269. """Serialized object that executed the run for potential reuse."""
  270. events: Optional[list[dict]] = None
  271. """List of events associated with the run, like
  272. start and end events."""
  273. inputs: dict = Field(default_factory=dict)
  274. """Inputs used for the run."""
  275. outputs: Optional[dict] = None
  276. """Outputs generated by the run, if any."""
  277. reference_example_id: Optional[UUID] = None
  278. """Reference to an example that this run may be based on."""
  279. parent_run_id: Optional[UUID] = None
  280. """Identifier for a parent run, if this run is a sub-run."""
  281. tags: Optional[list[str]] = None
  282. """Tags for categorizing or annotating the run."""
  283. attachments: Union[Attachments, dict[str, AttachmentInfo]] = Field(
  284. default_factory=dict
  285. )
  286. """Attachments associated with the run.
  287. Each entry is a tuple of `(mime_type, bytes)`.
  288. """
  289. @property
  290. def metadata(self) -> dict[str, Any]:
  291. """Retrieve the metadata (if any)."""
  292. if self.extra is None:
  293. self.extra = {}
  294. return self.extra.setdefault("metadata", {})
  295. @property
  296. def revision_id(self) -> Optional[UUID]:
  297. """Retrieve the revision ID (if any)."""
  298. return self.metadata.get("revision_id")
  299. @property
  300. def latency(self) -> Optional[float]:
  301. """Latency in seconds."""
  302. if self.end_time is None:
  303. return None
  304. return (self.end_time - self.start_time).total_seconds()
  305. def __repr__(self):
  306. """Return a string representation of the RunBase object."""
  307. return f"{self.__class__}(id={self.id}, name='{self.name}', run_type='{self.run_type}')"
  308. class Config:
  309. """Configuration class for the schema."""
  310. arbitrary_types_allowed = True
  311. class Run(RunBase):
  312. """Run schema when loading from the DB."""
  313. session_id: Optional[UUID] = None
  314. """The project ID this run belongs to."""
  315. child_run_ids: Optional[list[UUID]] = None
  316. """Deprecated: The child run IDs of this run."""
  317. child_runs: Optional[list[Run]] = None
  318. """The child runs of this run, if instructed to load using the client
  319. These are not populated by default, as it is a heavier query to make."""
  320. feedback_stats: Optional[dict[str, Any]] = None
  321. """Feedback stats for this run."""
  322. app_path: Optional[str] = None
  323. """Relative URL path of this run within the app."""
  324. manifest_id: Optional[UUID] = None
  325. """Unique ID of the serialized object for this run."""
  326. status: Optional[str] = None
  327. """Status of the run (e.g., 'success')."""
  328. prompt_tokens: Optional[int] = None
  329. """Number of tokens used for the prompt."""
  330. completion_tokens: Optional[int] = None
  331. """Number of tokens generated as output."""
  332. total_tokens: Optional[int] = None
  333. """Total tokens for prompt and completion."""
  334. prompt_token_details: Optional[dict[str, int]] = None
  335. """Breakdown of prompt (input) token counts.
  336. Does *not* need to sum to full prompt token count.
  337. """
  338. completion_token_details: Optional[dict[str, int]] = None
  339. """Breakdown of completion (output) token counts.
  340. Does *not* need to sum to full completion token count.
  341. """
  342. first_token_time: Optional[datetime] = None
  343. """Time the first token was processed."""
  344. total_cost: Optional[Decimal] = None
  345. """The total estimated LLM cost associated with the completion tokens."""
  346. prompt_cost: Optional[Decimal] = None
  347. """The estimated cost associated with the prompt (input) tokens."""
  348. completion_cost: Optional[Decimal] = None
  349. """The estimated cost associated with the completion tokens."""
  350. prompt_cost_details: Optional[dict[str, Decimal]] = None
  351. """Breakdown of prompt (input) token costs.
  352. Does *not* need to sum to full prompt token cost.
  353. """
  354. completion_cost_details: Optional[dict[str, Decimal]] = None
  355. """Breakdown of completion (output) token costs.
  356. Does *not* need to sum to full completion token cost.
  357. """
  358. parent_run_ids: Optional[list[UUID]] = None
  359. """List of parent run IDs."""
  360. trace_id: UUID
  361. """Unique ID assigned to every run within this nested trace."""
  362. dotted_order: str = Field(default="")
  363. """Dotted order for the run.
  364. This is a string composed of {time}{run-uuid}.* so that a trace can be
  365. sorted in the order it was executed.
  366. Example:
  367. - Parent: 20230914T223155647Z1b64098b-4ab7-43f6-afee-992304f198d8
  368. - Children:
  369. - 20230914T223155647Z1b64098b-4ab7-43f6-afee-992304f198d8.20230914T223155649Z809ed3a2-0172-4f4d-8a02-a64e9b7a0f8a
  370. - 20230915T223155647Z1b64098b-4ab7-43f6-afee-992304f198d8.20230914T223155650Zc8d9f4c5-6c5a-4b2d-9b1c-3d9d7a7c5c7c
  371. """ # noqa: E501
  372. in_dataset: Optional[bool] = None
  373. """Whether this run is in a dataset."""
  374. _host_url: Optional[str] = PrivateAttr(default=None)
  375. def __init__(self, _host_url: Optional[str] = None, **kwargs: Any) -> None:
  376. """Initialize a Run object."""
  377. if not kwargs.get("trace_id"):
  378. kwargs = {"trace_id": kwargs.get("id"), **kwargs}
  379. inputs = kwargs.pop("inputs", None) or {}
  380. super().__init__(**kwargs, inputs=inputs)
  381. self._host_url = _host_url
  382. if not self.dotted_order.strip() and not self.parent_run_id:
  383. self.dotted_order = f"{self.start_time.isoformat()}{self.id}"
  384. @property
  385. def url(self) -> Optional[str]:
  386. """URL of this run within the app."""
  387. if self._host_url and self.app_path:
  388. return f"{self._host_url}{self.app_path}"
  389. return None
  390. @property
  391. def input_tokens(self) -> int | None:
  392. """Alias for prompt_tokens."""
  393. return self.prompt_tokens
  394. @property
  395. def output_tokens(self) -> int | None:
  396. """Alias for completion_tokens."""
  397. return self.completion_tokens
  398. @property
  399. def input_cost(self) -> Decimal | None:
  400. """Alias for prompt_cost."""
  401. return self.prompt_cost
  402. @property
  403. def output_cost(self) -> Decimal | None:
  404. """Alias for completion_cost."""
  405. return self.completion_cost
  406. @property
  407. def input_token_details(self) -> dict[str, int] | None:
  408. """Alias for prompt_token_details."""
  409. return self.prompt_token_details
  410. @property
  411. def output_token_details(self) -> dict[str, int] | None:
  412. """Alias for output_token_details."""
  413. return self.completion_token_details
  414. @property
  415. def input_cost_details(self) -> dict[str, Decimal] | None:
  416. """Alias for prompt_cost_details."""
  417. return self.prompt_cost_details
  418. @property
  419. def output_cost_details(self) -> dict[str, Decimal] | None:
  420. """Alias for completion_cost_details."""
  421. return self.completion_cost_details
  422. class RunTypeEnum(str, Enum):
  423. """(Deprecated) Enum for run types. Use string directly."""
  424. tool = "tool"
  425. chain = "chain"
  426. llm = "llm"
  427. retriever = "retriever"
  428. embedding = "embedding"
  429. prompt = "prompt"
  430. parser = "parser"
  431. class RunLikeDict(TypedDict, total=False):
  432. """Run-like dictionary, for type-hinting."""
  433. name: str
  434. run_type: RunTypeEnum
  435. start_time: datetime
  436. inputs: Optional[dict]
  437. outputs: Optional[dict]
  438. end_time: Optional[datetime]
  439. extra: Optional[dict]
  440. error: Optional[str]
  441. serialized: Optional[dict]
  442. parent_run_id: Optional[UUID]
  443. manifest_id: Optional[UUID]
  444. events: Optional[list[dict]]
  445. tags: Optional[list[str]]
  446. inputs_s3_urls: Optional[dict]
  447. outputs_s3_urls: Optional[dict]
  448. id: Optional[UUID]
  449. session_id: Optional[UUID]
  450. session_name: Optional[str]
  451. reference_example_id: Optional[UUID]
  452. input_attachments: Optional[dict]
  453. output_attachments: Optional[dict]
  454. trace_id: UUID
  455. dotted_order: str
  456. attachments: Attachments
  457. class RunWithAnnotationQueueInfo(RunBase):
  458. """Run schema with annotation queue info."""
  459. last_reviewed_time: Optional[datetime] = None
  460. """The last time this run was reviewed."""
  461. added_at: Optional[datetime] = None
  462. """The time this run was added to the queue."""
  463. class FeedbackSourceBase(BaseModel):
  464. """Base class for feedback sources.
  465. This represents whether feedback is submitted from the API, model, human labeler,
  466. etc.
  467. """
  468. type: str
  469. """The type of the feedback source."""
  470. metadata: Optional[dict[str, Any]] = Field(default_factory=dict)
  471. """Additional metadata for the feedback source."""
  472. user_id: Optional[Union[UUID, str]] = None
  473. """The user ID associated with the feedback source."""
  474. user_name: Optional[str] = None
  475. """The user name associated with the feedback source."""
  476. class APIFeedbackSource(FeedbackSourceBase):
  477. """API feedback source."""
  478. type: Literal["api"] = "api"
  479. class ModelFeedbackSource(FeedbackSourceBase):
  480. """Model feedback source."""
  481. type: Literal["model"] = "model"
  482. class FeedbackSourceType(Enum):
  483. """Feedback source type."""
  484. API = "api"
  485. """General feedback submitted from the API."""
  486. MODEL = "model"
  487. """Model-assisted feedback."""
  488. class FeedbackBase(BaseModel):
  489. """Feedback schema."""
  490. id: UUID
  491. """The unique ID of the feedback."""
  492. created_at: Optional[datetime] = None
  493. """The time the feedback was created."""
  494. modified_at: Optional[datetime] = None
  495. """The time the feedback was last modified."""
  496. run_id: Optional[UUID]
  497. """The associated run ID this feedback is logged for."""
  498. trace_id: Optional[UUID]
  499. """The associated trace ID this feedback is logged for."""
  500. key: str
  501. """The metric name, tag, or aspect to provide feedback on."""
  502. score: SCORE_TYPE = None
  503. """Value or score to assign the run."""
  504. value: VALUE_TYPE = None
  505. """The display value, tag or other value for the feedback if not a metric."""
  506. comment: Optional[str] = None
  507. """Comment or explanation for the feedback."""
  508. correction: Union[str, dict, None] = None
  509. """Correction for the run."""
  510. feedback_source: Optional[FeedbackSourceBase] = None
  511. """The source of the feedback."""
  512. session_id: Optional[UUID] = None
  513. """The associated project ID (Session = Project) this feedback is logged for."""
  514. comparative_experiment_id: Optional[UUID] = None
  515. """If logged within a 'comparative experiment', this is the ID of the experiment."""
  516. feedback_group_id: Optional[UUID] = None
  517. """For preference scoring, this group ID is shared across feedbacks for each
  518. run in the group that was being compared."""
  519. extra: Optional[dict] = None
  520. """The metadata of the feedback."""
  521. class Config:
  522. """Configuration class for the schema."""
  523. frozen = True
  524. class FeedbackCategory(TypedDict, total=False):
  525. """Specific value and label pair for feedback."""
  526. value: float
  527. """The numeric value associated with this feedback category."""
  528. label: Optional[str]
  529. """An optional label to interpret the value for this feedback category."""
  530. class FeedbackConfig(TypedDict, total=False):
  531. """Represents _how_ a feedback value ought to be interpreted."""
  532. type: Literal["continuous", "categorical", "freeform"]
  533. """The type of feedback."""
  534. min: Optional[float]
  535. """The minimum value for continuous feedback."""
  536. max: Optional[float]
  537. """The maximum value for continuous feedback."""
  538. categories: Optional[list[FeedbackCategory]]
  539. """If feedback is categorical, this defines the valid categories the server will accept.
  540. Not applicable to continuous or freeform feedback types.""" # noqa
  541. class FeedbackCreate(FeedbackBase):
  542. """Schema used for creating feedback."""
  543. feedback_source: FeedbackSourceBase
  544. """The source of the feedback."""
  545. feedback_config: Optional[FeedbackConfig] = None
  546. """The config for the feedback"""
  547. error: Optional[bool] = None
  548. class Feedback(FeedbackBase):
  549. """Schema for getting feedback."""
  550. id: UUID
  551. created_at: datetime
  552. """The time the feedback was created."""
  553. modified_at: datetime
  554. """The time the feedback was last modified."""
  555. feedback_source: Optional[FeedbackSourceBase] = None
  556. """The source of the feedback. In this case"""
  557. class TracerSession(BaseModel):
  558. """TracerSession schema for the API.
  559. Sessions are also referred to as "Projects" in the UI.
  560. """
  561. id: UUID
  562. """The ID of the project."""
  563. start_time: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
  564. """The time the project was created."""
  565. end_time: Optional[datetime] = None
  566. """The time the project was ended."""
  567. description: Optional[str] = None
  568. """The description of the project."""
  569. name: Optional[str] = None
  570. """The name of the session."""
  571. extra: Optional[dict[str, Any]] = None
  572. """Extra metadata for the project."""
  573. tenant_id: UUID
  574. """The tenant ID this project belongs to."""
  575. reference_dataset_id: Optional[UUID]
  576. """The reference dataset IDs this project's runs were generated on."""
  577. _host_url: Optional[str] = PrivateAttr(default=None)
  578. def __init__(self, _host_url: Optional[str] = None, **kwargs: Any) -> None:
  579. """Initialize a Run object."""
  580. super().__init__(**kwargs)
  581. self._host_url = _host_url
  582. if self.start_time.tzinfo is None:
  583. self.start_time = self.start_time.replace(tzinfo=timezone.utc)
  584. @property
  585. def url(self) -> Optional[str]:
  586. """URL of this run within the app."""
  587. if self._host_url:
  588. return f"{self._host_url}/o/{self.tenant_id}/projects/p/{self.id}"
  589. return None
  590. @property
  591. def metadata(self) -> dict[str, Any]:
  592. """Retrieve the metadata (if any)."""
  593. if self.extra is None or "metadata" not in self.extra:
  594. return {}
  595. return self.extra["metadata"]
  596. @property
  597. def tags(self) -> list[str]:
  598. """Retrieve the tags (if any)."""
  599. if self.extra is None or "tags" not in self.extra:
  600. return []
  601. return self.extra["tags"]
  602. class TracerSessionResult(TracerSession):
  603. """A project, hydrated with additional information.
  604. Sessions are also referred to as "Projects" in the UI.
  605. """
  606. run_count: Optional[int]
  607. """The number of runs in the project."""
  608. latency_p50: Optional[timedelta]
  609. """The median (50th percentile) latency for the project."""
  610. latency_p99: Optional[timedelta]
  611. """The 99th percentile latency for the project."""
  612. total_tokens: Optional[int]
  613. """The total number of tokens consumed in the project."""
  614. prompt_tokens: Optional[int]
  615. """The total number of prompt tokens consumed in the project."""
  616. completion_tokens: Optional[int]
  617. """The total number of completion tokens consumed in the project."""
  618. last_run_start_time: Optional[datetime]
  619. """The start time of the last run in the project."""
  620. feedback_stats: Optional[dict[str, Any]]
  621. """Feedback stats for the project."""
  622. session_feedback_stats: Optional[dict[str, Any]]
  623. """Summary feedback stats for the project."""
  624. run_facets: Optional[list[dict[str, Any]]]
  625. """Facets for the runs in the project."""
  626. total_cost: Optional[Decimal]
  627. """The total estimated LLM cost associated with the completion tokens."""
  628. prompt_cost: Optional[Decimal]
  629. """The estimated cost associated with the prompt (input) tokens."""
  630. completion_cost: Optional[Decimal]
  631. """The estimated cost associated with the completion tokens."""
  632. first_token_p50: Optional[timedelta]
  633. """The median (50th percentile) time to process the first token."""
  634. first_token_p99: Optional[timedelta]
  635. """The 99th percentile time to process the first token."""
  636. error_rate: Optional[float]
  637. """The error rate for the project."""
  638. @runtime_checkable
  639. class BaseMessageLike(Protocol):
  640. """A protocol representing objects similar to BaseMessage."""
  641. content: str
  642. """The content of the message."""
  643. additional_kwargs: dict[Any, Any]
  644. """Additional keyword arguments associated with the message."""
  645. @property
  646. def type(self) -> str:
  647. """Type of the Message, used for serialization."""
  648. class DatasetShareSchema(TypedDict, total=False):
  649. """Represents the schema for a dataset share."""
  650. dataset_id: UUID
  651. """The ID of the dataset."""
  652. share_token: UUID
  653. """The token for sharing the dataset."""
  654. url: str
  655. """The URL of the shared dataset."""
  656. class AnnotationQueue(BaseModel):
  657. """Represents an annotation queue."""
  658. id: UUID
  659. """The unique identifier of the annotation queue."""
  660. name: str
  661. """The name of the annotation queue."""
  662. description: Optional[str] = None
  663. """An optional description of the annotation queue."""
  664. created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
  665. """The timestamp when the annotation queue was created."""
  666. updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
  667. """The timestamp when the annotation queue was last updated."""
  668. tenant_id: UUID
  669. """The ID of the tenant associated with the annotation queue."""
  670. class AnnotationQueueWithDetails(AnnotationQueue):
  671. """Represents an annotation queue with details."""
  672. rubric_instructions: Optional[str] = None
  673. """The rubric instructions for the annotation queue."""
  674. class BatchIngestConfig(TypedDict, total=False):
  675. """Configuration for batch ingestion."""
  676. use_multipart_endpoint: bool
  677. """Whether to use the multipart endpoint for batch ingestion."""
  678. scale_up_qsize_trigger: int
  679. """The queue size threshold that triggers scaling up."""
  680. scale_up_nthreads_limit: int
  681. """The maximum number of threads to scale up to."""
  682. scale_down_nempty_trigger: int
  683. """The number of empty threads that triggers scaling down."""
  684. size_limit: int
  685. """The maximum size limit for the batch."""
  686. size_limit_bytes: Optional[int]
  687. """The maximum size limit in bytes for the batch."""
  688. class LangSmithInfo(BaseModel):
  689. """Information about the LangSmith server."""
  690. version: str = ""
  691. """The version of the LangSmith server."""
  692. license_expiration_time: Optional[datetime] = None
  693. """The time the license will expire."""
  694. batch_ingest_config: Optional[BatchIngestConfig] = None
  695. """The instance flags."""
  696. instance_flags: Optional[dict[str, Any]] = None
  697. Example.update_forward_refs()
  698. class LangSmithSettings(BaseModel):
  699. """Settings for the LangSmith tenant."""
  700. id: str
  701. """The ID of the tenant."""
  702. display_name: str
  703. """The display name of the tenant."""
  704. created_at: datetime
  705. """The creation time of the tenant."""
  706. tenant_handle: Optional[str] = None
  707. class FeedbackIngestToken(BaseModel):
  708. """Represents the schema for a feedback ingest token."""
  709. id: UUID
  710. """The ID of the feedback ingest token."""
  711. url: str
  712. """The URL to GET when logging the feedback."""
  713. expires_at: datetime
  714. """The expiration time of the token."""
  715. class RunEvent(TypedDict, total=False):
  716. """Run event schema."""
  717. name: str
  718. """Type of event."""
  719. time: Union[datetime, str]
  720. """Time of the event."""
  721. kwargs: Optional[dict[str, Any]]
  722. """Additional metadata for the event."""
  723. class TimeDeltaInput(TypedDict, total=False):
  724. """Timedelta input schema."""
  725. days: int
  726. """Number of days."""
  727. hours: int
  728. """Number of hours."""
  729. minutes: int
  730. """Number of minutes."""
  731. class DatasetDiffInfo(BaseModel):
  732. """Represents the difference information between two datasets."""
  733. examples_modified: list[UUID]
  734. """A list of UUIDs representing the modified examples."""
  735. examples_added: list[UUID]
  736. """A list of UUIDs representing the added examples."""
  737. examples_removed: list[UUID]
  738. """A list of UUIDs representing the removed examples."""
  739. class ComparativeExperiment(BaseModel):
  740. """Represents a comparative experiment.
  741. This information summarizes evaluation results comparing
  742. two or more models on a given dataset.
  743. """
  744. id: UUID
  745. """The unique identifier for the comparative experiment."""
  746. name: Optional[str] = None
  747. """The optional name of the comparative experiment."""
  748. description: Optional[str] = None
  749. """An optional description of the comparative experiment."""
  750. tenant_id: UUID
  751. """The identifier of the tenant associated with this experiment."""
  752. created_at: datetime
  753. """The timestamp when the comparative experiment was created."""
  754. modified_at: datetime
  755. """The timestamp when the comparative experiment was last modified."""
  756. reference_dataset_id: UUID
  757. """The identifier of the reference dataset used in this experiment."""
  758. extra: Optional[dict[str, Any]] = None
  759. """Optional additional information about the experiment."""
  760. experiments_info: Optional[list[dict]] = None
  761. """Optional list of dictionaries containing information about individual experiments."""
  762. feedback_stats: Optional[dict[str, Any]] = None
  763. """Optional dictionary containing feedback statistics for the experiment."""
  764. @property
  765. def metadata(self) -> dict[str, Any]:
  766. """Retrieve the metadata (if any)."""
  767. if self.extra is None or "metadata" not in self.extra:
  768. return {}
  769. return self.extra["metadata"]
  770. class PromptCommit(BaseModel):
  771. """Represents a Prompt with a manifest."""
  772. owner: str
  773. """The handle of the owner of the prompt."""
  774. repo: str
  775. """The name of the prompt."""
  776. commit_hash: str
  777. """The commit hash of the prompt."""
  778. manifest: dict[str, Any]
  779. """The manifest of the prompt."""
  780. examples: list[dict]
  781. """The list of examples."""
  782. class ListedPromptCommit(BaseModel):
  783. """Represents a listed prompt commit with associated metadata."""
  784. id: UUID
  785. """The unique identifier for the prompt commit."""
  786. owner: str
  787. """The owner of the prompt commit."""
  788. repo: str
  789. """The repository name of the prompt commit."""
  790. manifest_id: Optional[UUID] = None
  791. """The optional identifier for the manifest associated with this commit."""
  792. repo_id: Optional[UUID] = None
  793. """The optional identifier for the repository."""
  794. parent_id: Optional[UUID] = None
  795. """The optional identifier for the parent commit."""
  796. commit_hash: Optional[str] = None
  797. """The optional hash of the commit."""
  798. created_at: Optional[datetime] = None
  799. """The optional timestamp when the commit was created."""
  800. updated_at: Optional[datetime] = None
  801. """The optional timestamp when the commit was last updated."""
  802. example_run_ids: Optional[list[UUID]] = Field(default_factory=list)
  803. """A list of example run identifiers associated with this commit."""
  804. num_downloads: Optional[int] = 0
  805. """The number of times this commit has been downloaded."""
  806. num_views: Optional[int] = 0
  807. """The number of times this commit has been viewed."""
  808. parent_commit_hash: Optional[str] = None
  809. """The optional hash of the parent commit."""
  810. class Prompt(BaseModel):
  811. """Represents a Prompt with metadata."""
  812. repo_handle: str
  813. """The name of the prompt."""
  814. description: Optional[str] = None
  815. """The description of the prompt."""
  816. readme: Optional[str] = None
  817. """The README of the prompt."""
  818. id: str
  819. """The ID of the prompt."""
  820. tenant_id: str
  821. """The tenant ID of the prompt owner."""
  822. created_at: datetime
  823. """The creation time of the prompt."""
  824. updated_at: datetime
  825. """The last update time of the prompt."""
  826. is_public: bool
  827. """Whether the prompt is public."""
  828. is_archived: bool
  829. """Whether the prompt is archived."""
  830. tags: list[str]
  831. """The tags associated with the prompt."""
  832. original_repo_id: Optional[str] = None
  833. """The ID of the original prompt, if forked."""
  834. upstream_repo_id: Optional[str] = None
  835. """The ID of the upstream prompt, if forked."""
  836. owner: Optional[str]
  837. """The handle of the owner of the prompt."""
  838. full_name: str
  839. """The full name of the prompt. (owner + repo_handle)"""
  840. num_likes: int
  841. """The number of likes."""
  842. num_downloads: int
  843. """The number of downloads."""
  844. num_views: int
  845. """The number of views."""
  846. liked_by_auth_user: Optional[bool] = None
  847. """Whether the prompt is liked by the authenticated user."""
  848. last_commit_hash: Optional[str] = None
  849. """The hash of the last commit."""
  850. num_commits: int
  851. """The number of commits."""
  852. original_repo_full_name: Optional[str] = None
  853. """The full name of the original prompt, if forked."""
  854. upstream_repo_full_name: Optional[str] = None
  855. """The full name of the upstream prompt, if forked."""
  856. class ListPromptsResponse(BaseModel):
  857. """A list of prompts with metadata."""
  858. repos: list[Prompt]
  859. """The list of prompts."""
  860. total: int
  861. """The total number of prompts."""
  862. class PromptSortField(str, Enum):
  863. """Enum for sorting fields for prompts."""
  864. num_downloads = "num_downloads"
  865. """Number of downloads."""
  866. num_views = "num_views"
  867. """Number of views."""
  868. updated_at = "updated_at"
  869. """Last updated time."""
  870. num_likes = "num_likes"
  871. """Number of likes."""
  872. class InputTokenDetails(TypedDict, total=False):
  873. """Breakdown of input token counts.
  874. Does *not* need to sum to full input token count. Does *not* need to have all keys.
  875. """
  876. audio: int
  877. """Audio input tokens."""
  878. cache_creation: int
  879. """Input tokens that were cached and there was a cache miss.
  880. Since there was a cache miss, the cache was created from these tokens.
  881. """
  882. cache_read: int
  883. """Input tokens that were cached and there was a cache hit.
  884. Since there was a cache hit, the tokens were read from the cache. More precisely,
  885. the model state given these tokens was read from the cache.
  886. """
  887. class OutputTokenDetails(TypedDict, total=False):
  888. """Breakdown of output token counts.
  889. Does *not* need to sum to full output token count. Does *not* need to have all keys.
  890. """
  891. audio: int
  892. """Audio output tokens."""
  893. reasoning: int
  894. """Reasoning output tokens.
  895. Tokens generated by the model in a chain of thought process (i.e. by OpenAI's o1
  896. models) that are not returned as part of model output.
  897. """
  898. class InputCostDetails(TypedDict, total=False):
  899. """Breakdown of input token costs.
  900. Does *not* need to sum to full input cost. Does *not* need to have all keys.
  901. """
  902. audio: float
  903. """Cost of audio input tokens."""
  904. cache_creation: float
  905. """Cost of input tokens that were cached and there was a cache miss.
  906. Since there was a cache miss, the cache was created from these tokens.
  907. """
  908. cache_read: float
  909. """Cost of input tokens that were cached and there was a cache hit.
  910. Since there was a cache hit, the tokens were read from the cache. More precisely,
  911. the model state given these tokens was read from the cache.
  912. """
  913. class OutputCostDetails(TypedDict, total=False):
  914. """Breakdown of output token costs.
  915. Does *not* need to sum to full output cost. Does *not* need to have all keys.
  916. """
  917. audio: float
  918. """Cost of audio output tokens."""
  919. reasoning: float
  920. """Cost of reasoning output tokens.
  921. Tokens generated by the model in a chain of thought process (i.e. by OpenAI's o1
  922. models) that are not returned as part of model output.
  923. """
  924. class UsageMetadata(TypedDict):
  925. """Usage metadata for a message, such as token counts.
  926. This is a standard representation of token usage that is consistent across models.
  927. """
  928. input_tokens: int
  929. """Count of input (or prompt) tokens. Sum of all input token types."""
  930. output_tokens: int
  931. """Count of output (or completion) tokens. Sum of all output token types."""
  932. total_tokens: int
  933. """Total token count. Sum of input_tokens + output_tokens."""
  934. input_token_details: NotRequired[InputTokenDetails]
  935. """Breakdown of input token counts.
  936. Does *not* need to sum to full input token count. Does *not* need to have all keys.
  937. """
  938. output_token_details: NotRequired[OutputTokenDetails]
  939. """Breakdown of output token counts.
  940. Does *not* need to sum to full output token count. Does *not* need to have all keys.
  941. """
  942. input_cost: NotRequired[float]
  943. """The cost of the input tokens."""
  944. output_cost: NotRequired[float]
  945. """The cost of the output tokens."""
  946. total_cost: NotRequired[float]
  947. """The total cost of the tokens."""
  948. input_cost_details: NotRequired[InputCostDetails]
  949. """The cost details of the input tokens."""
  950. output_cost_details: NotRequired[OutputCostDetails]
  951. """The cost details of the output tokens."""
  952. class ExtractedUsageMetadata(TypedDict, total=False):
  953. """Usage metadata dictionary extracted from a run.
  954. Should be the same as UsageMetadata, but does not require all
  955. keys to be present.
  956. """
  957. input_tokens: int
  958. """The number of tokens used for the prompt."""
  959. output_tokens: int
  960. """The number of tokens generated as output."""
  961. total_tokens: int
  962. """The total number of tokens used."""
  963. input_token_details: InputTokenDetails
  964. """The details of the input tokens."""
  965. output_token_details: OutputTokenDetails
  966. """The details of the output tokens."""
  967. input_cost: float
  968. """The cost of the input tokens."""
  969. output_cost: float
  970. """The cost of the output tokens."""
  971. total_cost: float
  972. """The total cost of the tokens."""
  973. input_cost_details: InputCostDetails
  974. """The cost details of the input tokens."""
  975. output_cost_details: OutputCostDetails
  976. """The cost details of the output tokens."""
  977. class UpsertExamplesResponse(TypedDict):
  978. """Response object returned from the upsert_examples_multipart method."""
  979. count: int
  980. """The number of examples that were upserted."""
  981. example_ids: list[str]
  982. """The ids of the examples that were upserted."""
  983. class ExampleWithRuns(Example):
  984. """Example with runs."""
  985. runs: list[Run] = Field(default_factory=list)
  986. """The runs of the example."""
  987. class ExperimentRunStats(TypedDict):
  988. """Run statistics for an experiment."""
  989. run_count: Optional[int]
  990. """The number of runs in the project."""
  991. latency_p50: Optional[timedelta]
  992. """The median (50th percentile) latency for the project."""
  993. latency_p99: Optional[timedelta]
  994. """The 99th percentile latency for the project."""
  995. total_tokens: Optional[int]
  996. """The total number of tokens consumed in the project."""
  997. prompt_tokens: Optional[int]
  998. """The total number of prompt tokens consumed in the project."""
  999. completion_tokens: Optional[int]
  1000. """The total number of completion tokens consumed in the project."""
  1001. last_run_start_time: Optional[datetime]
  1002. """The start time of the last run in the project."""
  1003. run_facets: Optional[list[dict[str, Any]]]
  1004. """Facets for the runs in the project."""
  1005. total_cost: Optional[Decimal]
  1006. """The total estimated LLM cost associated with the completion tokens."""
  1007. prompt_cost: Optional[Decimal]
  1008. """The estimated cost associated with the prompt (input) tokens."""
  1009. completion_cost: Optional[Decimal]
  1010. """The estimated cost associated with the completion tokens."""
  1011. first_token_p50: Optional[timedelta]
  1012. """The median (50th percentile) time to process the first token."""
  1013. first_token_p99: Optional[timedelta]
  1014. """The 99th percentile time to process the first token."""
  1015. error_rate: Optional[float]
  1016. """The error rate for the project."""
  1017. class ExperimentResults(TypedDict):
  1018. """Results container for experiment data with stats and examples.
  1019. Breaking change in v0.4.32:
  1020. The 'stats' field has been split into 'feedback_stats' and 'run_stats'.
  1021. """
  1022. feedback_stats: dict
  1023. """Feedback statistics for the experiment."""
  1024. run_stats: ExperimentRunStats
  1025. """Run statistics (latency, token count, etc.)."""
  1026. examples_with_runs: Iterator[ExampleWithRuns]
  1027. class InsightsReport(BaseModel):
  1028. """An Insights Report created by the Insights Agent over a tracing project."""
  1029. id: UUID | str
  1030. name: str
  1031. status: str
  1032. error: str | None = None
  1033. project_id: UUID | str
  1034. host_url: str
  1035. tenant_id: UUID | str
  1036. @property
  1037. def link(self) -> str:
  1038. """URL to view this Insights Report in LangSmith UI."""
  1039. return f"{self.host_url}/o/{str(self.tenant_id)}/projects/p/{str(self.project_id)}?tab=4&clusterJobId={str(self.id)}"
  1040. def _repr_html_(self) -> str:
  1041. return f'<a href="{self.link}", target="_blank" rel="noopener">InsightsReport(\'{self.name}\')</a>'
  1042. class FeedbackFormulaWeightedVariable(BaseModel):
  1043. """A feedback key and weight used when calculating feedback formulas."""
  1044. part_type: Literal["weighted_key"]
  1045. weight: float
  1046. key: Annotated[str, Field(min_length=1)]
  1047. class FeedbackFormulaCreate(BaseModel):
  1048. """Schema used for creating a feedback formula."""
  1049. dataset_id: Optional[UUID] = None
  1050. session_id: Optional[UUID] = None
  1051. feedback_key: str
  1052. aggregation_type: Literal["sum", "avg"]
  1053. formula_parts: list[FeedbackFormulaWeightedVariable] = Field(
  1054. ..., min_items=1, max_items=50
  1055. )
  1056. class FeedbackFormulaUpdate(BaseModel):
  1057. """Schema used for updating a feedback formula."""
  1058. feedback_key: str
  1059. aggregation_type: Literal["sum", "avg"]
  1060. formula_parts: list[FeedbackFormulaWeightedVariable] = Field(
  1061. ..., min_items=1, max_items=50
  1062. )
  1063. class FeedbackFormula(FeedbackFormulaCreate):
  1064. """Schema for getting feedback formulas."""
  1065. id: UUID
  1066. created_at: datetime
  1067. modified_at: datetime