client.py 356 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335
  1. """Client for interacting with the LangSmith API.
  2. Use the client to customize API keys / workspace connections, SSL certs,
  3. etc. for tracing.
  4. Also used to create, read, update, and delete LangSmith resources
  5. such as runs (~trace spans), datasets, examples (~records),
  6. feedback (~metrics), projects (tracer sessions/groups), etc.
  7. For detailed API documentation, visit the [LangSmith docs](https://docs.langchain.com/langsmith/home).
  8. """
  9. from __future__ import annotations
  10. import atexit
  11. import collections
  12. import concurrent.futures as cf
  13. import contextlib
  14. import datetime
  15. import functools
  16. import importlib
  17. import importlib.metadata
  18. import io
  19. import itertools
  20. import json
  21. import logging
  22. import os
  23. import random
  24. import threading
  25. import time
  26. import traceback
  27. import typing
  28. import uuid
  29. import warnings
  30. import weakref
  31. from collections.abc import AsyncIterable, Iterable, Iterator, Mapping, Sequence
  32. from inspect import signature
  33. from pathlib import Path
  34. from queue import PriorityQueue
  35. from typing import (
  36. TYPE_CHECKING,
  37. Annotated,
  38. Any,
  39. Callable,
  40. Literal,
  41. Optional,
  42. Union,
  43. cast,
  44. )
  45. from urllib import parse as urllib_parse
  46. import requests
  47. from pydantic import Field
  48. from requests import adapters as requests_adapters
  49. from requests_toolbelt import ( # type: ignore[import-untyped]
  50. multipart as rqtb_multipart,
  51. )
  52. from typing_extensions import TypeGuard, overload
  53. from urllib3.poolmanager import PoolKey # type: ignore[attr-defined, import-untyped]
  54. from urllib3.util import Retry # type: ignore[import-untyped]
  55. import langsmith
  56. from langsmith import env as ls_env
  57. from langsmith import schemas as ls_schemas
  58. from langsmith import utils as ls_utils
  59. from langsmith._internal import _orjson
  60. from langsmith._internal._background_thread import (
  61. TracingQueueItem,
  62. )
  63. from langsmith._internal._background_thread import (
  64. tracing_control_thread_func as _tracing_control_thread_func,
  65. )
  66. from langsmith._internal._beta_decorator import warn_beta
  67. from langsmith._internal._compressed_traces import CompressedTraces
  68. from langsmith._internal._constants import (
  69. _AUTO_SCALE_UP_NTHREADS_LIMIT,
  70. _BLOCKSIZE_BYTES,
  71. _BOUNDARY,
  72. _SIZE_LIMIT_BYTES,
  73. )
  74. from langsmith._internal._multipart import (
  75. MultipartPart,
  76. MultipartPartsAndContext,
  77. join_multipart_parts_and_context,
  78. )
  79. from langsmith._internal._operations import (
  80. SerializedFeedbackOperation,
  81. SerializedRunOperation,
  82. combine_serialized_queue_operations,
  83. compress_multipart_parts_and_context,
  84. serialize_feedback_dict,
  85. serialize_run_dict,
  86. serialized_feedback_operation_to_multipart_parts_and_context,
  87. serialized_run_operation_to_multipart_parts_and_context,
  88. )
  89. from langsmith._internal._serde import dumps_json as _dumps_json
  90. from langsmith._internal._uuid import uuid7
  91. from langsmith.schemas import AttachmentInfo, ExampleWithRuns
  92. _OPENAI_API_KEY = "OPENAI_API_KEY"
  93. _ANTHROPIC_API_KEY = "ANTHROPIC_API_KEY"
  94. def _check_otel_enabled() -> bool:
  95. """Check if OTEL is enabled and imports are available."""
  96. return ls_utils.is_env_var_truish("OTEL_ENABLED")
  97. def _import_otel():
  98. """Dynamically import OTEL modules when needed."""
  99. try:
  100. from opentelemetry import trace as otel_trace # type: ignore[import]
  101. from opentelemetry.trace import set_span_in_context # type: ignore[import]
  102. from langsmith._internal.otel._otel_client import (
  103. get_otlp_tracer_provider,
  104. )
  105. from langsmith._internal.otel._otel_exporter import OTELExporter
  106. return otel_trace, set_span_in_context, get_otlp_tracer_provider, OTELExporter
  107. except ImportError:
  108. raise ImportError(
  109. "To use OTEL tracing, you must install it with `pip install langsmith[otel]`"
  110. )
  111. try:
  112. from zoneinfo import ZoneInfo # type: ignore[import-not-found]
  113. except ImportError:
  114. class ZoneInfo: # type: ignore[no-redef]
  115. """Introduced in python 3.9."""
  116. try:
  117. from opentelemetry.sdk.trace import TracerProvider # type: ignore[import-not-found]
  118. except ImportError:
  119. class TracerProvider: # type: ignore[no-redef]
  120. """Used for optional OTEL tracing."""
  121. if TYPE_CHECKING:
  122. import pandas as pd # type: ignore
  123. from langchain_core.runnables import Runnable
  124. from langsmith import schemas
  125. # OTEL imports for type hints
  126. try:
  127. from opentelemetry import trace as otel_trace # type: ignore[import]
  128. from langsmith._internal.otel._otel_exporter import OTELExporter
  129. except ImportError:
  130. otel_trace = Any # type: ignore[assignment, misc]
  131. OTELExporter = Any # type: ignore[assignment, misc]
  132. from langsmith.evaluation import evaluator as ls_evaluator
  133. from langsmith.evaluation._arunner import (
  134. AEVALUATOR_T,
  135. ATARGET_T,
  136. AsyncExperimentResults,
  137. )
  138. from langsmith.evaluation._runner import (
  139. COMPARATIVE_EVALUATOR_T,
  140. DATA_T,
  141. EVALUATOR_T,
  142. EXPERIMENT_T,
  143. SUMMARY_EVALUATOR_T,
  144. TARGET_T,
  145. ComparativeExperimentResults,
  146. ExperimentResults,
  147. )
  148. logger = logging.getLogger(__name__)
  149. _urllib3_logger = logging.getLogger("urllib3.connectionpool")
  150. X_API_KEY = "x-api-key"
  151. EMPTY_SEQ: tuple[dict, ...] = ()
  152. URLLIB3_SUPPORTS_BLOCKSIZE = "key_blocksize" in signature(PoolKey).parameters
  153. DEFAULT_INSTRUCTIONS = "How are people using my agent? What are they asking about?"
  154. def _parse_token_or_url(
  155. url_or_token: Union[str, uuid.UUID],
  156. api_url: str,
  157. num_parts: int = 2,
  158. kind: str = "dataset",
  159. ) -> tuple[str, str]:
  160. """Parse a public dataset URL or share token."""
  161. try:
  162. if isinstance(url_or_token, uuid.UUID) or uuid.UUID(url_or_token):
  163. return api_url, str(url_or_token)
  164. except ValueError:
  165. pass
  166. # Then it's a URL
  167. parsed_url = urllib_parse.urlparse(str(url_or_token))
  168. # Extract the UUID from the path
  169. path_parts = parsed_url.path.split("/")
  170. if len(path_parts) >= num_parts:
  171. token_uuid = path_parts[-num_parts]
  172. _as_uuid(token_uuid, var="token parts")
  173. else:
  174. raise ls_utils.LangSmithUserError(f"Invalid public {kind} URL: {url_or_token}")
  175. if parsed_url.netloc == "smith.langchain.com":
  176. api_url = "https://api.smith.langchain.com"
  177. elif parsed_url.netloc == "beta.smith.langchain.com":
  178. api_url = "https://beta.api.smith.langchain.com"
  179. return api_url, token_uuid
  180. def _is_langchain_hosted(url: str) -> bool:
  181. """Check if the URL is langchain hosted.
  182. Args:
  183. url (str): The URL to check.
  184. Returns:
  185. bool: True if the URL is langchain hosted, False otherwise.
  186. """
  187. try:
  188. netloc = urllib_parse.urlsplit(url).netloc.split(":")[0]
  189. return netloc == "langchain.com" or netloc.endswith(".langchain.com")
  190. except Exception:
  191. return False
  192. ID_TYPE = Union[uuid.UUID, str]
  193. RUN_TYPE_T = Literal[
  194. "tool", "chain", "llm", "retriever", "embedding", "prompt", "parser"
  195. ]
  196. @functools.lru_cache(maxsize=1)
  197. def _default_retry_config() -> Retry:
  198. """Get the default retry configuration.
  199. If urllib3 version is 1.26 or greater, retry on all methods.
  200. Returns:
  201. Retry: The default retry configuration.
  202. """
  203. retry_params = dict(
  204. total=3,
  205. status_forcelist=[502, 503, 504, 408, 425],
  206. backoff_factor=0.5,
  207. # Sadly urllib3 1.x doesn't support backoff_jitter
  208. raise_on_redirect=False,
  209. raise_on_status=False,
  210. respect_retry_after_header=True,
  211. )
  212. # the `allowed_methods` keyword is not available in urllib3 < 1.26
  213. # check to see if urllib3 version is 1.26 or greater
  214. urllib3_version = importlib.metadata.version("urllib3")
  215. use_allowed_methods = tuple(map(int, urllib3_version.split("."))) >= (1, 26)
  216. if use_allowed_methods:
  217. # Retry on all methods
  218. retry_params["allowed_methods"] = None
  219. return ls_utils.LangSmithRetry(**retry_params) # type: ignore
  220. def close_session(session: requests.Session) -> None:
  221. """Close the session.
  222. Args:
  223. session (requests.Session): The session to close.
  224. """
  225. logger.debug("Closing Client.session")
  226. session.close()
  227. def _validate_api_key_if_hosted(api_url: str, api_key: Optional[str]) -> None:
  228. """Verify API key is provided if url not localhost.
  229. Args:
  230. api_url (str): The API URL.
  231. api_key (Optional[str]): The API key.
  232. Returns:
  233. None
  234. Raises:
  235. LangSmithUserError: If the API key is not provided when using the hosted service.
  236. """
  237. # If the domain is langchain.com, raise error if no api_key
  238. if not api_key:
  239. if (
  240. _is_langchain_hosted(api_url)
  241. and not ls_utils.is_env_var_truish("OTEL_ENABLED")
  242. and ls_utils.tracing_is_enabled()
  243. ):
  244. warnings.warn(
  245. "API key must be provided when using hosted LangSmith API",
  246. ls_utils.LangSmithMissingAPIKeyWarning,
  247. )
  248. def _format_feedback_score(score: Union[float, int, bool, None]):
  249. """Format a feedback score by truncating numerical values to 4 decimal places.
  250. Args:
  251. score: The score to format, can be a number or any other type
  252. Returns:
  253. The formatted score
  254. """
  255. if isinstance(score, float):
  256. # Truncate at 4 decimal places
  257. return round(score, 4)
  258. return score
  259. def _get_tracing_sampling_rate(
  260. tracing_sampling_rate: Optional[float] = None,
  261. ) -> float | None:
  262. """Get the tracing sampling rate.
  263. Returns:
  264. Optional[float]: The tracing sampling rate.
  265. """
  266. if tracing_sampling_rate is None:
  267. sampling_rate_str = ls_utils.get_env_var("TRACING_SAMPLING_RATE")
  268. if not sampling_rate_str:
  269. return None
  270. else:
  271. sampling_rate_str = str(tracing_sampling_rate)
  272. sampling_rate = float(sampling_rate_str)
  273. if sampling_rate < 0 or sampling_rate > 1:
  274. raise ls_utils.LangSmithUserError(
  275. "LANGSMITH_TRACING_SAMPLING_RATE must be between 0 and 1 if set."
  276. f" Got: {sampling_rate}"
  277. )
  278. return sampling_rate
  279. def _get_write_api_urls(_write_api_urls: Optional[dict[str, str]]) -> dict[str, str]:
  280. # Note: LANGSMITH_RUNS_ENDPOINTS is now handled via replicas, not _write_api_urls
  281. _write_api_urls = _write_api_urls or {}
  282. processed_write_api_urls = {}
  283. for url, api_key in _write_api_urls.items():
  284. processed_url = url.strip()
  285. if not processed_url:
  286. raise ls_utils.LangSmithUserError("LangSmith runs API URL cannot be empty")
  287. processed_url = processed_url.strip().strip('"').strip("'").rstrip("/")
  288. processed_api_key = api_key.strip().strip('"').strip("'")
  289. _validate_api_key_if_hosted(processed_url, processed_api_key)
  290. processed_write_api_urls[processed_url] = processed_api_key
  291. return processed_write_api_urls
  292. def _as_uuid(value: ID_TYPE, var: Optional[str] = None) -> uuid.UUID:
  293. try:
  294. return uuid.UUID(value) if not isinstance(value, uuid.UUID) else value
  295. except ValueError as e:
  296. var = var or "value"
  297. raise ls_utils.LangSmithUserError(
  298. f"{var} must be a valid UUID or UUID string. Got {value}"
  299. ) from e
  300. @typing.overload
  301. def _ensure_uuid(value: Optional[Union[str, uuid.UUID]]) -> uuid.UUID: ...
  302. @typing.overload
  303. def _ensure_uuid(
  304. value: Optional[Union[str, uuid.UUID]], *, accept_null: bool = True
  305. ) -> Optional[uuid.UUID]: ...
  306. def _ensure_uuid(value: Optional[Union[str, uuid.UUID]], *, accept_null: bool = False):
  307. if value is None:
  308. if accept_null:
  309. return None
  310. return uuid7()
  311. return _as_uuid(value)
  312. @functools.lru_cache(maxsize=1)
  313. def _parse_url(url):
  314. parsed_url = urllib_parse.urlparse(url)
  315. host = parsed_url.netloc.split(":")[0]
  316. return host
  317. class _LangSmithHttpAdapter(requests_adapters.HTTPAdapter):
  318. __attrs__ = [
  319. "max_retries",
  320. "config",
  321. "_pool_connections",
  322. "_pool_maxsize",
  323. "_pool_block",
  324. "_blocksize",
  325. ]
  326. def __init__(
  327. self,
  328. pool_connections: int = requests_adapters.DEFAULT_POOLSIZE,
  329. pool_maxsize: int = requests_adapters.DEFAULT_POOLSIZE,
  330. max_retries: Union[Retry, int, None] = requests_adapters.DEFAULT_RETRIES,
  331. pool_block: bool = requests_adapters.DEFAULT_POOLBLOCK,
  332. blocksize: int = 16384, # default from urllib3.BaseHTTPSConnection
  333. ) -> None:
  334. self._blocksize = blocksize
  335. super().__init__(pool_connections, pool_maxsize, max_retries, pool_block)
  336. def init_poolmanager(self, connections, maxsize, block=False, **pool_kwargs):
  337. if URLLIB3_SUPPORTS_BLOCKSIZE:
  338. # urllib3 before 2.0 doesn't support blocksize
  339. pool_kwargs["blocksize"] = self._blocksize
  340. return super().init_poolmanager(connections, maxsize, block, **pool_kwargs)
  341. class Client:
  342. """Client for interacting with the LangSmith API."""
  343. __slots__ = [
  344. "__weakref__",
  345. "api_url",
  346. "_api_key",
  347. "_workspace_id",
  348. "_headers",
  349. "_custom_headers",
  350. "retry_config",
  351. "timeout_ms",
  352. "_timeout",
  353. "session",
  354. "_get_data_type_cached",
  355. "_web_url",
  356. "_tenant_id",
  357. "tracing_sample_rate",
  358. "_filtered_post_uuids",
  359. "tracing_queue",
  360. "_anonymizer",
  361. "_hide_inputs",
  362. "_hide_outputs",
  363. "_hide_metadata",
  364. "_omit_traced_runtime_info",
  365. "_process_buffered_run_ops",
  366. "_run_ops_buffer_size",
  367. "_run_ops_buffer_timeout_ms",
  368. "_run_ops_buffer_last_flush_time",
  369. "_info",
  370. "_write_api_urls",
  371. "_settings",
  372. "_manual_cleanup",
  373. "_pyo3_client",
  374. "compressed_traces",
  375. "_data_available_event",
  376. "_futures",
  377. "_run_ops_buffer",
  378. "_run_ops_buffer_lock",
  379. "otel_exporter",
  380. "_otel_trace",
  381. "_set_span_in_context",
  382. "_max_batch_size_bytes",
  383. "_tracing_error_callback",
  384. ]
  385. _api_key: Optional[str]
  386. _headers: dict[str, str]
  387. _custom_headers: dict[str, str]
  388. _timeout: tuple[float, float]
  389. _manual_cleanup: bool
  390. def __init__(
  391. self,
  392. api_url: Optional[str] = None,
  393. *,
  394. api_key: Optional[str] = None,
  395. retry_config: Optional[Retry] = None,
  396. timeout_ms: Optional[Union[int, tuple[int, int]]] = None,
  397. web_url: Optional[str] = None,
  398. session: Optional[requests.Session] = None,
  399. auto_batch_tracing: bool = True,
  400. anonymizer: Optional[Callable[[dict], dict]] = None,
  401. hide_inputs: Optional[Union[Callable[[dict], dict], bool]] = None,
  402. hide_outputs: Optional[Union[Callable[[dict], dict], bool]] = None,
  403. hide_metadata: Optional[Union[Callable[[dict], dict], bool]] = None,
  404. omit_traced_runtime_info: bool = False,
  405. process_buffered_run_ops: Optional[
  406. Callable[[Sequence[dict]], Sequence[dict]]
  407. ] = None,
  408. run_ops_buffer_size: Optional[int] = None,
  409. run_ops_buffer_timeout_ms: Optional[float] = None,
  410. info: Optional[Union[dict, ls_schemas.LangSmithInfo]] = None,
  411. api_urls: Optional[dict[str, str]] = None,
  412. otel_tracer_provider: Optional[TracerProvider] = None,
  413. otel_enabled: Optional[bool] = None,
  414. tracing_sampling_rate: Optional[float] = None,
  415. workspace_id: Optional[str] = None,
  416. max_batch_size_bytes: Optional[int] = None,
  417. headers: Optional[dict[str, str]] = None,
  418. tracing_error_callback: Optional[Callable[[Exception], None]] = None,
  419. ) -> None:
  420. """Initialize a `Client` instance.
  421. Args:
  422. api_url (Optional[str]): URL for the LangSmith API. Defaults to the `LANGCHAIN_ENDPOINT`
  423. environment variable or `https://api.smith.langchain.com` if not set.
  424. api_key (Optional[str]): API key for the LangSmith API. Defaults to the `LANGCHAIN_API_KEY`
  425. environment variable.
  426. retry_config (Optional[Retry]): Retry configuration for the `HTTPAdapter`.
  427. timeout_ms (Optional[Union[int, Tuple[int, int]]]): Timeout for the `HTTPAdapter`.
  428. Can also be a 2-tuple of `(connect timeout, read timeout)` to set them separately.
  429. web_url (Optional[str]): URL for the LangSmith web app. Default is auto-inferred from
  430. the `ENDPOINT`.
  431. session (Optional[requests.Session]): The session to use for requests.
  432. If `None`, a new session will be created.
  433. auto_batch_tracing (bool, default=True): Whether to automatically batch tracing.
  434. anonymizer (Optional[Callable[[dict], dict]]): A function applied for masking serialized run inputs and outputs,
  435. before sending to the API.
  436. hide_inputs (Optional[Union[Callable[[dict], dict], bool]]): Whether to hide run inputs when tracing with this client.
  437. If `True`, hides the entire inputs.
  438. If a function, applied to all run inputs when creating runs.
  439. hide_outputs (Optional[Union[Callable[[dict], dict], bool]]): Whether to hide run outputs when tracing with this client.
  440. If `True`, hides the entire outputs.
  441. If a function, applied to all run outputs when creating runs.
  442. hide_metadata (Optional[Union[Callable[[dict], dict], bool]]): Whether to hide run metadata when tracing with this client.
  443. If `True`, hides the entire metadata.
  444. If a function, applied to all run metadata when creating runs.
  445. omit_traced_runtime_info (bool): Whether to omit runtime information from traced runs.
  446. If `True`, runtime information (SDK version, platform, Python version, etc.)
  447. will not be stored in the `extra.runtime` field of runs.
  448. Defaults to `False`.
  449. process_buffered_run_ops (Optional[Callable[[Sequence[dict]], Sequence[dict]]]): A function applied to buffered run operations
  450. that allows for modification of the raw run dicts before they are converted to multipart and compressed.
  451. Useful specifically for high throughput tracing where you need to apply a rate-limited API or other
  452. costly process to the runs before they are sent to the API.
  453. Note that the buffer will only flush automatically when `run_ops_buffer_size` is reached or a new run is added to the
  454. buffer after `run_ops_buffer_timeout_ms` has elapsed - it will not flush outside of these conditions unless you manually
  455. call `client.flush()`, so be sure to do this before your code exits.
  456. run_ops_buffer_size (Optional[int]): Maximum number of run operations to collect in the buffer before applying
  457. `process_buffered_run_ops` and sending to the API.
  458. Required when `process_buffered_run_ops` is provided.
  459. run_ops_buffer_timeout_ms (Optional[int]): Maximum time in milliseconds to wait before flushing the run ops buffer
  460. when new runs are added.
  461. Defaults to `5000`.
  462. Only used when `process_buffered_run_ops` is provided.
  463. info: The information about the LangSmith API.
  464. If not provided, it will be fetched from the API.
  465. api_urls (Optional[Dict[str, str]]): A dictionary of write API URLs and their corresponding API keys.
  466. Useful for multi-tenant setups. Data is only read from the first
  467. URL in the dictionary. However, ONLY Runs are written (`POST` and `PATCH`)
  468. to all URLs in the dictionary. Feedback, sessions, datasets, examples,
  469. annotation queues and evaluation results are only written to the first.
  470. otel_tracer_provider (Optional[TracerProvider]): Optional tracer provider for OpenTelemetry integration.
  471. If not provided, a LangSmith-specific tracer provider will be used.
  472. tracing_sampling_rate (Optional[float]): The sampling rate for tracing.
  473. If provided, overrides the `LANGCHAIN_TRACING_SAMPLING_RATE` environment variable.
  474. Should be a float between `0` and `1`, where `1` means trace everything
  475. and `0` means trace nothing.
  476. workspace_id (Optional[str]): The workspace ID.
  477. Required for org-scoped API keys.
  478. max_batch_size_bytes (Optional[int]): The maximum size of a batch of runs in bytes.
  479. If not provided, the default is set by the server.
  480. headers (Optional[Dict[str, str]]): Additional HTTP headers to include in all requests.
  481. These headers will be merged with the default headers (User-Agent, Accept, x-api-key, etc.).
  482. Custom headers will not override the default required headers.
  483. tracing_error_callback (Optional[Callable[[Exception], None]]): Optional callback function to handle errors.
  484. Called when exceptions occur during tracing operations.
  485. Raises:
  486. LangSmithUserError: If the API key is not provided when using the hosted service.
  487. LangSmithUserError: If both `api_url` and `api_urls` are provided.
  488. """
  489. if api_url and api_urls:
  490. raise ls_utils.LangSmithUserError(
  491. "You cannot provide both api_url and api_urls."
  492. )
  493. if (
  494. os.getenv("LANGSMITH_ENDPOINT") or os.getenv("LANGCHAIN_ENDPOINT")
  495. ) and os.getenv("LANGSMITH_RUNS_ENDPOINTS"):
  496. raise ls_utils.LangSmithUserError(
  497. "You cannot provide both LANGSMITH_ENDPOINT / LANGCHAIN_ENDPOINT "
  498. "and LANGSMITH_RUNS_ENDPOINTS."
  499. )
  500. self.tracing_sample_rate = _get_tracing_sampling_rate(tracing_sampling_rate)
  501. self._filtered_post_uuids: set[uuid.UUID] = set()
  502. self._write_api_urls: Mapping[str, Optional[str]] = _get_write_api_urls(
  503. api_urls
  504. )
  505. # Initialize workspace attribute first
  506. self._workspace_id = ls_utils.get_workspace_id(workspace_id)
  507. # Store custom headers
  508. self._custom_headers = headers or {}
  509. if self._write_api_urls:
  510. self.api_url = next(iter(self._write_api_urls))
  511. self.api_key = self._write_api_urls[self.api_url]
  512. else:
  513. self.api_url = ls_utils.get_api_url(api_url)
  514. self.api_key = ls_utils.get_api_key(api_key)
  515. _validate_api_key_if_hosted(self.api_url, self.api_key)
  516. self._write_api_urls = {self.api_url: self.api_key}
  517. self.retry_config = retry_config or _default_retry_config()
  518. self.timeout_ms = (
  519. (timeout_ms, timeout_ms)
  520. if isinstance(timeout_ms, int)
  521. else (timeout_ms or (10_000, 90_001))
  522. )
  523. self._timeout = (self.timeout_ms[0] / 1000, self.timeout_ms[1] / 1000)
  524. self._web_url = web_url
  525. self._tenant_id: Optional[uuid.UUID] = None
  526. # Create a session and register a finalizer to close it
  527. session_ = session if session else requests.Session()
  528. self.session = session_
  529. self._info = (
  530. info
  531. if info is None or isinstance(info, ls_schemas.LangSmithInfo)
  532. else ls_schemas.LangSmithInfo(**info)
  533. )
  534. weakref.finalize(self, close_session, self.session)
  535. atexit.register(close_session, session_)
  536. self.compressed_traces: Optional[CompressedTraces] = None
  537. self._data_available_event: Optional[threading.Event] = None
  538. self._futures: Optional[weakref.WeakSet[cf.Future]] = None
  539. self._run_ops_buffer: list[tuple[str, dict]] = []
  540. self._run_ops_buffer_lock = threading.Lock()
  541. self.otel_exporter: Optional[OTELExporter] = None
  542. self._max_batch_size_bytes = max_batch_size_bytes
  543. # Initialize auto batching
  544. if auto_batch_tracing:
  545. self.tracing_queue: Optional[PriorityQueue] = PriorityQueue()
  546. threading.Thread(
  547. target=_tracing_control_thread_func,
  548. # arg must be a weakref to self to avoid the Thread object
  549. # preventing garbage collection of the Client object
  550. args=(weakref.ref(self),),
  551. ).start()
  552. else:
  553. self.tracing_queue = None
  554. # Mount the HTTPAdapter with the retry configuration.
  555. adapter = _LangSmithHttpAdapter(
  556. max_retries=self.retry_config,
  557. blocksize=_BLOCKSIZE_BYTES,
  558. # We need to set the pool_maxsize to a value greater than the
  559. # number of threads used for batch tracing, plus 1 for other
  560. # requests.
  561. pool_maxsize=_AUTO_SCALE_UP_NTHREADS_LIMIT + 1,
  562. )
  563. self.session.mount("http://", adapter)
  564. self.session.mount("https://", adapter)
  565. self._get_data_type_cached = functools.lru_cache(maxsize=10)(
  566. self._get_data_type
  567. )
  568. self._anonymizer = anonymizer
  569. self._hide_inputs = (
  570. hide_inputs
  571. if hide_inputs is not None
  572. else ls_utils.get_env_var("HIDE_INPUTS") == "true"
  573. )
  574. self._hide_outputs = (
  575. hide_outputs
  576. if hide_outputs is not None
  577. else ls_utils.get_env_var("HIDE_OUTPUTS") == "true"
  578. )
  579. self._hide_metadata = (
  580. hide_metadata
  581. if hide_metadata is not None
  582. else ls_utils.get_env_var("HIDE_METADATA") == "true"
  583. )
  584. self._omit_traced_runtime_info = omit_traced_runtime_info
  585. self._process_buffered_run_ops = process_buffered_run_ops
  586. self._run_ops_buffer_size = run_ops_buffer_size
  587. self._run_ops_buffer_timeout_ms = run_ops_buffer_timeout_ms or 5000
  588. self._run_ops_buffer_last_flush_time = time.time()
  589. # Validate that run_ops_buffer_size is provided when process_buffered_run_ops is used
  590. if process_buffered_run_ops is not None and run_ops_buffer_size is None:
  591. raise ValueError(
  592. "run_ops_buffer_size must be provided when process_buffered_run_ops is specified"
  593. )
  594. if process_buffered_run_ops is None and run_ops_buffer_size is not None:
  595. raise ValueError(
  596. "process_buffered_run_ops must be provided when run_ops_buffer_size is specified"
  597. )
  598. # To trigger this code, set the `LANGSMITH_USE_PYO3_CLIENT` env var to any value.
  599. self._pyo3_client = None
  600. if ls_utils.get_env_var("USE_PYO3_CLIENT") is not None:
  601. langsmith_pyo3 = None
  602. try:
  603. import langsmith_pyo3 # type: ignore[import-not-found, no-redef]
  604. except ImportError as e:
  605. logger.warning(
  606. "Failed to import `langsmith_pyo3` when PyO3 client was requested, "
  607. "falling back to Python impl: %s",
  608. repr(e),
  609. )
  610. if langsmith_pyo3:
  611. # TODO: tweak these constants as needed
  612. queue_capacity = 1_000_000
  613. batch_size = 100
  614. batch_timeout_millis = 1000
  615. worker_threads = 1
  616. try:
  617. self._pyo3_client = langsmith_pyo3.BlockingTracingClient(
  618. self.api_url,
  619. self.api_key,
  620. queue_capacity,
  621. batch_size,
  622. batch_timeout_millis,
  623. worker_threads,
  624. )
  625. except Exception as e:
  626. logger.warning(
  627. "Failed to instantiate `langsmith_pyo3.BlockingTracingClient` "
  628. "when PyO3 client was requested, falling back to Python impl: %s",
  629. repr(e),
  630. )
  631. self._settings: Union[ls_schemas.LangSmithSettings, None] = None
  632. self._manual_cleanup = False
  633. if _check_otel_enabled() or otel_enabled:
  634. try:
  635. (
  636. otel_trace,
  637. set_span_in_context,
  638. get_otlp_tracer_provider,
  639. OTELExporter,
  640. ) = _import_otel()
  641. existing_provider = otel_trace.get_tracer_provider()
  642. tracer = existing_provider.get_tracer(__name__)
  643. if otel_tracer_provider is None:
  644. # Use existing global provider if available
  645. if not (
  646. isinstance(existing_provider, otel_trace.ProxyTracerProvider)
  647. and hasattr(tracer, "_tracer")
  648. and isinstance(
  649. cast(
  650. otel_trace.ProxyTracer, # type: ignore[attr-defined, name-defined]
  651. tracer,
  652. )._tracer,
  653. otel_trace.NoOpTracer,
  654. )
  655. ):
  656. otel_tracer_provider = cast(TracerProvider, existing_provider)
  657. else:
  658. otel_tracer_provider = get_otlp_tracer_provider()
  659. otel_trace.set_tracer_provider(otel_tracer_provider)
  660. self.otel_exporter = OTELExporter(tracer_provider=otel_tracer_provider)
  661. # Store imports for later use
  662. self._otel_trace = otel_trace
  663. self._set_span_in_context = set_span_in_context
  664. except ImportError:
  665. warnings.warn(
  666. "LANGSMITH_OTEL_ENABLED is set but OpenTelemetry packages are not installed: Install with `pip install langsmith[otel]"
  667. )
  668. self.otel_exporter = None
  669. else:
  670. self.otel_exporter = None
  671. self._tracing_error_callback = tracing_error_callback
  672. def _repr_html_(self) -> str:
  673. """Return an HTML representation of the instance with a link to the URL.
  674. Returns:
  675. str: The HTML representation of the instance.
  676. """
  677. link = self._host_url
  678. return f'<a href="{link}", target="_blank" rel="noopener">LangSmith Client</a>'
  679. def _invoke_tracing_error_callback(self, error: Exception) -> None:
  680. """Invoke the background tracing error callback if configured.
  681. Args:
  682. error: The exception that occurred during background tracing.
  683. """
  684. if self._tracing_error_callback:
  685. try:
  686. self._tracing_error_callback(error)
  687. except Exception:
  688. logger.error(
  689. "Error in tracing_error_callback:\n",
  690. exc_info=True,
  691. )
  692. def __repr__(self) -> str:
  693. """Return a string representation of the instance with a link to the URL.
  694. Returns:
  695. str: The string representation of the instance.
  696. """
  697. return f"Client (API URL: {self.api_url})"
  698. @property
  699. def _host(self) -> str:
  700. return _parse_url(self.api_url)
  701. @property
  702. def _host_url(self) -> str:
  703. """The web host url."""
  704. return ls_utils.get_host_url(self._web_url, self.api_url)
  705. def _compute_headers(self) -> dict[str, str]:
  706. headers = {
  707. "User-Agent": f"langsmith-py/{langsmith.__version__}",
  708. "Accept": "application/json",
  709. }
  710. # Merge custom headers first so they don't override required headers
  711. headers.update(self._custom_headers)
  712. # Required headers that should not be overridden
  713. if self.api_key:
  714. headers[X_API_KEY] = self.api_key
  715. if self._workspace_id:
  716. headers["X-Tenant-Id"] = self._workspace_id
  717. return headers
  718. def _set_header_affecting_attr(self, attr_name: str, value: Any) -> None:
  719. """Set attributes that affect headers and recalculate them."""
  720. object.__setattr__(self, attr_name, value)
  721. object.__setattr__(self, "_headers", self._compute_headers())
  722. @property
  723. def api_key(self) -> Optional[str]:
  724. """Return the API key used for authentication."""
  725. return self._api_key
  726. @api_key.setter
  727. def api_key(self, value: Optional[str]) -> None:
  728. self._set_header_affecting_attr("_api_key", value)
  729. @property
  730. def workspace_id(self) -> Optional[str]:
  731. """Return the workspace ID used for API requests."""
  732. return self._workspace_id
  733. @workspace_id.setter
  734. def workspace_id(self, value: Optional[str]) -> None:
  735. self._set_header_affecting_attr("_workspace_id", value)
  736. @property
  737. def info(self) -> ls_schemas.LangSmithInfo:
  738. """Get the information about the LangSmith API.
  739. Returns:
  740. The information about the LangSmith API, or `None` if the API is not available.
  741. """
  742. if self._info is not None:
  743. return self._info
  744. # Skip API call when using OTEL-only mode
  745. otel_only_mode = ls_utils.is_env_var_truish(
  746. "OTEL_ENABLED"
  747. ) and ls_utils.is_env_var_truish("OTEL_ONLY")
  748. if otel_only_mode:
  749. self._info = ls_schemas.LangSmithInfo()
  750. return self._info
  751. # Fetch info from API
  752. try:
  753. response = self.request_with_retries(
  754. "GET",
  755. "/info",
  756. headers={"Accept": "application/json"},
  757. timeout=self._timeout,
  758. )
  759. ls_utils.raise_for_status_with_text(response)
  760. self._info = ls_schemas.LangSmithInfo(**response.json())
  761. except BaseException as e:
  762. logger.warning(
  763. f"Failed to get info from {self.api_url}: {repr(e)}",
  764. )
  765. self._info = ls_schemas.LangSmithInfo()
  766. return self._info
  767. def _get_settings(self) -> ls_schemas.LangSmithSettings:
  768. """Get the settings for the current tenant.
  769. Returns:
  770. dict: The settings for the current tenant.
  771. """
  772. if self._settings is None:
  773. response = self.request_with_retries("GET", "/settings")
  774. ls_utils.raise_for_status_with_text(response)
  775. self._settings = ls_schemas.LangSmithSettings(**response.json())
  776. return self._settings
  777. def _content_above_size(self, content_length: Optional[int]) -> Optional[str]:
  778. if content_length is None or self._info is None:
  779. return None
  780. info = cast(ls_schemas.LangSmithInfo, self._info)
  781. bic = info.batch_ingest_config
  782. if not bic:
  783. return None
  784. size_limit = self._max_batch_size_bytes or bic.get("size_limit_bytes")
  785. if size_limit is None:
  786. return None
  787. if content_length > size_limit:
  788. return (
  789. f"The content length of {content_length} bytes exceeds the "
  790. f"maximum size limit of {size_limit} bytes."
  791. )
  792. return None
  793. def request_with_retries(
  794. self,
  795. /,
  796. method: Literal["GET", "POST", "PUT", "PATCH", "DELETE"],
  797. pathname: str,
  798. *,
  799. request_kwargs: Optional[Mapping] = None,
  800. stop_after_attempt: int = 1,
  801. retry_on: Optional[Sequence[type[BaseException]]] = None,
  802. to_ignore: Optional[Sequence[type[BaseException]]] = None,
  803. handle_response: Optional[Callable[[requests.Response, int], Any]] = None,
  804. _context: str = "",
  805. **kwargs: Any,
  806. ) -> requests.Response:
  807. """Send a request with retries.
  808. Args:
  809. method (str): The HTTP request method.
  810. pathname (str): The pathname of the request URL. Will be appended to the API URL.
  811. request_kwargs (Mapping): Additional request parameters.
  812. stop_after_attempt (int, default=1): The number of attempts to make.
  813. retry_on (Optional[Sequence[Type[BaseException]]]): The exceptions to retry on.
  814. In addition to: `[LangSmithConnectionError, LangSmithAPIError]`.
  815. to_ignore (Optional[Sequence[Type[BaseException]]]): The exceptions to ignore / pass on.
  816. handle_response (Optional[Callable[[requests.Response, int], Any]]): A function to handle the response and return whether to continue retrying.
  817. _context (str, default=""): The context of the request.
  818. **kwargs (Any): Additional keyword arguments to pass to the request.
  819. Returns:
  820. The response object.
  821. Raises:
  822. LangSmithAPIError: If a server error occurs.
  823. LangSmithUserError: If the request fails.
  824. LangSmithConnectionError: If a connection error occurs.
  825. LangSmithError: If the request fails.
  826. """
  827. request_kwargs = request_kwargs or {}
  828. request_kwargs = {
  829. "timeout": self._timeout,
  830. **request_kwargs,
  831. **kwargs,
  832. "headers": {
  833. **self._headers,
  834. **request_kwargs.get("headers", {}),
  835. **kwargs.get("headers", {}),
  836. },
  837. }
  838. if (
  839. method != "GET"
  840. and "data" in request_kwargs
  841. and "files" not in request_kwargs
  842. and not request_kwargs["headers"].get("Content-Type")
  843. ):
  844. request_kwargs["headers"]["Content-Type"] = "application/json"
  845. logging_filters = [
  846. ls_utils.FilterLangSmithRetry(),
  847. ls_utils.FilterPoolFullWarning(host=str(self._host)),
  848. ]
  849. retry_on_: tuple[type[BaseException], ...] = (
  850. *(retry_on or ()),
  851. *(
  852. ls_utils.LangSmithConnectionError,
  853. ls_utils.LangSmithRequestTimeout, # 408
  854. ls_utils.LangSmithAPIError, # 500
  855. ),
  856. )
  857. to_ignore_: tuple[type[BaseException], ...] = (*(to_ignore or ()),)
  858. response = None
  859. for idx in range(stop_after_attempt):
  860. try:
  861. try:
  862. with ls_utils.filter_logs(_urllib3_logger, logging_filters):
  863. response = self.session.request(
  864. method,
  865. _construct_url(self.api_url, pathname),
  866. stream=False,
  867. **request_kwargs,
  868. )
  869. ls_utils.raise_for_status_with_text(response)
  870. return response
  871. except requests.exceptions.ReadTimeout as e:
  872. logger.debug("Passing on exception %s", e)
  873. if idx + 1 == stop_after_attempt:
  874. raise
  875. sleep_time = 2**idx + (random.random() * 0.5)
  876. time.sleep(sleep_time)
  877. continue
  878. except requests.HTTPError as e:
  879. if response is not None:
  880. if handle_response is not None:
  881. if idx + 1 < stop_after_attempt:
  882. should_continue = handle_response(response, idx + 1)
  883. if should_continue:
  884. continue
  885. if response.status_code == 500:
  886. raise ls_utils.LangSmithAPIError(
  887. f"Server error caused failure to {method}"
  888. f" {pathname} in"
  889. f" LangSmith API. {repr(e)}"
  890. f"{_context}"
  891. )
  892. elif response.status_code == 408:
  893. raise ls_utils.LangSmithRequestTimeout(
  894. f"Client took too long to send request to {method}"
  895. f"{pathname} {_context}"
  896. )
  897. elif response.status_code == 429:
  898. raise ls_utils.LangSmithRateLimitError(
  899. f"Rate limit exceeded for {pathname}. {repr(e)}"
  900. f"{_context}"
  901. )
  902. elif response.status_code == 401:
  903. raise ls_utils.LangSmithAuthError(
  904. f"Authentication failed for {pathname}. {repr(e)}"
  905. f"{_context}"
  906. )
  907. elif response.status_code == 404:
  908. raise ls_utils.LangSmithNotFoundError(
  909. f"Resource not found for {pathname}. {repr(e)}"
  910. f"{_context}"
  911. )
  912. elif response.status_code == 409:
  913. raise ls_utils.LangSmithConflictError(
  914. f"Conflict for {pathname}. {repr(e)}{_context}"
  915. )
  916. elif response.status_code == 403:
  917. try:
  918. error_data = response.json()
  919. error_code = error_data.get("error", "")
  920. if error_code == "org_scoped_key_requires_workspace":
  921. raise ls_utils.LangSmithUserError(
  922. "This API key is org-scoped and requires workspace specification. "
  923. "Please provide 'workspace_id' parameter, "
  924. "or set LANGSMITH_WORKSPACE_ID environment variable."
  925. )
  926. except (ValueError, KeyError):
  927. pass
  928. raise ls_utils.LangSmithError(
  929. f"Failed to {method} {pathname} in LangSmith"
  930. f" API. {repr(e)}"
  931. )
  932. else:
  933. raise ls_utils.LangSmithError(
  934. f"Failed to {method} {pathname} in LangSmith"
  935. f" API. {repr(e)}"
  936. )
  937. else:
  938. raise ls_utils.LangSmithUserError(
  939. f"Failed to {method} {pathname} in LangSmith API. {repr(e)}"
  940. )
  941. except requests.ConnectionError as e:
  942. recommendation = (
  943. "Please confirm your LANGCHAIN_ENDPOINT."
  944. if self.api_url != "https://api.smith.langchain.com"
  945. else "Please confirm your internet connection."
  946. )
  947. try:
  948. content_length = int(
  949. str(e.request.headers.get("Content-Length"))
  950. if e.request
  951. else ""
  952. )
  953. size_rec = self._content_above_size(content_length)
  954. if size_rec:
  955. recommendation = size_rec
  956. except ValueError:
  957. content_length = None
  958. api_key = (
  959. e.request.headers.get("x-api-key") or "" if e.request else ""
  960. )
  961. prefix, suffix = api_key[:5], api_key[-2:]
  962. filler = "*" * (max(0, len(api_key) - 7))
  963. masked_api_key = f"{prefix}{filler}{suffix}"
  964. raise ls_utils.LangSmithConnectionError(
  965. f"Connection error caused failure to {method} {pathname}"
  966. f" in LangSmith API. {recommendation}"
  967. f" {repr(e)}"
  968. f"\nContent-Length: {content_length}"
  969. f"\nAPI Key: {masked_api_key}"
  970. f"{_context}"
  971. ) from e
  972. except Exception as e:
  973. args = list(e.args)
  974. msg = args[1] if len(args) > 1 else ""
  975. msg = msg.replace("session", "session (project)")
  976. if args:
  977. emsg = "\n".join(
  978. [str(args[0])]
  979. + [msg]
  980. + [str(arg) for arg in (args[2:] if len(args) > 2 else [])]
  981. )
  982. else:
  983. emsg = msg
  984. raise ls_utils.LangSmithError(
  985. f"Failed to {method} {pathname} in LangSmith API. {emsg}"
  986. f"{_context}"
  987. ) from e
  988. except to_ignore_ as e:
  989. if response is not None:
  990. logger.debug("Passing on exception %s", e)
  991. return response
  992. except ls_utils.LangSmithRateLimitError:
  993. if idx + 1 == stop_after_attempt:
  994. raise
  995. if response is not None:
  996. try:
  997. retry_after = float(response.headers.get("retry-after", "30"))
  998. except Exception as e:
  999. logger.warning(
  1000. "Invalid retry-after header: %s",
  1001. repr(e),
  1002. )
  1003. retry_after = 30
  1004. # Add exponential backoff
  1005. retry_after = retry_after * 2**idx + random.random()
  1006. time.sleep(retry_after)
  1007. except retry_on_:
  1008. # Handle other exceptions more immediately
  1009. if idx + 1 == stop_after_attempt:
  1010. raise
  1011. sleep_time = 2**idx + (random.random() * 0.5)
  1012. time.sleep(sleep_time)
  1013. continue
  1014. # Else we still raise an error
  1015. raise ls_utils.LangSmithError(
  1016. f"Failed to {method} {pathname} in LangSmith API."
  1017. )
  1018. def _get_paginated_list(
  1019. self, path: str, *, params: Optional[dict] = None
  1020. ) -> Iterator[dict]:
  1021. """Get a paginated list of items.
  1022. Args:
  1023. path (str): The path of the request URL.
  1024. params (Optional[dict]): The query parameters.
  1025. Yields:
  1026. The items in the paginated list.
  1027. """
  1028. params_ = params.copy() if params else {}
  1029. offset = params_.get("offset", 0)
  1030. params_["limit"] = params_.get("limit", 100)
  1031. while True:
  1032. params_["offset"] = offset
  1033. response = self.request_with_retries(
  1034. "GET",
  1035. path,
  1036. params=params_,
  1037. )
  1038. items = response.json()
  1039. if not items:
  1040. break
  1041. yield from items
  1042. if len(items) < params_["limit"]:
  1043. # offset and limit isn't respected if we're
  1044. # querying for specific values
  1045. break
  1046. offset += len(items)
  1047. def _get_cursor_paginated_list(
  1048. self,
  1049. path: str,
  1050. *,
  1051. body: Optional[dict] = None,
  1052. request_method: Literal["GET", "POST"] = "POST",
  1053. data_key: str = "runs",
  1054. ) -> Iterator[dict]:
  1055. """Get a cursor paginated list of items.
  1056. Args:
  1057. path (str): The path of the request URL.
  1058. body (Optional[dict]): The query body.
  1059. request_method (Literal["GET", "POST"], default="POST"): The HTTP request method.
  1060. data_key (str, default="runs"): The key in the response body that contains the items.
  1061. Yields:
  1062. The items in the paginated list.
  1063. """
  1064. params_ = body.copy() if body else {}
  1065. while True:
  1066. response = self.request_with_retries(
  1067. request_method,
  1068. path,
  1069. request_kwargs={
  1070. "data": _dumps_json(params_),
  1071. },
  1072. )
  1073. response_body = response.json()
  1074. if not response_body:
  1075. break
  1076. if not response_body.get(data_key):
  1077. break
  1078. yield from response_body[data_key]
  1079. cursors = response_body.get("cursors")
  1080. if not cursors:
  1081. break
  1082. if not cursors.get("next"):
  1083. break
  1084. params_["cursor"] = cursors["next"]
  1085. def upload_dataframe(
  1086. self,
  1087. df: pd.DataFrame,
  1088. name: str,
  1089. input_keys: Sequence[str],
  1090. output_keys: Sequence[str],
  1091. *,
  1092. description: Optional[str] = None,
  1093. data_type: Optional[ls_schemas.DataType] = ls_schemas.DataType.kv,
  1094. ) -> ls_schemas.Dataset:
  1095. """Upload a dataframe as individual examples to the LangSmith API.
  1096. Args:
  1097. df (pd.DataFrame): The dataframe to upload.
  1098. name (str): The name of the dataset.
  1099. input_keys (Sequence[str]): The input keys.
  1100. output_keys (Sequence[str]): The output keys.
  1101. description (Optional[str]): The description of the dataset.
  1102. data_type (Optional[DataType]): The data type of the dataset.
  1103. Returns:
  1104. Dataset: The uploaded dataset.
  1105. Raises:
  1106. ValueError: If the `csv_file` is not a `str` or `tuple`.
  1107. Example:
  1108. ```python
  1109. from langsmith import Client
  1110. import os
  1111. import pandas as pd
  1112. client = Client()
  1113. df = pd.read_parquet("path/to/your/myfile.parquet")
  1114. input_keys = ["column1", "column2"] # replace with your input column names
  1115. output_keys = ["output1", "output2"] # replace with your output column names
  1116. dataset = client.upload_dataframe(
  1117. df=df,
  1118. input_keys=input_keys,
  1119. output_keys=output_keys,
  1120. name="My Parquet Dataset",
  1121. description="Dataset created from a parquet file",
  1122. data_type="kv", # The default
  1123. )
  1124. ```
  1125. """
  1126. csv_file = io.BytesIO()
  1127. df.to_csv(csv_file, index=False)
  1128. csv_file.seek(0)
  1129. return self.upload_csv(
  1130. ("data.csv", csv_file),
  1131. input_keys=input_keys,
  1132. output_keys=output_keys,
  1133. description=description,
  1134. name=name,
  1135. data_type=data_type,
  1136. )
  1137. def upload_csv(
  1138. self,
  1139. csv_file: Union[str, tuple[str, io.BytesIO]],
  1140. input_keys: Sequence[str],
  1141. output_keys: Sequence[str],
  1142. *,
  1143. name: Optional[str] = None,
  1144. description: Optional[str] = None,
  1145. data_type: Optional[ls_schemas.DataType] = ls_schemas.DataType.kv,
  1146. ) -> ls_schemas.Dataset:
  1147. """Upload a CSV file to the LangSmith API.
  1148. Args:
  1149. csv_file (Union[str, Tuple[str, io.BytesIO]]): The CSV file to upload.
  1150. If a string, it should be the path.
  1151. If a tuple, it should be a tuple containing the filename
  1152. and a `BytesIO` object.
  1153. input_keys (Sequence[str]): The input keys.
  1154. output_keys (Sequence[str]): The output keys.
  1155. name (Optional[str]): The name of the dataset.
  1156. description (Optional[str]): The description of the dataset.
  1157. data_type (Optional[ls_schemas.DataType]): The data type of the dataset.
  1158. Returns:
  1159. Dataset: The uploaded dataset.
  1160. Raises:
  1161. ValueError: If the `csv_file` is not a string or tuple.
  1162. Example:
  1163. ```python
  1164. from langsmith import Client
  1165. import os
  1166. client = Client()
  1167. csv_file = "path/to/your/myfile.csv"
  1168. input_keys = ["column1", "column2"] # replace with your input column names
  1169. output_keys = ["output1", "output2"] # replace with your output column names
  1170. dataset = client.upload_csv(
  1171. csv_file=csv_file,
  1172. input_keys=input_keys,
  1173. output_keys=output_keys,
  1174. name="My CSV Dataset",
  1175. description="Dataset created from a CSV file",
  1176. data_type="kv", # The default
  1177. )
  1178. ```
  1179. """
  1180. data = {
  1181. "input_keys": input_keys,
  1182. "output_keys": output_keys,
  1183. }
  1184. if name:
  1185. data["name"] = name
  1186. if description:
  1187. data["description"] = description
  1188. if data_type:
  1189. data["data_type"] = ls_utils.get_enum_value(data_type)
  1190. data["id"] = str(uuid.uuid4())
  1191. if isinstance(csv_file, str):
  1192. with open(csv_file, "rb") as f:
  1193. file_ = {"file": f}
  1194. response = self.request_with_retries(
  1195. "POST",
  1196. "/datasets/upload",
  1197. data=data,
  1198. files=file_,
  1199. )
  1200. elif isinstance(csv_file, tuple):
  1201. response = self.request_with_retries(
  1202. "POST",
  1203. "/datasets/upload",
  1204. data=data,
  1205. files={"file": csv_file},
  1206. )
  1207. else:
  1208. raise ValueError("csv_file must be a string or tuple")
  1209. ls_utils.raise_for_status_with_text(response)
  1210. result = response.json()
  1211. # TODO: Make this more robust server-side
  1212. if "detail" in result and "already exists" in result["detail"]:
  1213. file_name = csv_file if isinstance(csv_file, str) else csv_file[0]
  1214. file_name = file_name.split("/")[-1]
  1215. raise ValueError(f"Dataset {file_name} already exists")
  1216. return ls_schemas.Dataset(
  1217. **result,
  1218. _host_url=self._host_url,
  1219. _tenant_id=self._get_optional_tenant_id(),
  1220. )
  1221. def _run_transform(
  1222. self,
  1223. run: Union[ls_schemas.Run, dict, ls_schemas.RunLikeDict],
  1224. update: bool = False,
  1225. copy: bool = False,
  1226. ) -> dict:
  1227. """Transform the given run object into a dictionary representation.
  1228. Args:
  1229. run (Union[ls_schemas.Run, dict]): The run object to transform.
  1230. update (Optional[bool]): Whether the payload is for an "update" event.
  1231. copy (Optional[bool]): Whether to deepcopy run inputs/outputs.
  1232. Returns:
  1233. dict: The transformed run object as a dictionary.
  1234. """
  1235. if hasattr(run, "dict") and callable(getattr(run, "dict")):
  1236. run_create: dict = run.dict() # type: ignore
  1237. else:
  1238. run_create = cast(dict, run)
  1239. if "id" not in run_create:
  1240. run_create["id"] = uuid.uuid4()
  1241. elif isinstance(run_create["id"], str):
  1242. run_create["id"] = uuid.UUID(run_create["id"])
  1243. if "inputs" in run_create and run_create["inputs"] is not None:
  1244. if copy:
  1245. run_create["inputs"] = ls_utils.deepish_copy(run_create["inputs"])
  1246. run_create["inputs"] = self._hide_run_inputs(run_create["inputs"])
  1247. if "outputs" in run_create and run_create["outputs"] is not None:
  1248. if copy:
  1249. run_create["outputs"] = ls_utils.deepish_copy(run_create["outputs"])
  1250. run_create["outputs"] = self._hide_run_outputs(run_create["outputs"])
  1251. # Hide metadata in extra if present
  1252. if "extra" in run_create and isinstance(run_create["extra"], dict):
  1253. extra = run_create["extra"]
  1254. if "metadata" in extra and extra["metadata"] is not None:
  1255. if copy:
  1256. extra["metadata"] = ls_utils.deepish_copy(extra["metadata"])
  1257. extra["metadata"] = self._hide_run_metadata(extra["metadata"])
  1258. if not update and not run_create.get("start_time"):
  1259. run_create["start_time"] = datetime.datetime.now(datetime.timezone.utc)
  1260. # Only retain LLM & Prompt manifests
  1261. if "serialized" in run_create:
  1262. if run_create.get("run_type") not in ("llm", "prompt"):
  1263. # Drop completely
  1264. run_create.pop("serialized", None)
  1265. elif run_create.get("serialized"):
  1266. # Drop graph
  1267. run_create["serialized"].pop("graph", None)
  1268. return run_create
  1269. def _insert_runtime_env(self, runs: Sequence[dict]) -> None:
  1270. if self._omit_traced_runtime_info:
  1271. return
  1272. runtime_env = ls_env.get_runtime_environment()
  1273. for run_create in runs:
  1274. run_extra = cast(dict, run_create.setdefault("extra", {}))
  1275. # update runtime
  1276. runtime: dict = run_extra.setdefault("runtime", {})
  1277. run_extra["runtime"] = {**runtime_env, **runtime}
  1278. # update metadata
  1279. metadata: dict = run_extra.setdefault("metadata", {})
  1280. langchain_metadata = ls_env.get_langchain_env_var_metadata()
  1281. metadata.update(
  1282. {k: v for k, v in langchain_metadata.items() if k not in metadata}
  1283. )
  1284. def _should_sample(self) -> bool:
  1285. if self.tracing_sample_rate is None:
  1286. return True
  1287. return random.random() < self.tracing_sample_rate
  1288. def _filter_for_sampling(
  1289. self, runs: Iterable[dict], *, patch: bool = False
  1290. ) -> list[dict]:
  1291. if self.tracing_sample_rate is None:
  1292. return list(runs)
  1293. if patch:
  1294. sampled = []
  1295. for run in runs:
  1296. trace_id = _as_uuid(run["trace_id"])
  1297. if trace_id not in self._filtered_post_uuids:
  1298. sampled.append(run)
  1299. elif run["id"] == trace_id:
  1300. self._filtered_post_uuids.remove(trace_id)
  1301. return sampled
  1302. else:
  1303. sampled = []
  1304. for run in runs:
  1305. trace_id = run.get("trace_id") or run["id"]
  1306. # If we've already made a decision about this trace, follow it
  1307. if trace_id in self._filtered_post_uuids:
  1308. continue
  1309. # For new traces, apply sampling
  1310. if run["id"] == trace_id:
  1311. if self._should_sample():
  1312. sampled.append(run)
  1313. else:
  1314. self._filtered_post_uuids.add(trace_id)
  1315. else:
  1316. # Child runs follow their trace's sampling decision
  1317. sampled.append(run)
  1318. return sampled
  1319. def create_run(
  1320. self,
  1321. name: str,
  1322. inputs: dict[str, Any],
  1323. run_type: RUN_TYPE_T,
  1324. *,
  1325. project_name: Optional[str] = None,
  1326. revision_id: Optional[str] = None,
  1327. dangerously_allow_filesystem: bool = False,
  1328. api_key: Optional[str] = None,
  1329. api_url: Optional[str] = None,
  1330. **kwargs: Any,
  1331. ) -> None:
  1332. """Persist a run to the LangSmith API.
  1333. Args:
  1334. name (str): The name of the run.
  1335. inputs (Dict[str, Any]): The input values for the run.
  1336. run_type (str): The type of the run, such as tool, chain, llm, retriever,
  1337. embedding, prompt, or parser.
  1338. project_name (Optional[str]): The project name of the run.
  1339. revision_id (Optional[Union[UUID, str]]): The revision ID of the run.
  1340. api_key (Optional[str]): The API key to use for this specific run.
  1341. api_url (Optional[str]): The API URL to use for this specific run.
  1342. **kwargs (Any): Additional keyword arguments.
  1343. Returns:
  1344. None
  1345. Raises:
  1346. LangSmithUserError: If the API key is not provided when using the hosted service.
  1347. Example:
  1348. ```python
  1349. from langsmith import Client
  1350. import datetime
  1351. from uuid import uuid4
  1352. client = Client()
  1353. run_id = uuid4()
  1354. client.create_run(
  1355. id=run_id,
  1356. project_name=project_name,
  1357. name="test_run",
  1358. run_type="llm",
  1359. inputs={"prompt": "hello world"},
  1360. outputs={"generation": "hi there"},
  1361. start_time=datetime.datetime.now(datetime.timezone.utc),
  1362. end_time=datetime.datetime.now(datetime.timezone.utc),
  1363. hide_inputs=True,
  1364. hide_outputs=True,
  1365. )
  1366. ```
  1367. """
  1368. project_name = project_name or kwargs.pop(
  1369. "session_name",
  1370. # if the project is not provided, use the environment's project
  1371. ls_utils.get_tracer_project(),
  1372. )
  1373. run_create = {
  1374. **kwargs,
  1375. "session_name": project_name,
  1376. "name": name,
  1377. "inputs": inputs,
  1378. "run_type": run_type,
  1379. }
  1380. if not self._filter_for_sampling([run_create]):
  1381. return
  1382. if revision_id is not None:
  1383. run_create["extra"]["metadata"]["revision_id"] = revision_id
  1384. run_create = self._run_transform(run_create, copy=False)
  1385. self._insert_runtime_env([run_create])
  1386. if run_create.get("attachments") is not None:
  1387. for attachment in run_create["attachments"].values():
  1388. if (
  1389. isinstance(attachment, tuple)
  1390. and isinstance(attachment[1], Path)
  1391. and not dangerously_allow_filesystem
  1392. ):
  1393. raise ValueError(
  1394. "Must set dangerously_allow_filesystem=True to allow passing in Paths for attachments."
  1395. )
  1396. # If process_buffered_run_ops is enabled, collect run ops in batches
  1397. # before batching
  1398. if self._process_buffered_run_ops and not kwargs.get("is_run_ops_buffer_flush"):
  1399. with self._run_ops_buffer_lock:
  1400. self._run_ops_buffer.append(("post", run_create))
  1401. # Process batch when we have enough runs or enough time has passed
  1402. if self._should_flush_run_ops_buffer():
  1403. self._flush_run_ops_buffer()
  1404. return
  1405. else:
  1406. self._create_run(run_create, api_key=api_key, api_url=api_url)
  1407. def _create_run(
  1408. self,
  1409. run_create: dict,
  1410. *,
  1411. api_key: Optional[str] = None,
  1412. api_url: Optional[str] = None,
  1413. ) -> None:
  1414. if (
  1415. # batch ingest requires trace_id and dotted_order to be set
  1416. run_create.get("trace_id") is not None
  1417. and run_create.get("dotted_order") is not None
  1418. ):
  1419. if self._pyo3_client is not None:
  1420. self._pyo3_client.create_run(run_create)
  1421. elif (
  1422. self.compressed_traces is not None
  1423. and api_key is None
  1424. and api_url is None
  1425. ):
  1426. if self._data_available_event is None:
  1427. raise ValueError(
  1428. "Run compression is enabled but threading event is not configured"
  1429. )
  1430. serialized_op = serialize_run_dict("post", run_create)
  1431. (
  1432. multipart_form,
  1433. opened_files,
  1434. ) = serialized_run_operation_to_multipart_parts_and_context(
  1435. serialized_op
  1436. )
  1437. logger.log(
  1438. 5,
  1439. "Adding compressed multipart to queue with context: %s",
  1440. multipart_form.context,
  1441. )
  1442. with self.compressed_traces.lock:
  1443. enqueued = compress_multipart_parts_and_context(
  1444. multipart_form,
  1445. self.compressed_traces,
  1446. _BOUNDARY,
  1447. )
  1448. if enqueued:
  1449. self.compressed_traces.trace_count += 1
  1450. self._data_available_event.set()
  1451. _close_files(list(opened_files.values()))
  1452. elif self.tracing_queue is not None:
  1453. serialized_op = serialize_run_dict("post", run_create)
  1454. logger.log(
  1455. 5,
  1456. "Adding to tracing queue: trace_id=%s, run_id=%s",
  1457. serialized_op.trace_id,
  1458. serialized_op.id,
  1459. )
  1460. if self.otel_exporter is not None:
  1461. self.tracing_queue.put(
  1462. TracingQueueItem(
  1463. run_create["dotted_order"],
  1464. serialized_op,
  1465. api_key=api_key,
  1466. api_url=api_url,
  1467. otel_context=self._set_span_in_context(
  1468. self._otel_trace.get_current_span()
  1469. ),
  1470. )
  1471. )
  1472. else:
  1473. self.tracing_queue.put(
  1474. TracingQueueItem(
  1475. run_create["dotted_order"],
  1476. serialized_op,
  1477. api_key=api_key,
  1478. api_url=api_url,
  1479. )
  1480. )
  1481. else:
  1482. # Neither Rust nor Python batch ingestion is configured,
  1483. # fall back to the non-batch approach.
  1484. self._create_run_non_batch(run_create, api_key=api_key, api_url=api_url)
  1485. else:
  1486. self._create_run_non_batch(run_create, api_key=api_key, api_url=api_url)
  1487. def _create_run_non_batch(
  1488. self,
  1489. run_create: dict,
  1490. *,
  1491. api_key: Optional[str] = None,
  1492. api_url: Optional[str] = None,
  1493. ):
  1494. errors = []
  1495. # If specific api_key/api_url provided, use those; otherwise use all configured endpoints
  1496. if api_key is not None or api_url is not None:
  1497. target_api_url = api_url or self.api_url
  1498. target_api_key = api_key or self.api_key
  1499. headers = {**self._headers, X_API_KEY: target_api_key}
  1500. try:
  1501. self.request_with_retries(
  1502. "POST",
  1503. f"{target_api_url}/runs",
  1504. request_kwargs={
  1505. "data": _dumps_json(run_create),
  1506. "headers": headers,
  1507. },
  1508. to_ignore=(ls_utils.LangSmithConflictError,),
  1509. )
  1510. except Exception as e:
  1511. errors.append(e)
  1512. else:
  1513. # Use all configured write API URLs
  1514. for write_api_url, write_api_key in self._write_api_urls.items():
  1515. headers = {**self._headers, X_API_KEY: write_api_key}
  1516. try:
  1517. self.request_with_retries(
  1518. "POST",
  1519. f"{write_api_url}/runs",
  1520. request_kwargs={
  1521. "data": _dumps_json(run_create),
  1522. "headers": headers,
  1523. },
  1524. to_ignore=(ls_utils.LangSmithConflictError,),
  1525. )
  1526. except Exception as e:
  1527. errors.append(e)
  1528. if errors:
  1529. # Invoke callback for the errors
  1530. if len(errors) > 1:
  1531. exception_group = ls_utils.LangSmithExceptionGroup(exceptions=errors)
  1532. self._invoke_tracing_error_callback(exception_group)
  1533. raise exception_group
  1534. else:
  1535. self._invoke_tracing_error_callback(errors[0])
  1536. raise errors[0]
  1537. def _hide_run_inputs(self, inputs: dict):
  1538. if self._hide_inputs is True:
  1539. return {}
  1540. if self._anonymizer:
  1541. json_inputs = _orjson.loads(_dumps_json(inputs))
  1542. return self._anonymizer(json_inputs)
  1543. if self._hide_inputs is False:
  1544. return inputs
  1545. return self._hide_inputs(inputs)
  1546. def _hide_run_outputs(self, outputs: dict):
  1547. if self._hide_outputs is True:
  1548. return {}
  1549. if self._anonymizer:
  1550. json_outputs = _orjson.loads(_dumps_json(outputs))
  1551. return self._anonymizer(json_outputs)
  1552. if self._hide_outputs is False:
  1553. return outputs
  1554. return self._hide_outputs(outputs)
  1555. def _hide_run_metadata(self, metadata: dict) -> dict:
  1556. if self._hide_metadata is True:
  1557. return {}
  1558. if self._hide_metadata is False:
  1559. return metadata
  1560. return self._hide_metadata(metadata)
  1561. def _should_flush_run_ops_buffer(self) -> bool:
  1562. """Check if the run ops buffer should be flushed based on size or time."""
  1563. if not self._run_ops_buffer:
  1564. return False
  1565. # Check size-based flushing
  1566. if (
  1567. self._run_ops_buffer_size is not None
  1568. and len(self._run_ops_buffer) >= self._run_ops_buffer_size
  1569. ):
  1570. return True
  1571. # Check time-based flushing
  1572. if self._run_ops_buffer_timeout_ms is not None:
  1573. time_since_last_flush = time.time() - self._run_ops_buffer_last_flush_time
  1574. if time_since_last_flush >= (self._run_ops_buffer_timeout_ms / 1000):
  1575. return True
  1576. return False
  1577. def _flush_run_ops_buffer(self) -> None:
  1578. """Process and flush run ops buffer in a background thread."""
  1579. if not self._run_ops_buffer:
  1580. return
  1581. # Copy the buffer contents and clear it immediately to avoid blocking
  1582. batch_to_process = list(self._run_ops_buffer)
  1583. self._run_ops_buffer.clear()
  1584. self._run_ops_buffer_last_flush_time = time.time()
  1585. # Submit the processing to processing thread pool
  1586. from langsmith._internal._background_thread import (
  1587. LANGSMITH_CLIENT_THREAD_POOL,
  1588. _process_buffered_run_ops_batch,
  1589. )
  1590. try:
  1591. future = LANGSMITH_CLIENT_THREAD_POOL.submit(
  1592. _process_buffered_run_ops_batch, self, batch_to_process
  1593. )
  1594. # Track the future if we have a futures set
  1595. if self._futures is not None:
  1596. self._futures.add(future)
  1597. except RuntimeError:
  1598. # Thread pool is shut down, process synchronously as fallback
  1599. _process_buffered_run_ops_batch(self, batch_to_process)
  1600. def _batch_ingest_run_ops(
  1601. self,
  1602. ops: list[SerializedRunOperation],
  1603. *,
  1604. api_url: Optional[str] = None,
  1605. api_key: Optional[str] = None,
  1606. ) -> None:
  1607. ids_and_partial_body: dict[
  1608. Literal["post", "patch"], list[tuple[str, bytes]]
  1609. ] = {
  1610. "post": [],
  1611. "patch": [],
  1612. }
  1613. # form the partial body and ids
  1614. for op in ops:
  1615. if isinstance(op, SerializedRunOperation):
  1616. curr_dict = _orjson.loads(op._none)
  1617. if op.inputs:
  1618. curr_dict["inputs"] = _orjson.Fragment(op.inputs)
  1619. if op.outputs:
  1620. curr_dict["outputs"] = _orjson.Fragment(op.outputs)
  1621. if op.events:
  1622. curr_dict["events"] = _orjson.Fragment(op.events)
  1623. if op.extra:
  1624. curr_dict["extra"] = _orjson.Fragment(op.extra)
  1625. if op.error:
  1626. curr_dict["error"] = _orjson.Fragment(op.error)
  1627. if op.serialized:
  1628. curr_dict["serialized"] = _orjson.Fragment(op.serialized)
  1629. if op.attachments:
  1630. logger.warning(
  1631. "Attachments are not supported when use_multipart_endpoint "
  1632. "is False"
  1633. )
  1634. ids_and_partial_body[op.operation].append(
  1635. (f"trace={op.trace_id},id={op.id}", _orjson.dumps(curr_dict))
  1636. )
  1637. elif isinstance(op, SerializedFeedbackOperation):
  1638. logger.warning(
  1639. "Feedback operations are not supported in non-multipart mode"
  1640. )
  1641. else:
  1642. logger.error("Unknown item type in tracing queue: %s", type(op))
  1643. # send the requests in batches
  1644. info = self.info
  1645. size_limit_bytes = (
  1646. self._max_batch_size_bytes
  1647. or (info.batch_ingest_config or {}).get("size_limit_bytes")
  1648. or _SIZE_LIMIT_BYTES
  1649. )
  1650. body_chunks: collections.defaultdict[str, list] = collections.defaultdict(list)
  1651. context_ids: collections.defaultdict[str, list] = collections.defaultdict(list)
  1652. body_size = 0
  1653. for key in cast(list[Literal["post", "patch"]], ["post", "patch"]):
  1654. body_deque = collections.deque(ids_and_partial_body[key])
  1655. while body_deque:
  1656. if (
  1657. body_size > 0
  1658. and body_size + len(body_deque[0][1]) > size_limit_bytes
  1659. ):
  1660. self._post_batch_ingest_runs(
  1661. _orjson.dumps(body_chunks),
  1662. _context=f"\n{key}: {'; '.join(context_ids[key])}",
  1663. api_url=api_url,
  1664. api_key=api_key,
  1665. )
  1666. body_size = 0
  1667. body_chunks.clear()
  1668. context_ids.clear()
  1669. curr_id, curr_body = body_deque.popleft()
  1670. body_size += len(curr_body)
  1671. body_chunks[key].append(_orjson.Fragment(curr_body))
  1672. context_ids[key].append(curr_id)
  1673. if body_size:
  1674. context = "; ".join(f"{k}: {'; '.join(v)}" for k, v in context_ids.items())
  1675. self._post_batch_ingest_runs(
  1676. _orjson.dumps(body_chunks),
  1677. _context="\n" + context,
  1678. api_url=api_url,
  1679. api_key=api_key,
  1680. )
  1681. def batch_ingest_runs(
  1682. self,
  1683. create: Optional[
  1684. Sequence[Union[ls_schemas.Run, ls_schemas.RunLikeDict, dict]]
  1685. ] = None,
  1686. update: Optional[
  1687. Sequence[Union[ls_schemas.Run, ls_schemas.RunLikeDict, dict]]
  1688. ] = None,
  1689. *,
  1690. pre_sampled: bool = False,
  1691. ) -> None:
  1692. """Batch ingest/upsert multiple runs in the Langsmith system.
  1693. Args:
  1694. create (Optional[Sequence[Union[Run, RunLikeDict]]]):
  1695. A sequence of `Run` objects or equivalent dictionaries representing
  1696. runs to be created / posted.
  1697. update (Optional[Sequence[Union[Run, RunLikeDict]]]):
  1698. A sequence of `Run` objects or equivalent dictionaries representing
  1699. runs that have already been created and should be updated / patched.
  1700. pre_sampled (bool, default=False): Whether the runs have already been subject
  1701. to sampling, and therefore should not be sampled again.
  1702. Raises:
  1703. LangsmithAPIError: If there is an error in the API request.
  1704. Returns:
  1705. None
  1706. !!! note
  1707. The run objects MUST contain the `dotted_order` and `trace_id` fields
  1708. to be accepted by the API.
  1709. Example:
  1710. ```python
  1711. from langsmith import Client
  1712. import datetime
  1713. from uuid import uuid4
  1714. client = Client()
  1715. _session = "__test_batch_ingest_runs"
  1716. trace_id = uuid4()
  1717. trace_id_2 = uuid4()
  1718. run_id_2 = uuid4()
  1719. current_time = datetime.datetime.now(datetime.timezone.utc).strftime(
  1720. "%Y%m%dT%H%M%S%fZ"
  1721. )
  1722. later_time = (
  1723. datetime.datetime.now(datetime.timezone.utc) + timedelta(seconds=1)
  1724. ).strftime("%Y%m%dT%H%M%S%fZ")
  1725. runs_to_create = [
  1726. {
  1727. "id": str(trace_id),
  1728. "session_name": _session,
  1729. "name": "run 1",
  1730. "run_type": "chain",
  1731. "dotted_order": f"{current_time}{str(trace_id)}",
  1732. "trace_id": str(trace_id),
  1733. "inputs": {"input1": 1, "input2": 2},
  1734. "outputs": {"output1": 3, "output2": 4},
  1735. },
  1736. {
  1737. "id": str(trace_id_2),
  1738. "session_name": _session,
  1739. "name": "run 3",
  1740. "run_type": "chain",
  1741. "dotted_order": f"{current_time}{str(trace_id_2)}",
  1742. "trace_id": str(trace_id_2),
  1743. "inputs": {"input1": 1, "input2": 2},
  1744. "error": "error",
  1745. },
  1746. {
  1747. "id": str(run_id_2),
  1748. "session_name": _session,
  1749. "name": "run 2",
  1750. "run_type": "chain",
  1751. "dotted_order": f"{current_time}{str(trace_id)}."
  1752. f"{later_time}{str(run_id_2)}",
  1753. "trace_id": str(trace_id),
  1754. "parent_run_id": str(trace_id),
  1755. "inputs": {"input1": 5, "input2": 6},
  1756. },
  1757. ]
  1758. runs_to_update = [
  1759. {
  1760. "id": str(run_id_2),
  1761. "dotted_order": f"{current_time}{str(trace_id)}."
  1762. f"{later_time}{str(run_id_2)}",
  1763. "trace_id": str(trace_id),
  1764. "parent_run_id": str(trace_id),
  1765. "outputs": {"output1": 4, "output2": 5},
  1766. },
  1767. ]
  1768. client.batch_ingest_runs(create=runs_to_create, update=runs_to_update)
  1769. ```
  1770. """
  1771. if not create and not update:
  1772. return
  1773. # transform and convert to dicts
  1774. create_dicts = [
  1775. self._run_transform(run, copy=False) for run in create or EMPTY_SEQ
  1776. ]
  1777. update_dicts = [
  1778. self._run_transform(run, update=True, copy=False)
  1779. for run in update or EMPTY_SEQ
  1780. ]
  1781. for run in create_dicts:
  1782. if not run.get("trace_id") or not run.get("dotted_order"):
  1783. raise ls_utils.LangSmithUserError(
  1784. "Batch ingest requires trace_id and dotted_order to be set."
  1785. )
  1786. for run in update_dicts:
  1787. if not run.get("trace_id") or not run.get("dotted_order"):
  1788. raise ls_utils.LangSmithUserError(
  1789. "Batch ingest requires trace_id and dotted_order to be set."
  1790. )
  1791. # filter out runs that are not sampled
  1792. if not pre_sampled:
  1793. create_dicts = self._filter_for_sampling(create_dicts)
  1794. update_dicts = self._filter_for_sampling(update_dicts, patch=True)
  1795. if not create_dicts and not update_dicts:
  1796. return
  1797. # Apply process_buffered_run_ops function if provided
  1798. if self._process_buffered_run_ops:
  1799. if create_dicts:
  1800. create_dicts = list(self._process_buffered_run_ops(create_dicts))
  1801. if update_dicts:
  1802. update_dicts = list(self._process_buffered_run_ops(update_dicts))
  1803. self._insert_runtime_env(create_dicts + update_dicts)
  1804. # convert to serialized ops
  1805. serialized_ops = cast(
  1806. list[SerializedRunOperation],
  1807. combine_serialized_queue_operations(
  1808. list(
  1809. itertools.chain(
  1810. (serialize_run_dict("post", run) for run in create_dicts),
  1811. (serialize_run_dict("patch", run) for run in update_dicts),
  1812. )
  1813. )
  1814. ),
  1815. )
  1816. self._batch_ingest_run_ops(serialized_ops)
  1817. def _post_batch_ingest_runs(
  1818. self,
  1819. body: bytes,
  1820. *,
  1821. _context: str,
  1822. api_url: Optional[str] = None,
  1823. api_key: Optional[str] = None,
  1824. ):
  1825. # Use provided endpoint or fall back to all configured endpoints
  1826. endpoints: Mapping[str, Optional[str]]
  1827. if api_url is not None and api_key is not None:
  1828. endpoints = {api_url: api_key}
  1829. else:
  1830. endpoints = self._write_api_urls
  1831. for target_api_url, target_api_key in endpoints.items():
  1832. try:
  1833. logger.debug(
  1834. f"Sending batch ingest request to {target_api_url} with context: {_context}"
  1835. )
  1836. self.request_with_retries(
  1837. "POST",
  1838. f"{target_api_url}/runs/batch",
  1839. request_kwargs={
  1840. "data": body,
  1841. "headers": {
  1842. **self._headers,
  1843. X_API_KEY: target_api_key,
  1844. },
  1845. },
  1846. to_ignore=(ls_utils.LangSmithConflictError,),
  1847. stop_after_attempt=3,
  1848. _context=_context,
  1849. )
  1850. except Exception as e:
  1851. try:
  1852. exc_desc_lines = traceback.format_exception_only(type(e), e)
  1853. exc_desc = "".join(exc_desc_lines).rstrip()
  1854. logger.warning(f"Failed to batch ingest runs: {exc_desc}")
  1855. except Exception:
  1856. logger.warning(f"Failed to batch ingest runs: {repr(e)}")
  1857. self._invoke_tracing_error_callback(e)
  1858. def _multipart_ingest_ops(
  1859. self,
  1860. ops: list[Union[SerializedRunOperation, SerializedFeedbackOperation]],
  1861. *,
  1862. api_url: Optional[str] = None,
  1863. api_key: Optional[str] = None,
  1864. ) -> None:
  1865. parts: list[MultipartPartsAndContext] = []
  1866. opened_files_dict: dict[str, io.BufferedReader] = {}
  1867. for op in ops:
  1868. if isinstance(op, SerializedRunOperation):
  1869. (
  1870. part,
  1871. opened_files,
  1872. ) = serialized_run_operation_to_multipart_parts_and_context(op)
  1873. parts.append(part)
  1874. opened_files_dict.update(opened_files)
  1875. elif isinstance(op, SerializedFeedbackOperation):
  1876. parts.append(
  1877. serialized_feedback_operation_to_multipart_parts_and_context(op)
  1878. )
  1879. else:
  1880. logger.error("Unknown operation type in tracing queue: %s", type(op))
  1881. acc_multipart = join_multipart_parts_and_context(parts)
  1882. if acc_multipart:
  1883. try:
  1884. self._send_multipart_req(
  1885. acc_multipart, api_url=api_url, api_key=api_key
  1886. )
  1887. finally:
  1888. _close_files(list(opened_files_dict.values()))
  1889. def multipart_ingest(
  1890. self,
  1891. create: Optional[
  1892. Sequence[Union[ls_schemas.Run, ls_schemas.RunLikeDict, dict]]
  1893. ] = None,
  1894. update: Optional[
  1895. Sequence[Union[ls_schemas.Run, ls_schemas.RunLikeDict, dict]]
  1896. ] = None,
  1897. *,
  1898. pre_sampled: bool = False,
  1899. dangerously_allow_filesystem: bool = False,
  1900. ) -> None:
  1901. """Batch ingest/upsert multiple runs in the Langsmith system.
  1902. Args:
  1903. create (Optional[Sequence[Union[ls_schemas.Run, RunLikeDict]]]):
  1904. A sequence of `Run` objects or equivalent dictionaries representing
  1905. runs to be created / posted.
  1906. update (Optional[Sequence[Union[ls_schemas.Run, RunLikeDict]]]):
  1907. A sequence of `Run` objects or equivalent dictionaries representing
  1908. runs that have already been created and should be updated / patched.
  1909. pre_sampled (bool, default=False): Whether the runs have already been subject
  1910. to sampling, and therefore should not be sampled again.
  1911. Raises:
  1912. LangsmithAPIError: If there is an error in the API request.
  1913. !!! note
  1914. The run objects MUST contain the `dotted_order` and `trace_id` fields
  1915. to be accepted by the API.
  1916. Example:
  1917. ```python
  1918. from langsmith import Client
  1919. import datetime
  1920. from uuid import uuid4
  1921. client = Client()
  1922. _session = "__test_batch_ingest_runs"
  1923. trace_id = uuid4()
  1924. trace_id_2 = uuid4()
  1925. run_id_2 = uuid4()
  1926. current_time = datetime.datetime.now(datetime.timezone.utc).strftime(
  1927. "%Y%m%dT%H%M%S%fZ"
  1928. )
  1929. later_time = (
  1930. datetime.datetime.now(datetime.timezone.utc) + timedelta(seconds=1)
  1931. ).strftime("%Y%m%dT%H%M%S%fZ")
  1932. runs_to_create = [
  1933. {
  1934. "id": str(trace_id),
  1935. "session_name": _session,
  1936. "name": "run 1",
  1937. "run_type": "chain",
  1938. "dotted_order": f"{current_time}{str(trace_id)}",
  1939. "trace_id": str(trace_id),
  1940. "inputs": {"input1": 1, "input2": 2},
  1941. "outputs": {"output1": 3, "output2": 4},
  1942. },
  1943. {
  1944. "id": str(trace_id_2),
  1945. "session_name": _session,
  1946. "name": "run 3",
  1947. "run_type": "chain",
  1948. "dotted_order": f"{current_time}{str(trace_id_2)}",
  1949. "trace_id": str(trace_id_2),
  1950. "inputs": {"input1": 1, "input2": 2},
  1951. "error": "error",
  1952. },
  1953. {
  1954. "id": str(run_id_2),
  1955. "session_name": _session,
  1956. "name": "run 2",
  1957. "run_type": "chain",
  1958. "dotted_order": f"{current_time}{str(trace_id)}."
  1959. f"{later_time}{str(run_id_2)}",
  1960. "trace_id": str(trace_id),
  1961. "parent_run_id": str(trace_id),
  1962. "inputs": {"input1": 5, "input2": 6},
  1963. },
  1964. ]
  1965. runs_to_update = [
  1966. {
  1967. "id": str(run_id_2),
  1968. "dotted_order": f"{current_time}{str(trace_id)}."
  1969. f"{later_time}{str(run_id_2)}",
  1970. "trace_id": str(trace_id),
  1971. "parent_run_id": str(trace_id),
  1972. "outputs": {"output1": 4, "output2": 5},
  1973. },
  1974. ]
  1975. client.multipart_ingest(create=runs_to_create, update=runs_to_update)
  1976. ```
  1977. """
  1978. if not (create or update):
  1979. return
  1980. # transform and convert to dicts
  1981. create_dicts = [self._run_transform(run) for run in create or EMPTY_SEQ]
  1982. update_dicts = [
  1983. self._run_transform(run, update=True) for run in update or EMPTY_SEQ
  1984. ]
  1985. # require trace_id and dotted_order
  1986. if create_dicts:
  1987. for run in create_dicts:
  1988. if not run.get("trace_id") or not run.get("dotted_order"):
  1989. raise ls_utils.LangSmithUserError(
  1990. "Multipart ingest requires trace_id and dotted_order"
  1991. " to be set in create dicts."
  1992. )
  1993. else:
  1994. del run
  1995. if update_dicts:
  1996. for run in update_dicts:
  1997. if not run.get("trace_id") or not run.get("dotted_order"):
  1998. raise ls_utils.LangSmithUserError(
  1999. "Multipart ingest requires trace_id and dotted_order"
  2000. " to be set in update dicts."
  2001. )
  2002. else:
  2003. del run
  2004. # combine post and patch dicts where possible
  2005. if update_dicts and create_dicts:
  2006. create_by_id = {run["id"]: run for run in create_dicts}
  2007. standalone_updates: list[dict] = []
  2008. for run in update_dicts:
  2009. if run["id"] in create_by_id:
  2010. for k, v in run.items():
  2011. if v is not None:
  2012. create_by_id[run["id"]][k] = v
  2013. else:
  2014. standalone_updates.append(run)
  2015. else:
  2016. del run
  2017. update_dicts = standalone_updates
  2018. # filter out runs that are not sampled
  2019. if not pre_sampled:
  2020. create_dicts = self._filter_for_sampling(create_dicts)
  2021. update_dicts = self._filter_for_sampling(update_dicts, patch=True)
  2022. if not create_dicts and not update_dicts:
  2023. return
  2024. # insert runtime environment
  2025. self._insert_runtime_env(create_dicts)
  2026. self._insert_runtime_env(update_dicts)
  2027. # format as serialized operations
  2028. serialized_ops = combine_serialized_queue_operations(
  2029. list(
  2030. itertools.chain(
  2031. (serialize_run_dict("post", run) for run in create_dicts),
  2032. (serialize_run_dict("patch", run) for run in update_dicts),
  2033. )
  2034. )
  2035. )
  2036. for op in serialized_ops:
  2037. if isinstance(op, SerializedRunOperation) and op.attachments:
  2038. for attachment in op.attachments.values():
  2039. if (
  2040. isinstance(attachment, tuple)
  2041. and isinstance(attachment[1], Path)
  2042. and not dangerously_allow_filesystem
  2043. ):
  2044. raise ValueError(
  2045. "Must set dangerously_allow_filesystem=True to allow passing in Paths for attachments."
  2046. )
  2047. # sent the runs in multipart requests
  2048. self._multipart_ingest_ops(serialized_ops)
  2049. def _send_multipart_req(
  2050. self,
  2051. acc: MultipartPartsAndContext,
  2052. *,
  2053. attempts: int = 3,
  2054. api_url: Optional[str] = None,
  2055. api_key: Optional[str] = None,
  2056. ):
  2057. parts = acc.parts
  2058. _context = acc.context
  2059. # Use provided endpoint or fall back to all configured endpoints
  2060. if api_url is not None and api_key is not None:
  2061. endpoints: Mapping[str, str | None] = {api_url: api_key}
  2062. else:
  2063. endpoints = self._write_api_urls
  2064. for target_api_url, target_api_key in endpoints.items():
  2065. for idx in range(1, attempts + 1):
  2066. try:
  2067. encoder = rqtb_multipart.MultipartEncoder(parts, boundary=_BOUNDARY)
  2068. if encoder.len <= 20_000_000: # ~20 MB
  2069. data = encoder.to_string()
  2070. else:
  2071. data = encoder
  2072. logger.debug(
  2073. f"Sending multipart request to {target_api_url} with context: {_context}"
  2074. )
  2075. self.request_with_retries(
  2076. "POST",
  2077. f"{target_api_url}/runs/multipart",
  2078. request_kwargs={
  2079. "data": data,
  2080. "headers": {
  2081. **self._headers,
  2082. X_API_KEY: target_api_key,
  2083. "Content-Type": encoder.content_type,
  2084. },
  2085. },
  2086. stop_after_attempt=1,
  2087. _context=_context,
  2088. )
  2089. break
  2090. except ls_utils.LangSmithConflictError:
  2091. break
  2092. except (
  2093. ls_utils.LangSmithConnectionError,
  2094. ls_utils.LangSmithRequestTimeout,
  2095. ls_utils.LangSmithAPIError,
  2096. ) as exc:
  2097. if idx == attempts:
  2098. logger.warning(f"Failed to multipart ingest runs: {exc}")
  2099. self._invoke_tracing_error_callback(exc)
  2100. else:
  2101. continue
  2102. except Exception as e:
  2103. try:
  2104. exc_desc_lines = traceback.format_exception_only(type(e), e)
  2105. exc_desc = "".join(exc_desc_lines).rstrip()
  2106. logger.warning(f"Failed to multipart ingest runs: {exc_desc}")
  2107. except Exception:
  2108. logger.warning(f"Failed to multipart ingest runs: {repr(e)}")
  2109. self._invoke_tracing_error_callback(e)
  2110. # do not retry by default
  2111. break
  2112. def _send_compressed_multipart_req(
  2113. self,
  2114. data_stream: io.BytesIO,
  2115. compressed_traces_info: Optional[tuple[int, int]],
  2116. *,
  2117. attempts: int = 3,
  2118. ):
  2119. """Send a zstd-compressed multipart form data stream to the backend."""
  2120. _context: str = "; ".join(getattr(data_stream, "context", []))
  2121. for api_url, api_key in self._write_api_urls.items():
  2122. data_stream.seek(0)
  2123. for idx in range(1, attempts + 1):
  2124. try:
  2125. headers = {
  2126. **self._headers,
  2127. "X-API-KEY": api_key,
  2128. "Content-Type": f"multipart/form-data; boundary={_BOUNDARY}",
  2129. "Content-Encoding": "zstd",
  2130. "X-Pre-Compressed-Size": (
  2131. str(compressed_traces_info[0])
  2132. if compressed_traces_info
  2133. else ""
  2134. ),
  2135. "X-Post-Compressed-Size": (
  2136. str(compressed_traces_info[1])
  2137. if compressed_traces_info
  2138. else ""
  2139. ),
  2140. }
  2141. logger.debug(
  2142. f"Sending compressed multipart request with context: {_context}"
  2143. )
  2144. self.request_with_retries(
  2145. "POST",
  2146. f"{api_url}/runs/multipart",
  2147. request_kwargs={
  2148. "data": data_stream,
  2149. "headers": headers,
  2150. },
  2151. stop_after_attempt=1,
  2152. _context=_context,
  2153. )
  2154. break
  2155. except ls_utils.LangSmithConflictError:
  2156. break
  2157. except (
  2158. ls_utils.LangSmithConnectionError,
  2159. ls_utils.LangSmithRequestTimeout,
  2160. ls_utils.LangSmithAPIError,
  2161. ) as exc:
  2162. if idx == attempts:
  2163. logger.warning(
  2164. f"Failed to send compressed multipart ingest: {exc}"
  2165. )
  2166. self._invoke_tracing_error_callback(exc)
  2167. else:
  2168. continue
  2169. except Exception as e:
  2170. try:
  2171. exc_desc_lines = traceback.format_exception_only(type(e), e)
  2172. exc_desc = "".join(exc_desc_lines).rstrip()
  2173. logger.warning(
  2174. f"Failed to send compressed multipart ingest: {exc_desc}"
  2175. )
  2176. except Exception:
  2177. logger.warning(
  2178. f"Failed to send compressed multipart ingest: {repr(e)}"
  2179. )
  2180. self._invoke_tracing_error_callback(e)
  2181. # Do not retry by default after unknown exceptions
  2182. break
  2183. def update_run(
  2184. self,
  2185. run_id: ID_TYPE,
  2186. *,
  2187. name: Optional[str] = None,
  2188. run_type: Optional[RUN_TYPE_T] = None,
  2189. start_time: Optional[datetime.datetime] = None,
  2190. end_time: Optional[datetime.datetime] = None,
  2191. error: Optional[str] = None,
  2192. inputs: Optional[dict] = None,
  2193. outputs: Optional[dict] = None,
  2194. events: Optional[Sequence[dict]] = None,
  2195. extra: Optional[dict] = None,
  2196. tags: Optional[list[str]] = None,
  2197. attachments: Optional[ls_schemas.Attachments] = None,
  2198. dangerously_allow_filesystem: bool = False,
  2199. reference_example_id: str | uuid.UUID | None = None,
  2200. api_key: Optional[str] = None,
  2201. api_url: Optional[str] = None,
  2202. **kwargs: Any,
  2203. ) -> None:
  2204. """Update a run in the LangSmith API.
  2205. Args:
  2206. run_id (Union[UUID, str]): The ID of the run to update.
  2207. name (Optional[str]): The name of the run.
  2208. run_type (Optional[str]): The type of the run (e.g., llm, chain, tool).
  2209. start_time (Optional[datetime.datetime]): The start time of the run.
  2210. end_time (Optional[datetime.datetime]): The end time of the run.
  2211. error (Optional[str]): The error message of the run.
  2212. inputs (Optional[Dict]): The input values for the run.
  2213. outputs (Optional[Dict]): The output values for the run.
  2214. events (Optional[Sequence[dict]]): The events for the run.
  2215. extra (Optional[Dict]): The extra information for the run.
  2216. tags (Optional[List[str]]): The tags for the run.
  2217. attachments (Optional[Dict[str, Attachment]]): A dictionary of attachments to add to the run. The keys are the attachment names,
  2218. and the values are Attachment objects containing the data and mime type.
  2219. reference_example_id (Optional[Union[str, uuid.UUID]]): ID of the example
  2220. that was the source of the run inputs. Used for runs that were part of
  2221. an experiment.
  2222. api_key (Optional[str]): The API key to use for this specific run.
  2223. api_url (Optional[str]): The API URL to use for this specific run.
  2224. **kwargs (Any): Kwargs are ignored.
  2225. Returns:
  2226. None
  2227. Examples:
  2228. ```python
  2229. from langsmith import Client
  2230. import datetime
  2231. from uuid import uuid4
  2232. client = Client()
  2233. project_name = "__test_update_run"
  2234. start_time = datetime.datetime.now()
  2235. revision_id = uuid4()
  2236. run: dict = dict(
  2237. id=uuid4(),
  2238. name="test_run",
  2239. run_type="llm",
  2240. inputs={"text": "hello world"},
  2241. project_name=project_name,
  2242. api_url=os.getenv("LANGCHAIN_ENDPOINT"),
  2243. start_time=start_time,
  2244. extra={"extra": "extra"},
  2245. revision_id=revision_id,
  2246. )
  2247. # Create the run
  2248. client.create_run(**run)
  2249. run["outputs"] = {"output": ["Hi"]}
  2250. run["extra"]["foo"] = "bar"
  2251. run["name"] = "test_run_updated"
  2252. # Update the run
  2253. client.update_run(run["id"], **run)
  2254. ```
  2255. """
  2256. data: dict[str, Any] = {
  2257. "id": _as_uuid(run_id, "run_id"),
  2258. "name": name,
  2259. "run_type": run_type,
  2260. "trace_id": kwargs.pop("trace_id", None),
  2261. "parent_run_id": kwargs.pop("parent_run_id", None),
  2262. "dotted_order": kwargs.pop("dotted_order", None),
  2263. "tags": tags,
  2264. "extra": extra,
  2265. "session_id": kwargs.pop("session_id", None),
  2266. "session_name": kwargs.pop("session_name", None),
  2267. }
  2268. if start_time is not None:
  2269. data["start_time"] = start_time.isoformat()
  2270. if attachments:
  2271. for _, attachment in attachments.items():
  2272. if (
  2273. isinstance(attachment, tuple)
  2274. and isinstance(attachment[1], Path)
  2275. and not dangerously_allow_filesystem
  2276. ):
  2277. raise ValueError(
  2278. "Must set dangerously_allow_filesystem=True to allow passing in Paths for attachments."
  2279. )
  2280. data["attachments"] = attachments
  2281. use_multipart = (
  2282. (self.tracing_queue is not None or self.compressed_traces is not None)
  2283. # batch ingest requires trace_id and dotted_order to be set
  2284. and data["trace_id"] is not None
  2285. and data["dotted_order"] is not None
  2286. )
  2287. if not self._filter_for_sampling([data], patch=True):
  2288. return
  2289. if end_time is not None:
  2290. data["end_time"] = end_time.isoformat()
  2291. else:
  2292. data["end_time"] = datetime.datetime.now(datetime.timezone.utc).isoformat()
  2293. if error is not None:
  2294. data["error"] = error
  2295. if inputs is not None:
  2296. data["inputs"] = self._hide_run_inputs(inputs)
  2297. if outputs is not None:
  2298. if not use_multipart:
  2299. outputs = ls_utils.deepish_copy(outputs)
  2300. data["outputs"] = self._hide_run_outputs(outputs)
  2301. if events is not None:
  2302. data["events"] = events
  2303. if data["extra"]:
  2304. self._insert_runtime_env([data])
  2305. if metadata := data["extra"].get("metadata"):
  2306. data["extra"]["metadata"] = self._hide_run_metadata(metadata)
  2307. if reference_example_id is not None:
  2308. data["reference_example_id"] = reference_example_id
  2309. # If process_buffered_run_ops is enabled, collect runs in batches
  2310. if self._process_buffered_run_ops and not kwargs.get("is_run_ops_buffer_flush"):
  2311. with self._run_ops_buffer_lock:
  2312. self._run_ops_buffer.append(("patch", data))
  2313. # Process batch when we have enough runs or enough time has passed
  2314. if self._should_flush_run_ops_buffer():
  2315. self._flush_run_ops_buffer()
  2316. return
  2317. else:
  2318. self._update_run(data, api_key=api_key, api_url=api_url)
  2319. def _update_run(
  2320. self,
  2321. run_update: dict,
  2322. *,
  2323. api_key: Optional[str] = None,
  2324. api_url: Optional[str] = None,
  2325. ):
  2326. use_multipart = (
  2327. (self.tracing_queue is not None or self.compressed_traces is not None)
  2328. # batch ingest requires trace_id and dotted_order to be set
  2329. and run_update["trace_id"] is not None
  2330. and run_update["dotted_order"] is not None
  2331. )
  2332. if self._pyo3_client is not None:
  2333. self._pyo3_client.update_run(run_update)
  2334. elif use_multipart:
  2335. serialized_op = serialize_run_dict(operation="patch", payload=run_update)
  2336. if (
  2337. self.compressed_traces is not None
  2338. and api_key is None
  2339. and api_url is None
  2340. ):
  2341. (
  2342. multipart_form,
  2343. opened_files,
  2344. ) = serialized_run_operation_to_multipart_parts_and_context(
  2345. serialized_op
  2346. )
  2347. logger.log(
  2348. 5,
  2349. "Adding compressed multipart to queue with context: %s",
  2350. multipart_form.context,
  2351. )
  2352. with self.compressed_traces.lock:
  2353. if self._data_available_event is None:
  2354. raise ValueError(
  2355. "Run compression is enabled but threading event is not configured"
  2356. )
  2357. enqueued = compress_multipart_parts_and_context(
  2358. multipart_form,
  2359. self.compressed_traces,
  2360. _BOUNDARY,
  2361. )
  2362. if enqueued:
  2363. self.compressed_traces.trace_count += 1
  2364. self._data_available_event.set()
  2365. _close_files(list(opened_files.values()))
  2366. elif self.tracing_queue is not None:
  2367. logger.log(
  2368. 5,
  2369. "Adding to tracing queue: trace_id=%s, run_id=%s",
  2370. serialized_op.trace_id,
  2371. serialized_op.id,
  2372. )
  2373. if self.otel_exporter is not None:
  2374. self.tracing_queue.put(
  2375. TracingQueueItem(
  2376. run_update["dotted_order"],
  2377. serialized_op,
  2378. api_key=api_key,
  2379. api_url=api_url,
  2380. otel_context=self._set_span_in_context(
  2381. self._otel_trace.get_current_span()
  2382. ),
  2383. )
  2384. )
  2385. else:
  2386. self.tracing_queue.put(
  2387. TracingQueueItem(
  2388. run_update["dotted_order"],
  2389. serialized_op,
  2390. api_key=api_key,
  2391. api_url=api_url,
  2392. )
  2393. )
  2394. else:
  2395. self._update_run_non_batch(run_update, api_key=api_key, api_url=api_url)
  2396. def _update_run_non_batch(
  2397. self,
  2398. run_update: dict,
  2399. *,
  2400. api_key: Optional[str] = None,
  2401. api_url: Optional[str] = None,
  2402. ) -> None:
  2403. # If specific api_key/api_url provided, use those; otherwise use all configured endpoints
  2404. if api_key is not None or api_url is not None:
  2405. target_api_url = api_url or self.api_url
  2406. target_api_key = api_key or self.api_key
  2407. headers = {
  2408. **self._headers,
  2409. X_API_KEY: target_api_key,
  2410. }
  2411. self.request_with_retries(
  2412. "PATCH",
  2413. f"{target_api_url}/runs/{run_update['id']}",
  2414. request_kwargs={
  2415. "data": _dumps_json(run_update),
  2416. "headers": headers,
  2417. },
  2418. )
  2419. else:
  2420. # Use all configured write API URLs
  2421. for write_api_url, write_api_key in self._write_api_urls.items():
  2422. headers = {
  2423. **self._headers,
  2424. X_API_KEY: write_api_key,
  2425. }
  2426. self.request_with_retries(
  2427. "PATCH",
  2428. f"{write_api_url}/runs/{run_update['id']}",
  2429. request_kwargs={
  2430. "data": _dumps_json(run_update),
  2431. "headers": headers,
  2432. },
  2433. )
  2434. def flush_compressed_traces(self, attempts: int = 3) -> None:
  2435. """Force flush the currently buffered compressed runs."""
  2436. if self.compressed_traces is None:
  2437. return
  2438. if self._futures is None:
  2439. raise ValueError(
  2440. "Run compression is enabled but request pool futures is not set"
  2441. )
  2442. # Attempt to drain and send any remaining data
  2443. from langsmith._internal._background_thread import (
  2444. LANGSMITH_CLIENT_THREAD_POOL,
  2445. _tracing_thread_drain_compressed_buffer,
  2446. )
  2447. (
  2448. final_data_stream,
  2449. compressed_traces_info,
  2450. ) = _tracing_thread_drain_compressed_buffer(
  2451. self, size_limit=1, size_limit_bytes=1
  2452. )
  2453. if final_data_stream is not None:
  2454. # We have data to send
  2455. future = None
  2456. try:
  2457. future = LANGSMITH_CLIENT_THREAD_POOL.submit(
  2458. self._send_compressed_multipart_req,
  2459. final_data_stream,
  2460. compressed_traces_info,
  2461. attempts=attempts,
  2462. )
  2463. self._futures.add(future)
  2464. except RuntimeError:
  2465. # In case the ThreadPoolExecutor is already shutdown
  2466. self._send_compressed_multipart_req(
  2467. final_data_stream, compressed_traces_info, attempts=attempts
  2468. )
  2469. # If we got a future, wait for it to complete
  2470. if self._futures:
  2471. futures = list(self._futures)
  2472. done, _ = cf.wait(futures)
  2473. # Remove completed futures
  2474. self._futures.difference_update(done)
  2475. def flush(self) -> None:
  2476. """Flush either queue or compressed buffer, depending on mode."""
  2477. # Flush any remaining batch items first
  2478. if self._process_buffered_run_ops:
  2479. with self._run_ops_buffer_lock:
  2480. if self._run_ops_buffer:
  2481. self._flush_run_ops_buffer()
  2482. if self.compressed_traces is not None:
  2483. self.flush_compressed_traces()
  2484. elif self.tracing_queue is not None:
  2485. self.tracing_queue.join()
  2486. def _load_child_runs(self, run: ls_schemas.Run) -> ls_schemas.Run:
  2487. """Load child runs for a given run.
  2488. Args:
  2489. run (Run): The run to load child runs for.
  2490. Returns:
  2491. Run: The run with loaded child runs.
  2492. Raises:
  2493. LangSmithError: If a child run has no parent.
  2494. """
  2495. child_runs = self.list_runs(
  2496. is_root=False, session_id=run.session_id, trace_id=run.trace_id
  2497. )
  2498. treemap: collections.defaultdict[uuid.UUID, list[ls_schemas.Run]] = (
  2499. collections.defaultdict(list)
  2500. )
  2501. runs: dict[uuid.UUID, ls_schemas.Run] = {}
  2502. run_id_str = str(run.id)
  2503. for child_run in sorted(
  2504. child_runs,
  2505. key=lambda r: r.dotted_order,
  2506. ):
  2507. if child_run.parent_run_id is None:
  2508. raise ls_utils.LangSmithError(f"Child run {child_run.id} has no parent")
  2509. # Only track downstream children
  2510. ancestor_ids = {
  2511. seg.split("Z", 1)[1]
  2512. for seg in child_run.dotted_order.split(".")
  2513. if "Z" in seg
  2514. }
  2515. if run_id_str in ancestor_ids and child_run.id != run.id:
  2516. treemap[child_run.parent_run_id].append(child_run)
  2517. runs[child_run.id] = child_run
  2518. run.child_runs = treemap.pop(run.id, [])
  2519. for run_id, children in treemap.items():
  2520. runs[run_id].child_runs = children
  2521. return run
  2522. def read_run(
  2523. self, run_id: ID_TYPE, load_child_runs: bool = False
  2524. ) -> ls_schemas.Run:
  2525. """Read a run from the LangSmith API.
  2526. Args:
  2527. run_id (Union[UUID, str]):
  2528. The ID of the run to read.
  2529. load_child_runs (bool, default=False):
  2530. Whether to load nested child runs.
  2531. Returns:
  2532. Run: The run read from the LangSmith API.
  2533. Examples:
  2534. ```python
  2535. from langsmith import Client
  2536. # Existing run
  2537. run_id = "your-run-id"
  2538. client = Client()
  2539. stored_run = client.read_run(run_id)
  2540. ```
  2541. """
  2542. response = self.request_with_retries(
  2543. "GET", f"/runs/{_as_uuid(run_id, 'run_id')}"
  2544. )
  2545. attachments = _convert_stored_attachments_to_attachments_dict(
  2546. response.json(), attachments_key="s3_urls", api_url=self.api_url
  2547. )
  2548. run = ls_schemas.Run(
  2549. attachments=attachments, **response.json(), _host_url=self._host_url
  2550. )
  2551. if load_child_runs:
  2552. run = self._load_child_runs(run)
  2553. return run
  2554. def list_runs(
  2555. self,
  2556. *,
  2557. project_id: Optional[Union[ID_TYPE, Sequence[ID_TYPE]]] = None,
  2558. project_name: Optional[Union[str, Sequence[str]]] = None,
  2559. run_type: Optional[str] = None,
  2560. trace_id: Optional[ID_TYPE] = None,
  2561. reference_example_id: Optional[ID_TYPE] = None,
  2562. query: Optional[str] = None,
  2563. filter: Optional[str] = None,
  2564. trace_filter: Optional[str] = None,
  2565. tree_filter: Optional[str] = None,
  2566. is_root: Optional[bool] = None,
  2567. parent_run_id: Optional[ID_TYPE] = None,
  2568. start_time: Optional[datetime.datetime] = None,
  2569. error: Optional[bool] = None,
  2570. run_ids: Optional[Sequence[ID_TYPE]] = None,
  2571. select: Optional[Sequence[str]] = None,
  2572. limit: Optional[int] = None,
  2573. **kwargs: Any,
  2574. ) -> Iterator[ls_schemas.Run]:
  2575. """List runs from the LangSmith API.
  2576. Args:
  2577. project_id (Optional[Union[UUID, str], Sequence[Union[UUID, str]]]):
  2578. The ID(s) of the project to filter by.
  2579. project_name (Optional[Union[str, Sequence[str]]]): The name(s) of the project to filter by.
  2580. run_type (Optional[str]): The type of the runs to filter by.
  2581. trace_id (Optional[Union[UUID, str]]): The ID of the trace to filter by.
  2582. reference_example_id (Optional[Union[UUID, str]]): The ID of the reference example to filter by.
  2583. query (Optional[str]): The query string to filter by.
  2584. filter (Optional[str]): The filter string to filter by.
  2585. trace_filter (Optional[str]): Filter to apply to the ROOT run in the trace tree. This is meant to
  2586. be used in conjunction with the regular `filter` parameter to let you
  2587. filter runs by attributes of the root run within a trace.
  2588. tree_filter (Optional[str]): Filter to apply to OTHER runs in the trace tree, including
  2589. sibling and child runs. This is meant to be used in conjunction with
  2590. the regular `filter` parameter to let you filter runs by attributes
  2591. of any run within a trace.
  2592. is_root (Optional[bool]): Whether to filter by root runs.
  2593. parent_run_id (Optional[Union[UUID, str]]):
  2594. The ID of the parent run to filter by.
  2595. start_time (Optional[datetime.datetime]):
  2596. The start time to filter by.
  2597. error (Optional[bool]): Whether to filter by error status.
  2598. run_ids (Optional[Sequence[Union[UUID, str]]]):
  2599. The IDs of the runs to filter by.
  2600. select (Optional[Sequence[str]]): The fields to select.
  2601. limit (Optional[int]): The maximum number of runs to return.
  2602. **kwargs (Any): Additional keyword arguments.
  2603. Yields:
  2604. The runs.
  2605. Examples:
  2606. ```python
  2607. # List all runs in a project
  2608. project_runs = client.list_runs(project_name="<your_project>")
  2609. # List LLM and Chat runs in the last 24 hours
  2610. todays_llm_runs = client.list_runs(
  2611. project_name="<your_project>",
  2612. start_time=datetime.now() - timedelta(days=1),
  2613. run_type="llm",
  2614. )
  2615. # List root traces in a project
  2616. root_runs = client.list_runs(project_name="<your_project>", is_root=1)
  2617. # List runs without errors
  2618. correct_runs = client.list_runs(project_name="<your_project>", error=False)
  2619. # List runs and only return their inputs/outputs (to speed up the query)
  2620. input_output_runs = client.list_runs(
  2621. project_name="<your_project>", select=["inputs", "outputs"]
  2622. )
  2623. # List runs by run ID
  2624. run_ids = [
  2625. "a36092d2-4ad5-4fb4-9c0d-0dba9a2ed836",
  2626. "9398e6be-964f-4aa4-8ae9-ad78cd4b7074",
  2627. ]
  2628. selected_runs = client.list_runs(id=run_ids)
  2629. # List all "chain" type runs that took more than 10 seconds and had
  2630. # `total_tokens` greater than 5000
  2631. chain_runs = client.list_runs(
  2632. project_name="<your_project>",
  2633. filter='and(eq(run_type, "chain"), gt(latency, 10), gt(total_tokens, 5000))',
  2634. )
  2635. # List all runs called "extractor" whose root of the trace was assigned feedback "user_score" score of 1
  2636. good_extractor_runs = client.list_runs(
  2637. project_name="<your_project>",
  2638. filter='eq(name, "extractor")',
  2639. trace_filter='and(eq(feedback_key, "user_score"), eq(feedback_score, 1))',
  2640. )
  2641. # List all runs that started after a specific timestamp and either have "error" not equal to null or a "Correctness" feedback score equal to 0
  2642. complex_runs = client.list_runs(
  2643. project_name="<your_project>",
  2644. filter='and(gt(start_time, "2023-07-15T12:34:56Z"), or(neq(error, null), and(eq(feedback_key, "Correctness"), eq(feedback_score, 0.0))))',
  2645. )
  2646. # List all runs where `tags` include "experimental" or "beta" and `latency` is greater than 2 seconds
  2647. tagged_runs = client.list_runs(
  2648. project_name="<your_project>",
  2649. filter='and(or(has(tags, "experimental"), has(tags, "beta")), gt(latency, 2))',
  2650. )
  2651. ```
  2652. """ # noqa: E501
  2653. project_ids = []
  2654. if isinstance(project_id, (uuid.UUID, str)):
  2655. project_ids.append(project_id)
  2656. elif isinstance(project_id, list):
  2657. project_ids.extend(project_id)
  2658. if project_name is not None:
  2659. if isinstance(project_name, str):
  2660. project_name = [project_name]
  2661. project_ids.extend(
  2662. [self.read_project(project_name=name).id for name in project_name]
  2663. )
  2664. default_select = [
  2665. "app_path",
  2666. "completion_cost",
  2667. "completion_tokens",
  2668. "dotted_order",
  2669. "end_time",
  2670. "error",
  2671. "events",
  2672. "extra",
  2673. "feedback_stats",
  2674. "first_token_time",
  2675. "id",
  2676. "inputs",
  2677. "name",
  2678. "outputs",
  2679. "parent_run_id",
  2680. "parent_run_ids",
  2681. "prompt_cost",
  2682. "prompt_tokens",
  2683. "reference_example_id",
  2684. "run_type",
  2685. "session_id",
  2686. "start_time",
  2687. "status",
  2688. "tags",
  2689. "total_cost",
  2690. "total_tokens",
  2691. "trace_id",
  2692. ]
  2693. select = select or default_select
  2694. if "child_run_ids" in select:
  2695. warnings.warn(
  2696. "The child_run_ids field is deprecated and will be removed in following versions",
  2697. DeprecationWarning,
  2698. )
  2699. body_query: dict[str, Any] = {
  2700. "session": project_ids if project_ids else None,
  2701. "run_type": run_type,
  2702. "reference_example": (
  2703. [reference_example_id] if reference_example_id else None
  2704. ),
  2705. "query": query,
  2706. "filter": filter,
  2707. "trace_filter": trace_filter,
  2708. "tree_filter": tree_filter,
  2709. "is_root": is_root,
  2710. "parent_run": parent_run_id,
  2711. "start_time": start_time.isoformat() if start_time else None,
  2712. "error": error,
  2713. "id": run_ids,
  2714. "trace": trace_id,
  2715. "select": select,
  2716. "limit": limit,
  2717. **kwargs,
  2718. }
  2719. body_query = {k: v for k, v in body_query.items() if v is not None}
  2720. for i, run in enumerate(
  2721. self._get_cursor_paginated_list("/runs/query", body=body_query)
  2722. ):
  2723. # Should this be behind a flag?
  2724. attachments = _convert_stored_attachments_to_attachments_dict(
  2725. run, attachments_key="s3_urls", api_url=self.api_url
  2726. )
  2727. yield ls_schemas.Run(
  2728. attachments=attachments, **run, _host_url=self._host_url
  2729. )
  2730. if limit is not None and i + 1 >= limit:
  2731. break
  2732. def get_run_stats(
  2733. self,
  2734. *,
  2735. id: Optional[list[ID_TYPE]] = None,
  2736. trace: Optional[ID_TYPE] = None,
  2737. parent_run: Optional[ID_TYPE] = None,
  2738. run_type: Optional[str] = None,
  2739. project_names: Optional[list[str]] = None,
  2740. project_ids: Optional[list[ID_TYPE]] = None,
  2741. reference_example_ids: Optional[list[ID_TYPE]] = None,
  2742. start_time: Optional[str] = None,
  2743. end_time: Optional[str] = None,
  2744. error: Optional[bool] = None,
  2745. query: Optional[str] = None,
  2746. filter: Optional[str] = None,
  2747. trace_filter: Optional[str] = None,
  2748. tree_filter: Optional[str] = None,
  2749. is_root: Optional[bool] = None,
  2750. data_source_type: Optional[str] = None,
  2751. ) -> dict[str, Any]:
  2752. """Get aggregate statistics over queried runs.
  2753. Takes in similar query parameters to `list_runs` and returns statistics
  2754. based on the runs that match the query.
  2755. Args:
  2756. id (Optional[List[Union[UUID, str]]]): List of run IDs to filter by.
  2757. trace (Optional[Union[UUID, str]]): Trace ID to filter by.
  2758. parent_run (Optional[Union[UUID, str]]): Parent run ID to filter by.
  2759. run_type (Optional[str]): Run type to filter by.
  2760. project_names (Optional[List[str]]): List of project names to filter by.
  2761. project_ids (Optional[List[Union[UUID, str]]]): List of project IDs to filter by.
  2762. reference_example_ids (Optional[List[Union[UUID, str]]]): List of reference example IDs to filter by.
  2763. start_time (Optional[str]): Start time to filter by.
  2764. end_time (Optional[str]): End time to filter by.
  2765. error (Optional[bool]): Filter by error status.
  2766. query (Optional[str]): Query string to filter by.
  2767. filter (Optional[str]): Filter string to apply.
  2768. trace_filter (Optional[str]): Trace filter string to apply.
  2769. tree_filter (Optional[str]): Tree filter string to apply.
  2770. is_root (Optional[bool]): Filter by root run status.
  2771. data_source_type (Optional[str]): Data source type to filter by.
  2772. Returns:
  2773. Dict[str, Any]: A dictionary containing the run statistics.
  2774. """ # noqa: E501
  2775. from concurrent.futures import ThreadPoolExecutor, as_completed # type: ignore
  2776. project_ids = project_ids or []
  2777. if project_names:
  2778. with ThreadPoolExecutor() as executor:
  2779. futures = [
  2780. executor.submit(self.read_project, project_name=name)
  2781. for name in project_names
  2782. ]
  2783. for future in as_completed(futures):
  2784. project_ids.append(future.result().id)
  2785. payload = {
  2786. "id": id,
  2787. "trace": trace,
  2788. "parent_run": parent_run,
  2789. "run_type": run_type,
  2790. "session": project_ids,
  2791. "reference_example": reference_example_ids,
  2792. "start_time": start_time,
  2793. "end_time": end_time,
  2794. "error": error,
  2795. "query": query,
  2796. "filter": filter,
  2797. "trace_filter": trace_filter,
  2798. "tree_filter": tree_filter,
  2799. "is_root": is_root,
  2800. "data_source_type": data_source_type,
  2801. }
  2802. # Remove None values from the payload
  2803. payload = {k: v for k, v in payload.items() if v is not None}
  2804. response = self.request_with_retries(
  2805. "POST",
  2806. "/runs/stats",
  2807. request_kwargs={
  2808. "data": _dumps_json(payload),
  2809. },
  2810. )
  2811. ls_utils.raise_for_status_with_text(response)
  2812. return response.json()
  2813. def get_run_url(
  2814. self,
  2815. *,
  2816. run: ls_schemas.RunBase,
  2817. project_name: Optional[str] = None,
  2818. project_id: Optional[ID_TYPE] = None,
  2819. ) -> str:
  2820. """Get the URL for a run.
  2821. Not recommended for use within your agent runtime.
  2822. More for use interacting with runs after the fact
  2823. for data analysis or ETL workloads.
  2824. Args:
  2825. run (RunBase): The run.
  2826. project_name (Optional[str]): The name of the project.
  2827. project_id (Optional[Union[UUID, str]]): The ID of the project.
  2828. Returns:
  2829. str: The URL for the run.
  2830. """
  2831. if session_id := getattr(run, "session_id", None):
  2832. pass
  2833. elif session_name := getattr(run, "session_name", None):
  2834. session_id = self.read_project(project_name=session_name).id
  2835. elif project_id is not None:
  2836. session_id = project_id
  2837. elif project_name is not None:
  2838. session_id = self.read_project(project_name=project_name).id
  2839. else:
  2840. project_name = ls_utils.get_tracer_project()
  2841. session_id = self.read_project(project_name=project_name).id
  2842. session_id_ = _as_uuid(session_id, "session_id")
  2843. return (
  2844. f"{self._host_url}/o/{self._get_tenant_id()}/projects/p/{session_id_}/"
  2845. f"r/{run.id}?poll=true"
  2846. )
  2847. def share_run(self, run_id: ID_TYPE, *, share_id: Optional[ID_TYPE] = None) -> str:
  2848. """Get a share link for a run.
  2849. Args:
  2850. run_id (Union[UUID, str]): The ID of the run to share.
  2851. share_id (Optional[Union[UUID, str]]): Custom share ID.
  2852. If not provided, a random UUID will be generated.
  2853. Returns:
  2854. str: The URL of the shared run.
  2855. """
  2856. run_id_ = _as_uuid(run_id, "run_id")
  2857. data = {
  2858. "run_id": str(run_id_),
  2859. "share_token": share_id or str(uuid.uuid4()),
  2860. }
  2861. response = self.request_with_retries(
  2862. "PUT",
  2863. f"/runs/{run_id_}/share",
  2864. headers=self._headers,
  2865. json=data,
  2866. )
  2867. ls_utils.raise_for_status_with_text(response)
  2868. share_token = response.json()["share_token"]
  2869. return f"{self._host_url}/public/{share_token}/r"
  2870. def unshare_run(self, run_id: ID_TYPE) -> None:
  2871. """Delete share link for a run.
  2872. Args:
  2873. run_id (Union[UUID, str]): The ID of the run to unshare.
  2874. Returns:
  2875. None
  2876. """
  2877. response = self.request_with_retries(
  2878. "DELETE",
  2879. f"/runs/{_as_uuid(run_id, 'run_id')}/share",
  2880. headers=self._headers,
  2881. )
  2882. ls_utils.raise_for_status_with_text(response)
  2883. def read_run_shared_link(self, run_id: ID_TYPE) -> Optional[str]:
  2884. """Retrieve the shared link for a specific run.
  2885. Args:
  2886. run_id (Union[UUID, str]): The ID of the run.
  2887. Returns:
  2888. Optional[str]: The shared link for the run, or None if the link is not
  2889. available.
  2890. """
  2891. response = self.request_with_retries(
  2892. "GET",
  2893. f"/runs/{_as_uuid(run_id, 'run_id')}/share",
  2894. headers=self._headers,
  2895. )
  2896. ls_utils.raise_for_status_with_text(response)
  2897. result = response.json()
  2898. if result is None or "share_token" not in result:
  2899. return None
  2900. return f"{self._host_url}/public/{result['share_token']}/r"
  2901. def run_is_shared(self, run_id: ID_TYPE) -> bool:
  2902. """Get share state for a run.
  2903. Args:
  2904. run_id (Union[UUID, str]): The ID of the run.
  2905. Returns:
  2906. bool: True if the run is shared, False otherwise.
  2907. """
  2908. link = self.read_run_shared_link(_as_uuid(run_id, "run_id"))
  2909. return link is not None
  2910. def read_shared_run(
  2911. self, share_token: Union[ID_TYPE, str], run_id: Optional[ID_TYPE] = None
  2912. ) -> ls_schemas.Run:
  2913. """Get shared runs.
  2914. Args:
  2915. share_token (Union[UUID, str]): The share token or URL of the shared run.
  2916. run_id (Optional[Union[UUID, str]]): The ID of the specific run to retrieve.
  2917. If not provided, the full shared run will be returned.
  2918. Returns:
  2919. Run: The shared run.
  2920. """
  2921. _, token_uuid = _parse_token_or_url(share_token, "", kind="run")
  2922. path = f"/public/{token_uuid}/run"
  2923. if run_id is not None:
  2924. path += f"/{_as_uuid(run_id, 'run_id')}"
  2925. response = self.request_with_retries(
  2926. "GET",
  2927. path,
  2928. headers=self._headers,
  2929. )
  2930. ls_utils.raise_for_status_with_text(response)
  2931. return ls_schemas.Run(**response.json(), _host_url=self._host_url)
  2932. def list_shared_runs(
  2933. self, share_token: Union[ID_TYPE, str], run_ids: Optional[list[str]] = None
  2934. ) -> Iterator[ls_schemas.Run]:
  2935. """Get shared runs.
  2936. Args:
  2937. share_token (Union[UUID, str]): The share token or URL of the shared run.
  2938. run_ids (Optional[List[str]]): A list of run IDs to filter the results by.
  2939. Yields:
  2940. A shared run.
  2941. """
  2942. body = {"id": run_ids} if run_ids else {}
  2943. _, token_uuid = _parse_token_or_url(share_token, "", kind="run")
  2944. for run in self._get_cursor_paginated_list(
  2945. f"/public/{token_uuid}/runs/query", body=body
  2946. ):
  2947. yield ls_schemas.Run(**run, _host_url=self._host_url)
  2948. def read_dataset_shared_schema(
  2949. self,
  2950. dataset_id: Optional[ID_TYPE] = None,
  2951. *,
  2952. dataset_name: Optional[str] = None,
  2953. ) -> ls_schemas.DatasetShareSchema:
  2954. """Retrieve the shared schema of a dataset.
  2955. Args:
  2956. dataset_id (Optional[Union[UUID, str]]): The ID of the dataset.
  2957. Either `dataset_id` or `dataset_name` must be given.
  2958. dataset_name (Optional[str]): The name of the dataset.
  2959. Either `dataset_id` or `dataset_name` must be given.
  2960. Returns:
  2961. ls_schemas.DatasetShareSchema: The shared schema of the dataset.
  2962. Raises:
  2963. ValueError: If neither `dataset_id` nor `dataset_name` is given.
  2964. """
  2965. if dataset_id is None and dataset_name is None:
  2966. raise ValueError("Either dataset_id or dataset_name must be given")
  2967. if dataset_id is None:
  2968. dataset_id = self.read_dataset(dataset_name=dataset_name).id
  2969. response = self.request_with_retries(
  2970. "GET",
  2971. f"/datasets/{_as_uuid(dataset_id, 'dataset_id')}/share",
  2972. headers=self._headers,
  2973. )
  2974. ls_utils.raise_for_status_with_text(response)
  2975. d = response.json()
  2976. return cast(
  2977. ls_schemas.DatasetShareSchema,
  2978. {
  2979. **d,
  2980. "url": f"{self._host_url}/public/"
  2981. f"{_as_uuid(d['share_token'], 'response.share_token')}/d",
  2982. },
  2983. )
  2984. def share_dataset(
  2985. self,
  2986. dataset_id: Optional[ID_TYPE] = None,
  2987. *,
  2988. dataset_name: Optional[str] = None,
  2989. ) -> ls_schemas.DatasetShareSchema:
  2990. """Get a share link for a dataset.
  2991. Args:
  2992. dataset_id (Optional[Union[UUID, str]]): The ID of the dataset.
  2993. Either `dataset_id` or `dataset_name` must be given.
  2994. dataset_name (Optional[str]): The name of the dataset.
  2995. Either `dataset_id` or `dataset_name` must be given.
  2996. Returns:
  2997. ls_schemas.DatasetShareSchema: The shared schema of the dataset.
  2998. Raises:
  2999. ValueError: If neither `dataset_id` nor `dataset_name` is given.
  3000. """
  3001. if dataset_id is None and dataset_name is None:
  3002. raise ValueError("Either dataset_id or dataset_name must be given")
  3003. if dataset_id is None:
  3004. dataset_id = self.read_dataset(dataset_name=dataset_name).id
  3005. data = {
  3006. "dataset_id": str(dataset_id),
  3007. }
  3008. response = self.request_with_retries(
  3009. "PUT",
  3010. f"/datasets/{_as_uuid(dataset_id, 'dataset_id')}/share",
  3011. headers=self._headers,
  3012. json=data,
  3013. )
  3014. ls_utils.raise_for_status_with_text(response)
  3015. d: dict = response.json()
  3016. return cast(
  3017. ls_schemas.DatasetShareSchema,
  3018. {**d, "url": f"{self._host_url}/public/{d['share_token']}/d"},
  3019. )
  3020. def unshare_dataset(self, dataset_id: ID_TYPE) -> None:
  3021. """Delete share link for a dataset.
  3022. Args:
  3023. dataset_id (Union[UUID, str]): The ID of the dataset to unshare.
  3024. Returns:
  3025. None
  3026. """
  3027. response = self.request_with_retries(
  3028. "DELETE",
  3029. f"/datasets/{_as_uuid(dataset_id, 'dataset_id')}/share",
  3030. headers=self._headers,
  3031. )
  3032. ls_utils.raise_for_status_with_text(response)
  3033. def read_shared_dataset(
  3034. self,
  3035. share_token: str,
  3036. ) -> ls_schemas.Dataset:
  3037. """Get shared datasets.
  3038. Args:
  3039. share_token (Union[UUID, str]): The share token or URL of the shared dataset.
  3040. Returns:
  3041. Dataset: The shared dataset.
  3042. """
  3043. _, token_uuid = _parse_token_or_url(share_token, self.api_url)
  3044. response = self.request_with_retries(
  3045. "GET",
  3046. f"/public/{token_uuid}/datasets",
  3047. headers=self._headers,
  3048. )
  3049. ls_utils.raise_for_status_with_text(response)
  3050. return ls_schemas.Dataset(
  3051. **response.json(),
  3052. _host_url=self._host_url,
  3053. _public_path=f"/public/{share_token}/d",
  3054. )
  3055. def list_shared_examples(
  3056. self,
  3057. share_token: str,
  3058. *,
  3059. example_ids: Optional[list[ID_TYPE]] = None,
  3060. limit: Optional[int] = None,
  3061. ) -> Iterator[ls_schemas.Example]:
  3062. """Get shared examples.
  3063. Args:
  3064. share_token (Union[UUID, str]): The share token or URL of the shared dataset.
  3065. example_ids (Optional[List[UUID, str]], optional): The IDs of the examples to filter by.
  3066. limit (Optional[int]): Maximum number of examples to return, by default None.
  3067. Returns:
  3068. List[ls_schemas.Example]: The list of shared examples.
  3069. """
  3070. params = {}
  3071. if example_ids is not None:
  3072. params["id"] = [str(id) for id in example_ids]
  3073. for i, example in enumerate(
  3074. self._get_paginated_list(
  3075. f"/public/{_as_uuid(share_token, 'share_token')}/examples",
  3076. params=params,
  3077. )
  3078. ):
  3079. yield ls_schemas.Example(**example, _host_url=self._host_url)
  3080. if limit is not None and i + 1 >= limit:
  3081. break
  3082. def list_shared_projects(
  3083. self,
  3084. *,
  3085. dataset_share_token: str,
  3086. project_ids: Optional[list[ID_TYPE]] = None,
  3087. name: Optional[str] = None,
  3088. name_contains: Optional[str] = None,
  3089. limit: Optional[int] = None,
  3090. ) -> Iterator[ls_schemas.TracerSessionResult]:
  3091. """List shared projects.
  3092. Args:
  3093. dataset_share_token (str): The share token of the dataset.
  3094. project_ids (Optional[List[Union[UUID, str]]]): List of project IDs to filter the results, by default None.
  3095. name (Optional[str]): Name of the project to filter the results, by default None.
  3096. name_contains (Optional[str]): Substring to search for in project names, by default None.
  3097. limit (Optional[int]): Maximum number of projects to return, by default None.
  3098. Yields:
  3099. The shared projects.
  3100. """
  3101. params = {"id": project_ids, "name": name, "name_contains": name_contains}
  3102. share_token = _as_uuid(dataset_share_token, "dataset_share_token")
  3103. for i, project in enumerate(
  3104. self._get_paginated_list(
  3105. f"/public/{share_token}/datasets/sessions",
  3106. params=params,
  3107. )
  3108. ):
  3109. yield ls_schemas.TracerSessionResult(**project, _host_url=self._host_url)
  3110. if limit is not None and i + 1 >= limit:
  3111. break
  3112. def create_project(
  3113. self,
  3114. project_name: str,
  3115. *,
  3116. description: Optional[str] = None,
  3117. metadata: Optional[dict] = None,
  3118. upsert: bool = False,
  3119. project_extra: Optional[dict] = None,
  3120. reference_dataset_id: Optional[ID_TYPE] = None,
  3121. ) -> ls_schemas.TracerSession:
  3122. """Create a project on the LangSmith API.
  3123. Args:
  3124. project_name (str): The name of the project.
  3125. project_extra (Optional[dict]): Additional project information.
  3126. metadata (Optional[dict]): Additional metadata to associate with the project.
  3127. description (Optional[str]): The description of the project.
  3128. upsert (bool, default=False): Whether to update the project if it already exists.
  3129. reference_dataset_id (Optional[Union[UUID, str]): The ID of the reference dataset to associate with the project.
  3130. Returns:
  3131. TracerSession: The created project.
  3132. """
  3133. endpoint = f"{self.api_url}/sessions"
  3134. extra = project_extra
  3135. if metadata:
  3136. extra = {**(extra or {}), "metadata": metadata}
  3137. body: dict[str, Any] = {
  3138. "name": project_name,
  3139. "extra": extra,
  3140. "description": description,
  3141. "id": str(uuid.uuid4()),
  3142. }
  3143. params = {}
  3144. if upsert:
  3145. params["upsert"] = True
  3146. if reference_dataset_id is not None:
  3147. body["reference_dataset_id"] = reference_dataset_id
  3148. response = self.request_with_retries(
  3149. "POST",
  3150. endpoint,
  3151. headers={**self._headers, "Content-Type": "application/json"},
  3152. data=_dumps_json(body),
  3153. )
  3154. ls_utils.raise_for_status_with_text(response)
  3155. return ls_schemas.TracerSession(**response.json(), _host_url=self._host_url)
  3156. def update_project(
  3157. self,
  3158. project_id: ID_TYPE,
  3159. *,
  3160. name: Optional[str] = None,
  3161. description: Optional[str] = None,
  3162. metadata: Optional[dict] = None,
  3163. project_extra: Optional[dict] = None,
  3164. end_time: Optional[datetime.datetime] = None,
  3165. ) -> ls_schemas.TracerSession:
  3166. """Update a LangSmith project.
  3167. Args:
  3168. project_id (Union[UUID, str]):
  3169. The ID of the project to update.
  3170. name (Optional[str]):
  3171. The new name to give the project. This is only valid if the project
  3172. has been assigned an end_time, meaning it has been completed/closed.
  3173. description (Optional[str]):
  3174. The new description to give the project.
  3175. metadata (Optional[dict]):
  3176. Additional metadata to associate with the project.
  3177. project_extra (Optional[dict]):
  3178. Additional project information.
  3179. end_time (Optional[datetime.datetime]):
  3180. The time the project was completed.
  3181. Returns:
  3182. TracerSession: The updated project.
  3183. """
  3184. endpoint = f"{self.api_url}/sessions/{_as_uuid(project_id, 'project_id')}"
  3185. extra = project_extra
  3186. if metadata:
  3187. extra = {**(extra or {}), "metadata": metadata}
  3188. body: dict[str, Any] = {
  3189. "name": name,
  3190. "extra": extra,
  3191. "description": description,
  3192. "end_time": end_time.isoformat() if end_time else None,
  3193. }
  3194. response = self.request_with_retries(
  3195. "PATCH",
  3196. endpoint,
  3197. headers={**self._headers, "Content-Type": "application/json"},
  3198. data=_dumps_json(body),
  3199. )
  3200. ls_utils.raise_for_status_with_text(response)
  3201. return ls_schemas.TracerSession(**response.json(), _host_url=self._host_url)
  3202. def _get_optional_tenant_id(self) -> Optional[uuid.UUID]:
  3203. if self._tenant_id is not None:
  3204. return self._tenant_id
  3205. try:
  3206. response = self.request_with_retries(
  3207. "GET", "/sessions", params={"limit": 1}
  3208. )
  3209. result = response.json()
  3210. if isinstance(result, list) and len(result) > 0:
  3211. tracer_session = ls_schemas.TracerSessionResult(
  3212. **result[0], _host_url=self._host_url
  3213. )
  3214. self._tenant_id = tracer_session.tenant_id
  3215. return self._tenant_id
  3216. except Exception as e:
  3217. logger.debug(
  3218. "Failed to get tenant ID from LangSmith: %s", repr(e), exc_info=True
  3219. )
  3220. return None
  3221. def _get_tenant_id(self) -> uuid.UUID:
  3222. tenant_id = self._get_optional_tenant_id()
  3223. if tenant_id is None:
  3224. raise ls_utils.LangSmithError("No tenant ID found")
  3225. return tenant_id
  3226. @ls_utils.xor_args(("project_id", "project_name"))
  3227. def read_project(
  3228. self,
  3229. *,
  3230. project_id: Optional[str] = None,
  3231. project_name: Optional[str] = None,
  3232. include_stats: bool = False,
  3233. ) -> ls_schemas.TracerSessionResult:
  3234. """Read a project from the LangSmith API.
  3235. Args:
  3236. project_id (Optional[str]):
  3237. The ID of the project to read.
  3238. project_name (Optional[str]): The name of the project to read.
  3239. Only one of project_id or project_name may be given.
  3240. include_stats (bool, default=False):
  3241. Whether to include a project's aggregate statistics in the response.
  3242. Returns:
  3243. TracerSessionResult: The project.
  3244. """
  3245. path = "/sessions"
  3246. params: dict[str, Any] = {"limit": 1}
  3247. if project_id is not None:
  3248. path += f"/{_as_uuid(project_id, 'project_id')}"
  3249. elif project_name is not None:
  3250. params["name"] = project_name
  3251. else:
  3252. raise ValueError("Must provide project_name or project_id")
  3253. params["include_stats"] = include_stats
  3254. response = self.request_with_retries("GET", path, params=params)
  3255. result = response.json()
  3256. if isinstance(result, list):
  3257. if len(result) == 0:
  3258. raise ls_utils.LangSmithNotFoundError(
  3259. f"Project {project_name} not found"
  3260. )
  3261. return ls_schemas.TracerSessionResult(**result[0], _host_url=self._host_url)
  3262. return ls_schemas.TracerSessionResult(
  3263. **response.json(), _host_url=self._host_url
  3264. )
  3265. def has_project(
  3266. self, project_name: str, *, project_id: Optional[str] = None
  3267. ) -> bool:
  3268. """Check if a project exists.
  3269. Args:
  3270. project_name (str):
  3271. The name of the project to check for.
  3272. project_id (Optional[str]):
  3273. The ID of the project to check for.
  3274. Returns:
  3275. bool: Whether the project exists.
  3276. """
  3277. try:
  3278. self.read_project(project_name=project_name)
  3279. except ls_utils.LangSmithNotFoundError:
  3280. return False
  3281. return True
  3282. def get_test_results(
  3283. self,
  3284. *,
  3285. project_id: Optional[ID_TYPE] = None,
  3286. project_name: Optional[str] = None,
  3287. ) -> pd.DataFrame:
  3288. """Read the record-level information from an experiment into a Pandas DF.
  3289. !!! note
  3290. This will fetch whatever data exists in the DB. Results are not
  3291. immediately available in the DB upon evaluation run completion.
  3292. Feedback score values will be returned as an average across all runs for
  3293. the experiment. Non-numeric feedback scores will be omitted.
  3294. Args:
  3295. project_id (Optional[Union[UUID, str]]): The ID of the project.
  3296. project_name (Optional[str]): The name of the project.
  3297. Returns:
  3298. pd.DataFrame: A dataframe containing the test results.
  3299. """
  3300. warnings.warn(
  3301. "Function get_test_results is in beta.", UserWarning, stacklevel=2
  3302. )
  3303. from concurrent.futures import ThreadPoolExecutor, as_completed # type: ignore
  3304. import pandas as pd # type: ignore
  3305. runs = self.list_runs(
  3306. project_id=project_id,
  3307. project_name=project_name,
  3308. is_root=True,
  3309. select=[
  3310. "id",
  3311. "reference_example_id",
  3312. "inputs",
  3313. "outputs",
  3314. "error",
  3315. "feedback_stats",
  3316. "start_time",
  3317. "end_time",
  3318. ],
  3319. )
  3320. results: list[dict] = []
  3321. example_ids = []
  3322. def fetch_examples(batch):
  3323. examples = self.list_examples(example_ids=batch)
  3324. return [
  3325. {
  3326. "example_id": example.id,
  3327. **{f"reference.{k}": v for k, v in (example.outputs or {}).items()},
  3328. }
  3329. for example in examples
  3330. ]
  3331. batch_size = 50
  3332. cursor = 0
  3333. with ThreadPoolExecutor() as executor:
  3334. futures = []
  3335. for r in runs:
  3336. row = {
  3337. "example_id": r.reference_example_id,
  3338. **{f"input.{k}": v for k, v in r.inputs.items()},
  3339. **{f"outputs.{k}": v for k, v in (r.outputs or {}).items()},
  3340. "execution_time": (
  3341. (r.end_time - r.start_time).total_seconds()
  3342. if r.end_time
  3343. else None
  3344. ),
  3345. "error": r.error,
  3346. "id": r.id,
  3347. }
  3348. if r.feedback_stats:
  3349. row.update(
  3350. {
  3351. f"feedback.{k}": v.get("avg")
  3352. for k, v in r.feedback_stats.items()
  3353. if not (k == "note" and v.get("comments"))
  3354. }
  3355. )
  3356. if r.feedback_stats.get("note") and (
  3357. comments := r.feedback_stats["note"].get("comments")
  3358. ):
  3359. row["notes"] = comments
  3360. if r.reference_example_id:
  3361. example_ids.append(r.reference_example_id)
  3362. else:
  3363. logger.warning(f"Run {r.id} has no reference example ID.")
  3364. if len(example_ids) % batch_size == 0:
  3365. # Ensure not empty
  3366. if batch := example_ids[cursor : cursor + batch_size]:
  3367. futures.append(executor.submit(fetch_examples, batch))
  3368. cursor += batch_size
  3369. results.append(row)
  3370. # Handle any remaining examples
  3371. if example_ids[cursor:]:
  3372. futures.append(executor.submit(fetch_examples, example_ids[cursor:]))
  3373. result_df = pd.DataFrame(results).set_index("example_id")
  3374. example_outputs = [
  3375. output for future in as_completed(futures) for output in future.result()
  3376. ]
  3377. if example_outputs:
  3378. example_df = pd.DataFrame(example_outputs).set_index("example_id")
  3379. result_df = example_df.merge(result_df, left_index=True, right_index=True)
  3380. # Flatten dict columns into dot syntax for easier access
  3381. return pd.json_normalize(result_df.to_dict(orient="records"))
  3382. def list_projects(
  3383. self,
  3384. project_ids: Optional[list[ID_TYPE]] = None,
  3385. name: Optional[str] = None,
  3386. name_contains: Optional[str] = None,
  3387. reference_dataset_id: Optional[ID_TYPE] = None,
  3388. reference_dataset_name: Optional[str] = None,
  3389. reference_free: Optional[bool] = None,
  3390. include_stats: Optional[bool] = None,
  3391. dataset_version: Optional[str] = None,
  3392. limit: Optional[int] = None,
  3393. metadata: Optional[dict[str, Any]] = None,
  3394. ) -> Iterator[ls_schemas.TracerSessionResult]:
  3395. """List projects from the LangSmith API.
  3396. Args:
  3397. project_ids (Optional[List[Union[UUID, str]]]):
  3398. A list of project IDs to filter by, by default None
  3399. name (Optional[str]):
  3400. The name of the project to filter by, by default None
  3401. name_contains (Optional[str]):
  3402. A string to search for in the project name, by default None
  3403. reference_dataset_id (Optional[List[Union[UUID, str]]]):
  3404. A dataset ID to filter by, by default None
  3405. reference_dataset_name (Optional[str]):
  3406. The name of the reference dataset to filter by, by default None
  3407. reference_free (Optional[bool]):
  3408. Whether to filter for only projects not associated with a dataset.
  3409. limit (Optional[int]):
  3410. The maximum number of projects to return, by default None
  3411. metadata (Optional[Dict[str, Any]]):
  3412. Metadata to filter by.
  3413. Yields:
  3414. The projects.
  3415. Raises:
  3416. ValueError: If both reference_dataset_id and reference_dataset_name are given.
  3417. """
  3418. params: dict[str, Any] = {
  3419. "limit": min(limit, 100) if limit is not None else 100
  3420. }
  3421. if project_ids is not None:
  3422. params["id"] = project_ids
  3423. if name is not None:
  3424. params["name"] = name
  3425. if name_contains is not None:
  3426. params["name_contains"] = name_contains
  3427. if reference_dataset_id is not None:
  3428. if reference_dataset_name is not None:
  3429. raise ValueError(
  3430. "Only one of reference_dataset_id or"
  3431. " reference_dataset_name may be given"
  3432. )
  3433. params["reference_dataset"] = reference_dataset_id
  3434. elif reference_dataset_name is not None:
  3435. reference_dataset_id = self.read_dataset(
  3436. dataset_name=reference_dataset_name
  3437. ).id
  3438. params["reference_dataset"] = reference_dataset_id
  3439. if reference_free is not None:
  3440. params["reference_free"] = reference_free
  3441. if include_stats is not None:
  3442. params["include_stats"] = include_stats
  3443. if dataset_version is not None:
  3444. params["dataset_version"] = dataset_version
  3445. if metadata is not None:
  3446. params["metadata"] = json.dumps(metadata)
  3447. for i, project in enumerate(
  3448. self._get_paginated_list("/sessions", params=params)
  3449. ):
  3450. yield ls_schemas.TracerSessionResult(**project, _host_url=self._host_url)
  3451. if limit is not None and i + 1 >= limit:
  3452. break
  3453. @ls_utils.xor_args(("project_name", "project_id"))
  3454. def delete_project(
  3455. self, *, project_name: Optional[str] = None, project_id: Optional[str] = None
  3456. ) -> None:
  3457. """Delete a project from LangSmith.
  3458. Args:
  3459. project_name (Optional[str]):
  3460. The name of the project to delete.
  3461. project_id (Optional[str]):
  3462. The ID of the project to delete.
  3463. Returns:
  3464. None
  3465. Raises:
  3466. ValueError: If neither project_name or project_id is provided.
  3467. """
  3468. if project_name is not None:
  3469. project_id = str(self.read_project(project_name=project_name).id)
  3470. elif project_id is None:
  3471. raise ValueError("Must provide project_name or project_id")
  3472. response = self.request_with_retries(
  3473. "DELETE",
  3474. f"/sessions/{_as_uuid(project_id, 'project_id')}",
  3475. headers=self._headers,
  3476. )
  3477. ls_utils.raise_for_status_with_text(response)
  3478. def create_dataset(
  3479. self,
  3480. dataset_name: str,
  3481. *,
  3482. description: Optional[str] = None,
  3483. data_type: ls_schemas.DataType = ls_schemas.DataType.kv,
  3484. inputs_schema: Optional[dict[str, Any]] = None,
  3485. outputs_schema: Optional[dict[str, Any]] = None,
  3486. transformations: Optional[list[ls_schemas.DatasetTransformation]] = None,
  3487. metadata: Optional[dict] = None,
  3488. ) -> ls_schemas.Dataset:
  3489. """Create a dataset in the LangSmith API.
  3490. Args:
  3491. dataset_name (str):
  3492. The name of the dataset.
  3493. description (Optional[str]):
  3494. The description of the dataset.
  3495. data_type (DataType, default=DataType.kv):
  3496. The data type of the dataset.
  3497. inputs_schema (Optional[Dict[str, Any]]):
  3498. The schema definition for the inputs of the dataset.
  3499. outputs_schema (Optional[Dict[str, Any]]):
  3500. The schema definition for the outputs of the dataset.
  3501. transformations (Optional[List[DatasetTransformation]]):
  3502. A list of transformations to apply to the dataset.
  3503. metadata (Optional[dict]):
  3504. Additional metadata to associate with the dataset.
  3505. Returns:
  3506. Dataset: The created dataset.
  3507. Raises:
  3508. requests.HTTPError: If the request to create the dataset fails.
  3509. """
  3510. metadata = {"runtime": ls_env.get_runtime_environment(), **(metadata or {})}
  3511. dataset: dict[str, Any] = {
  3512. "name": dataset_name,
  3513. "data_type": data_type.value,
  3514. "transformations": transformations,
  3515. "extra": {
  3516. "metadata": {
  3517. "runtime": ls_env.get_runtime_environment(),
  3518. **(metadata or {}),
  3519. }
  3520. },
  3521. }
  3522. if description is not None:
  3523. dataset["description"] = description
  3524. if inputs_schema is not None:
  3525. dataset["inputs_schema_definition"] = inputs_schema
  3526. if outputs_schema is not None:
  3527. dataset["outputs_schema_definition"] = outputs_schema
  3528. response = self.request_with_retries(
  3529. "POST",
  3530. "/datasets",
  3531. headers={**self._headers, "Content-Type": "application/json"},
  3532. data=_orjson.dumps(dataset),
  3533. )
  3534. ls_utils.raise_for_status_with_text(response)
  3535. json_response = response.json()
  3536. json_response["metadata"] = json_response.get("metadata") or metadata
  3537. return ls_schemas.Dataset(
  3538. **json_response,
  3539. _host_url=self._host_url,
  3540. _tenant_id=self._get_optional_tenant_id(),
  3541. )
  3542. def has_dataset(
  3543. self,
  3544. *,
  3545. dataset_name: Optional[str] = None,
  3546. dataset_id: Optional[ID_TYPE] = None,
  3547. ) -> bool:
  3548. """Check whether a dataset exists in your tenant.
  3549. Args:
  3550. dataset_name (Optional[str]):
  3551. The name of the dataset to check.
  3552. dataset_id (Optional[Union[UUID, str]]):
  3553. The ID of the dataset to check.
  3554. Returns:
  3555. bool: Whether the dataset exists.
  3556. """
  3557. try:
  3558. self.read_dataset(dataset_name=dataset_name, dataset_id=dataset_id)
  3559. return True
  3560. except ls_utils.LangSmithNotFoundError:
  3561. return False
  3562. @ls_utils.xor_args(("dataset_name", "dataset_id"))
  3563. def read_dataset(
  3564. self,
  3565. *,
  3566. dataset_name: Optional[str] = None,
  3567. dataset_id: Optional[ID_TYPE] = None,
  3568. ) -> ls_schemas.Dataset:
  3569. """Read a dataset from the LangSmith API.
  3570. Args:
  3571. dataset_name (Optional[str]):
  3572. The name of the dataset to read.
  3573. dataset_id (Optional[Union[UUID, str]]):
  3574. The ID of the dataset to read.
  3575. Returns:
  3576. Dataset: The dataset.
  3577. """
  3578. path = "/datasets"
  3579. params: dict[str, Any] = {"limit": 1}
  3580. if dataset_id is not None:
  3581. path += f"/{_as_uuid(dataset_id, 'dataset_id')}"
  3582. elif dataset_name is not None:
  3583. params["name"] = dataset_name
  3584. else:
  3585. raise ValueError("Must provide dataset_name or dataset_id")
  3586. response = self.request_with_retries(
  3587. "GET",
  3588. path,
  3589. params=params,
  3590. )
  3591. result = response.json()
  3592. if isinstance(result, list):
  3593. if len(result) == 0:
  3594. raise ls_utils.LangSmithNotFoundError(
  3595. f"Dataset {dataset_name} not found"
  3596. )
  3597. return ls_schemas.Dataset(
  3598. **result[0],
  3599. _host_url=self._host_url,
  3600. _tenant_id=self._get_optional_tenant_id(),
  3601. )
  3602. return ls_schemas.Dataset(
  3603. **result,
  3604. _host_url=self._host_url,
  3605. _tenant_id=self._get_optional_tenant_id(),
  3606. )
  3607. def diff_dataset_versions(
  3608. self,
  3609. dataset_id: Optional[ID_TYPE] = None,
  3610. *,
  3611. dataset_name: Optional[str] = None,
  3612. from_version: Union[str, datetime.datetime],
  3613. to_version: Union[str, datetime.datetime],
  3614. ) -> ls_schemas.DatasetDiffInfo:
  3615. """Get the difference between two versions of a dataset.
  3616. Args:
  3617. dataset_id (Optional[Union[UUID, str]]):
  3618. The ID of the dataset.
  3619. dataset_name (Optional[str]):
  3620. The name of the dataset.
  3621. from_version (Union[str, datetime.datetime]):
  3622. The starting version for the diff.
  3623. to_version (Union[str, datetime.datetime]):
  3624. The ending version for the diff.
  3625. Returns:
  3626. DatasetDiffInfo: The difference between the two versions of the dataset.
  3627. Examples:
  3628. ```python
  3629. # Get the difference between two tagged versions of a dataset
  3630. from_version = "prod"
  3631. to_version = "dev"
  3632. diff = client.diff_dataset_versions(
  3633. dataset_name="my-dataset",
  3634. from_version=from_version,
  3635. to_version=to_version,
  3636. )
  3637. # Get the difference between two timestamped versions of a dataset
  3638. from_version = datetime.datetime(2024, 1, 1)
  3639. to_version = datetime.datetime(2024, 2, 1)
  3640. diff = client.diff_dataset_versions(
  3641. dataset_name="my-dataset",
  3642. from_version=from_version,
  3643. to_version=to_version,
  3644. )
  3645. ```
  3646. """
  3647. if dataset_id is None:
  3648. if dataset_name is None:
  3649. raise ValueError("Must provide either dataset name or ID")
  3650. dataset_id = self.read_dataset(dataset_name=dataset_name).id
  3651. dsid = _as_uuid(dataset_id, "dataset_id")
  3652. response = self.request_with_retries(
  3653. "GET",
  3654. f"/datasets/{dsid}/versions/diff",
  3655. headers=self._headers,
  3656. params={
  3657. "from_version": (
  3658. from_version.isoformat()
  3659. if isinstance(from_version, datetime.datetime)
  3660. else from_version
  3661. ),
  3662. "to_version": (
  3663. to_version.isoformat()
  3664. if isinstance(to_version, datetime.datetime)
  3665. else to_version
  3666. ),
  3667. },
  3668. )
  3669. ls_utils.raise_for_status_with_text(response)
  3670. return ls_schemas.DatasetDiffInfo(**response.json())
  3671. def read_dataset_openai_finetuning(
  3672. self,
  3673. dataset_id: Optional[ID_TYPE] = None,
  3674. *,
  3675. dataset_name: Optional[str] = None,
  3676. ) -> list:
  3677. """Download a dataset in OpenAI Jsonl format and load it as a list of dicts.
  3678. Args:
  3679. dataset_id (Optional[Union[UUID, str]]):
  3680. The ID of the dataset to download.
  3681. dataset_name (Optional[str]):
  3682. The name of the dataset to download.
  3683. Returns:
  3684. list[dict]: The dataset loaded as a list of dicts.
  3685. Raises:
  3686. ValueError: If neither dataset_id nor dataset_name is provided.
  3687. """
  3688. path = "/datasets"
  3689. if dataset_id is not None:
  3690. pass
  3691. elif dataset_name is not None:
  3692. dataset_id = self.read_dataset(dataset_name=dataset_name).id
  3693. else:
  3694. raise ValueError("Must provide dataset_name or dataset_id")
  3695. response = self.request_with_retries(
  3696. "GET",
  3697. f"{path}/{_as_uuid(dataset_id, 'dataset_id')}/openai_ft",
  3698. )
  3699. dataset = [json.loads(line) for line in response.text.strip().split("\n")]
  3700. return dataset
  3701. def list_datasets(
  3702. self,
  3703. *,
  3704. dataset_ids: Optional[list[ID_TYPE]] = None,
  3705. data_type: Optional[str] = None,
  3706. dataset_name: Optional[str] = None,
  3707. dataset_name_contains: Optional[str] = None,
  3708. metadata: Optional[dict[str, Any]] = None,
  3709. limit: Optional[int] = None,
  3710. ) -> Iterator[ls_schemas.Dataset]:
  3711. """List the datasets on the LangSmith API.
  3712. Args:
  3713. dataset_ids (Optional[List[Union[UUID, str]]]):
  3714. A list of dataset IDs to filter the results by.
  3715. data_type (Optional[str]):
  3716. The data type of the datasets to filter the results by.
  3717. dataset_name (Optional[str]):
  3718. The name of the dataset to filter the results by.
  3719. dataset_name_contains (Optional[str]):
  3720. A substring to search for in the dataset names.
  3721. metadata (Optional[Dict[str, Any]]):
  3722. A dictionary of metadata to filter the results by.
  3723. limit (Optional[int]):
  3724. The maximum number of datasets to return.
  3725. Yields:
  3726. The datasets.
  3727. """
  3728. params: dict[str, Any] = {
  3729. "limit": min(limit, 100) if limit is not None else 100
  3730. }
  3731. if dataset_ids is not None:
  3732. params["id"] = dataset_ids
  3733. if data_type is not None:
  3734. params["data_type"] = data_type
  3735. if dataset_name is not None:
  3736. params["name"] = dataset_name
  3737. if dataset_name_contains is not None:
  3738. params["name_contains"] = dataset_name_contains
  3739. if metadata is not None:
  3740. params["metadata"] = json.dumps(metadata)
  3741. for i, dataset in enumerate(
  3742. self._get_paginated_list("/datasets", params=params)
  3743. ):
  3744. yield ls_schemas.Dataset(
  3745. **dataset,
  3746. _host_url=self._host_url,
  3747. _tenant_id=self._get_optional_tenant_id(),
  3748. )
  3749. if limit is not None and i + 1 >= limit:
  3750. break
  3751. @ls_utils.xor_args(("dataset_id", "dataset_name"))
  3752. def delete_dataset(
  3753. self,
  3754. *,
  3755. dataset_id: Optional[ID_TYPE] = None,
  3756. dataset_name: Optional[str] = None,
  3757. ) -> None:
  3758. """Delete a dataset from the LangSmith API.
  3759. Args:
  3760. dataset_id (Optional[Union[UUID, str]]):
  3761. The ID of the dataset to delete.
  3762. dataset_name (Optional[str]):
  3763. The name of the dataset to delete.
  3764. Returns:
  3765. None
  3766. """
  3767. if dataset_name is not None:
  3768. dataset_id = self.read_dataset(dataset_name=dataset_name).id
  3769. if dataset_id is None:
  3770. raise ValueError("Must provide either dataset name or ID")
  3771. response = self.request_with_retries(
  3772. "DELETE",
  3773. f"/datasets/{_as_uuid(dataset_id, 'dataset_id')}",
  3774. headers=self._headers,
  3775. )
  3776. ls_utils.raise_for_status_with_text(response)
  3777. def update_dataset_tag(
  3778. self,
  3779. *,
  3780. dataset_id: Optional[ID_TYPE] = None,
  3781. dataset_name: Optional[str] = None,
  3782. as_of: datetime.datetime,
  3783. tag: str,
  3784. ) -> None:
  3785. """Update the tags of a dataset.
  3786. If the tag is already assigned to a different version of this dataset,
  3787. the tag will be moved to the new version. The as_of parameter is used to
  3788. determine which version of the dataset to apply the new tags to.
  3789. It must be an exact version of the dataset to succeed. You can
  3790. use the read_dataset_version method to find the exact version
  3791. to apply the tags to.
  3792. Args:
  3793. dataset_id (Optional[Union[UUID, str]]):
  3794. The ID of the dataset to update.
  3795. dataset_name (Optional[str]):
  3796. The name of the dataset to update.
  3797. as_of (datetime.datetime):
  3798. The timestamp of the dataset to apply the new tags to.
  3799. tag (str):
  3800. The new tag to apply to the dataset.
  3801. Returns:
  3802. None
  3803. Examples:
  3804. ```python
  3805. dataset_name = "my-dataset"
  3806. # Get the version of a dataset <= a given timestamp
  3807. dataset_version = client.read_dataset_version(
  3808. dataset_name=dataset_name, as_of=datetime.datetime(2024, 1, 1)
  3809. )
  3810. # Assign that version a new tag
  3811. client.update_dataset_tags(
  3812. dataset_name="my-dataset",
  3813. as_of=dataset_version.as_of,
  3814. tag="prod",
  3815. )
  3816. ```
  3817. """
  3818. if dataset_name is not None:
  3819. dataset_id = self.read_dataset(dataset_name=dataset_name).id
  3820. if dataset_id is None:
  3821. raise ValueError("Must provide either dataset name or ID")
  3822. response = self.request_with_retries(
  3823. "PUT",
  3824. f"/datasets/{_as_uuid(dataset_id, 'dataset_id')}/tags",
  3825. headers=self._headers,
  3826. json={
  3827. "as_of": as_of.isoformat(),
  3828. "tag": tag,
  3829. },
  3830. )
  3831. ls_utils.raise_for_status_with_text(response)
  3832. def list_dataset_versions(
  3833. self,
  3834. *,
  3835. dataset_id: Optional[ID_TYPE] = None,
  3836. dataset_name: Optional[str] = None,
  3837. search: Optional[str] = None,
  3838. limit: Optional[int] = None,
  3839. ) -> Iterator[ls_schemas.DatasetVersion]:
  3840. """List dataset versions.
  3841. Args:
  3842. dataset_id (Optional[Union[UUID, str]]): The ID of the dataset.
  3843. dataset_name (Optional[str]): The name of the dataset.
  3844. search (Optional[str]): The search query.
  3845. limit (Optional[int]): The maximum number of versions to return.
  3846. Yields:
  3847. The dataset versions.
  3848. """
  3849. if dataset_id is None:
  3850. dataset_id = self.read_dataset(dataset_name=dataset_name).id
  3851. params = {
  3852. "search": search,
  3853. "limit": min(limit, 100) if limit is not None else 100,
  3854. }
  3855. for i, version in enumerate(
  3856. self._get_paginated_list(
  3857. f"/datasets/{_as_uuid(dataset_id, 'dataset_id')}/versions",
  3858. params=params,
  3859. )
  3860. ):
  3861. yield ls_schemas.DatasetVersion(**version)
  3862. if limit is not None and i + 1 >= limit:
  3863. break
  3864. def read_dataset_version(
  3865. self,
  3866. *,
  3867. dataset_id: Optional[ID_TYPE] = None,
  3868. dataset_name: Optional[str] = None,
  3869. as_of: Optional[datetime.datetime] = None,
  3870. tag: Optional[str] = None,
  3871. ) -> ls_schemas.DatasetVersion:
  3872. """Get dataset version by `as_of` or exact tag.
  3873. Ues this to resolve the nearest version to a given timestamp or for a given tag.
  3874. Args:
  3875. dataset_id (Optional[ID_TYPE]): The ID of the dataset.
  3876. dataset_name (Optional[str]): The name of the dataset.
  3877. as_of (Optional[datetime.datetime]): The timestamp of the dataset
  3878. to retrieve.
  3879. tag (Optional[str]): The tag of the dataset to retrieve.
  3880. Returns:
  3881. DatasetVersion: The dataset version.
  3882. Examples:
  3883. ```python
  3884. # Get the latest version of a dataset
  3885. client.read_dataset_version(dataset_name="my-dataset", tag="latest")
  3886. # Get the version of a dataset <= a given timestamp
  3887. client.read_dataset_version(
  3888. dataset_name="my-dataset",
  3889. as_of=datetime.datetime(2024, 1, 1),
  3890. )
  3891. # Get the version of a dataset with a specific tag
  3892. client.read_dataset_version(dataset_name="my-dataset", tag="prod")
  3893. ```
  3894. """
  3895. if dataset_id is None:
  3896. dataset_id = self.read_dataset(dataset_name=dataset_name).id
  3897. if (as_of and tag) or (as_of is None and tag is None):
  3898. raise ValueError("Exactly one of as_of and tag must be specified.")
  3899. response = self.request_with_retries(
  3900. "GET",
  3901. f"/datasets/{_as_uuid(dataset_id, 'dataset_id')}/version",
  3902. params={"as_of": as_of, "tag": tag},
  3903. )
  3904. return ls_schemas.DatasetVersion(**response.json())
  3905. def clone_public_dataset(
  3906. self,
  3907. token_or_url: str,
  3908. *,
  3909. source_api_url: Optional[str] = None,
  3910. dataset_name: Optional[str] = None,
  3911. ) -> ls_schemas.Dataset:
  3912. """Clone a public dataset to your own langsmith tenant.
  3913. This operation is idempotent. If you already have a dataset with the given name,
  3914. this function will do nothing.
  3915. Args:
  3916. token_or_url (str): The token of the public dataset to clone.
  3917. source_api_url (Optional[str]): The URL of the langsmith server where the data is hosted.
  3918. Defaults to the API URL of your current client.
  3919. dataset_name (Optional[str]): The name of the dataset to create in your tenant.
  3920. Defaults to the name of the public dataset.
  3921. Returns:
  3922. Dataset: The cloned dataset.
  3923. """
  3924. source_api_url = source_api_url or self.api_url
  3925. source_api_url, token_uuid = _parse_token_or_url(token_or_url, source_api_url)
  3926. source_client = Client(
  3927. # Placeholder API key not needed anymore in most cases, but
  3928. # some private deployments may have API key-based rate limiting
  3929. # that would cause this to fail if we provide no value.
  3930. api_url=source_api_url,
  3931. api_key="placeholder",
  3932. )
  3933. ds = source_client.read_shared_dataset(token_uuid)
  3934. dataset_name = dataset_name or ds.name
  3935. try:
  3936. ds = self.read_dataset(dataset_name=dataset_name)
  3937. logger.info(
  3938. f"Dataset {dataset_name} already exists in your tenant. Skipping."
  3939. )
  3940. return ds
  3941. except ls_utils.LangSmithNotFoundError:
  3942. pass
  3943. try:
  3944. # Fetch examples first
  3945. examples = list(source_client.list_shared_examples(token_uuid))
  3946. dataset = self.create_dataset(
  3947. dataset_name=dataset_name,
  3948. description=ds.description,
  3949. data_type=ds.data_type or ls_schemas.DataType.kv,
  3950. inputs_schema=ds.inputs_schema,
  3951. outputs_schema=ds.outputs_schema,
  3952. transformations=ds.transformations,
  3953. )
  3954. try:
  3955. self.create_examples(
  3956. inputs=[e.inputs for e in examples],
  3957. outputs=[e.outputs for e in examples],
  3958. dataset_id=dataset.id,
  3959. )
  3960. except BaseException as e:
  3961. # Let's not do automatic clean up for now in case there might be
  3962. # some other reasons why create_examples fails (i.e., not network issue
  3963. # or keyboard interrupt).
  3964. # The risk is that this is an existing dataset that has valid examples
  3965. # populated from another source so we don't want to delete it.
  3966. logger.error(
  3967. f"An error occurred while creating dataset {dataset_name}. "
  3968. "You should delete it manually."
  3969. )
  3970. raise e
  3971. finally:
  3972. del source_client
  3973. return dataset
  3974. def _get_data_type(self, dataset_id: ID_TYPE) -> ls_schemas.DataType:
  3975. dataset = self.read_dataset(dataset_id=dataset_id)
  3976. return dataset.data_type
  3977. @ls_utils.xor_args(("dataset_id", "dataset_name"))
  3978. def create_llm_example(
  3979. self,
  3980. prompt: str,
  3981. generation: Optional[str] = None,
  3982. dataset_id: Optional[ID_TYPE] = None,
  3983. dataset_name: Optional[str] = None,
  3984. created_at: Optional[datetime.datetime] = None,
  3985. ) -> ls_schemas.Example:
  3986. """Add an example (row) to an LLM-type dataset.
  3987. Args:
  3988. prompt (str):
  3989. The input prompt for the example.
  3990. generation (Optional[str]):
  3991. The output generation for the example.
  3992. dataset_id (Optional[Union[UUID, str]]):
  3993. The ID of the dataset.
  3994. dataset_name (Optional[str]):
  3995. The name of the dataset.
  3996. created_at (Optional[datetime.datetime]):
  3997. The creation timestamp of the example.
  3998. Returns:
  3999. Example: The created example
  4000. """
  4001. return self.create_example(
  4002. inputs={"input": prompt},
  4003. outputs={"output": generation},
  4004. dataset_id=dataset_id,
  4005. dataset_name=dataset_name,
  4006. created_at=created_at,
  4007. )
  4008. @ls_utils.xor_args(("dataset_id", "dataset_name"))
  4009. def create_chat_example(
  4010. self,
  4011. messages: list[Union[Mapping[str, Any], ls_schemas.BaseMessageLike]],
  4012. generations: Optional[
  4013. Union[Mapping[str, Any], ls_schemas.BaseMessageLike]
  4014. ] = None,
  4015. dataset_id: Optional[ID_TYPE] = None,
  4016. dataset_name: Optional[str] = None,
  4017. created_at: Optional[datetime.datetime] = None,
  4018. ) -> ls_schemas.Example:
  4019. """Add an example (row) to a Chat-type dataset.
  4020. Args:
  4021. messages (List[Union[Mapping[str, Any], BaseMessageLike]]):
  4022. The input messages for the example.
  4023. generations (Optional[Union[Mapping[str, Any], BaseMessageLike]]):
  4024. The output messages for the example.
  4025. dataset_id (Optional[Union[UUID, str]]):
  4026. The ID of the dataset.
  4027. dataset_name (Optional[str]):
  4028. The name of the dataset.
  4029. created_at (Optional[datetime.datetime]):
  4030. The creation timestamp of the example.
  4031. Returns:
  4032. Example: The created example
  4033. """
  4034. final_input = []
  4035. for message in messages:
  4036. if ls_utils.is_base_message_like(message):
  4037. final_input.append(
  4038. ls_utils.convert_langchain_message(
  4039. cast(ls_schemas.BaseMessageLike, message)
  4040. )
  4041. )
  4042. else:
  4043. final_input.append(cast(dict, message))
  4044. final_generations = None
  4045. if generations is not None:
  4046. if ls_utils.is_base_message_like(generations):
  4047. final_generations = ls_utils.convert_langchain_message(
  4048. cast(ls_schemas.BaseMessageLike, generations)
  4049. )
  4050. else:
  4051. final_generations = cast(dict, generations)
  4052. return self.create_example(
  4053. inputs={"input": final_input},
  4054. outputs=(
  4055. {"output": final_generations} if final_generations is not None else None
  4056. ),
  4057. dataset_id=dataset_id,
  4058. dataset_name=dataset_name,
  4059. created_at=created_at,
  4060. )
  4061. def create_example_from_run(
  4062. self,
  4063. run: ls_schemas.Run,
  4064. dataset_id: Optional[ID_TYPE] = None,
  4065. dataset_name: Optional[str] = None,
  4066. created_at: Optional[datetime.datetime] = None,
  4067. ) -> ls_schemas.Example:
  4068. """Add an example (row) to a dataset from a run.
  4069. Args:
  4070. run (Run): The run to create an example from.
  4071. dataset_id (Optional[Union[UUID, str]]): The ID of the dataset.
  4072. dataset_name (Optional[str]): The name of the dataset.
  4073. created_at (Optional[datetime.datetime]): The creation timestamp of the example.
  4074. Returns:
  4075. Example: The created example
  4076. """
  4077. if dataset_id is None:
  4078. dataset_id = self.read_dataset(dataset_name=dataset_name).id
  4079. dataset_name = None # Nested call expects only 1 defined
  4080. dataset_type = self._get_data_type_cached(dataset_id)
  4081. if dataset_type == ls_schemas.DataType.llm:
  4082. if run.run_type != "llm":
  4083. raise ValueError(
  4084. f"Run type {run.run_type} is not supported"
  4085. " for dataset of type 'LLM'"
  4086. )
  4087. try:
  4088. prompt = ls_utils.get_prompt_from_inputs(run.inputs)
  4089. except ValueError:
  4090. raise ValueError(
  4091. "Error converting LLM run inputs to prompt for run"
  4092. f" {run.id} with inputs {run.inputs}"
  4093. )
  4094. inputs: dict[str, Any] = {"input": prompt}
  4095. if not run.outputs:
  4096. outputs: Optional[dict[str, Any]] = None
  4097. else:
  4098. try:
  4099. generation = ls_utils.get_llm_generation_from_outputs(run.outputs)
  4100. except ValueError:
  4101. raise ValueError(
  4102. "Error converting LLM run outputs to generation for run"
  4103. f" {run.id} with outputs {run.outputs}"
  4104. )
  4105. outputs = {"output": generation}
  4106. elif dataset_type == ls_schemas.DataType.chat:
  4107. if run.run_type != "llm":
  4108. raise ValueError(
  4109. f"Run type {run.run_type} is not supported"
  4110. " for dataset of type 'chat'"
  4111. )
  4112. try:
  4113. inputs = {"input": ls_utils.get_messages_from_inputs(run.inputs)}
  4114. except ValueError:
  4115. raise ValueError(
  4116. "Error converting LLM run inputs to chat messages for run"
  4117. f" {run.id} with inputs {run.inputs}"
  4118. )
  4119. if not run.outputs:
  4120. outputs = None
  4121. else:
  4122. try:
  4123. outputs = {
  4124. "output": ls_utils.get_message_generation_from_outputs(
  4125. run.outputs
  4126. )
  4127. }
  4128. except ValueError:
  4129. raise ValueError(
  4130. "Error converting LLM run outputs to chat generations"
  4131. f" for run {run.id} with outputs {run.outputs}"
  4132. )
  4133. elif dataset_type == ls_schemas.DataType.kv:
  4134. # Anything goes
  4135. inputs = run.inputs
  4136. outputs = run.outputs
  4137. else:
  4138. raise ValueError(f"Dataset type {dataset_type} not recognized.")
  4139. return self.create_example(
  4140. inputs=inputs,
  4141. outputs=outputs,
  4142. dataset_id=dataset_id,
  4143. dataset_name=dataset_name,
  4144. created_at=created_at,
  4145. )
  4146. def _prepare_multipart_data(
  4147. self,
  4148. examples: Union[
  4149. list[ls_schemas.ExampleCreate]
  4150. | list[ls_schemas.ExampleUpsertWithAttachments]
  4151. | list[ls_schemas.ExampleUpdate],
  4152. ],
  4153. include_dataset_id: bool = False,
  4154. dangerously_allow_filesystem: bool = False,
  4155. ) -> tuple[Any, bytes, dict[str, io.BufferedReader]]:
  4156. parts: list[MultipartPart] = []
  4157. opened_files_dict: dict[str, io.BufferedReader] = {}
  4158. if include_dataset_id:
  4159. if not isinstance(examples[0], ls_schemas.ExampleUpsertWithAttachments):
  4160. raise ValueError(
  4161. "The examples must be of type ExampleUpsertWithAttachments"
  4162. " if include_dataset_id is True"
  4163. )
  4164. dataset_id = examples[0].dataset_id
  4165. for example in examples:
  4166. if (
  4167. not isinstance(example, ls_schemas.ExampleCreate)
  4168. and not isinstance(example, ls_schemas.ExampleUpsertWithAttachments)
  4169. and not isinstance(example, ls_schemas.ExampleUpdate)
  4170. ):
  4171. raise ValueError(
  4172. "The examples must be of type ExampleCreate"
  4173. " or ExampleUpsertWithAttachments"
  4174. " or ExampleUpdate"
  4175. )
  4176. if example.id is not None:
  4177. example_id = str(example.id)
  4178. else:
  4179. example_id = str(uuid.uuid4())
  4180. if isinstance(example, ls_schemas.ExampleUpdate):
  4181. created_at = None
  4182. else:
  4183. created_at = example.created_at
  4184. if isinstance(example, ls_schemas.ExampleCreate):
  4185. use_source_run_io = example.use_source_run_io
  4186. use_source_run_attachments = example.use_source_run_attachments
  4187. source_run_id = example.source_run_id
  4188. else:
  4189. use_source_run_io, use_source_run_attachments, source_run_id = (
  4190. None,
  4191. None,
  4192. None,
  4193. )
  4194. example_body = {
  4195. **({"dataset_id": dataset_id} if include_dataset_id else {}),
  4196. **({"created_at": created_at} if created_at is not None else {}),
  4197. **(
  4198. {"use_source_run_io": use_source_run_io}
  4199. if use_source_run_io
  4200. else {}
  4201. ),
  4202. **(
  4203. {"use_source_run_attachments": use_source_run_attachments}
  4204. if use_source_run_attachments
  4205. else {}
  4206. ),
  4207. **({"source_run_id": source_run_id} if source_run_id else {}),
  4208. }
  4209. if example.metadata is not None:
  4210. example_body["metadata"] = example.metadata
  4211. if example.split is not None:
  4212. example_body["split"] = example.split
  4213. valb = _dumps_json(example_body)
  4214. parts.append(
  4215. (
  4216. f"{example_id}",
  4217. (
  4218. None,
  4219. valb,
  4220. "application/json",
  4221. {},
  4222. ),
  4223. )
  4224. )
  4225. if example.inputs is not None:
  4226. inputsb = _dumps_json(example.inputs)
  4227. parts.append(
  4228. (
  4229. f"{example_id}.inputs",
  4230. (
  4231. None,
  4232. inputsb,
  4233. "application/json",
  4234. {},
  4235. ),
  4236. )
  4237. )
  4238. if example.outputs is not None:
  4239. outputsb = _dumps_json(example.outputs)
  4240. parts.append(
  4241. (
  4242. f"{example_id}.outputs",
  4243. (
  4244. None,
  4245. outputsb,
  4246. "application/json",
  4247. {},
  4248. ),
  4249. )
  4250. )
  4251. if example.attachments:
  4252. for name, attachment in example.attachments.items():
  4253. if isinstance(attachment, dict):
  4254. mime_type = attachment["mime_type"]
  4255. attachment_data = attachment["data"]
  4256. else:
  4257. mime_type, attachment_data = attachment
  4258. if isinstance(attachment_data, Path):
  4259. if dangerously_allow_filesystem:
  4260. try:
  4261. file_size = os.path.getsize(attachment_data)
  4262. file = open(attachment_data, "rb")
  4263. except FileNotFoundError:
  4264. logger.warning(
  4265. "Attachment file not found for example %s: %s",
  4266. example_id,
  4267. attachment_data,
  4268. )
  4269. continue
  4270. opened_files_dict[
  4271. str(attachment_data) + str(uuid.uuid4())
  4272. ] = file
  4273. parts.append(
  4274. (
  4275. f"{example_id}.attachment.{name}",
  4276. (
  4277. None,
  4278. file, # type: ignore[arg-type]
  4279. f"{mime_type}; length={file_size}",
  4280. {},
  4281. ),
  4282. )
  4283. )
  4284. else:
  4285. raise ValueError(
  4286. "dangerously_allow_filesystem must be True to upload files from the filesystem"
  4287. )
  4288. else:
  4289. parts.append(
  4290. (
  4291. f"{example_id}.attachment.{name}",
  4292. (
  4293. None,
  4294. attachment_data,
  4295. f"{mime_type}; length={len(attachment_data)}",
  4296. {},
  4297. ),
  4298. )
  4299. )
  4300. if (
  4301. isinstance(example, ls_schemas.ExampleUpdate)
  4302. and example.attachments_operations
  4303. ):
  4304. attachments_operationsb = _dumps_json(example.attachments_operations)
  4305. parts.append(
  4306. (
  4307. f"{example_id}.attachments_operations",
  4308. (
  4309. None,
  4310. attachments_operationsb,
  4311. "application/json",
  4312. {},
  4313. ),
  4314. )
  4315. )
  4316. encoder = rqtb_multipart.MultipartEncoder(parts, boundary=_BOUNDARY)
  4317. if encoder.len <= 20_000_000: # ~20 MB
  4318. data = encoder.to_string()
  4319. else:
  4320. data = encoder
  4321. return encoder, data, opened_files_dict
  4322. def update_examples_multipart(
  4323. self,
  4324. *,
  4325. dataset_id: ID_TYPE,
  4326. updates: Optional[list[ls_schemas.ExampleUpdate]] = None,
  4327. dangerously_allow_filesystem: bool = False,
  4328. ) -> ls_schemas.UpsertExamplesResponse:
  4329. """Update examples using multipart.
  4330. .. deprecated:: 0.3.9
  4331. Use Client.update_examples instead. Will be removed in 0.4.0.
  4332. """
  4333. return self._update_examples_multipart(
  4334. dataset_id=dataset_id,
  4335. updates=updates,
  4336. dangerously_allow_filesystem=dangerously_allow_filesystem,
  4337. )
  4338. def _update_examples_multipart(
  4339. self,
  4340. *,
  4341. dataset_id: ID_TYPE,
  4342. updates: Optional[list[ls_schemas.ExampleUpdate]] = None,
  4343. dangerously_allow_filesystem: bool = False,
  4344. ) -> ls_schemas.UpsertExamplesResponse:
  4345. """Update examples using multipart.
  4346. Args:
  4347. dataset_id (Union[UUID, str]): The ID of the dataset to update.
  4348. updates (Optional[List[ExampleUpdate]]): The updates to apply to the examples.
  4349. Raises:
  4350. ValueError: If the multipart examples endpoint is not enabled.
  4351. """
  4352. if not (self.info.instance_flags or {}).get(
  4353. "dataset_examples_multipart_enabled", False
  4354. ):
  4355. raise ValueError(
  4356. "Your LangSmith deployment does not allow using the latest examples "
  4357. "endpoints, please upgrade your deployment to the latest version or downgrade your SDK "
  4358. "to langsmith<0.3.9."
  4359. )
  4360. if updates is None:
  4361. updates = []
  4362. encoder, data, opened_files_dict = self._prepare_multipart_data(
  4363. updates,
  4364. include_dataset_id=False,
  4365. dangerously_allow_filesystem=dangerously_allow_filesystem,
  4366. )
  4367. try:
  4368. response = self.request_with_retries(
  4369. "PATCH",
  4370. _dataset_examples_path(self.api_url, dataset_id),
  4371. request_kwargs={
  4372. "data": data,
  4373. "headers": {
  4374. **self._headers,
  4375. "Content-Type": encoder.content_type,
  4376. },
  4377. },
  4378. )
  4379. ls_utils.raise_for_status_with_text(response)
  4380. finally:
  4381. _close_files(list(opened_files_dict.values()))
  4382. return response.json()
  4383. def upload_examples_multipart(
  4384. self,
  4385. *,
  4386. dataset_id: ID_TYPE,
  4387. uploads: Optional[list[ls_schemas.ExampleCreate]] = None,
  4388. dangerously_allow_filesystem: bool = False,
  4389. ) -> ls_schemas.UpsertExamplesResponse:
  4390. """Upload examples using multipart.
  4391. .. deprecated:: 0.3.9
  4392. Use Client.create_examples instead. Will be removed in 0.4.0.
  4393. """
  4394. return self._upload_examples_multipart(
  4395. dataset_id=dataset_id,
  4396. uploads=uploads,
  4397. dangerously_allow_filesystem=dangerously_allow_filesystem,
  4398. )
  4399. def _estimate_example_size(self, example: ls_schemas.ExampleCreate) -> int:
  4400. """Estimate the size of an example in bytes for batching purposes."""
  4401. size = 1000 # Base overhead for JSON structure and boundaries
  4402. if example.inputs:
  4403. size += len(_dumps_json(example.inputs))
  4404. if example.outputs:
  4405. size += len(_dumps_json(example.outputs))
  4406. if example.metadata:
  4407. size += len(_dumps_json(example.metadata))
  4408. # Estimate attachments
  4409. if example.attachments:
  4410. for _, attachment in example.attachments.items():
  4411. if isinstance(attachment, dict):
  4412. attachment_data = attachment["data"]
  4413. else:
  4414. _, attachment_data = attachment
  4415. if isinstance(attachment_data, Path):
  4416. try:
  4417. size += os.path.getsize(attachment_data)
  4418. except (FileNotFoundError, OSError):
  4419. size += 1_000_000 # 1MB fallback estimate
  4420. else:
  4421. size += len(attachment_data)
  4422. size += 200 # Multipart headers overhead per attachment
  4423. return size
  4424. def _batch_examples_by_size(
  4425. self,
  4426. examples: list[ls_schemas.ExampleCreate],
  4427. max_batch_size_bytes: int = 20_000_000, # 20MB limit per batch
  4428. ) -> list[list[ls_schemas.ExampleCreate]]:
  4429. """Batch examples by size limits."""
  4430. batches = []
  4431. current_batch: list[ls_schemas.ExampleCreate] = []
  4432. current_size = 0
  4433. for example in examples:
  4434. example_size = self._estimate_example_size(example)
  4435. # Handle oversized single examples
  4436. if example_size > max_batch_size_bytes:
  4437. # Flush current batch first
  4438. if current_batch:
  4439. batches.append(current_batch)
  4440. current_batch = []
  4441. current_size = 0
  4442. # oversized example
  4443. batches.append([example])
  4444. continue
  4445. size_exceeded = current_size + example_size > max_batch_size_bytes
  4446. # new batch
  4447. if current_batch and size_exceeded:
  4448. batches.append(current_batch)
  4449. current_batch = [example]
  4450. current_size = example_size
  4451. else:
  4452. current_batch.append(example)
  4453. current_size += example_size
  4454. # final batch
  4455. if current_batch:
  4456. batches.append(current_batch)
  4457. return batches
  4458. def _upload_examples_multipart(
  4459. self,
  4460. *,
  4461. dataset_id: ID_TYPE,
  4462. uploads: Optional[list[ls_schemas.ExampleCreate]] = None,
  4463. dangerously_allow_filesystem: bool = False,
  4464. ) -> ls_schemas.UpsertExamplesResponse:
  4465. """Upload examples using multipart.
  4466. Args:
  4467. dataset_id (Union[UUID, str]): The ID of the dataset to upload to.
  4468. uploads (Optional[List[ExampleCreate]]): The examples to upload.
  4469. dangerously_allow_filesystem (bool): Whether to allow uploading files from the filesystem.
  4470. Returns:
  4471. ls_schemas.UpsertExamplesResponse: The count and ids of the successfully uploaded examples
  4472. Raises:
  4473. ValueError: If the multipart examples endpoint is not enabled.
  4474. """
  4475. if not (self.info.instance_flags or {}).get(
  4476. "dataset_examples_multipart_enabled", False
  4477. ):
  4478. raise ValueError(
  4479. "Your LangSmith deployment does not allow using the multipart examples endpoint, please upgrade your deployment to the latest version."
  4480. )
  4481. if uploads is None:
  4482. uploads = []
  4483. encoder, data, opened_files_dict = self._prepare_multipart_data(
  4484. uploads,
  4485. include_dataset_id=False,
  4486. dangerously_allow_filesystem=dangerously_allow_filesystem,
  4487. )
  4488. try:
  4489. response = self.request_with_retries(
  4490. "POST",
  4491. _dataset_examples_path(self.api_url, dataset_id),
  4492. request_kwargs={
  4493. "data": data,
  4494. "headers": {
  4495. **self._headers,
  4496. "Content-Type": encoder.content_type,
  4497. },
  4498. },
  4499. )
  4500. ls_utils.raise_for_status_with_text(response)
  4501. finally:
  4502. _close_files(list(opened_files_dict.values()))
  4503. return response.json()
  4504. def upsert_examples_multipart(
  4505. self,
  4506. *,
  4507. upserts: Optional[list[ls_schemas.ExampleUpsertWithAttachments]] = None,
  4508. dangerously_allow_filesystem: bool = False,
  4509. ) -> ls_schemas.UpsertExamplesResponse:
  4510. """Upsert examples.
  4511. .. deprecated:: 0.3.9
  4512. Use Client.create_examples and Client.update_examples instead. Will be
  4513. removed in 0.4.0.
  4514. """
  4515. if not (self.info.instance_flags or {}).get(
  4516. "examples_multipart_enabled", False
  4517. ):
  4518. raise ValueError(
  4519. "Your LangSmith deployment does not allow using the multipart examples endpoint, please upgrade your deployment to the latest version."
  4520. )
  4521. if upserts is None:
  4522. upserts = []
  4523. encoder, data, opened_files_dict = self._prepare_multipart_data(
  4524. upserts,
  4525. include_dataset_id=True,
  4526. dangerously_allow_filesystem=dangerously_allow_filesystem,
  4527. )
  4528. try:
  4529. response = self.request_with_retries(
  4530. "POST",
  4531. (
  4532. "/v1/platform/examples/multipart"
  4533. if self.api_url[-3:] != "/v1" and self.api_url[-4:] != "/v1/"
  4534. else "/platform/examples/multipart"
  4535. ),
  4536. request_kwargs={
  4537. "data": data,
  4538. "headers": {
  4539. **self._headers,
  4540. "Content-Type": encoder.content_type,
  4541. },
  4542. },
  4543. )
  4544. ls_utils.raise_for_status_with_text(response)
  4545. finally:
  4546. _close_files(list(opened_files_dict.values()))
  4547. return response.json()
  4548. @ls_utils.xor_args(("dataset_id", "dataset_name"))
  4549. def create_examples(
  4550. self,
  4551. *,
  4552. dataset_name: Optional[str] = None,
  4553. dataset_id: Optional[ID_TYPE] = None,
  4554. examples: Optional[Sequence[ls_schemas.ExampleCreate | dict]] = None,
  4555. dangerously_allow_filesystem: bool = False,
  4556. max_concurrency: Annotated[int, Field(ge=1, le=3)] = 1,
  4557. **kwargs: Any,
  4558. ) -> ls_schemas.UpsertExamplesResponse | dict[str, Any]:
  4559. """Create examples in a dataset.
  4560. Args:
  4561. dataset_name (str | None):
  4562. The name of the dataset to create the examples in. Must specify exactly
  4563. one of dataset_name or dataset_id.
  4564. dataset_id (UUID | str | None):
  4565. The ID of the dataset to create the examples in. Must specify exactly
  4566. one of dataset_name or dataset_id
  4567. examples (Sequence[ExampleCreate | dict]):
  4568. The examples to create.
  4569. dangerously_allow_filesystem (bool):
  4570. Whether to allow uploading files from the filesystem.
  4571. **kwargs (Any): Legacy keyword args. Should not be specified if 'examples' is specified.
  4572. - inputs (Sequence[Mapping[str, Any]]): The input values for the examples.
  4573. - outputs (Optional[Sequence[Optional[Mapping[str, Any]]]]): The output values for the examples.
  4574. - metadata (Optional[Sequence[Optional[Mapping[str, Any]]]]): The metadata for the examples.
  4575. - splits (Optional[Sequence[Optional[str | List[str]]]]): The splits for the examples, which are divisions of your dataset such as 'train', 'test', or 'validation'.
  4576. - source_run_ids (Optional[Sequence[Optional[Union[UUID, str]]]]): The IDs of the source runs associated with the examples.
  4577. - ids (Optional[Sequence[Union[UUID, str]]]): The IDs of the examples.
  4578. Raises:
  4579. ValueError: If 'examples' and legacy args are both provided.
  4580. Returns:
  4581. The LangSmith JSON response. Includes 'count' and 'example_ids'.
  4582. !!! warning "Behavior changed in `langsmith` 0.3.11"
  4583. Updated to take argument 'examples', a single list where each
  4584. element is the full example to create. This should be used instead of the
  4585. legacy 'inputs', 'outputs', etc. arguments which split each examples
  4586. attributes across arguments.
  4587. Updated to support creating examples with attachments.
  4588. Example:
  4589. ```python
  4590. from langsmith import Client
  4591. client = Client()
  4592. dataset = client.create_dataset("agent-qa")
  4593. examples = [
  4594. {
  4595. "inputs": {"question": "what's an agent"},
  4596. "outputs": {"answer": "an agent is..."},
  4597. "metadata": {"difficulty": "easy"},
  4598. },
  4599. {
  4600. "inputs": {
  4601. "question": "can you explain the agent architecture in this diagram?"
  4602. },
  4603. "outputs": {"answer": "this diagram shows..."},
  4604. "attachments": {"diagram": {"mime_type": "image/png", "data": b"..."}},
  4605. "metadata": {"difficulty": "medium"},
  4606. },
  4607. # more examples...
  4608. ]
  4609. response = client.create_examples(dataset_name="agent-qa", examples=examples)
  4610. # -> {"example_ids": [...
  4611. ```
  4612. """ # noqa: E501
  4613. if not 1 <= max_concurrency <= 3:
  4614. raise ValueError("max_concurrency must be between 1 and 3")
  4615. if kwargs and examples:
  4616. kwarg_keys = ", ".join([f"'{k}'" for k in kwargs])
  4617. raise ValueError(
  4618. f"Cannot specify {kwarg_keys} when 'examples' is specified."
  4619. )
  4620. supported_kwargs = {
  4621. "inputs",
  4622. "outputs",
  4623. "metadata",
  4624. "splits",
  4625. "ids",
  4626. "source_run_ids",
  4627. }
  4628. if kwargs and (unsupported := set(kwargs).difference(supported_kwargs)):
  4629. raise ValueError(
  4630. f"Received unsupported keyword arguments: {tuple(unsupported)}."
  4631. )
  4632. if not (dataset_id or dataset_name):
  4633. raise ValueError("Either dataset_id or dataset_name must be provided.")
  4634. elif not dataset_id:
  4635. dataset_id = self.read_dataset(dataset_name=dataset_name).id
  4636. if examples:
  4637. uploads = [
  4638. ls_schemas.ExampleCreate(**x) if isinstance(x, dict) else x
  4639. for x in examples
  4640. ]
  4641. # For backwards compatibility
  4642. else:
  4643. inputs = kwargs.get("inputs")
  4644. if not inputs:
  4645. raise ValueError("Must specify either 'examples' or 'inputs.'")
  4646. # Since inputs are required, we will check against them
  4647. input_len = len(inputs)
  4648. for arg_name, arg_value in kwargs.items():
  4649. if arg_value is not None and len(arg_value) != input_len:
  4650. raise ValueError(
  4651. f"Length of {arg_name} ({len(arg_value)}) does not match"
  4652. f" length of inputs ({input_len})"
  4653. )
  4654. uploads = [
  4655. ls_schemas.ExampleCreate(
  4656. **{
  4657. "inputs": in_,
  4658. "outputs": out_,
  4659. "metadata": metadata_,
  4660. "split": split_,
  4661. "id": id_ or str(uuid.uuid4()),
  4662. "source_run_id": source_run_id_,
  4663. }
  4664. )
  4665. for in_, out_, metadata_, split_, id_, source_run_id_ in zip(
  4666. inputs,
  4667. kwargs.get("outputs") or (None for _ in range(input_len)),
  4668. kwargs.get("metadata") or (None for _ in range(input_len)),
  4669. kwargs.get("splits") or (None for _ in range(input_len)),
  4670. kwargs.get("ids") or (None for _ in range(input_len)),
  4671. kwargs.get("source_run_ids") or (None for _ in range(input_len)),
  4672. )
  4673. ]
  4674. if not uploads:
  4675. return ls_schemas.UpsertExamplesResponse(example_ids=[], count=0)
  4676. # Use size-aware batching to prevent payload limit errors
  4677. batches = self._batch_examples_by_size(uploads)
  4678. return self._upload_examples_batches_parallel(
  4679. batches, dataset_id, dangerously_allow_filesystem, max_concurrency
  4680. )
  4681. def _upload_examples_batches_parallel(
  4682. self, batches, dataset_id, dangerously_allow_filesystem, max_concurrency
  4683. ):
  4684. all_examples_ids = []
  4685. total_count = 0
  4686. from langsmith.utils import ContextThreadPoolExecutor
  4687. with ContextThreadPoolExecutor(max_workers=max_concurrency) as executor:
  4688. # submit all batch uploads to thread pool
  4689. futures = [
  4690. executor.submit(
  4691. self._upload_single_batch,
  4692. batch,
  4693. dataset_id,
  4694. dangerously_allow_filesystem,
  4695. )
  4696. for batch in batches
  4697. ]
  4698. # collect results as they complete
  4699. for future in cf.as_completed(futures):
  4700. response = future.result()
  4701. all_examples_ids.extend(response.get("example_ids", []))
  4702. total_count += response.get("count", 0)
  4703. return ls_schemas.UpsertExamplesResponse(
  4704. example_ids=all_examples_ids, count=total_count
  4705. )
  4706. def _upload_single_batch(self, batch, dataset_id, dangerously_allow_filesystem):
  4707. """Upload a single batch of examples (used by both sequential and parallel)."""
  4708. if (self.info.instance_flags or {}).get(
  4709. "dataset_examples_multipart_enabled", False
  4710. ):
  4711. response = self._upload_examples_multipart(
  4712. dataset_id=cast(uuid.UUID, dataset_id),
  4713. uploads=batch, # batch is a list of ExampleCreate objects
  4714. dangerously_allow_filesystem=dangerously_allow_filesystem,
  4715. )
  4716. return {
  4717. "example_ids": response.get("example_ids", []),
  4718. "count": response.get("count", 0),
  4719. }
  4720. else:
  4721. # Strip attachments for legacy endpoint
  4722. for upload in batch:
  4723. if getattr(upload, "attachments") is not None:
  4724. upload.attachments = None
  4725. warnings.warn(
  4726. "Must upgrade your LangSmith version to use attachments."
  4727. )
  4728. response = self.request_with_retries(
  4729. "POST",
  4730. "/examples/bulk",
  4731. headers={**self._headers, "Content-Type": "application/json"},
  4732. data=_dumps_json(
  4733. [
  4734. {**dump_model(upload), "dataset_id": str(dataset_id)}
  4735. for upload in batch
  4736. ]
  4737. ),
  4738. )
  4739. ls_utils.raise_for_status_with_text(response)
  4740. response_data = response.json()
  4741. return {
  4742. "example_ids": [data["id"] for data in response_data],
  4743. "count": len(response_data),
  4744. }
  4745. @ls_utils.xor_args(("dataset_id", "dataset_name"))
  4746. def create_example(
  4747. self,
  4748. inputs: Optional[Mapping[str, Any]] = None,
  4749. dataset_id: Optional[ID_TYPE] = None,
  4750. dataset_name: Optional[str] = None,
  4751. created_at: Optional[datetime.datetime] = None,
  4752. outputs: Optional[Mapping[str, Any]] = None,
  4753. metadata: Optional[Mapping[str, Any]] = None,
  4754. split: Optional[str | list[str]] = None,
  4755. example_id: Optional[ID_TYPE] = None,
  4756. source_run_id: Optional[ID_TYPE] = None,
  4757. use_source_run_io: bool = False,
  4758. use_source_run_attachments: Optional[list[str]] = None,
  4759. attachments: Optional[ls_schemas.Attachments] = None,
  4760. ) -> ls_schemas.Example:
  4761. """Create a dataset example in the LangSmith API.
  4762. Examples are rows in a dataset, containing the inputs
  4763. and expected outputs (or other reference information)
  4764. for a model or chain.
  4765. Args:
  4766. inputs (Mapping[str, Any]):
  4767. The input values for the example.
  4768. dataset_id (Optional[Union[UUID, str]]):
  4769. The ID of the dataset to create the example in.
  4770. dataset_name (Optional[str]):
  4771. The name of the dataset to create the example in.
  4772. created_at (Optional[datetime.datetime]):
  4773. The creation timestamp of the example.
  4774. outputs (Optional[Mapping[str, Any]]):
  4775. The output values for the example.
  4776. metadata (Optional[Mapping[str, Any]]):
  4777. The metadata for the example.
  4778. split (Optional[str | List[str]]):
  4779. The splits for the example, which are divisions
  4780. of your dataset such as 'train', 'test', or 'validation'.
  4781. example_id (Optional[Union[UUID, str]]):
  4782. The ID of the example to create. If not provided, a new
  4783. example will be created.
  4784. source_run_id (Optional[Union[UUID, str]]):
  4785. The ID of the source run associated with this example.
  4786. use_source_run_io (bool):
  4787. Whether to use the inputs, outputs, and attachments from the source run.
  4788. use_source_run_attachments (Optional[List[str]]):
  4789. Which attachments to use from the source run. If use_source_run_io
  4790. is True, all attachments will be used regardless of this param.
  4791. attachments (Optional[Attachments]):
  4792. The attachments for the example.
  4793. Returns:
  4794. Example: The created example.
  4795. """
  4796. if inputs is None and not use_source_run_io:
  4797. raise ValueError("Must provide either inputs or use_source_run_io")
  4798. if dataset_id is None:
  4799. dataset_id = self.read_dataset(dataset_name=dataset_name).id
  4800. data = ls_schemas.ExampleCreate(
  4801. **{
  4802. "inputs": inputs,
  4803. "outputs": outputs,
  4804. "metadata": metadata,
  4805. "split": split,
  4806. "source_run_id": source_run_id,
  4807. "use_source_run_io": use_source_run_io,
  4808. "use_source_run_attachments": use_source_run_attachments,
  4809. "attachments": attachments,
  4810. }
  4811. )
  4812. if created_at:
  4813. data.created_at = created_at
  4814. data.id = (
  4815. (uuid.UUID(example_id) if isinstance(example_id, str) else example_id)
  4816. if example_id
  4817. else uuid.uuid4()
  4818. )
  4819. if (self.info.instance_flags or {}).get(
  4820. "dataset_examples_multipart_enabled", False
  4821. ):
  4822. self._upload_examples_multipart(dataset_id=dataset_id, uploads=[data])
  4823. return self.read_example(example_id=data.id)
  4824. else:
  4825. # fallback to old method
  4826. if getattr(data, "attachments") is not None:
  4827. data.attachments = None
  4828. warnings.warn("Must upgrade your LangSmith version to use attachments")
  4829. response = self.request_with_retries(
  4830. "POST",
  4831. "/examples",
  4832. headers={**self._headers, "Content-Type": "application/json"},
  4833. data=_dumps_json(
  4834. {
  4835. **{k: v for k, v in dump_model(data).items() if v is not None},
  4836. "dataset_id": str(dataset_id),
  4837. }
  4838. ),
  4839. )
  4840. ls_utils.raise_for_status_with_text(response)
  4841. result = response.json()
  4842. return ls_schemas.Example(
  4843. **result,
  4844. _host_url=self._host_url,
  4845. _tenant_id=self._get_optional_tenant_id(),
  4846. )
  4847. def read_example(
  4848. self, example_id: ID_TYPE, *, as_of: Optional[datetime.datetime] = None
  4849. ) -> ls_schemas.Example:
  4850. """Read an example from the LangSmith API.
  4851. Args:
  4852. example_id (Union[UUID, str]): The ID of the example to read.
  4853. as_of (Optional[datetime.datetime]): The dataset version tag OR
  4854. timestamp to retrieve the example as of.
  4855. Response examples will only be those that were present at the time
  4856. of the tagged (or timestamped) version.
  4857. Returns:
  4858. Example: The example.
  4859. """
  4860. response = self.request_with_retries(
  4861. "GET",
  4862. f"/examples/{_as_uuid(example_id, 'example_id')}",
  4863. params={
  4864. "as_of": as_of.isoformat() if as_of else None,
  4865. },
  4866. )
  4867. example = response.json()
  4868. attachments = _convert_stored_attachments_to_attachments_dict(
  4869. example, attachments_key="attachment_urls", api_url=self.api_url
  4870. )
  4871. return ls_schemas.Example(
  4872. **{k: v for k, v in example.items() if k != "attachment_urls"},
  4873. attachments=attachments,
  4874. _host_url=self._host_url,
  4875. _tenant_id=self._get_optional_tenant_id(),
  4876. )
  4877. def list_examples(
  4878. self,
  4879. dataset_id: Optional[ID_TYPE] = None,
  4880. dataset_name: Optional[str] = None,
  4881. example_ids: Optional[Sequence[ID_TYPE]] = None,
  4882. as_of: Optional[Union[datetime.datetime, str]] = None,
  4883. splits: Optional[Sequence[str]] = None,
  4884. inline_s3_urls: bool = True,
  4885. *,
  4886. offset: int = 0,
  4887. limit: Optional[int] = None,
  4888. metadata: Optional[dict] = None,
  4889. filter: Optional[str] = None,
  4890. include_attachments: bool = False,
  4891. **kwargs: Any,
  4892. ) -> Iterator[ls_schemas.Example]:
  4893. r"""Retrieve the example rows of the specified dataset.
  4894. Args:
  4895. dataset_id (Optional[Union[UUID, str]]): The ID of the dataset to filter by.
  4896. dataset_name (Optional[str]): The name of the dataset to filter by.
  4897. example_ids (Optional[Sequence[Union[UUID, str]]): The IDs of the examples to filter by.
  4898. as_of (Optional[Union[datetime.datetime, str]]): The dataset version tag OR
  4899. timestamp to retrieve the examples as of.
  4900. Response examples will only be those that were present at the time
  4901. of the tagged (or timestamped) version.
  4902. splits (Optional[Sequence[str]]): A list of dataset splits, which are
  4903. divisions of your dataset such as 'train', 'test', or 'validation'.
  4904. Returns examples only from the specified splits.
  4905. inline_s3_urls (bool, default=True): Whether to inline S3 URLs.
  4906. offset (int, default=0): The offset to start from. Defaults to 0.
  4907. limit (Optional[int]): The maximum number of examples to return.
  4908. metadata (Optional[dict]): A dictionary of metadata to filter by.
  4909. filter (Optional[str]): A structured filter string to apply to
  4910. the examples.
  4911. include_attachments (bool, default=False): Whether to include the
  4912. attachments in the response.
  4913. **kwargs (Any): Additional keyword arguments are ignored.
  4914. Yields:
  4915. The examples.
  4916. Examples:
  4917. List all examples for a dataset:
  4918. ```python
  4919. from langsmith import Client
  4920. client = Client()
  4921. # By Dataset ID
  4922. examples = client.list_examples(
  4923. dataset_id="c9ace0d8-a82c-4b6c-13d2-83401d68e9ab"
  4924. )
  4925. # By Dataset Name
  4926. examples = client.list_examples(dataset_name="My Test Dataset")
  4927. ```
  4928. List examples by id
  4929. ```python
  4930. example_ids = [
  4931. "734fc6a0-c187-4266-9721-90b7a025751a",
  4932. "d6b4c1b9-6160-4d63-9b61-b034c585074f",
  4933. "4d31df4e-f9c3-4a6e-8b6c-65701c2fed13",
  4934. ]
  4935. examples = client.list_examples(example_ids=example_ids)
  4936. ```
  4937. List examples by metadata
  4938. ```python
  4939. examples = client.list_examples(
  4940. dataset_name=dataset_name, metadata={"foo": "bar"}
  4941. )
  4942. ```
  4943. List examples by structured filter
  4944. ```python
  4945. examples = client.list_examples(
  4946. dataset_name=dataset_name,
  4947. filter='and(not(has(metadata, \'{"foo": "bar"}\')), exists(metadata, "tenant_id"))',
  4948. )
  4949. ```
  4950. """
  4951. params: dict[str, Any] = {
  4952. **kwargs,
  4953. "offset": offset,
  4954. "id": example_ids,
  4955. "as_of": (
  4956. as_of.isoformat() if isinstance(as_of, datetime.datetime) else as_of
  4957. ),
  4958. "splits": splits,
  4959. "inline_s3_urls": inline_s3_urls,
  4960. "limit": min(limit, 100) if limit is not None else 100,
  4961. "filter": filter,
  4962. }
  4963. if metadata is not None:
  4964. params["metadata"] = _dumps_json(metadata)
  4965. if dataset_id is not None:
  4966. params["dataset"] = dataset_id
  4967. elif dataset_name is not None:
  4968. dataset_id = self.read_dataset(dataset_name=dataset_name).id
  4969. params["dataset"] = dataset_id
  4970. else:
  4971. pass
  4972. if include_attachments:
  4973. params["select"] = ["attachment_urls", "outputs", "metadata"]
  4974. for i, example in enumerate(
  4975. self._get_paginated_list("/examples", params=params)
  4976. ):
  4977. attachments = _convert_stored_attachments_to_attachments_dict(
  4978. example, attachments_key="attachment_urls", api_url=self.api_url
  4979. )
  4980. yield ls_schemas.Example(
  4981. **{k: v for k, v in example.items() if k != "attachment_urls"},
  4982. attachments=attachments,
  4983. _host_url=self._host_url,
  4984. _tenant_id=self._get_optional_tenant_id(),
  4985. )
  4986. if limit is not None and i + 1 >= limit:
  4987. break
  4988. @warn_beta
  4989. def index_dataset(
  4990. self,
  4991. *,
  4992. dataset_id: ID_TYPE,
  4993. tag: str = "latest",
  4994. **kwargs: Any,
  4995. ) -> None:
  4996. """Enable dataset indexing. Examples are indexed by their inputs.
  4997. This enables searching for similar examples by inputs with
  4998. ``client.similar_examples()``.
  4999. Args:
  5000. dataset_id (Union[UUID, str]): The ID of the dataset to index.
  5001. tag (Optional[str]): The version of the dataset to index. If 'latest'
  5002. then any updates to the dataset (additions, updates, deletions of
  5003. examples) will be reflected in the index.
  5004. **kwargs (Any): Additional keyword arguments to pass as part of request body.
  5005. Returns:
  5006. None
  5007. """ # noqa: E501
  5008. dataset_id = _as_uuid(dataset_id, "dataset_id")
  5009. resp = self.request_with_retries(
  5010. "POST",
  5011. f"/datasets/{dataset_id}/index",
  5012. headers=self._headers,
  5013. data=json.dumps({"tag": tag, **kwargs}),
  5014. )
  5015. ls_utils.raise_for_status_with_text(resp)
  5016. @warn_beta
  5017. def sync_indexed_dataset(
  5018. self,
  5019. *,
  5020. dataset_id: ID_TYPE,
  5021. **kwargs: Any,
  5022. ) -> None:
  5023. """Sync dataset index.
  5024. This already happens automatically every 5 minutes, but you can call this to
  5025. force a sync.
  5026. Args:
  5027. dataset_id (Union[UUID, str]): The ID of the dataset to sync.
  5028. Returns:
  5029. None
  5030. """ # noqa: E501
  5031. dataset_id = _as_uuid(dataset_id, "dataset_id")
  5032. resp = self.request_with_retries(
  5033. "POST",
  5034. f"/datasets/{dataset_id}/index/sync",
  5035. headers=self._headers,
  5036. data=json.dumps({**kwargs}),
  5037. )
  5038. ls_utils.raise_for_status_with_text(resp)
  5039. # NOTE: dataset_name arg explicitly not supported to avoid extra API calls.
  5040. @warn_beta
  5041. def similar_examples(
  5042. self,
  5043. inputs: dict,
  5044. /,
  5045. *,
  5046. limit: int,
  5047. dataset_id: ID_TYPE,
  5048. filter: Optional[str] = None,
  5049. **kwargs: Any,
  5050. ) -> list[ls_schemas.ExampleSearch]:
  5051. r"""Retrieve the dataset examples whose inputs best match the current inputs.
  5052. !!! note
  5053. Must have few-shot indexing enabled for the dataset. See `client.index_dataset()`.
  5054. Args:
  5055. inputs (dict): The inputs to use as a search query. Must match the dataset
  5056. input schema. Must be JSON serializable.
  5057. limit (int): The maximum number of examples to return.
  5058. dataset_id (Union[UUID, str]): The ID of the dataset to search over.
  5059. filter (Optional[str]): A filter string to apply to the search results. Uses
  5060. the same syntax as the `filter` parameter in `list_runs()`. Only a subset
  5061. of operations are supported.
  5062. For example, you can use ``and(eq(metadata.some_tag, 'some_value'), neq(metadata.env, 'dev'))``
  5063. to filter only examples where some_tag has some_value, and the environment is not dev.
  5064. **kwargs: Additional keyword arguments to pass as part of request body.
  5065. Returns:
  5066. list[ExampleSearch]: List of ExampleSearch objects.
  5067. Examples:
  5068. ```python
  5069. from langsmith import Client
  5070. client = Client()
  5071. client.similar_examples(
  5072. {"question": "When would i use the runnable generator"},
  5073. limit=3,
  5074. dataset_id="...",
  5075. )
  5076. ```
  5077. ```python
  5078. [
  5079. ExampleSearch(
  5080. inputs={
  5081. "question": "How do I cache a Chat model? What caches can I use?"
  5082. },
  5083. outputs={
  5084. "answer": "You can use LangChain's caching layer for Chat Models. This can save you money by reducing the number of API calls you make to the LLM provider, if you're often requesting the same completion multiple times, and speed up your application.\n\nfrom langchain.cache import InMemoryCache\nlangchain.llm_cache = InMemoryCache()\n\n# The first time, it is not yet in cache, so it should take longer\nllm.predict('Tell me a joke')\n\nYou can also use SQLite Cache which uses a SQLite database:\n\nrm .langchain.db\n\nfrom langchain.cache import SQLiteCache\nlangchain.llm_cache = SQLiteCache(database_path=\".langchain.db\")\n\n# The first time, it is not yet in cache, so it should take longer\nllm.predict('Tell me a joke') \n"
  5085. },
  5086. metadata=None,
  5087. id=UUID("b2ddd1c4-dff6-49ae-8544-f48e39053398"),
  5088. dataset_id=UUID("01b6ce0f-bfb6-4f48-bbb8-f19272135d40"),
  5089. ),
  5090. ExampleSearch(
  5091. inputs={"question": "What's a runnable lambda?"},
  5092. outputs={
  5093. "answer": "A runnable lambda is an object that implements LangChain's `Runnable` interface and runs a callbale (i.e., a function). Note the function must accept a single argument."
  5094. },
  5095. metadata=None,
  5096. id=UUID("f94104a7-2434-4ba7-8293-6a283f4860b4"),
  5097. dataset_id=UUID("01b6ce0f-bfb6-4f48-bbb8-f19272135d40"),
  5098. ),
  5099. ExampleSearch(
  5100. inputs={"question": "Show me how to use RecursiveURLLoader"},
  5101. outputs={
  5102. "answer": 'The RecursiveURLLoader comes from the langchain.document_loaders.recursive_url_loader module. Here\'s an example of how to use it:\n\nfrom langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader\n\n# Create an instance of RecursiveUrlLoader with the URL you want to load\nloader = RecursiveUrlLoader(url="https://example.com")\n\n# Load all child links from the URL page\nchild_links = loader.load()\n\n# Print the child links\nfor link in child_links:\n print(link)\n\nMake sure to replace "https://example.com" with the actual URL you want to load. The load() method returns a list of child links found on the URL page. You can iterate over this list to access each child link.'
  5103. },
  5104. metadata=None,
  5105. id=UUID("0308ea70-a803-4181-a37d-39e95f138f8c"),
  5106. dataset_id=UUID("01b6ce0f-bfb6-4f48-bbb8-f19272135d40"),
  5107. ),
  5108. ]
  5109. ```
  5110. """
  5111. dataset_id = _as_uuid(dataset_id, "dataset_id")
  5112. req = {
  5113. "inputs": inputs,
  5114. "limit": limit,
  5115. **kwargs,
  5116. }
  5117. if filter is not None:
  5118. req["filter"] = filter
  5119. resp = self.request_with_retries(
  5120. "POST",
  5121. f"/datasets/{dataset_id}/search",
  5122. headers=self._headers,
  5123. data=json.dumps(req),
  5124. )
  5125. ls_utils.raise_for_status_with_text(resp)
  5126. examples = []
  5127. for ex in resp.json()["examples"]:
  5128. examples.append(ls_schemas.ExampleSearch(**ex, dataset_id=dataset_id))
  5129. return examples
  5130. def update_example(
  5131. self,
  5132. example_id: ID_TYPE,
  5133. *,
  5134. inputs: Optional[dict[str, Any]] = None,
  5135. outputs: Optional[Mapping[str, Any]] = None,
  5136. metadata: Optional[dict] = None,
  5137. split: Optional[str | list[str]] = None,
  5138. dataset_id: Optional[ID_TYPE] = None,
  5139. attachments_operations: Optional[ls_schemas.AttachmentsOperations] = None,
  5140. attachments: Optional[ls_schemas.Attachments] = None,
  5141. ) -> dict[str, Any]:
  5142. """Update a specific example.
  5143. Args:
  5144. example_id (Union[UUID, str]):
  5145. The ID of the example to update.
  5146. inputs (Optional[Dict[str, Any]]):
  5147. The input values to update.
  5148. outputs (Optional[Mapping[str, Any]]):
  5149. The output values to update.
  5150. metadata (Optional[Dict]):
  5151. The metadata to update.
  5152. split (Optional[str | List[str]]):
  5153. The dataset split to update, such as
  5154. 'train', 'test', or 'validation'.
  5155. dataset_id (Optional[Union[UUID, str]]):
  5156. The ID of the dataset to update.
  5157. attachments_operations (Optional[AttachmentsOperations]):
  5158. The attachments operations to perform.
  5159. attachments (Optional[Attachments]):
  5160. The attachments to add to the example.
  5161. Returns:
  5162. Dict[str, Any]: The updated example.
  5163. """
  5164. if attachments_operations is not None:
  5165. if not (self.info.instance_flags or {}).get(
  5166. "dataset_examples_multipart_enabled", False
  5167. ):
  5168. raise ValueError(
  5169. "Your LangSmith deployment does not allow using the attachment operations, please upgrade your deployment to the latest version."
  5170. )
  5171. example_dict = dict(
  5172. inputs=inputs,
  5173. outputs=outputs,
  5174. id=example_id,
  5175. metadata=metadata,
  5176. split=split,
  5177. attachments_operations=attachments_operations,
  5178. attachments=attachments,
  5179. )
  5180. example = ls_schemas.ExampleUpdate(
  5181. **{k: v for k, v in example_dict.items() if v is not None}
  5182. )
  5183. if dataset_id is None:
  5184. dataset_id = self.read_example(example_id).dataset_id
  5185. if (self.info.instance_flags or {}).get(
  5186. "dataset_examples_multipart_enabled", False
  5187. ):
  5188. return dict(
  5189. self._update_examples_multipart(
  5190. dataset_id=dataset_id, updates=[example]
  5191. )
  5192. )
  5193. else:
  5194. # fallback to old method
  5195. response = self.request_with_retries(
  5196. "PATCH",
  5197. f"/examples/{_as_uuid(example_id, 'example_id')}",
  5198. headers={**self._headers, "Content-Type": "application/json"},
  5199. data=_dumps_json(
  5200. {
  5201. **{
  5202. k: v
  5203. for k, v in dump_model(example).items()
  5204. if v is not None
  5205. },
  5206. "dataset_id": str(dataset_id),
  5207. }
  5208. ),
  5209. )
  5210. ls_utils.raise_for_status_with_text(response)
  5211. return response.json()
  5212. def update_examples(
  5213. self,
  5214. *,
  5215. dataset_name: str | None = None,
  5216. dataset_id: ID_TYPE | None = None,
  5217. updates: Optional[Sequence[ls_schemas.ExampleUpdate | dict]] = None,
  5218. dangerously_allow_filesystem: bool = False,
  5219. **kwargs: Any,
  5220. ) -> dict[str, Any]:
  5221. """Update multiple examples.
  5222. Examples are expected to all be part of the same dataset.
  5223. Args:
  5224. dataset_name (str | None):
  5225. The name of the dataset to update. Should specify exactly one of
  5226. 'dataset_name' or 'dataset_id'.
  5227. dataset_id (UUID | str | None):
  5228. The ID of the dataset to update. Should specify exactly one of
  5229. 'dataset_name' or 'dataset_id'.
  5230. updates (Sequence[ExampleUpdate | dict] | None):
  5231. The example updates. Overwrites any specified fields and does not
  5232. update any unspecified fields.
  5233. dangerously_allow_filesystem (bool):
  5234. Whether to allow using filesystem paths as attachments.
  5235. **kwargs (Any):
  5236. Legacy keyword args. Should not be specified if 'updates' is specified.
  5237. - example_ids (Sequence[UUID | str]): The IDs of the examples to update.
  5238. - inputs (Sequence[dict | None] | None): The input values for the examples.
  5239. - outputs (Sequence[dict | None] | None): The output values for the examples.
  5240. - metadata (Sequence[dict | None] | None): The metadata for the examples.
  5241. - splits (Sequence[str | list[str] | None] | None): The splits for the examples, which are divisions of your dataset such as 'train', 'test', or 'validation'.
  5242. - attachments_operations (Sequence[AttachmentsOperations | None] | None): The operations to perform on the attachments.
  5243. - dataset_ids (Sequence[UUID | str] | None): The IDs of the datasets to move the examples to.
  5244. Returns:
  5245. The LangSmith JSON response. Includes 'message', 'count', and 'example_ids'.
  5246. !!! warning "Behavior changed in `langsmith` 0.3.9"
  5247. Updated to ...
  5248. Example:
  5249. ```python
  5250. from langsmith import Client
  5251. client = Client()
  5252. dataset = client.create_dataset("agent-qa")
  5253. examples = [
  5254. {
  5255. "inputs": {"question": "what's an agent"},
  5256. "outputs": {"answer": "an agent is..."},
  5257. "metadata": {"difficulty": "easy"},
  5258. },
  5259. {
  5260. "inputs": {
  5261. "question": "can you explain the agent architecture in this diagram?"
  5262. },
  5263. "outputs": {"answer": "this diagram shows..."},
  5264. "attachments": {"diagram": {"mime_type": "image/png", "data": b"..."}},
  5265. "metadata": {"difficulty": "medium"},
  5266. },
  5267. # more examples...
  5268. ]
  5269. response = client.create_examples(dataset_name="agent-qa", examples=examples)
  5270. example_ids = response["example_ids"]
  5271. updates = [
  5272. {
  5273. "id": example_ids[0],
  5274. "inputs": {"question": "what isn't an agent"},
  5275. "outputs": {"answer": "an agent is not..."},
  5276. },
  5277. {
  5278. "id": example_ids[1],
  5279. "attachments_operations": [
  5280. {"rename": {"diagram": "agent_diagram"}, "retain": []}
  5281. ],
  5282. },
  5283. ]
  5284. response = client.update_examples(dataset_name="agent-qa", updates=updates)
  5285. # -> {"example_ids": [...
  5286. ```
  5287. """ # noqa: E501
  5288. if kwargs and updates:
  5289. raise ValueError(
  5290. f"Must pass in either 'updates' or args {tuple(kwargs)}, not both."
  5291. )
  5292. if not (kwargs or updates):
  5293. raise ValueError("Please pass in a non-empty sequence for arg 'updates'.")
  5294. if dataset_name and dataset_id:
  5295. raise ValueError(
  5296. "Must pass in exactly one of 'dataset_name' or 'dataset_id'."
  5297. )
  5298. elif dataset_name:
  5299. dataset_id = self.read_dataset(dataset_name=dataset_name).id
  5300. if updates:
  5301. updates_obj = [
  5302. ls_schemas.ExampleUpdate(**x) if isinstance(x, dict) else x
  5303. for x in updates
  5304. ]
  5305. if not dataset_id:
  5306. if updates_obj[0].dataset_id:
  5307. dataset_id = updates_obj[0].dataset_id
  5308. else:
  5309. raise ValueError(
  5310. "Must pass in (exactly) one of 'dataset_name' or 'dataset_id'."
  5311. )
  5312. # For backwards compatibility
  5313. else:
  5314. example_ids = kwargs.get("example_ids", None)
  5315. if not example_ids:
  5316. raise ValueError(
  5317. "Must pass in (exactly) one of 'updates' or 'example_ids'."
  5318. )
  5319. if not dataset_id:
  5320. if "dataset_ids" not in kwargs:
  5321. # Assume all examples belong to same dataset
  5322. dataset_id = self.read_example(example_ids[0]).dataset_id
  5323. elif len(set(kwargs["dataset_ids"])) > 1:
  5324. raise ValueError("Dataset IDs must be the same for all examples")
  5325. elif not kwargs["dataset_ids"][0]:
  5326. raise ValueError("If specified, dataset_ids must be non-null.")
  5327. else:
  5328. dataset_id = kwargs["dataset_ids"][0]
  5329. multipart_enabled = (self.info.instance_flags or {}).get(
  5330. "dataset_examples_multipart_enabled"
  5331. )
  5332. if (
  5333. not multipart_enabled
  5334. and (kwargs.get("attachments_operations") or kwargs.get("attachments"))
  5335. is not None
  5336. ):
  5337. raise ValueError(
  5338. "Your LangSmith deployment does not allow using the attachment "
  5339. "operations, please upgrade your deployment to the latest version."
  5340. )
  5341. # Since ids are required, we will check against them
  5342. examples_len = len(example_ids)
  5343. for arg_name, arg_value in kwargs.items():
  5344. if arg_value is not None and len(arg_value) != examples_len:
  5345. raise ValueError(
  5346. f"Length of {arg_name} ({len(arg_value)}) does not match"
  5347. f" length of examples ({examples_len})"
  5348. )
  5349. updates_obj = [
  5350. ls_schemas.ExampleUpdate(
  5351. **{
  5352. "id": id_,
  5353. "inputs": in_,
  5354. "outputs": out_,
  5355. "dataset_id": dataset_id_,
  5356. "metadata": metadata_,
  5357. "split": split_,
  5358. "attachments": attachments_,
  5359. "attachments_operations": attachments_operations_,
  5360. }
  5361. )
  5362. for id_, in_, out_, metadata_, split_, dataset_id_, attachments_, attachments_operations_ in zip(
  5363. example_ids,
  5364. kwargs.get("inputs", (None for _ in range(examples_len))),
  5365. kwargs.get("outputs", (None for _ in range(examples_len))),
  5366. kwargs.get("metadata", (None for _ in range(examples_len))),
  5367. kwargs.get("splits", (None for _ in range(examples_len))),
  5368. kwargs.get("dataset_ids", (None for _ in range(examples_len))),
  5369. kwargs.get("attachments", (None for _ in range(examples_len))),
  5370. kwargs.get(
  5371. "attachments_operations", (None for _ in range(examples_len))
  5372. ),
  5373. )
  5374. ]
  5375. response: Any = None
  5376. if (self.info.instance_flags or {}).get(
  5377. "dataset_examples_multipart_enabled", False
  5378. ):
  5379. response = self._update_examples_multipart(
  5380. dataset_id=cast(uuid.UUID, dataset_id),
  5381. updates=updates_obj,
  5382. dangerously_allow_filesystem=dangerously_allow_filesystem,
  5383. )
  5384. return {
  5385. "message": f"{response.get('count', 0)} examples updated",
  5386. **response,
  5387. }
  5388. else:
  5389. # fallback to old method
  5390. response = self.request_with_retries(
  5391. "PATCH",
  5392. "/examples/bulk",
  5393. headers={**self._headers, "Content-Type": "application/json"},
  5394. data=(
  5395. _dumps_json(
  5396. [
  5397. {
  5398. k: v
  5399. for k, v in dump_model(example).items()
  5400. if v is not None
  5401. }
  5402. for example in updates_obj
  5403. ]
  5404. )
  5405. ),
  5406. )
  5407. ls_utils.raise_for_status_with_text(response)
  5408. return response.json()
  5409. def delete_example(self, example_id: ID_TYPE) -> None:
  5410. """Delete an example by ID.
  5411. Args:
  5412. example_id (Union[UUID, str]):
  5413. The ID of the example to delete.
  5414. Returns:
  5415. None
  5416. """
  5417. response = self.request_with_retries(
  5418. "DELETE",
  5419. f"/examples/{_as_uuid(example_id, 'example_id')}",
  5420. headers=self._headers,
  5421. )
  5422. ls_utils.raise_for_status_with_text(response)
  5423. def delete_examples(
  5424. self, example_ids: Sequence[ID_TYPE], *, hard_delete: bool = False
  5425. ) -> None:
  5426. """Delete multiple examples by ID.
  5427. Parameters
  5428. ----------
  5429. example_ids : Sequence[ID_TYPE]
  5430. The IDs of the examples to delete.
  5431. hard_delete : bool, default=False
  5432. If True, permanently delete the examples. If False, soft delete them.
  5433. """
  5434. if hard_delete:
  5435. # Hard delete uses POST to a different endpoint
  5436. # The platform endpoint is at /v1/platform/... instead of /api/v1/...
  5437. # So we need to use a different base URL
  5438. body = {
  5439. "example_ids": [
  5440. str(_as_uuid(id_, f"example_ids[{i}]"))
  5441. for i, id_ in enumerate(example_ids)
  5442. ],
  5443. "hard_delete": True,
  5444. }
  5445. # Use platform path helper for consistent URL construction
  5446. path = _platform_path(self.api_url, "datasets/examples/delete")
  5447. full_url = _construct_url(self.api_url, path)
  5448. response = self.session.request(
  5449. "POST",
  5450. full_url,
  5451. headers={**self._headers, "Content-Type": "application/json"},
  5452. data=_dumps_json(body),
  5453. timeout=self._timeout,
  5454. )
  5455. else:
  5456. # Soft delete uses DELETE with query params
  5457. params: dict[str, Any] = {
  5458. "example_ids": [
  5459. str(_as_uuid(id_, f"example_ids[{i}]"))
  5460. for i, id_ in enumerate(example_ids)
  5461. ]
  5462. }
  5463. response = self.request_with_retries(
  5464. "DELETE",
  5465. "/examples",
  5466. headers={**self._headers, "Content-Type": "application/json"},
  5467. params=params,
  5468. )
  5469. ls_utils.raise_for_status_with_text(response)
  5470. def list_dataset_splits(
  5471. self,
  5472. *,
  5473. dataset_id: Optional[ID_TYPE] = None,
  5474. dataset_name: Optional[str] = None,
  5475. as_of: Optional[Union[str, datetime.datetime]] = None,
  5476. ) -> list[str]:
  5477. """Get the splits for a dataset.
  5478. Args:
  5479. dataset_id (Optional[Union[UUID, str]]): The ID of the dataset.
  5480. dataset_name (Optional[str]): The name of the dataset.
  5481. as_of (Optional[Union[str, datetime.datetime]]): The version
  5482. of the dataset to retrieve splits for. Can be a timestamp or a
  5483. string tag. Defaults to "latest".
  5484. Returns:
  5485. List[str]: The names of this dataset's splits.
  5486. """
  5487. if dataset_id is None:
  5488. if dataset_name is None:
  5489. raise ValueError("Must provide dataset name or ID")
  5490. dataset_id = self.read_dataset(dataset_name=dataset_name).id
  5491. params = {}
  5492. if as_of is not None:
  5493. params["as_of"] = (
  5494. as_of.isoformat() if isinstance(as_of, datetime.datetime) else as_of
  5495. )
  5496. response = self.request_with_retries(
  5497. "GET",
  5498. f"/datasets/{_as_uuid(dataset_id, 'dataset_id')}/splits",
  5499. params=params,
  5500. )
  5501. ls_utils.raise_for_status_with_text(response)
  5502. return response.json()
  5503. def update_dataset_splits(
  5504. self,
  5505. *,
  5506. dataset_id: Optional[ID_TYPE] = None,
  5507. dataset_name: Optional[str] = None,
  5508. split_name: str,
  5509. example_ids: list[ID_TYPE],
  5510. remove: bool = False,
  5511. ) -> None:
  5512. """Update the splits for a dataset.
  5513. Args:
  5514. dataset_id (Optional[Union[UUID, str]]): The ID of the dataset to update.
  5515. dataset_name (Optional[str]): The name of the dataset to update.
  5516. split_name (str): The name of the split to update.
  5517. example_ids (List[Union[UUID, str]]): The IDs of the examples to add to or
  5518. remove from the split.
  5519. remove (Optional[bool]): If True, remove the examples from the split.
  5520. If False, add the examples to the split.
  5521. Returns:
  5522. None
  5523. """
  5524. if dataset_id is None:
  5525. if dataset_name is None:
  5526. raise ValueError("Must provide dataset name or ID")
  5527. dataset_id = self.read_dataset(dataset_name=dataset_name).id
  5528. data = {
  5529. "split_name": split_name,
  5530. "examples": [
  5531. str(_as_uuid(id_, f"example_ids[{i}]"))
  5532. for i, id_ in enumerate(example_ids)
  5533. ],
  5534. "remove": remove,
  5535. }
  5536. response = self.request_with_retries(
  5537. "PUT", f"/datasets/{_as_uuid(dataset_id, 'dataset_id')}/splits", json=data
  5538. )
  5539. ls_utils.raise_for_status_with_text(response)
  5540. def _resolve_run_id(
  5541. self,
  5542. run: Union[ls_schemas.Run, ls_schemas.RunBase, str, uuid.UUID],
  5543. load_child_runs: bool,
  5544. ) -> ls_schemas.Run:
  5545. """Resolve the run ID.
  5546. Args:
  5547. run (Union[Run, RunBase, str, UUID]):
  5548. The run to resolve.
  5549. load_child_runs (bool):
  5550. Whether to load child runs.
  5551. Returns:
  5552. Run: The resolved run.
  5553. Raises:
  5554. TypeError: If the run type is invalid.
  5555. """
  5556. if isinstance(run, (str, uuid.UUID)):
  5557. run_ = self.read_run(run, load_child_runs=load_child_runs)
  5558. else:
  5559. run_ = cast(ls_schemas.Run, run)
  5560. return run_
  5561. def _resolve_example_id(
  5562. self,
  5563. example: Union[ls_schemas.Example, str, uuid.UUID, dict, None],
  5564. run: ls_schemas.Run,
  5565. ) -> Optional[ls_schemas.Example]:
  5566. """Resolve the example ID.
  5567. Args:
  5568. example (Optional[Union[Example, str, UUID, dict]]):
  5569. The example to resolve.
  5570. run (Run):
  5571. The run associated with the example.
  5572. Returns:
  5573. Optional[Example]: The resolved example.
  5574. """
  5575. if isinstance(example, (str, uuid.UUID)):
  5576. reference_example_ = self.read_example(example)
  5577. elif isinstance(example, ls_schemas.Example):
  5578. reference_example_ = example
  5579. elif isinstance(example, dict):
  5580. reference_example_ = ls_schemas.Example(
  5581. **example,
  5582. _host_url=self._host_url,
  5583. _tenant_id=self._get_optional_tenant_id(),
  5584. )
  5585. elif run.reference_example_id is not None:
  5586. reference_example_ = self.read_example(run.reference_example_id)
  5587. else:
  5588. reference_example_ = None
  5589. return reference_example_
  5590. def _select_eval_results(
  5591. self,
  5592. results: Union[
  5593. ls_evaluator.EvaluationResult, ls_evaluator.EvaluationResults, dict
  5594. ],
  5595. *,
  5596. fn_name: Optional[str] = None,
  5597. ) -> list[ls_evaluator.EvaluationResult]:
  5598. from langsmith.evaluation import evaluator as ls_evaluator # noqa: F811
  5599. def _cast_result(
  5600. single_result: Union[ls_evaluator.EvaluationResult, dict],
  5601. ) -> ls_evaluator.EvaluationResult:
  5602. if isinstance(single_result, dict):
  5603. return ls_evaluator.EvaluationResult(
  5604. **{
  5605. "key": fn_name,
  5606. "comment": single_result.get("reasoning"),
  5607. **single_result,
  5608. }
  5609. )
  5610. return single_result
  5611. def _is_eval_results(results: Any) -> TypeGuard[ls_evaluator.EvaluationResults]:
  5612. return isinstance(results, dict) and "results" in results
  5613. if isinstance(results, ls_evaluator.EvaluationResult):
  5614. results_ = [results]
  5615. elif _is_eval_results(results):
  5616. results_ = [_cast_result(r) for r in results["results"]]
  5617. elif isinstance(results, dict):
  5618. results_ = [_cast_result(cast(dict, results))]
  5619. else:
  5620. raise ValueError(
  5621. f"Invalid evaluation results type: {type(results)}."
  5622. " Must be EvaluationResult, EvaluationResults."
  5623. )
  5624. return results_
  5625. def evaluate_run(
  5626. self,
  5627. run: Union[ls_schemas.Run, ls_schemas.RunBase, str, uuid.UUID],
  5628. evaluator: ls_evaluator.RunEvaluator,
  5629. *,
  5630. source_info: Optional[dict[str, Any]] = None,
  5631. reference_example: Optional[
  5632. Union[ls_schemas.Example, str, dict, uuid.UUID]
  5633. ] = None,
  5634. load_child_runs: bool = False,
  5635. ) -> ls_evaluator.EvaluationResult:
  5636. """Evaluate a run.
  5637. Args:
  5638. run (Union[Run, RunBase, str, UUID]):
  5639. The run to evaluate.
  5640. evaluator (RunEvaluator):
  5641. The evaluator to use.
  5642. source_info (Optional[Dict[str, Any]]):
  5643. Additional information about the source of the evaluation to log
  5644. as feedback metadata.
  5645. reference_example (Optional[Union[Example, str, dict, UUID]]):
  5646. The example to use as a reference for the evaluation.
  5647. If not provided, the run's reference example will be used.
  5648. load_child_runs (bool, default=False):
  5649. Whether to load child runs when resolving the run ID.
  5650. Returns:
  5651. Feedback: The feedback object created by the evaluation.
  5652. """
  5653. run_ = self._resolve_run_id(run, load_child_runs=load_child_runs)
  5654. reference_example_ = self._resolve_example_id(reference_example, run_)
  5655. evaluator_response = evaluator.evaluate_run(
  5656. run_,
  5657. example=reference_example_,
  5658. )
  5659. results = self._log_evaluation_feedback(
  5660. evaluator_response,
  5661. run_,
  5662. source_info=source_info,
  5663. )
  5664. # TODO: Return all results
  5665. return results[0]
  5666. def _log_evaluation_feedback(
  5667. self,
  5668. evaluator_response: Union[
  5669. ls_evaluator.EvaluationResult, ls_evaluator.EvaluationResults, dict
  5670. ],
  5671. run: Optional[ls_schemas.Run] = None,
  5672. source_info: Optional[dict[str, Any]] = None,
  5673. project_id: Optional[ID_TYPE] = None,
  5674. *,
  5675. _executor: Optional[cf.ThreadPoolExecutor] = None,
  5676. ) -> list[ls_evaluator.EvaluationResult]:
  5677. results = self._select_eval_results(evaluator_response)
  5678. def _submit_feedback(**kwargs):
  5679. if _executor:
  5680. _executor.submit(self.create_feedback, **kwargs)
  5681. else:
  5682. self.create_feedback(**kwargs)
  5683. for res in results:
  5684. source_info_ = source_info or {}
  5685. if res.evaluator_info:
  5686. source_info_ = {**res.evaluator_info, **source_info_}
  5687. run_id_ = None
  5688. if res.target_run_id:
  5689. run_id_ = res.target_run_id
  5690. elif run is not None:
  5691. run_id_ = run.id
  5692. error = res.extra.get("error", None) if res.extra is not None else None
  5693. _submit_feedback(
  5694. run_id=run_id_,
  5695. key=res.key,
  5696. score=res.score,
  5697. value=res.value,
  5698. comment=res.comment,
  5699. correction=res.correction,
  5700. source_info=source_info_,
  5701. source_run_id=res.source_run_id,
  5702. feedback_config=cast(
  5703. Optional[ls_schemas.FeedbackConfig], res.feedback_config
  5704. ),
  5705. feedback_source_type=ls_schemas.FeedbackSourceType.MODEL,
  5706. project_id=project_id,
  5707. extra=res.extra,
  5708. trace_id=run.trace_id if run else None,
  5709. error=error,
  5710. )
  5711. return results
  5712. async def aevaluate_run(
  5713. self,
  5714. run: Union[ls_schemas.Run, str, uuid.UUID],
  5715. evaluator: ls_evaluator.RunEvaluator,
  5716. *,
  5717. source_info: Optional[dict[str, Any]] = None,
  5718. reference_example: Optional[
  5719. Union[ls_schemas.Example, str, dict, uuid.UUID]
  5720. ] = None,
  5721. load_child_runs: bool = False,
  5722. ) -> ls_evaluator.EvaluationResult:
  5723. """Evaluate a run asynchronously.
  5724. Args:
  5725. run (Union[Run, str, UUID]):
  5726. The run to evaluate.
  5727. evaluator (RunEvaluator):
  5728. The evaluator to use.
  5729. source_info (Optional[Dict[str, Any]]):
  5730. Additional information about the source of the evaluation to log
  5731. as feedback metadata.
  5732. reference_example (Optional[Union[Example, str, dict, UUID]]):
  5733. The example to use as a reference for the evaluation.
  5734. If not provided, the run's reference example will be used.
  5735. load_child_runs (bool, default=False):
  5736. Whether to load child runs when resolving the run ID.
  5737. Returns:
  5738. EvaluationResult: The evaluation result object created by the evaluation.
  5739. """
  5740. run_ = self._resolve_run_id(run, load_child_runs=load_child_runs)
  5741. reference_example_ = self._resolve_example_id(reference_example, run_)
  5742. evaluator_response = await evaluator.aevaluate_run(
  5743. run_,
  5744. example=reference_example_,
  5745. )
  5746. # TODO: Return all results and use async API
  5747. results = self._log_evaluation_feedback(
  5748. evaluator_response,
  5749. run_,
  5750. source_info=source_info,
  5751. )
  5752. return results[0]
  5753. def create_feedback(
  5754. self,
  5755. # TODO: make run_id a kwarg and drop default value for 'key' in breaking release.
  5756. run_id: Optional[ID_TYPE] = None,
  5757. key: str = "unnamed",
  5758. *,
  5759. score: Union[float, int, bool, None] = None,
  5760. value: Union[str, dict, None] = None,
  5761. trace_id: Optional[ID_TYPE] = None,
  5762. correction: Union[dict, None] = None,
  5763. comment: Union[str, None] = None,
  5764. source_info: Optional[dict[str, Any]] = None,
  5765. feedback_source_type: Union[
  5766. ls_schemas.FeedbackSourceType, str
  5767. ] = ls_schemas.FeedbackSourceType.API,
  5768. source_run_id: Optional[ID_TYPE] = None,
  5769. feedback_id: Optional[ID_TYPE] = None,
  5770. feedback_config: Optional[ls_schemas.FeedbackConfig] = None,
  5771. stop_after_attempt: int = 10,
  5772. project_id: Optional[ID_TYPE] = None,
  5773. comparative_experiment_id: Optional[ID_TYPE] = None,
  5774. feedback_group_id: Optional[ID_TYPE] = None,
  5775. extra: Optional[dict] = None,
  5776. error: Optional[bool] = None,
  5777. **kwargs: Any,
  5778. ) -> ls_schemas.Feedback:
  5779. """Create feedback for a run.
  5780. !!! note
  5781. To enable feedback to be batch uploaded in the background you must
  5782. specify `trace_id`. *We highly encourage this for latency-sensitive environments.*
  5783. Args:
  5784. key (str):
  5785. The name of the feedback metric.
  5786. score (Optional[Union[float, int, bool]]):
  5787. The score to rate this run on the metric or aspect.
  5788. value (Optional[Union[float, int, bool, str, dict]]):
  5789. The display value or non-numeric value for this feedback.
  5790. run_id (Optional[Union[UUID, str]]):
  5791. The ID of the run to provide feedback for. At least one of run_id,
  5792. trace_id, or project_id must be specified.
  5793. trace_id (Optional[Union[UUID, str]]):
  5794. The ID of the trace (i.e. root parent run) of the run to provide
  5795. feedback for (specified by run_id). If run_id and trace_id are the
  5796. same, only trace_id needs to be specified. **NOTE**: trace_id is
  5797. required feedback ingestion to be batched and backgrounded.
  5798. correction (Optional[dict]):
  5799. The proper ground truth for this run.
  5800. comment (Optional[str]):
  5801. A comment about this feedback, such as a justification for the score or
  5802. chain-of-thought trajectory for an LLM judge.
  5803. source_info (Optional[Dict[str, Any]]):
  5804. Information about the source of this feedback.
  5805. feedback_source_type (Union[FeedbackSourceType, str]):
  5806. The type of feedback source, such as model (for model-generated feedback)
  5807. or API.
  5808. source_run_id (Optional[Union[UUID, str]]):
  5809. The ID of the run that generated this feedback, if a "model" type.
  5810. feedback_id (Optional[Union[UUID, str]]):
  5811. The ID of the feedback to create. If not provided, a random UUID will be
  5812. generated.
  5813. feedback_config (Optional[FeedbackConfig]):
  5814. The configuration specifying how to interpret feedback with this key.
  5815. Examples include continuous (with min/max bounds), categorical,
  5816. or freeform.
  5817. stop_after_attempt (int, default=10):
  5818. The number of times to retry the request before giving up.
  5819. project_id (Optional[Union[UUID, str]]):
  5820. The ID of the project (or experiment) to provide feedback on. This is
  5821. used for creating summary metrics for experiments. Cannot specify
  5822. run_id or trace_id if project_id is specified, and vice versa.
  5823. comparative_experiment_id (Optional[Union[UUID, str]]):
  5824. If this feedback was logged as a part of a comparative experiment, this
  5825. associates the feedback with that experiment.
  5826. feedback_group_id (Optional[Union[UUID, str]]):
  5827. When logging preferences, ranking runs, or other comparative feedback,
  5828. this is used to group feedback together.
  5829. extra (Optional[Dict]):
  5830. Metadata for the feedback.
  5831. **kwargs (Any):
  5832. Additional keyword arguments.
  5833. Returns:
  5834. Feedback: The created feedback object.
  5835. Example:
  5836. ```python
  5837. from langsmith import trace, traceable, Client
  5838. @traceable
  5839. def foo(x):
  5840. return {"y": x * 2}
  5841. @traceable
  5842. def bar(y):
  5843. return {"z": y - 1}
  5844. client = Client()
  5845. inputs = {"x": 1}
  5846. with trace(name="foobar", inputs=inputs) as root_run:
  5847. result = foo(**inputs)
  5848. result = bar(**result)
  5849. root_run.outputs = result
  5850. trace_id = root_run.id
  5851. child_runs = root_run.child_runs
  5852. # Provide feedback for a trace (a.k.a. a root run)
  5853. client.create_feedback(
  5854. key="user_feedback",
  5855. score=1,
  5856. trace_id=trace_id,
  5857. )
  5858. # Provide feedback for a child run
  5859. foo_run_id = [run for run in child_runs if run.name == "foo"][0].id
  5860. client.create_feedback(
  5861. key="correctness",
  5862. score=0,
  5863. run_id=foo_run_id,
  5864. # trace_id= is optional but recommended to enable batched and backgrounded
  5865. # feedback ingestion.
  5866. trace_id=trace_id,
  5867. )
  5868. ```
  5869. """
  5870. run_id = run_id or trace_id
  5871. if run_id is None and project_id is None:
  5872. raise ValueError("One of run_id, trace_id, or project_id must be provided")
  5873. if run_id is not None and project_id is not None:
  5874. raise ValueError(
  5875. "project_id cannot be provided if run_id or trace_id is provided"
  5876. )
  5877. if kwargs:
  5878. warnings.warn(
  5879. "The following arguments are no longer used in the create_feedback"
  5880. f" endpoint: {sorted(kwargs)}",
  5881. DeprecationWarning,
  5882. )
  5883. try:
  5884. if not isinstance(feedback_source_type, ls_schemas.FeedbackSourceType):
  5885. feedback_source_type = ls_schemas.FeedbackSourceType(
  5886. feedback_source_type
  5887. )
  5888. if feedback_source_type == ls_schemas.FeedbackSourceType.API:
  5889. feedback_source: ls_schemas.FeedbackSourceBase = (
  5890. ls_schemas.APIFeedbackSource(metadata=source_info)
  5891. )
  5892. elif feedback_source_type == ls_schemas.FeedbackSourceType.MODEL:
  5893. feedback_source = ls_schemas.ModelFeedbackSource(metadata=source_info)
  5894. else:
  5895. raise ValueError(f"Unknown feedback source type {feedback_source_type}")
  5896. feedback_source.metadata = (
  5897. feedback_source.metadata if feedback_source.metadata is not None else {}
  5898. )
  5899. if source_run_id is not None and "__run" not in feedback_source.metadata:
  5900. feedback_source.metadata["__run"] = {"run_id": str(source_run_id)}
  5901. if feedback_source.metadata and "__run" in feedback_source.metadata:
  5902. # Validate that the linked run ID is a valid UUID
  5903. # Run info may be a base model or dict.
  5904. _run_meta: Union[dict, Any] = feedback_source.metadata["__run"]
  5905. if hasattr(_run_meta, "dict") and callable(_run_meta):
  5906. _run_meta = _run_meta.dict()
  5907. if "run_id" in _run_meta:
  5908. _run_meta["run_id"] = str(
  5909. _as_uuid(
  5910. feedback_source.metadata["__run"]["run_id"],
  5911. "feedback_source.metadata['__run']['run_id']",
  5912. )
  5913. )
  5914. feedback_source.metadata["__run"] = _run_meta
  5915. feedback = ls_schemas.FeedbackCreate(
  5916. id=_ensure_uuid(feedback_id),
  5917. # If run_id is None, this is interpreted as session-level
  5918. # feedback.
  5919. run_id=_ensure_uuid(run_id, accept_null=True),
  5920. trace_id=_ensure_uuid(trace_id, accept_null=True),
  5921. key=key,
  5922. score=_format_feedback_score(score),
  5923. value=value,
  5924. correction=correction,
  5925. comment=comment,
  5926. feedback_source=feedback_source,
  5927. created_at=datetime.datetime.now(datetime.timezone.utc),
  5928. modified_at=datetime.datetime.now(datetime.timezone.utc),
  5929. feedback_config=feedback_config,
  5930. session_id=_ensure_uuid(project_id, accept_null=True),
  5931. comparative_experiment_id=_ensure_uuid(
  5932. comparative_experiment_id, accept_null=True
  5933. ),
  5934. feedback_group_id=_ensure_uuid(feedback_group_id, accept_null=True),
  5935. extra=extra,
  5936. error=error,
  5937. )
  5938. use_multipart = (self.info.batch_ingest_config or {}).get(
  5939. "use_multipart_endpoint", False
  5940. )
  5941. if (
  5942. use_multipart
  5943. and self.info.version # TODO: Remove version check once versions have updated
  5944. and ls_utils.is_version_greater_or_equal(self.info.version, "0.8.10")
  5945. and (
  5946. self.tracing_queue is not None or self.compressed_traces is not None
  5947. )
  5948. and feedback.trace_id is not None
  5949. and self.otel_exporter is None
  5950. ):
  5951. serialized_op = serialize_feedback_dict(feedback)
  5952. if self.compressed_traces is not None:
  5953. multipart_form = (
  5954. serialized_feedback_operation_to_multipart_parts_and_context(
  5955. serialized_op
  5956. )
  5957. )
  5958. with self.compressed_traces.lock:
  5959. enqueued = compress_multipart_parts_and_context(
  5960. multipart_form,
  5961. self.compressed_traces,
  5962. _BOUNDARY,
  5963. )
  5964. if enqueued:
  5965. self.compressed_traces.trace_count += 1
  5966. if self._data_available_event:
  5967. self._data_available_event.set()
  5968. elif self.tracing_queue is not None:
  5969. self.tracing_queue.put(
  5970. TracingQueueItem(str(feedback.id), serialized_op)
  5971. )
  5972. else:
  5973. feedback_block = _dumps_json(feedback.dict(exclude_none=True))
  5974. self.request_with_retries(
  5975. "POST",
  5976. "/feedback",
  5977. request_kwargs={
  5978. "data": feedback_block,
  5979. },
  5980. stop_after_attempt=stop_after_attempt,
  5981. retry_on=(ls_utils.LangSmithNotFoundError,),
  5982. )
  5983. return ls_schemas.Feedback(**feedback.dict())
  5984. except Exception as e:
  5985. logger.error("Error creating feedback", exc_info=True)
  5986. raise e
  5987. def update_feedback(
  5988. self,
  5989. feedback_id: ID_TYPE,
  5990. *,
  5991. score: Union[float, int, bool, None] = None,
  5992. value: Union[float, int, bool, str, dict, None] = None,
  5993. correction: Union[dict, None] = None,
  5994. comment: Union[str, None] = None,
  5995. ) -> None:
  5996. """Update a feedback in the LangSmith API.
  5997. Args:
  5998. feedback_id (Union[UUID, str]):
  5999. The ID of the feedback to update.
  6000. score (Optional[Union[float, int, bool]]):
  6001. The score to update the feedback with.
  6002. value (Optional[Union[float, int, bool, str, dict]]):
  6003. The value to update the feedback with.
  6004. correction (Optional[dict]):
  6005. The correction to update the feedback with.
  6006. comment (Optional[str]):
  6007. The comment to update the feedback with.
  6008. Returns:
  6009. None
  6010. """
  6011. feedback_update: dict[str, Any] = {}
  6012. if score is not None:
  6013. feedback_update["score"] = _format_feedback_score(score)
  6014. if value is not None:
  6015. feedback_update["value"] = value
  6016. if correction is not None:
  6017. feedback_update["correction"] = correction
  6018. if comment is not None:
  6019. feedback_update["comment"] = comment
  6020. response = self.request_with_retries(
  6021. "PATCH",
  6022. f"/feedback/{_as_uuid(feedback_id, 'feedback_id')}",
  6023. headers={**self._headers, "Content-Type": "application/json"},
  6024. data=_dumps_json(feedback_update),
  6025. )
  6026. ls_utils.raise_for_status_with_text(response)
  6027. def read_feedback(self, feedback_id: ID_TYPE) -> ls_schemas.Feedback:
  6028. """Read a feedback from the LangSmith API.
  6029. Args:
  6030. feedback_id (Union[UUID, str]):
  6031. The ID of the feedback to read.
  6032. Returns:
  6033. Feedback: The feedback.
  6034. """
  6035. response = self.request_with_retries(
  6036. "GET",
  6037. f"/feedback/{_as_uuid(feedback_id, 'feedback_id')}",
  6038. )
  6039. return ls_schemas.Feedback(**response.json())
  6040. def list_feedback(
  6041. self,
  6042. *,
  6043. run_ids: Optional[Sequence[ID_TYPE]] = None,
  6044. feedback_key: Optional[Sequence[str]] = None,
  6045. feedback_source_type: Optional[Sequence[ls_schemas.FeedbackSourceType]] = None,
  6046. limit: Optional[int] = None,
  6047. **kwargs: Any,
  6048. ) -> Iterator[ls_schemas.Feedback]:
  6049. """List the feedback objects on the LangSmith API.
  6050. Args:
  6051. run_ids (Optional[Sequence[Union[UUID, str]]]):
  6052. The IDs of the runs to filter by.
  6053. feedback_key (Optional[Sequence[str]]):
  6054. The feedback key(s) to filter by. Examples: 'correctness'
  6055. The query performs a union of all feedback keys.
  6056. feedback_source_type (Optional[Sequence[FeedbackSourceType]]):
  6057. The type of feedback source, such as model or API.
  6058. limit (Optional[int]):
  6059. The maximum number of feedback to return.
  6060. **kwargs (Any):
  6061. Additional keyword arguments.
  6062. Yields:
  6063. The feedback objects.
  6064. """
  6065. params: dict = {
  6066. "run": run_ids,
  6067. "limit": min(limit, 100) if limit is not None else 100,
  6068. **kwargs,
  6069. }
  6070. if feedback_key is not None:
  6071. params["key"] = feedback_key
  6072. if feedback_source_type is not None:
  6073. params["source"] = feedback_source_type
  6074. for i, feedback in enumerate(
  6075. self._get_paginated_list("/feedback", params=params)
  6076. ):
  6077. yield ls_schemas.Feedback(**feedback)
  6078. if limit is not None and i + 1 >= limit:
  6079. break
  6080. def delete_feedback(self, feedback_id: ID_TYPE) -> None:
  6081. """Delete a feedback by ID.
  6082. Args:
  6083. feedback_id (Union[UUID, str]):
  6084. The ID of the feedback to delete.
  6085. Returns:
  6086. None
  6087. """
  6088. response = self.request_with_retries(
  6089. "DELETE",
  6090. f"/feedback/{_as_uuid(feedback_id, 'feedback_id')}",
  6091. headers=self._headers,
  6092. )
  6093. ls_utils.raise_for_status_with_text(response)
  6094. def create_feedback_from_token(
  6095. self,
  6096. token_or_url: Union[str, uuid.UUID],
  6097. score: Union[float, int, bool, None] = None,
  6098. *,
  6099. value: Union[float, int, bool, str, dict, None] = None,
  6100. correction: Union[dict, None] = None,
  6101. comment: Union[str, None] = None,
  6102. metadata: Optional[dict] = None,
  6103. ) -> None:
  6104. """Create feedback from a presigned token or URL.
  6105. Args:
  6106. token_or_url (Union[str, uuid.UUID]): The token or URL from which to create
  6107. feedback.
  6108. score (Optional[Union[float, int, bool]]): The score of the feedback.
  6109. value (Optional[Union[float, int, bool, str, dict]]): The value of the
  6110. feedback.
  6111. correction (Optional[dict]): The correction of the feedback.
  6112. comment (Optional[str]): The comment of the feedback.
  6113. metadata (Optional[dict]): Additional metadata for the feedback.
  6114. Raises:
  6115. ValueError: If the source API URL is invalid.
  6116. Returns:
  6117. None
  6118. """
  6119. source_api_url, token_uuid = _parse_token_or_url(
  6120. token_or_url, self.api_url, num_parts=1
  6121. )
  6122. if source_api_url != self.api_url:
  6123. raise ValueError(f"Invalid source API URL. {source_api_url}")
  6124. response = self.request_with_retries(
  6125. "POST",
  6126. f"/feedback/tokens/{_as_uuid(token_uuid)}",
  6127. data=_dumps_json(
  6128. {
  6129. "score": score,
  6130. "value": value,
  6131. "correction": correction,
  6132. "comment": comment,
  6133. "metadata": metadata,
  6134. # TODO: Add ID once the API supports it.
  6135. }
  6136. ),
  6137. headers=self._headers,
  6138. )
  6139. ls_utils.raise_for_status_with_text(response)
  6140. def create_presigned_feedback_token(
  6141. self,
  6142. run_id: ID_TYPE,
  6143. feedback_key: str,
  6144. *,
  6145. expiration: Optional[datetime.datetime | datetime.timedelta] = None,
  6146. feedback_config: Optional[ls_schemas.FeedbackConfig] = None,
  6147. feedback_id: Optional[ID_TYPE] = None,
  6148. ) -> ls_schemas.FeedbackIngestToken:
  6149. """Create a pre-signed URL to send feedback data to.
  6150. This is useful for giving browser-based clients a way to upload
  6151. feedback data directly to LangSmith without accessing the
  6152. API key.
  6153. Args:
  6154. run_id (Union[UUID, str]):
  6155. The ID of the run.
  6156. feedback_key (str):
  6157. The key of the feedback to create.
  6158. expiration (Optional[datetime.datetime | datetime.timedelta]): The expiration time of the pre-signed URL.
  6159. Either a datetime or a timedelta offset from now.
  6160. Default to 3 hours.
  6161. feedback_config (Optional[FeedbackConfig]):
  6162. If creating a feedback_key for the first time,
  6163. this defines how the metric should be interpreted,
  6164. such as a continuous score (w/ optional bounds),
  6165. or distribution over categorical values.
  6166. feedback_id (Optional[Union[UUID, str]): The ID of the feedback to create. If not provided, a new
  6167. feedback will be created.
  6168. Returns:
  6169. FeedbackIngestToken: The pre-signed URL for uploading feedback data.
  6170. """
  6171. body: dict[str, Any] = {
  6172. "run_id": run_id,
  6173. "feedback_key": feedback_key,
  6174. "feedback_config": feedback_config,
  6175. "id": feedback_id or str(uuid.uuid4()),
  6176. }
  6177. if expiration is None:
  6178. body["expires_in"] = ls_schemas.TimeDeltaInput(
  6179. days=0,
  6180. hours=3,
  6181. minutes=0,
  6182. )
  6183. elif isinstance(expiration, datetime.datetime):
  6184. body["expires_at"] = expiration.isoformat()
  6185. elif isinstance(expiration, datetime.timedelta):
  6186. body["expires_in"] = ls_schemas.TimeDeltaInput(
  6187. days=expiration.days,
  6188. hours=expiration.seconds // 3600,
  6189. minutes=(expiration.seconds // 60) % 60,
  6190. )
  6191. else:
  6192. raise ValueError(f"Unknown expiration type: {type(expiration)}")
  6193. response = self.request_with_retries(
  6194. "POST",
  6195. "/feedback/tokens",
  6196. data=_dumps_json(body),
  6197. )
  6198. ls_utils.raise_for_status_with_text(response)
  6199. return ls_schemas.FeedbackIngestToken(**response.json())
  6200. def create_presigned_feedback_tokens(
  6201. self,
  6202. run_id: ID_TYPE,
  6203. feedback_keys: Sequence[str],
  6204. *,
  6205. expiration: Optional[datetime.datetime | datetime.timedelta] = None,
  6206. feedback_configs: Optional[
  6207. Sequence[Optional[ls_schemas.FeedbackConfig]]
  6208. ] = None,
  6209. ) -> Sequence[ls_schemas.FeedbackIngestToken]:
  6210. """Create a pre-signed URL to send feedback data to.
  6211. This is useful for giving browser-based clients a way to upload
  6212. feedback data directly to LangSmith without accessing the
  6213. API key.
  6214. Args:
  6215. run_id (Union[UUID, str]):
  6216. The ID of the run.
  6217. feedback_keys (Sequence[str]):
  6218. The key of the feedback to create.
  6219. expiration (Optional[datetime.datetime | datetime.timedelta]): The expiration time of the pre-signed URL.
  6220. Either a datetime or a timedelta offset from now.
  6221. Default to 3 hours.
  6222. feedback_configs (Optional[Sequence[Optional[FeedbackConfig]]]):
  6223. If creating a feedback_key for the first time,
  6224. this defines how the metric should be interpreted,
  6225. such as a continuous score (w/ optional bounds),
  6226. or distribution over categorical values.
  6227. Returns:
  6228. Sequence[FeedbackIngestToken]: The pre-signed URL for uploading feedback data.
  6229. """
  6230. # validate
  6231. if feedback_configs is not None and len(feedback_keys) != len(feedback_configs):
  6232. raise ValueError(
  6233. "The length of feedback_keys and feedback_configs must be the same."
  6234. )
  6235. if not feedback_configs:
  6236. feedback_configs = [None] * len(feedback_keys)
  6237. # build expiry option
  6238. expires_in, expires_at = None, None
  6239. if expiration is None:
  6240. expires_in = ls_schemas.TimeDeltaInput(
  6241. days=0,
  6242. hours=3,
  6243. minutes=0,
  6244. )
  6245. elif isinstance(expiration, datetime.datetime):
  6246. expires_at = expiration.isoformat()
  6247. elif isinstance(expiration, datetime.timedelta):
  6248. expires_in = ls_schemas.TimeDeltaInput(
  6249. days=expiration.days,
  6250. hours=expiration.seconds // 3600,
  6251. minutes=(expiration.seconds // 60) % 60,
  6252. )
  6253. else:
  6254. raise ValueError(f"Unknown expiration type: {type(expiration)}")
  6255. # assemble body, one entry per key
  6256. body = _dumps_json(
  6257. [
  6258. {
  6259. "run_id": run_id,
  6260. "feedback_key": feedback_key,
  6261. "feedback_config": feedback_config,
  6262. "expires_in": expires_in,
  6263. "expires_at": expires_at,
  6264. }
  6265. for feedback_key, feedback_config in zip(
  6266. feedback_keys, feedback_configs
  6267. )
  6268. ]
  6269. )
  6270. def req(api_url: str, api_key: Optional[str]) -> list:
  6271. response = self.request_with_retries(
  6272. "POST",
  6273. f"{api_url}/feedback/tokens",
  6274. request_kwargs={
  6275. "data": body,
  6276. "headers": {
  6277. **self._headers,
  6278. X_API_KEY: api_key or self.api_key,
  6279. },
  6280. },
  6281. )
  6282. ls_utils.raise_for_status_with_text(response)
  6283. return response.json()
  6284. tokens = []
  6285. with cf.ThreadPoolExecutor(max_workers=len(self._write_api_urls)) as executor:
  6286. futs = [
  6287. executor.submit(req, api_url, api_key)
  6288. for api_url, api_key in self._write_api_urls.items()
  6289. ]
  6290. for fut in cf.as_completed(futs):
  6291. response = fut.result()
  6292. tokens.extend(
  6293. [ls_schemas.FeedbackIngestToken(**part) for part in response]
  6294. )
  6295. return tokens
  6296. def list_presigned_feedback_tokens(
  6297. self,
  6298. run_id: ID_TYPE,
  6299. *,
  6300. limit: Optional[int] = None,
  6301. ) -> Iterator[ls_schemas.FeedbackIngestToken]:
  6302. """List the feedback ingest tokens for a run.
  6303. Args:
  6304. run_id (Union[UUID, str]): The ID of the run to filter by.
  6305. limit (Optional[int]): The maximum number of tokens to return.
  6306. Yields:
  6307. The feedback ingest tokens.
  6308. """
  6309. params = {
  6310. "run_id": _as_uuid(run_id, "run_id"),
  6311. "limit": min(limit, 100) if limit is not None else 100,
  6312. }
  6313. for i, token in enumerate(
  6314. self._get_paginated_list("/feedback/tokens", params=params)
  6315. ):
  6316. yield ls_schemas.FeedbackIngestToken(**token)
  6317. if limit is not None and i + 1 >= limit:
  6318. break
  6319. def list_feedback_formulas(
  6320. self,
  6321. *,
  6322. dataset_id: Optional[ID_TYPE] = None,
  6323. session_id: Optional[ID_TYPE] = None,
  6324. limit: Optional[int] = None,
  6325. offset: int = 0,
  6326. ) -> Iterator[ls_schemas.FeedbackFormula]:
  6327. """List feedback formulas.
  6328. Args:
  6329. dataset_id (Optional[Union[UUID, str]]):
  6330. The ID of the dataset to filter by.
  6331. session_id (Optional[Union[UUID, str]]):
  6332. The ID of the session to filter by.
  6333. limit (Optional[int]):
  6334. The maximum number of feedback formulas to return.
  6335. offset (int):
  6336. The starting offset for pagination.
  6337. Yields:
  6338. The feedback formulas.
  6339. """
  6340. params: dict[str, Any] = {
  6341. "dataset_id": (
  6342. _as_uuid(dataset_id, "dataset_id") if dataset_id is not None else None
  6343. ),
  6344. "session_id": (
  6345. _as_uuid(session_id, "session_id") if session_id is not None else None
  6346. ),
  6347. "limit": min(limit, 100) if limit is not None else 100,
  6348. "offset": offset,
  6349. }
  6350. for i, feedback_formula in enumerate(
  6351. self._get_paginated_list("/feedback/formulas", params=params)
  6352. ):
  6353. yield ls_schemas.FeedbackFormula(**feedback_formula)
  6354. if limit is not None and i + 1 >= limit:
  6355. break
  6356. def get_feedback_formula_by_id(
  6357. self, feedback_formula_id: ID_TYPE
  6358. ) -> ls_schemas.FeedbackFormula:
  6359. """Get a feedback formula by ID.
  6360. Args:
  6361. feedback_formula_id (Union[UUID, str]):
  6362. The ID of the feedback formula to retrieve.
  6363. Returns:
  6364. The requested feedback formula.
  6365. """
  6366. response = self.request_with_retries(
  6367. "GET",
  6368. f"/feedback/formulas/{_as_uuid(feedback_formula_id, 'feedback_formula_id')}",
  6369. )
  6370. ls_utils.raise_for_status_with_text(response)
  6371. return ls_schemas.FeedbackFormula(**response.json())
  6372. def create_feedback_formula(
  6373. self,
  6374. *,
  6375. feedback_key: str,
  6376. aggregation_type: Literal["sum", "avg"],
  6377. formula_parts: Sequence[
  6378. Union[ls_schemas.FeedbackFormulaWeightedVariable, dict]
  6379. ],
  6380. dataset_id: Optional[ID_TYPE] = None,
  6381. session_id: Optional[ID_TYPE] = None,
  6382. ) -> ls_schemas.FeedbackFormula:
  6383. """Create a feedback formula.
  6384. Args:
  6385. feedback_key (str):
  6386. The feedback key for the formula.
  6387. aggregation_type (Literal["sum", "avg"]):
  6388. The aggregation type to use when combining parts.
  6389. formula_parts (Sequence[FeedbackFormulaWeightedVariable | dict]):
  6390. The weighted feedback keys included in the formula.
  6391. dataset_id (Optional[Union[UUID, str]]):
  6392. The dataset to scope the formula to.
  6393. session_id (Optional[Union[UUID, str]]):
  6394. The session to scope the formula to.
  6395. Returns:
  6396. The created feedback formula.
  6397. """
  6398. typed_parts: list[ls_schemas.FeedbackFormulaWeightedVariable] = [
  6399. part
  6400. if isinstance(part, ls_schemas.FeedbackFormulaWeightedVariable)
  6401. else ls_schemas.FeedbackFormulaWeightedVariable(**part)
  6402. for part in formula_parts
  6403. ]
  6404. payload = ls_schemas.FeedbackFormulaCreate(
  6405. feedback_key=feedback_key,
  6406. aggregation_type=aggregation_type,
  6407. formula_parts=typed_parts,
  6408. dataset_id=(
  6409. _as_uuid(dataset_id, "dataset_id") if dataset_id is not None else None
  6410. ),
  6411. session_id=(
  6412. _as_uuid(session_id, "session_id") if session_id is not None else None
  6413. ),
  6414. )
  6415. response = self.request_with_retries(
  6416. "POST",
  6417. "/feedback/formulas",
  6418. request_kwargs={
  6419. "data": _dumps_json(payload.dict(exclude_none=True)),
  6420. },
  6421. )
  6422. ls_utils.raise_for_status_with_text(response)
  6423. return ls_schemas.FeedbackFormula(**response.json())
  6424. def update_feedback_formula(
  6425. self,
  6426. feedback_formula_id: ID_TYPE,
  6427. *,
  6428. feedback_key: str,
  6429. aggregation_type: Literal["sum", "avg"],
  6430. formula_parts: Sequence[
  6431. Union[ls_schemas.FeedbackFormulaWeightedVariable, dict]
  6432. ],
  6433. ) -> ls_schemas.FeedbackFormula:
  6434. """Update a feedback formula.
  6435. Args:
  6436. feedback_formula_id (Union[UUID, str]):
  6437. The ID of the feedback formula to update.
  6438. feedback_key (str):
  6439. The feedback key for the formula.
  6440. aggregation_type (Literal["sum", "avg"]):
  6441. The aggregation type to use when combining parts.
  6442. formula_parts (Sequence[FeedbackFormulaWeightedVariable | dict]):
  6443. The weighted feedback keys included in the formula.
  6444. Returns:
  6445. The updated feedback formula.
  6446. """
  6447. typed_parts: list[ls_schemas.FeedbackFormulaWeightedVariable] = [
  6448. part
  6449. if isinstance(part, ls_schemas.FeedbackFormulaWeightedVariable)
  6450. else ls_schemas.FeedbackFormulaWeightedVariable(**part)
  6451. for part in formula_parts
  6452. ]
  6453. payload = ls_schemas.FeedbackFormulaUpdate(
  6454. feedback_key=feedback_key,
  6455. aggregation_type=aggregation_type,
  6456. formula_parts=typed_parts,
  6457. )
  6458. response = self.request_with_retries(
  6459. "PUT",
  6460. f"/feedback/formulas/{_as_uuid(feedback_formula_id, 'feedback_formula_id')}",
  6461. request_kwargs={
  6462. "data": _dumps_json(payload.dict(exclude_none=True)),
  6463. },
  6464. )
  6465. ls_utils.raise_for_status_with_text(response)
  6466. return ls_schemas.FeedbackFormula(**response.json())
  6467. def delete_feedback_formula(self, feedback_formula_id: ID_TYPE) -> None:
  6468. """Delete a feedback formula by ID.
  6469. Args:
  6470. feedback_formula_id (Union[UUID, str]):
  6471. The ID of the feedback formula to delete.
  6472. """
  6473. response = self.request_with_retries(
  6474. "DELETE",
  6475. f"/feedback/formulas/{_as_uuid(feedback_formula_id, 'feedback_formula_id')}",
  6476. )
  6477. ls_utils.raise_for_status_with_text(response)
  6478. # Annotation Queue API
  6479. def list_annotation_queues(
  6480. self,
  6481. *,
  6482. queue_ids: Optional[list[ID_TYPE]] = None,
  6483. name: Optional[str] = None,
  6484. name_contains: Optional[str] = None,
  6485. limit: Optional[int] = None,
  6486. ) -> Iterator[ls_schemas.AnnotationQueue]:
  6487. """List the annotation queues on the LangSmith API.
  6488. Args:
  6489. queue_ids (Optional[List[Union[UUID, str]]]):
  6490. The IDs of the queues to filter by.
  6491. name (Optional[str]):
  6492. The name of the queue to filter by.
  6493. name_contains (Optional[str]):
  6494. The substring that the queue name should contain.
  6495. limit (Optional[int]):
  6496. The maximum number of queues to return.
  6497. Yields:
  6498. The annotation queues.
  6499. """
  6500. params: dict = {
  6501. "ids": (
  6502. [_as_uuid(id_, f"queue_ids[{i}]") for i, id_ in enumerate(queue_ids)]
  6503. if queue_ids is not None
  6504. else None
  6505. ),
  6506. "name": name,
  6507. "name_contains": name_contains,
  6508. "limit": min(limit, 100) if limit is not None else 100,
  6509. }
  6510. for i, queue in enumerate(
  6511. self._get_paginated_list("/annotation-queues", params=params)
  6512. ):
  6513. yield ls_schemas.AnnotationQueue(
  6514. **queue,
  6515. )
  6516. if limit is not None and i + 1 >= limit:
  6517. break
  6518. def create_annotation_queue(
  6519. self,
  6520. *,
  6521. name: str,
  6522. description: Optional[str] = None,
  6523. queue_id: Optional[ID_TYPE] = None,
  6524. rubric_instructions: Optional[str] = None,
  6525. ) -> ls_schemas.AnnotationQueueWithDetails:
  6526. """Create an annotation queue on the LangSmith API.
  6527. Args:
  6528. name (str):
  6529. The name of the annotation queue.
  6530. description (Optional[str]):
  6531. The description of the annotation queue.
  6532. queue_id (Optional[Union[UUID, str]]):
  6533. The ID of the annotation queue.
  6534. rubric_instructions (Optional[str]):
  6535. The rubric instructions for the annotation queue.
  6536. Returns:
  6537. AnnotationQueue: The created annotation queue object.
  6538. """
  6539. body = {
  6540. "name": name,
  6541. "description": description,
  6542. "id": str(queue_id) if queue_id is not None else str(uuid.uuid4()),
  6543. "rubric_instructions": rubric_instructions,
  6544. }
  6545. response = self.request_with_retries(
  6546. "POST",
  6547. "/annotation-queues",
  6548. json={k: v for k, v in body.items() if v is not None},
  6549. )
  6550. ls_utils.raise_for_status_with_text(response)
  6551. return ls_schemas.AnnotationQueueWithDetails(
  6552. **response.json(),
  6553. )
  6554. def read_annotation_queue(self, queue_id: ID_TYPE) -> ls_schemas.AnnotationQueue:
  6555. """Read an annotation queue with the specified `queue_id`.
  6556. Args:
  6557. queue_id (Union[UUID, str]): The ID of the annotation queue to read.
  6558. Returns:
  6559. AnnotationQueue: The annotation queue object.
  6560. """
  6561. base_url = f"/annotation-queues/{_as_uuid(queue_id, 'queue_id')}"
  6562. response = self.request_with_retries(
  6563. "GET",
  6564. f"{base_url}",
  6565. headers=self._headers,
  6566. )
  6567. ls_utils.raise_for_status_with_text(response)
  6568. return ls_schemas.AnnotationQueueWithDetails(**response.json())
  6569. def update_annotation_queue(
  6570. self,
  6571. queue_id: ID_TYPE,
  6572. *,
  6573. name: str,
  6574. description: Optional[str] = None,
  6575. rubric_instructions: Optional[str] = None,
  6576. ) -> None:
  6577. """Update an annotation queue with the specified `queue_id`.
  6578. Args:
  6579. queue_id (Union[UUID, str]): The ID of the annotation queue to update.
  6580. name (str): The new name for the annotation queue.
  6581. description (Optional[str]): The new description for the
  6582. annotation queue.
  6583. rubric_instructions (Optional[str]): The new rubric instructions for the
  6584. annotation queue.
  6585. Returns:
  6586. None
  6587. """
  6588. response = self.request_with_retries(
  6589. "PATCH",
  6590. f"/annotation-queues/{_as_uuid(queue_id, 'queue_id')}",
  6591. json={
  6592. "name": name,
  6593. "description": description,
  6594. "rubric_instructions": rubric_instructions,
  6595. },
  6596. )
  6597. ls_utils.raise_for_status_with_text(response)
  6598. def delete_annotation_queue(self, queue_id: ID_TYPE) -> None:
  6599. """Delete an annotation queue with the specified `queue_id`.
  6600. Args:
  6601. queue_id (Union[UUID, str]): The ID of the annotation queue to delete.
  6602. Returns:
  6603. None
  6604. """
  6605. response = self.request_with_retries(
  6606. "DELETE",
  6607. f"/annotation-queues/{_as_uuid(queue_id, 'queue_id')}",
  6608. headers={"Accept": "application/json", **self._headers},
  6609. )
  6610. ls_utils.raise_for_status_with_text(response)
  6611. def add_runs_to_annotation_queue(
  6612. self, queue_id: ID_TYPE, *, run_ids: list[ID_TYPE]
  6613. ) -> None:
  6614. """Add runs to an annotation queue with the specified `queue_id`.
  6615. Args:
  6616. queue_id (Union[UUID, str]): The ID of the annotation queue.
  6617. run_ids (List[Union[UUID, str]]): The IDs of the runs to be added to the annotation
  6618. queue.
  6619. Returns:
  6620. None
  6621. """
  6622. response = self.request_with_retries(
  6623. "POST",
  6624. f"/annotation-queues/{_as_uuid(queue_id, 'queue_id')}/runs",
  6625. json=[str(_as_uuid(id_, f"run_ids[{i}]")) for i, id_ in enumerate(run_ids)],
  6626. )
  6627. ls_utils.raise_for_status_with_text(response)
  6628. def delete_run_from_annotation_queue(
  6629. self, queue_id: ID_TYPE, *, run_id: ID_TYPE
  6630. ) -> None:
  6631. """Delete a run from an annotation queue with the specified `queue_id` and `run_id`.
  6632. Args:
  6633. queue_id (Union[UUID, str]): The ID of the annotation queue.
  6634. run_id (Union[UUID, str]): The ID of the run to be added to the annotation
  6635. queue.
  6636. Returns:
  6637. None
  6638. """
  6639. response = self.request_with_retries(
  6640. "DELETE",
  6641. f"/annotation-queues/{_as_uuid(queue_id, 'queue_id')}/runs/{_as_uuid(run_id, 'run_id')}",
  6642. )
  6643. ls_utils.raise_for_status_with_text(response)
  6644. def get_run_from_annotation_queue(
  6645. self, queue_id: ID_TYPE, *, index: int
  6646. ) -> ls_schemas.RunWithAnnotationQueueInfo:
  6647. """Get a run from an annotation queue at the specified index.
  6648. Args:
  6649. queue_id (Union[UUID, str]): The ID of the annotation queue.
  6650. index (int): The index of the run to retrieve.
  6651. Returns:
  6652. RunWithAnnotationQueueInfo: The run at the specified index.
  6653. Raises:
  6654. LangSmithNotFoundError: If the run is not found at the given index.
  6655. LangSmithError: For other API-related errors.
  6656. """
  6657. base_url = f"/annotation-queues/{_as_uuid(queue_id, 'queue_id')}/run"
  6658. response = self.request_with_retries(
  6659. "GET",
  6660. f"{base_url}/{index}",
  6661. headers=self._headers,
  6662. )
  6663. ls_utils.raise_for_status_with_text(response)
  6664. return ls_schemas.RunWithAnnotationQueueInfo(**response.json())
  6665. def create_comparative_experiment(
  6666. self,
  6667. name: str,
  6668. experiments: Sequence[ID_TYPE],
  6669. *,
  6670. reference_dataset: Optional[ID_TYPE] = None,
  6671. description: Optional[str] = None,
  6672. created_at: Optional[datetime.datetime] = None,
  6673. metadata: Optional[dict[str, Any]] = None,
  6674. id: Optional[ID_TYPE] = None,
  6675. ) -> ls_schemas.ComparativeExperiment:
  6676. """Create a comparative experiment on the LangSmith API.
  6677. These experiments compare 2 or more experiment results over a shared dataset.
  6678. Args:
  6679. name (str): The name of the comparative experiment.
  6680. experiments (Sequence[Union[UUID, str]]): The IDs of the experiments to compare.
  6681. reference_dataset (Optional[Union[UUID, str]]): The ID of the dataset these experiments are compared on.
  6682. description (Optional[str]): The description of the comparative experiment.
  6683. created_at (Optional[datetime.datetime]): The creation time of the comparative experiment.
  6684. metadata (Optional[Dict[str, Any]]): Additional metadata for the comparative experiment.
  6685. id (Optional[Union[UUID, str]]): The ID of the comparative experiment.
  6686. Returns:
  6687. ComparativeExperiment: The created comparative experiment object.
  6688. """
  6689. if not experiments:
  6690. raise ValueError("At least one experiment is required.")
  6691. if reference_dataset is None:
  6692. # Get one of the experiments' reference dataset
  6693. reference_dataset = self.read_project(
  6694. project_id=experiments[0]
  6695. ).reference_dataset_id
  6696. if not reference_dataset:
  6697. raise ValueError("A reference dataset is required.")
  6698. body: dict[str, Any] = {
  6699. "id": id or str(uuid.uuid4()),
  6700. "name": name,
  6701. "experiment_ids": experiments,
  6702. "reference_dataset_id": reference_dataset,
  6703. "description": description,
  6704. "created_at": created_at or datetime.datetime.now(datetime.timezone.utc),
  6705. "extra": {},
  6706. }
  6707. if metadata is not None:
  6708. body["extra"]["metadata"] = metadata
  6709. ser = _dumps_json({k: v for k, v in body.items()}) # if v is not None})
  6710. response = self.request_with_retries(
  6711. "POST",
  6712. "/datasets/comparative",
  6713. request_kwargs={
  6714. "data": ser,
  6715. },
  6716. )
  6717. ls_utils.raise_for_status_with_text(response)
  6718. response_d = response.json()
  6719. return ls_schemas.ComparativeExperiment(**response_d)
  6720. async def arun_on_dataset(
  6721. self,
  6722. dataset_name: str,
  6723. llm_or_chain_factory: Any,
  6724. *,
  6725. evaluation: Optional[Any] = None,
  6726. concurrency_level: int = 5,
  6727. project_name: Optional[str] = None,
  6728. project_metadata: Optional[dict[str, Any]] = None,
  6729. dataset_version: Optional[Union[datetime.datetime, str]] = None,
  6730. verbose: bool = False,
  6731. input_mapper: Optional[Callable[[dict], Any]] = None,
  6732. revision_id: Optional[str] = None,
  6733. **kwargs: Any,
  6734. ) -> dict[str, Any]:
  6735. """Asynchronously run the Chain or language model on a dataset.
  6736. .. deprecated:: 0.1.0
  6737. This method is deprecated. Use :func:`langsmith.aevaluate` instead.
  6738. """ # noqa: E501
  6739. warnings.warn(
  6740. "The `arun_on_dataset` method is deprecated and"
  6741. " will be removed in a future version."
  6742. "Please use the `aevaluate` method instead.",
  6743. DeprecationWarning,
  6744. )
  6745. try:
  6746. from langchain.smith import ( # type: ignore[import-not-found]
  6747. arun_on_dataset as _arun_on_dataset,
  6748. )
  6749. except ImportError:
  6750. raise ImportError(
  6751. "The client.arun_on_dataset function requires the langchain"
  6752. "package to run.\nInstall with pip install langchain"
  6753. )
  6754. return await _arun_on_dataset(
  6755. dataset_name=dataset_name,
  6756. llm_or_chain_factory=llm_or_chain_factory,
  6757. client=self,
  6758. evaluation=evaluation,
  6759. concurrency_level=concurrency_level,
  6760. project_name=project_name,
  6761. project_metadata=project_metadata,
  6762. verbose=verbose,
  6763. input_mapper=input_mapper,
  6764. revision_id=revision_id,
  6765. dataset_version=dataset_version,
  6766. **kwargs,
  6767. )
  6768. def run_on_dataset(
  6769. self,
  6770. dataset_name: str,
  6771. llm_or_chain_factory: Any,
  6772. *,
  6773. evaluation: Optional[Any] = None,
  6774. concurrency_level: int = 5,
  6775. project_name: Optional[str] = None,
  6776. project_metadata: Optional[dict[str, Any]] = None,
  6777. dataset_version: Optional[Union[datetime.datetime, str]] = None,
  6778. verbose: bool = False,
  6779. input_mapper: Optional[Callable[[dict], Any]] = None,
  6780. revision_id: Optional[str] = None,
  6781. **kwargs: Any,
  6782. ) -> dict[str, Any]:
  6783. """Run the Chain or language model on a dataset.
  6784. .. deprecated:: 0.1.0
  6785. This method is deprecated. Use :func:`langsmith.aevaluate` instead.
  6786. """ # noqa: E501 # noqa: E501
  6787. warnings.warn(
  6788. "The `run_on_dataset` method is deprecated and"
  6789. " will be removed in a future version."
  6790. "Please use the `evaluate` method instead.",
  6791. DeprecationWarning,
  6792. )
  6793. try:
  6794. from langchain.smith import (
  6795. run_on_dataset as _run_on_dataset, # type: ignore
  6796. )
  6797. except ImportError:
  6798. raise ImportError(
  6799. "The client.run_on_dataset function requires the langchain"
  6800. "package to run.\nInstall with pip install langchain"
  6801. )
  6802. return _run_on_dataset(
  6803. dataset_name=dataset_name,
  6804. llm_or_chain_factory=llm_or_chain_factory,
  6805. concurrency_level=concurrency_level,
  6806. client=self,
  6807. evaluation=evaluation,
  6808. project_name=project_name,
  6809. project_metadata=project_metadata,
  6810. verbose=verbose,
  6811. input_mapper=input_mapper,
  6812. revision_id=revision_id,
  6813. dataset_version=dataset_version,
  6814. **kwargs,
  6815. )
  6816. def _current_tenant_is_owner(self, owner: str) -> bool:
  6817. """Check if the current workspace has the same handle as owner.
  6818. Args:
  6819. owner (str): The owner to check against.
  6820. Returns:
  6821. bool: True if the current tenant is the owner, False otherwise.
  6822. """
  6823. settings = self._get_settings()
  6824. return owner == "-" or settings.tenant_handle == owner
  6825. def _owner_conflict_error(
  6826. self, action: str, owner: str
  6827. ) -> ls_utils.LangSmithUserError:
  6828. return ls_utils.LangSmithUserError(
  6829. f"Cannot {action} for another tenant.\n"
  6830. f"Current tenant: {self._get_settings().tenant_handle},\n"
  6831. f"Requested tenant: {owner}"
  6832. )
  6833. def _get_latest_commit_hash(
  6834. self, prompt_owner_and_name: str, limit: int = 1, offset: int = 0
  6835. ) -> Optional[str]:
  6836. """Get the latest commit hash for a prompt.
  6837. Args:
  6838. prompt_owner_and_name (str): The owner and name of the prompt.
  6839. limit (int, default=1): The maximum number of commits to fetch. Defaults to 1.
  6840. offset (int, default=0): The number of commits to skip. Defaults to 0.
  6841. Returns:
  6842. Optional[str]: The latest commit hash, or None if no commits are found.
  6843. """
  6844. response = self.request_with_retries(
  6845. "GET",
  6846. f"/commits/{prompt_owner_and_name}/",
  6847. params={"limit": limit, "offset": offset},
  6848. )
  6849. commits = response.json()["commits"]
  6850. return commits[0]["commit_hash"] if commits else None
  6851. def _like_or_unlike_prompt(
  6852. self, prompt_identifier: str, like: bool
  6853. ) -> dict[str, int]:
  6854. """Like or unlike a prompt.
  6855. Args:
  6856. prompt_identifier (str): The identifier of the prompt.
  6857. like (bool): True to like the prompt, False to unlike it.
  6858. Returns:
  6859. A dictionary with the key 'likes' and the count of likes as the value.
  6860. Raises:
  6861. requests.exceptions.HTTPError: If the prompt is not found or
  6862. another error occurs.
  6863. """
  6864. owner, prompt_name, _ = ls_utils.parse_prompt_identifier(prompt_identifier)
  6865. response = self.request_with_retries(
  6866. "POST", f"/likes/{owner}/{prompt_name}", json={"like": like}
  6867. )
  6868. response.raise_for_status()
  6869. return response.json()
  6870. def _get_prompt_url(self, prompt_identifier: str) -> str:
  6871. """Get a URL for a prompt.
  6872. Args:
  6873. prompt_identifier (str): The identifier of the prompt.
  6874. Returns:
  6875. str: The URL for the prompt.
  6876. """
  6877. owner, prompt_name, commit_hash = ls_utils.parse_prompt_identifier(
  6878. prompt_identifier
  6879. )
  6880. if not self._current_tenant_is_owner(owner):
  6881. return f"{self._host_url}/hub/{owner}/{prompt_name}:{commit_hash[:8]}"
  6882. settings = self._get_settings()
  6883. return (
  6884. f"{self._host_url}/prompts/{prompt_name}/{commit_hash[:8]}"
  6885. f"?organizationId={settings.id}"
  6886. )
  6887. def _prompt_exists(self, prompt_identifier: str) -> bool:
  6888. """Check if a prompt exists.
  6889. Args:
  6890. prompt_identifier (str): The identifier of the prompt.
  6891. Returns:
  6892. bool: True if the prompt exists, False otherwise.
  6893. """
  6894. prompt = self.get_prompt(prompt_identifier)
  6895. return True if prompt else False
  6896. def like_prompt(self, prompt_identifier: str) -> dict[str, int]:
  6897. """Like a prompt.
  6898. Args:
  6899. prompt_identifier (str): The identifier of the prompt.
  6900. Returns:
  6901. Dict[str, int]: A dictionary with the key 'likes' and the count of likes as the value.
  6902. """
  6903. return self._like_or_unlike_prompt(prompt_identifier, like=True)
  6904. def unlike_prompt(self, prompt_identifier: str) -> dict[str, int]:
  6905. """Unlike a prompt.
  6906. Args:
  6907. prompt_identifier (str): The identifier of the prompt.
  6908. Returns:
  6909. Dict[str, int]: A dictionary with the key 'likes' and the count of likes as the value.
  6910. """
  6911. return self._like_or_unlike_prompt(prompt_identifier, like=False)
  6912. def list_prompts(
  6913. self,
  6914. *,
  6915. limit: int = 100,
  6916. offset: int = 0,
  6917. is_public: Optional[bool] = None,
  6918. is_archived: Optional[bool] = False,
  6919. sort_field: ls_schemas.PromptSortField = ls_schemas.PromptSortField.updated_at,
  6920. sort_direction: Literal["desc", "asc"] = "desc",
  6921. query: Optional[str] = None,
  6922. ) -> ls_schemas.ListPromptsResponse:
  6923. """List prompts with pagination.
  6924. Args:
  6925. limit (int, default=100): The maximum number of prompts to return. Defaults to 100.
  6926. offset (int, default=0): The number of prompts to skip. Defaults to 0.
  6927. is_public (Optional[bool]): Filter prompts by if they are public.
  6928. is_archived (Optional[bool]): Filter prompts by if they are archived.
  6929. sort_field (PromptSortField): The field to sort by.
  6930. Defaults to "updated_at".
  6931. sort_direction (Literal["desc", "asc"], default="desc"): The order to sort by.
  6932. Defaults to "desc".
  6933. query (Optional[str]): Filter prompts by a search query.
  6934. Returns:
  6935. ListPromptsResponse: A response object containing
  6936. the list of prompts.
  6937. """
  6938. params = {
  6939. "limit": limit,
  6940. "offset": offset,
  6941. "is_public": (
  6942. "true" if is_public else "false" if is_public is not None else None
  6943. ),
  6944. "is_archived": "true" if is_archived else "false",
  6945. "sort_field": sort_field,
  6946. "sort_direction": sort_direction,
  6947. "query": query,
  6948. "match_prefix": "true" if query else None,
  6949. }
  6950. response = self.request_with_retries("GET", "/repos/", params=params)
  6951. return ls_schemas.ListPromptsResponse(**response.json())
  6952. def get_prompt(self, prompt_identifier: str) -> Optional[ls_schemas.Prompt]:
  6953. """Get a specific prompt by its identifier.
  6954. Args:
  6955. prompt_identifier (str): The identifier of the prompt.
  6956. The identifier should be in the format "prompt_name" or "owner/prompt_name".
  6957. Returns:
  6958. Optional[Prompt]: The prompt object.
  6959. Raises:
  6960. requests.exceptions.HTTPError: If the prompt is not found or
  6961. another error occurs.
  6962. """
  6963. owner, prompt_name, _ = ls_utils.parse_prompt_identifier(prompt_identifier)
  6964. try:
  6965. response = self.request_with_retries("GET", f"/repos/{owner}/{prompt_name}")
  6966. return ls_schemas.Prompt(**response.json()["repo"])
  6967. except ls_utils.LangSmithNotFoundError:
  6968. return None
  6969. def create_prompt(
  6970. self,
  6971. prompt_identifier: str,
  6972. *,
  6973. description: Optional[str] = None,
  6974. readme: Optional[str] = None,
  6975. tags: Optional[Sequence[str]] = None,
  6976. is_public: bool = False,
  6977. ) -> ls_schemas.Prompt:
  6978. """Create a new prompt.
  6979. Does not attach prompt object, just creates an empty prompt.
  6980. Args:
  6981. prompt_identifier (str): The identifier of the prompt.
  6982. The identifier should be in the formatof owner/name:hash, name:hash, owner/name, or name
  6983. description (Optional[str]): A description of the prompt.
  6984. readme (Optional[str]): A readme for the prompt.
  6985. tags (Optional[Sequence[str]]): A list of tags for the prompt.
  6986. is_public (bool): Whether the prompt should be public.
  6987. Returns:
  6988. Prompt: The created prompt object.
  6989. Raises:
  6990. ValueError: If the current tenant is not the owner.
  6991. HTTPError: If the server request fails.
  6992. """
  6993. settings = self._get_settings()
  6994. if is_public and not settings.tenant_handle:
  6995. raise ls_utils.LangSmithUserError(
  6996. "Cannot create a public prompt without first\n"
  6997. "creating a LangChain Hub handle. "
  6998. "You can add a handle by creating a public prompt at:\n"
  6999. "https://smith.langchain.com/prompts"
  7000. )
  7001. owner, prompt_name, _ = ls_utils.parse_prompt_identifier(prompt_identifier)
  7002. if not self._current_tenant_is_owner(owner=owner):
  7003. raise self._owner_conflict_error("create a prompt", owner)
  7004. json: dict[str, Union[str, bool, Sequence[str]]] = {
  7005. "repo_handle": prompt_name,
  7006. "description": description or "",
  7007. "readme": readme or "",
  7008. "tags": tags or [],
  7009. "is_public": is_public,
  7010. }
  7011. response = self.request_with_retries("POST", "/repos/", json=json)
  7012. response.raise_for_status()
  7013. return ls_schemas.Prompt(**response.json()["repo"])
  7014. def create_commit(
  7015. self,
  7016. prompt_identifier: str,
  7017. object: Any,
  7018. *,
  7019. parent_commit_hash: Optional[str] = None,
  7020. ) -> str:
  7021. """Create a commit for an existing prompt.
  7022. Args:
  7023. prompt_identifier (str): The identifier of the prompt.
  7024. object (Any): The LangChain object to commit.
  7025. parent_commit_hash (Optional[str]): The hash of the parent commit.
  7026. Defaults to latest commit.
  7027. Returns:
  7028. str: The url of the prompt commit.
  7029. Raises:
  7030. HTTPError: If the server request fails.
  7031. ValueError: If the prompt does not exist.
  7032. """
  7033. if not self._prompt_exists(prompt_identifier):
  7034. raise ls_utils.LangSmithNotFoundError(
  7035. "Prompt does not exist, you must create it first."
  7036. )
  7037. try:
  7038. from langchain_core.load import dumps
  7039. except ImportError:
  7040. raise ImportError(
  7041. "The client.create_commit function requires the langchain-core"
  7042. "package to run.\nInstall with `pip install langchain-core`"
  7043. )
  7044. json_object = dumps(prep_obj_for_push(object))
  7045. manifest_dict = json.loads(json_object)
  7046. owner, prompt_name, _ = ls_utils.parse_prompt_identifier(prompt_identifier)
  7047. prompt_owner_and_name = f"{owner}/{prompt_name}"
  7048. if parent_commit_hash == "latest" or parent_commit_hash is None:
  7049. parent_commit_hash = self._get_latest_commit_hash(prompt_owner_and_name)
  7050. request_dict = {"parent_commit": parent_commit_hash, "manifest": manifest_dict}
  7051. response = self.request_with_retries(
  7052. "POST", f"/commits/{prompt_owner_and_name}", json=request_dict
  7053. )
  7054. commit_hash = response.json()["commit"]["commit_hash"]
  7055. return self._get_prompt_url(f"{prompt_owner_and_name}:{commit_hash}")
  7056. def update_prompt(
  7057. self,
  7058. prompt_identifier: str,
  7059. *,
  7060. description: Optional[str] = None,
  7061. readme: Optional[str] = None,
  7062. tags: Optional[Sequence[str]] = None,
  7063. is_public: Optional[bool] = None,
  7064. is_archived: Optional[bool] = None,
  7065. ) -> dict[str, Any]:
  7066. """Update a prompt's metadata.
  7067. To update the content of a prompt, use push_prompt or create_commit instead.
  7068. Args:
  7069. prompt_identifier (str): The identifier of the prompt to update.
  7070. description (Optional[str]): New description for the prompt.
  7071. readme (Optional[str]): New readme for the prompt.
  7072. tags (Optional[Sequence[str]]): New list of tags for the prompt.
  7073. is_public (Optional[bool]): New public status for the prompt.
  7074. is_archived (Optional[bool]): New archived status for the prompt.
  7075. Returns:
  7076. Dict[str, Any]: The updated prompt data as returned by the server.
  7077. Raises:
  7078. ValueError: If the prompt_identifier is empty.
  7079. HTTPError: If the server request fails.
  7080. """
  7081. settings = self._get_settings()
  7082. if is_public and not settings.tenant_handle:
  7083. raise ValueError(
  7084. "Cannot create a public prompt without first\n"
  7085. "creating a LangChain Hub handle. "
  7086. "You can add a handle by creating a public prompt at:\n"
  7087. "https://smith.langchain.com/prompts"
  7088. )
  7089. json: dict[str, Union[str, bool, Sequence[str]]] = {}
  7090. if description is not None:
  7091. json["description"] = description
  7092. if readme is not None:
  7093. json["readme"] = readme
  7094. if is_public is not None:
  7095. json["is_public"] = is_public
  7096. if is_archived is not None:
  7097. json["is_archived"] = is_archived
  7098. if tags is not None:
  7099. json["tags"] = tags
  7100. owner, prompt_name, _ = ls_utils.parse_prompt_identifier(prompt_identifier)
  7101. response = self.request_with_retries(
  7102. "PATCH", f"/repos/{owner}/{prompt_name}", json=json
  7103. )
  7104. response.raise_for_status()
  7105. return response.json()
  7106. def delete_prompt(self, prompt_identifier: str) -> None:
  7107. """Delete a prompt.
  7108. Args:
  7109. prompt_identifier (str): The identifier of the prompt to delete.
  7110. Returns:
  7111. bool: True if the prompt was successfully deleted, False otherwise.
  7112. Raises:
  7113. ValueError: If the current tenant is not the owner of the prompt.
  7114. """
  7115. owner, prompt_name, _ = ls_utils.parse_prompt_identifier(prompt_identifier)
  7116. if not self._current_tenant_is_owner(owner):
  7117. raise self._owner_conflict_error("delete a prompt", owner)
  7118. response = self.request_with_retries("DELETE", f"/repos/{owner}/{prompt_name}")
  7119. response.raise_for_status()
  7120. def pull_prompt_commit(
  7121. self,
  7122. prompt_identifier: str,
  7123. *,
  7124. include_model: Optional[bool] = False,
  7125. ) -> ls_schemas.PromptCommit:
  7126. """Pull a prompt object from the LangSmith API.
  7127. Args:
  7128. prompt_identifier (str): The identifier of the prompt.
  7129. Returns:
  7130. PromptCommit: The prompt object.
  7131. Raises:
  7132. ValueError: If no commits are found for the prompt.
  7133. """
  7134. owner, prompt_name, commit_hash = ls_utils.parse_prompt_identifier(
  7135. prompt_identifier
  7136. )
  7137. response = self.request_with_retries(
  7138. "GET",
  7139. (
  7140. f"/commits/{owner}/{prompt_name}/{commit_hash}"
  7141. f"{'?include_model=true' if include_model else ''}"
  7142. ),
  7143. )
  7144. return ls_schemas.PromptCommit(
  7145. **{"owner": owner, "repo": prompt_name, **response.json()}
  7146. )
  7147. def list_prompt_commits(
  7148. self,
  7149. prompt_identifier: str,
  7150. *,
  7151. limit: Optional[int] = None,
  7152. offset: int = 0,
  7153. include_model: bool = False,
  7154. ) -> Iterator[ls_schemas.ListedPromptCommit]:
  7155. """List commits for a given prompt.
  7156. Args:
  7157. prompt_identifier (str): The identifier of the prompt in the format 'owner/repo_name'.
  7158. limit (Optional[int]): The maximum number of commits to return. If None, returns all commits.
  7159. offset (int, default=0): The number of commits to skip before starting to return results.
  7160. include_model (bool, default=False): Whether to include the model information in the commit data.
  7161. Yields:
  7162. A ListedPromptCommit object for each commit.
  7163. !!! note
  7164. This method uses pagination to retrieve commits. It will make multiple API calls if necessary to retrieve all commits
  7165. or up to the specified limit.
  7166. """
  7167. owner, prompt_name, _ = ls_utils.parse_prompt_identifier(prompt_identifier)
  7168. params = {
  7169. "limit": min(100, limit) if limit is not None else limit,
  7170. "offset": offset,
  7171. "include_model": include_model,
  7172. }
  7173. i = 0
  7174. while True:
  7175. params["offset"] = offset
  7176. response = self.request_with_retries(
  7177. "GET",
  7178. f"/commits/{owner}/{prompt_name}/",
  7179. params=params,
  7180. )
  7181. val = response.json()
  7182. items = val["commits"]
  7183. total = val["total"]
  7184. if not items:
  7185. break
  7186. for it in items:
  7187. if limit is not None and i >= limit:
  7188. return # Stop iteration if we've reached the limit
  7189. yield ls_schemas.ListedPromptCommit(
  7190. **{"owner": owner, "repo": prompt_name, **it}
  7191. )
  7192. i += 1
  7193. offset += len(items)
  7194. if offset >= total:
  7195. break
  7196. def pull_prompt(
  7197. self, prompt_identifier: str, *, include_model: Optional[bool] = False
  7198. ) -> Any:
  7199. """Pull a prompt and return it as a LangChain `PromptTemplate`.
  7200. This method requires [`langchain-core`](https://pypi.org/project/langchain-core).
  7201. Args:
  7202. prompt_identifier: The identifier of the prompt.
  7203. include_model: Whether to include the model information in the prompt data.
  7204. Returns:
  7205. Any: The prompt object in the specified format.
  7206. """
  7207. try:
  7208. from langchain_core.language_models.base import BaseLanguageModel
  7209. from langchain_core.load.load import loads
  7210. from langchain_core.output_parsers import BaseOutputParser
  7211. from langchain_core.prompts import BasePromptTemplate
  7212. from langchain_core.prompts.structured import StructuredPrompt
  7213. from langchain_core.runnables.base import RunnableBinding, RunnableSequence
  7214. except ImportError:
  7215. raise ImportError(
  7216. "The client.pull_prompt function requires the langchain-core"
  7217. "package to run.\nInstall with `pip install langchain-core`"
  7218. )
  7219. try:
  7220. from langchain_core._api import suppress_langchain_beta_warning
  7221. except ImportError:
  7222. @contextlib.contextmanager
  7223. def suppress_langchain_beta_warning():
  7224. yield
  7225. prompt_object = self.pull_prompt_commit(
  7226. prompt_identifier, include_model=include_model
  7227. )
  7228. with suppress_langchain_beta_warning():
  7229. prompt = loads(json.dumps(prompt_object.manifest))
  7230. if (
  7231. isinstance(prompt, BasePromptTemplate)
  7232. or isinstance(prompt, RunnableSequence)
  7233. and isinstance(prompt.first, BasePromptTemplate)
  7234. ):
  7235. prompt_template = (
  7236. prompt
  7237. if isinstance(prompt, BasePromptTemplate)
  7238. else (
  7239. prompt.first
  7240. if isinstance(prompt, RunnableSequence)
  7241. and isinstance(prompt.first, BasePromptTemplate)
  7242. else None
  7243. )
  7244. )
  7245. if prompt_template is None:
  7246. raise ls_utils.LangSmithError(
  7247. "Prompt object is not a valid prompt template."
  7248. )
  7249. if prompt_template.metadata is None:
  7250. prompt_template.metadata = {}
  7251. prompt_template.metadata.update(
  7252. {
  7253. "lc_hub_owner": prompt_object.owner,
  7254. "lc_hub_repo": prompt_object.repo,
  7255. "lc_hub_commit_hash": prompt_object.commit_hash,
  7256. }
  7257. )
  7258. # Transform 2-step RunnableSequence to 3-step for structured prompts
  7259. # See create_commit for the reverse transformation
  7260. if (
  7261. include_model
  7262. and isinstance(prompt, RunnableSequence)
  7263. and isinstance(prompt.first, StructuredPrompt)
  7264. # Make forward-compatible in case we let update the response type
  7265. and (
  7266. len(prompt.steps) == 2 and not isinstance(prompt.last, BaseOutputParser)
  7267. )
  7268. ):
  7269. if isinstance(prompt.last, RunnableBinding) and isinstance(
  7270. prompt.last.bound, BaseLanguageModel
  7271. ):
  7272. seq = cast(RunnableSequence, prompt.first | prompt.last.bound)
  7273. if len(seq.steps) == 3: # prompt | bound llm | output parser
  7274. rebound_llm = seq.steps[1]
  7275. prompt = RunnableSequence(
  7276. prompt.first,
  7277. rebound_llm.bind(**{**prompt.last.kwargs}),
  7278. seq.last,
  7279. )
  7280. else:
  7281. prompt = seq # Not sure
  7282. elif isinstance(prompt.last, BaseLanguageModel):
  7283. prompt: RunnableSequence = prompt.first | prompt.last # type: ignore[no-redef, assignment]
  7284. else:
  7285. pass
  7286. return prompt
  7287. def push_prompt(
  7288. self,
  7289. prompt_identifier: str,
  7290. *,
  7291. object: Optional[Any] = None,
  7292. parent_commit_hash: str = "latest",
  7293. is_public: Optional[bool] = None,
  7294. description: Optional[str] = None,
  7295. readme: Optional[str] = None,
  7296. tags: Optional[Sequence[str]] = None,
  7297. ) -> str:
  7298. """Push a prompt to the LangSmith API.
  7299. Can be used to update prompt metadata or prompt content.
  7300. If the prompt does not exist, it will be created.
  7301. If the prompt exists, it will be updated.
  7302. Args:
  7303. prompt_identifier (str): The identifier of the prompt.
  7304. object (Optional[Any]): The LangChain object to push.
  7305. parent_commit_hash (str): The parent commit hash.
  7306. Defaults to "latest".
  7307. is_public (Optional[bool]): Whether the prompt should be public.
  7308. If None (default), the current visibility status is maintained for existing prompts.
  7309. For new prompts, None defaults to private.
  7310. Set to True to make public, or False to make private.
  7311. description (Optional[str]): A description of the prompt.
  7312. Defaults to an empty string.
  7313. readme (Optional[str]): A readme for the prompt.
  7314. Defaults to an empty string.
  7315. tags (Optional[Sequence[str]]): A list of tags for the prompt.
  7316. Defaults to an empty list.
  7317. Returns:
  7318. str: The URL of the prompt.
  7319. """
  7320. # Create or update prompt metadata
  7321. if self._prompt_exists(prompt_identifier):
  7322. if any(
  7323. param is not None for param in [is_public, description, readme, tags]
  7324. ):
  7325. self.update_prompt(
  7326. prompt_identifier,
  7327. description=description,
  7328. readme=readme,
  7329. tags=tags,
  7330. is_public=is_public,
  7331. )
  7332. else:
  7333. self.create_prompt(
  7334. prompt_identifier,
  7335. is_public=is_public if is_public is not None else False,
  7336. description=description,
  7337. readme=readme,
  7338. tags=tags,
  7339. )
  7340. if object is None:
  7341. return self._get_prompt_url(prompt_identifier=prompt_identifier)
  7342. # Create a commit with the new manifest
  7343. url = self.create_commit(
  7344. prompt_identifier,
  7345. object,
  7346. parent_commit_hash=parent_commit_hash,
  7347. )
  7348. return url
  7349. def cleanup(self) -> None:
  7350. """Manually trigger cleanup of the background thread."""
  7351. self._manual_cleanup = True
  7352. @overload
  7353. def evaluate(
  7354. self,
  7355. target: Union[TARGET_T, Runnable, EXPERIMENT_T],
  7356. /,
  7357. data: Optional[DATA_T] = None,
  7358. evaluators: Optional[Sequence[EVALUATOR_T]] = None,
  7359. summary_evaluators: Optional[Sequence[SUMMARY_EVALUATOR_T]] = None,
  7360. metadata: Optional[dict] = None,
  7361. experiment_prefix: Optional[str] = None,
  7362. description: Optional[str] = None,
  7363. max_concurrency: Optional[int] = 0,
  7364. num_repetitions: int = 1,
  7365. blocking: bool = True,
  7366. experiment: Optional[EXPERIMENT_T] = None,
  7367. upload_results: bool = True,
  7368. **kwargs: Any,
  7369. ) -> ExperimentResults: ...
  7370. @overload
  7371. def evaluate(
  7372. self,
  7373. target: Union[tuple[EXPERIMENT_T, EXPERIMENT_T]],
  7374. /,
  7375. data: Optional[DATA_T] = None,
  7376. evaluators: Optional[Sequence[COMPARATIVE_EVALUATOR_T]] = None,
  7377. summary_evaluators: Optional[Sequence[SUMMARY_EVALUATOR_T]] = None,
  7378. metadata: Optional[dict] = None,
  7379. experiment_prefix: Optional[str] = None,
  7380. description: Optional[str] = None,
  7381. max_concurrency: Optional[int] = 0,
  7382. num_repetitions: int = 1,
  7383. blocking: bool = True,
  7384. experiment: Optional[EXPERIMENT_T] = None,
  7385. upload_results: bool = True,
  7386. **kwargs: Any,
  7387. ) -> ComparativeExperimentResults: ...
  7388. def evaluate(
  7389. self,
  7390. target: Union[
  7391. TARGET_T, Runnable, EXPERIMENT_T, tuple[EXPERIMENT_T, EXPERIMENT_T]
  7392. ],
  7393. /,
  7394. data: Optional[DATA_T] = None,
  7395. evaluators: Optional[
  7396. Union[Sequence[EVALUATOR_T], Sequence[COMPARATIVE_EVALUATOR_T]]
  7397. ] = None,
  7398. summary_evaluators: Optional[Sequence[SUMMARY_EVALUATOR_T]] = None,
  7399. metadata: Optional[dict] = None,
  7400. experiment_prefix: Optional[str] = None,
  7401. description: Optional[str] = None,
  7402. max_concurrency: Optional[int] = 0,
  7403. num_repetitions: int = 1,
  7404. blocking: bool = True,
  7405. experiment: Optional[EXPERIMENT_T] = None,
  7406. upload_results: bool = True,
  7407. error_handling: Literal["log", "ignore"] = "log",
  7408. **kwargs: Any,
  7409. ) -> Union[ExperimentResults, ComparativeExperimentResults]:
  7410. r"""Evaluate a target system on a given dataset.
  7411. Args:
  7412. target (Union[TARGET_T, Runnable, EXPERIMENT_T, Tuple[EXPERIMENT_T, EXPERIMENT_T]]):
  7413. The target system or experiment(s) to evaluate.
  7414. Can be a function that takes a `dict` and returns a `dict`, a langchain `Runnable`, an
  7415. existing experiment ID, or a two-tuple of experiment IDs.
  7416. data (DATA_T): The dataset to evaluate on.
  7417. Can be a dataset name, a list of examples, or a generator of examples.
  7418. evaluators (Optional[Union[Sequence[EVALUATOR_T], Sequence[COMPARATIVE_EVALUATOR_T]]]):
  7419. A list of evaluators to run on each example. The evaluator signature
  7420. depends on the target type. Default to None.
  7421. summary_evaluators (Optional[Sequence[SUMMARY_EVALUATOR_T]]): A list of summary
  7422. evaluators to run on the entire dataset. Should not be specified if
  7423. comparing two existing experiments.
  7424. metadata (Optional[dict]): Metadata to attach to the experiment.
  7425. experiment_prefix (Optional[str]): A prefix to provide for your experiment name.
  7426. description (Optional[str]): A free-form text description for the experiment.
  7427. max_concurrency (Optional[int], default=0): The maximum number of concurrent
  7428. evaluations to run.
  7429. If `None` then no limit is set. If `0` then no concurrency.
  7430. blocking (bool, default=True): Whether to block until the evaluation is complete.
  7431. num_repetitions (int, default=1): The number of times to run the evaluation.
  7432. Each item in the dataset will be run and evaluated this many times.
  7433. Defaults to 1.
  7434. experiment (Optional[EXPERIMENT_T]): An existing experiment to
  7435. extend.
  7436. If provided, `experiment_prefix` is ignored.
  7437. For advanced usage only. Should not be specified if target is an existing experiment or
  7438. two-tuple fo experiments.
  7439. upload_results (bool, default=True): Whether to upload the results to LangSmith.
  7440. error_handling (str, default="log"): How to handle individual run errors.
  7441. `'log'` will trace the runs with the error message as part of the
  7442. experiment, `'ignore'` will not count the run as part of the experiment at
  7443. all.
  7444. **kwargs (Any): Additional keyword arguments to pass to the evaluator.
  7445. Returns:
  7446. ExperimentResults: If target is a function, Runnable, or existing experiment.
  7447. ComparativeExperimentResults: If target is a two-tuple of existing experiments.
  7448. Examples:
  7449. Prepare the dataset:
  7450. ```python
  7451. from langsmith import Client
  7452. client = Client()
  7453. dataset = client.clone_public_dataset(
  7454. "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d"
  7455. )
  7456. dataset_name = "Evaluate Examples"
  7457. ```
  7458. Basic usage:
  7459. ```python
  7460. def accuracy(outputs: dict, reference_outputs: dict) -> dict:
  7461. # Row-level evaluator for accuracy.
  7462. pred = outputs["response"]
  7463. expected = reference_outputs["answer"]
  7464. return {"score": expected.lower() == pred.lower()}
  7465. ```
  7466. ```python
  7467. def precision(outputs: list[dict], reference_outputs: list[dict]) -> dict:
  7468. # Experiment-level evaluator for precision.
  7469. # TP / (TP + FP)
  7470. predictions = [out["response"].lower() for out in outputs]
  7471. expected = [ref["answer"].lower() for ref in reference_outputs]
  7472. # yes and no are the only possible answers
  7473. tp = sum([p == e for p, e in zip(predictions, expected) if p == "yes"])
  7474. fp = sum([p == "yes" and e == "no" for p, e in zip(predictions, expected)])
  7475. return {"score": tp / (tp + fp)}
  7476. def predict(inputs: dict) -> dict:
  7477. # This can be any function or just an API call to your app.
  7478. return {"response": "Yes"}
  7479. results = client.evaluate(
  7480. predict,
  7481. data=dataset_name,
  7482. evaluators=[accuracy],
  7483. summary_evaluators=[precision],
  7484. experiment_prefix="My Experiment",
  7485. description="Evaluating the accuracy of a simple prediction model.",
  7486. metadata={
  7487. "my-prompt-version": "abcd-1234",
  7488. },
  7489. )
  7490. ```
  7491. Evaluating over only a subset of the examples
  7492. ```python
  7493. experiment_name = results.experiment_name
  7494. examples = client.list_examples(dataset_name=dataset_name, limit=5)
  7495. results = client.evaluate(
  7496. predict,
  7497. data=examples,
  7498. evaluators=[accuracy],
  7499. summary_evaluators=[precision],
  7500. experiment_prefix="My Experiment",
  7501. description="Just testing a subset synchronously.",
  7502. )
  7503. ```
  7504. Streaming each prediction to more easily + eagerly debug.
  7505. ```python
  7506. results = client.evaluate(
  7507. predict,
  7508. data=dataset_name,
  7509. evaluators=[accuracy],
  7510. summary_evaluators=[precision],
  7511. description="I don't even have to block!",
  7512. blocking=False,
  7513. )
  7514. for i, result in enumerate(results): # doctest: +ELLIPSIS
  7515. pass
  7516. ```
  7517. Using the `evaluate` API with an off-the-shelf LangChain evaluator:
  7518. ```python
  7519. from langsmith.evaluation import LangChainStringEvaluator
  7520. from langchain.chat_models import init_chat_model
  7521. def prepare_criteria_data(run: Run, example: Example):
  7522. return {
  7523. "prediction": run.outputs["output"],
  7524. "reference": example.outputs["answer"],
  7525. "input": str(example.inputs),
  7526. }
  7527. results = client.evaluate(
  7528. predict,
  7529. data=dataset_name,
  7530. evaluators=[
  7531. accuracy,
  7532. LangChainStringEvaluator("embedding_distance"),
  7533. LangChainStringEvaluator(
  7534. "labeled_criteria",
  7535. config={
  7536. "criteria": {
  7537. "usefulness": "The prediction is useful if it is correct"
  7538. " and/or asks a useful followup question."
  7539. },
  7540. "llm": init_chat_model("gpt-4o"),
  7541. },
  7542. prepare_data=prepare_criteria_data,
  7543. ),
  7544. ],
  7545. description="Evaluating with off-the-shelf LangChain evaluators.",
  7546. summary_evaluators=[precision],
  7547. )
  7548. ```
  7549. View the evaluation results for experiment:...
  7550. Evaluating a LangChain object:
  7551. ```python
  7552. from langchain_core.runnables import chain as as_runnable
  7553. @as_runnable
  7554. def nested_predict(inputs):
  7555. return {"response": "Yes"}
  7556. @as_runnable
  7557. def lc_predict(inputs):
  7558. return nested_predict.invoke(inputs)
  7559. results = client.evaluate(
  7560. lc_predict,
  7561. data=dataset_name,
  7562. evaluators=[accuracy],
  7563. description="This time we're evaluating a LangChain object.",
  7564. summary_evaluators=[precision],
  7565. )
  7566. ```
  7567. Comparative evaluation:
  7568. ```python
  7569. results = client.evaluate(
  7570. # The target is a tuple of the experiment IDs to compare
  7571. target=(
  7572. "12345678-1234-1234-1234-123456789012",
  7573. "98765432-1234-1234-1234-123456789012",
  7574. ),
  7575. evaluators=[accuracy],
  7576. summary_evaluators=[precision],
  7577. )
  7578. ```
  7579. Evaluate an existing experiment:
  7580. ```python
  7581. results = client.evaluate(
  7582. # The target is the ID of the experiment we are evaluating
  7583. target="12345678-1234-1234-1234-123456789012",
  7584. evaluators=[accuracy],
  7585. summary_evaluators=[precision],
  7586. )
  7587. ```
  7588. !!! version-added "Added in `langsmith` 0.2.0"
  7589. """ # noqa: E501
  7590. from langsmith.evaluation._runner import evaluate as evaluate_
  7591. # Need to ignore because it fails when there are too many union types +
  7592. # overloads.
  7593. return evaluate_( # type: ignore[misc]
  7594. target, # type: ignore[arg-type]
  7595. data=data,
  7596. evaluators=evaluators, # type: ignore[arg-type]
  7597. summary_evaluators=summary_evaluators,
  7598. metadata=metadata,
  7599. experiment_prefix=experiment_prefix,
  7600. description=description,
  7601. max_concurrency=max_concurrency,
  7602. num_repetitions=num_repetitions,
  7603. client=self,
  7604. blocking=blocking,
  7605. experiment=experiment,
  7606. upload_results=upload_results,
  7607. error_handling=error_handling,
  7608. **kwargs,
  7609. )
  7610. async def aevaluate(
  7611. self,
  7612. target: Union[
  7613. ATARGET_T,
  7614. AsyncIterable[dict],
  7615. Runnable,
  7616. str,
  7617. uuid.UUID,
  7618. schemas.TracerSession,
  7619. ],
  7620. /,
  7621. data: Union[
  7622. DATA_T, AsyncIterable[schemas.Example], Iterable[schemas.Example], None
  7623. ] = None,
  7624. evaluators: Optional[Sequence[Union[EVALUATOR_T, AEVALUATOR_T]]] = None,
  7625. summary_evaluators: Optional[Sequence[SUMMARY_EVALUATOR_T]] = None,
  7626. metadata: Optional[dict] = None,
  7627. experiment_prefix: Optional[str] = None,
  7628. description: Optional[str] = None,
  7629. max_concurrency: Optional[int] = 0,
  7630. num_repetitions: int = 1,
  7631. blocking: bool = True,
  7632. experiment: Optional[Union[schemas.TracerSession, str, uuid.UUID]] = None,
  7633. upload_results: bool = True,
  7634. error_handling: Literal["log", "ignore"] = "log",
  7635. **kwargs: Any,
  7636. ) -> AsyncExperimentResults:
  7637. r"""Evaluate an async target system on a given dataset.
  7638. Args:
  7639. target (Union[ATARGET_T, AsyncIterable[dict], Runnable, str, uuid.UUID, TracerSession]):
  7640. The target system or experiment(s) to evaluate.
  7641. Can be an async function that takes a `dict` and returns a `dict`, a langchain `Runnable`, an
  7642. existing experiment ID, or a two-tuple of experiment IDs.
  7643. data (Union[DATA_T, AsyncIterable[Example]]): The dataset to evaluate on.
  7644. Can be a dataset name, a list of examples, an async generator of examples, or an async iterable of examples.
  7645. evaluators (Optional[Sequence[EVALUATOR_T]]): A list of evaluators to run
  7646. on each example.
  7647. summary_evaluators (Optional[Sequence[SUMMARY_EVALUATOR_T]]): A list of summary
  7648. evaluators to run on the entire dataset.
  7649. metadata (Optional[dict]): Metadata to attach to the experiment.
  7650. experiment_prefix (Optional[str]): A prefix to provide for your experiment name.
  7651. description (Optional[str]): A description of the experiment.
  7652. max_concurrency (Optional[int], default=0): The maximum number of concurrent
  7653. evaluations to run.
  7654. If `None` then no limit is set. If `0` then no concurrency.
  7655. num_repetitions (int, default=1): The number of times to run the evaluation.
  7656. Each item in the dataset will be run and evaluated this many times.
  7657. Defaults to 1.
  7658. blocking (bool, default=True): Whether to block until the evaluation is complete.
  7659. experiment (Optional[TracerSession]): An existing experiment to
  7660. extend.
  7661. If provided, `experiment_prefix` is ignored.
  7662. For advanced usage only.
  7663. upload_results (bool, default=True): Whether to upload the results to LangSmith.
  7664. error_handling (str, default="log"): How to handle individual run errors.
  7665. `'log'` will trace the runs with the error message as part of the
  7666. experiment, `'ignore'` will not count the run as part of the experiment at
  7667. all.
  7668. **kwargs (Any): Additional keyword arguments to pass to the evaluator.
  7669. Returns:
  7670. An async iterator over the experiment results.
  7671. Environment:
  7672. - `LANGSMITH_TEST_CACHE`: If set, API calls will be cached to disk to save time and
  7673. cost during testing.
  7674. Recommended to commit the cache files to your repository for faster CI/CD runs.
  7675. Requires the `'langsmith[vcr]'` package to be installed.
  7676. Examples:
  7677. Prepare the dataset:
  7678. ```python
  7679. import asyncio
  7680. from langsmith import Client
  7681. client = Client()
  7682. dataset = client.clone_public_dataset(
  7683. "https://smith.langchain.com/public/419dcab2-1d66-4b94-8901-0357ead390df/d"
  7684. )
  7685. dataset_name = "Evaluate Examples"
  7686. ```
  7687. Basic usage:
  7688. ```python
  7689. def accuracy(outputs: dict, reference_outputs: dict) -> dict:
  7690. # Row-level evaluator for accuracy.
  7691. pred = outputs["resposen"]
  7692. expected = reference_outputs["answer"]
  7693. return {"score": expected.lower() == pred.lower()}
  7694. def precision(outputs: list[dict], reference_outputs: list[dict]) -> dict:
  7695. # Experiment-level evaluator for precision.
  7696. # TP / (TP + FP)
  7697. predictions = [out["response"].lower() for out in outputs]
  7698. expected = [ref["answer"].lower() for ref in reference_outputs]
  7699. # yes and no are the only possible answers
  7700. tp = sum([p == e for p, e in zip(predictions, expected) if p == "yes"])
  7701. fp = sum([p == "yes" and e == "no" for p, e in zip(predictions, expected)])
  7702. return {"score": tp / (tp + fp)}
  7703. async def apredict(inputs: dict) -> dict:
  7704. # This can be any async function or just an API call to your app.
  7705. await asyncio.sleep(0.1)
  7706. return {"response": "Yes"}
  7707. results = asyncio.run(
  7708. client.aevaluate(
  7709. apredict,
  7710. data=dataset_name,
  7711. evaluators=[accuracy],
  7712. summary_evaluators=[precision],
  7713. experiment_prefix="My Experiment",
  7714. description="Evaluate the accuracy of the model asynchronously.",
  7715. metadata={
  7716. "my-prompt-version": "abcd-1234",
  7717. },
  7718. )
  7719. )
  7720. ```
  7721. Evaluating over only a subset of the examples using an async generator:
  7722. ```python
  7723. async def example_generator():
  7724. examples = client.list_examples(dataset_name=dataset_name, limit=5)
  7725. for example in examples:
  7726. yield example
  7727. results = asyncio.run(
  7728. client.aevaluate(
  7729. apredict,
  7730. data=example_generator(),
  7731. evaluators=[accuracy],
  7732. summary_evaluators=[precision],
  7733. experiment_prefix="My Subset Experiment",
  7734. description="Evaluate a subset of examples asynchronously.",
  7735. )
  7736. )
  7737. ```
  7738. Streaming each prediction to more easily + eagerly debug.
  7739. ```python
  7740. results = asyncio.run(
  7741. client.aevaluate(
  7742. apredict,
  7743. data=dataset_name,
  7744. evaluators=[accuracy],
  7745. summary_evaluators=[precision],
  7746. experiment_prefix="My Streaming Experiment",
  7747. description="Streaming predictions for debugging.",
  7748. blocking=False,
  7749. )
  7750. )
  7751. async def aenumerate(iterable):
  7752. async for elem in iterable:
  7753. print(elem)
  7754. asyncio.run(aenumerate(results))
  7755. ```
  7756. Running without concurrency:
  7757. ```python
  7758. results = asyncio.run(
  7759. client.aevaluate(
  7760. apredict,
  7761. data=dataset_name,
  7762. evaluators=[accuracy],
  7763. summary_evaluators=[precision],
  7764. experiment_prefix="My Experiment Without Concurrency",
  7765. description="This was run without concurrency.",
  7766. max_concurrency=0,
  7767. )
  7768. )
  7769. ```
  7770. Using Async evaluators:
  7771. ```python
  7772. async def helpfulness(outputs: dict) -> dict:
  7773. # Row-level evaluator for helpfulness.
  7774. await asyncio.sleep(5) # Replace with your LLM API call
  7775. return {"score": outputs["output"] == "Yes"}
  7776. results = asyncio.run(
  7777. client.aevaluate(
  7778. apredict,
  7779. data=dataset_name,
  7780. evaluators=[helpfulness],
  7781. summary_evaluators=[precision],
  7782. experiment_prefix="My Helpful Experiment",
  7783. description="Applying async evaluators example.",
  7784. )
  7785. )
  7786. ```
  7787. Evaluate an existing experiment:
  7788. ```python
  7789. results = asyncio.run(
  7790. client.aevaluate(
  7791. # The target is the ID of the experiment we are evaluating
  7792. target="419dcab2-1d66-4b94-8901-0357ead390df",
  7793. evaluators=[accuracy, helpfulness],
  7794. summary_evaluators=[precision],
  7795. )
  7796. )
  7797. ```
  7798. !!! version-added "Added in `langsmith` 0.2.0"
  7799. """ # noqa: E501
  7800. from langsmith.evaluation._arunner import aevaluate as aevaluate_
  7801. return await aevaluate_(
  7802. target,
  7803. data=data,
  7804. evaluators=evaluators,
  7805. summary_evaluators=summary_evaluators,
  7806. metadata=metadata,
  7807. experiment_prefix=experiment_prefix,
  7808. description=description,
  7809. max_concurrency=max_concurrency,
  7810. num_repetitions=num_repetitions,
  7811. client=self,
  7812. blocking=blocking,
  7813. experiment=experiment,
  7814. upload_results=upload_results,
  7815. error_handling=error_handling,
  7816. **kwargs,
  7817. )
  7818. def _paginate_examples_with_runs(
  7819. self,
  7820. dataset_id: ID_TYPE,
  7821. session_id: uuid.UUID,
  7822. preview: bool = False,
  7823. comparative_experiment_id: Optional[uuid.UUID] = None,
  7824. filters: dict[uuid.UUID, list[str]] | None = None,
  7825. limit: Optional[int] = None,
  7826. ) -> Iterator[list[ExampleWithRuns]]:
  7827. """Paginate through examples with runs and yield batches.
  7828. Args:
  7829. dataset_id: Dataset UUID to fetch examples with runs
  7830. session_id: Session UUID to filter runs by, same as project_id
  7831. preview: Whether to return preview data only
  7832. comparative_experiment_id: Optional comparative experiment UUID
  7833. filters: Optional filters to apply
  7834. limit: Maximum total number of results to return
  7835. Yields:
  7836. Batches of run results as lists of ExampleWithRuns instances
  7837. """
  7838. offset = 0
  7839. results_count = 0
  7840. while True:
  7841. remaining = (limit - results_count) if limit else None
  7842. batch_limit = min(100, remaining) if remaining else 100
  7843. body = {
  7844. "session_ids": [session_id],
  7845. "offset": offset,
  7846. "limit": batch_limit,
  7847. "preview": preview,
  7848. "comparative_experiment_id": comparative_experiment_id,
  7849. "filters": filters,
  7850. }
  7851. response = self.request_with_retries(
  7852. "POST",
  7853. f"/datasets/{dataset_id}/runs",
  7854. request_kwargs={"data": _dumps_json(body)},
  7855. )
  7856. batch = response.json()
  7857. if not batch:
  7858. break
  7859. # Transform raw dictionaries to ExampleWithRuns instances
  7860. examples_batch = [ls_schemas.ExampleWithRuns(**result) for result in batch]
  7861. yield examples_batch
  7862. results_count += len(batch)
  7863. if len(batch) < batch_limit or (limit and results_count >= limit):
  7864. break
  7865. offset += len(batch)
  7866. def get_experiment_results(
  7867. self,
  7868. name: Optional[str] = None,
  7869. project_id: Optional[uuid.UUID] = None,
  7870. preview: bool = False,
  7871. comparative_experiment_id: Optional[uuid.UUID] = None,
  7872. filters: dict[uuid.UUID, list[str]] | None = None,
  7873. limit: Optional[int] = None,
  7874. ) -> ls_schemas.ExperimentResults:
  7875. """Get results for an experiment, including experiment session aggregated stats and experiment runs for each dataset example.
  7876. Experiment results may not be available immediately after the experiment is created.
  7877. Args:
  7878. name: The experiment name.
  7879. project_id: Experiment's tracing project id, also called session_id, can be found in the url of the LS experiment page
  7880. preview: Whether to return lightweight preview data only. When True,
  7881. fetches inputs_preview/outputs_preview summaries instead of full inputs/outputs from S3 storage.
  7882. Faster and less bandwidth.
  7883. comparative_experiment_id: Optional comparative experiment UUID for pairwise comparison experiment results.
  7884. filters: Optional filters to apply to results
  7885. limit: Maximum number of results to return
  7886. Returns:
  7887. ExperimentResults with:
  7888. - feedback_stats: Combined feedback statistics including session-level feedback
  7889. - run_stats: Aggregated run statistics (latency, tokens, cost, etc.)
  7890. - examples_with_runs: Iterator of ExampleWithRuns
  7891. Raises:
  7892. ValueError: If project not found for the given session_id
  7893. Example:
  7894. ```python
  7895. client = Client()
  7896. results = client.get_experiment_results(
  7897. project_id="037ae90f-f297-4926-b93c-37d8abf6899f",
  7898. )
  7899. for example_with_runs in results["examples_with_runs"]:
  7900. print(example_with_runs.dict())
  7901. # Access aggregated experiment statistics
  7902. print(f"Total runs: {results['run_stats']['run_count']}")
  7903. print(f"Total cost: {results['run_stats']['total_cost']}")
  7904. print(f"P50 latency: {results['run_stats']['latency_p50']}")
  7905. # Access feedback statistics
  7906. print(f"Feedback stats: {results['feedback_stats']}")
  7907. ```
  7908. """
  7909. project = self.read_project(
  7910. project_name=name, project_id=project_id, include_stats=True
  7911. )
  7912. if not project:
  7913. raise ValueError(f"No experiment found with project_id: '{project_id}'")
  7914. def _get_examples_with_runs_iterator():
  7915. """Yield examples with corresponding experiment runs."""
  7916. for batch in self._paginate_examples_with_runs(
  7917. dataset_id=project.reference_dataset_id,
  7918. session_id=project.id,
  7919. preview=preview,
  7920. comparative_experiment_id=comparative_experiment_id,
  7921. filters=filters,
  7922. limit=limit,
  7923. ):
  7924. yield from batch
  7925. run_stats: ls_schemas.ExperimentRunStats = {
  7926. "run_count": project.run_count,
  7927. "latency_p50": project.latency_p50,
  7928. "latency_p99": project.latency_p99,
  7929. "total_tokens": project.total_tokens,
  7930. "prompt_tokens": project.prompt_tokens,
  7931. "completion_tokens": project.completion_tokens,
  7932. "last_run_start_time": project.last_run_start_time,
  7933. "run_facets": project.run_facets,
  7934. "total_cost": project.total_cost,
  7935. "prompt_cost": project.prompt_cost,
  7936. "completion_cost": project.completion_cost,
  7937. "first_token_p50": project.first_token_p50,
  7938. "first_token_p99": project.first_token_p99,
  7939. "error_rate": project.error_rate,
  7940. }
  7941. feedback_stats = {
  7942. **(project.feedback_stats or {}),
  7943. **(project.session_feedback_stats or {}),
  7944. }
  7945. return ls_schemas.ExperimentResults(
  7946. feedback_stats=feedback_stats,
  7947. run_stats=run_stats,
  7948. examples_with_runs=_get_examples_with_runs_iterator(),
  7949. )
  7950. @warn_beta
  7951. def generate_insights(
  7952. self,
  7953. *,
  7954. chat_histories: list[list[dict]],
  7955. instructions: str = DEFAULT_INSTRUCTIONS,
  7956. name: str | None = None,
  7957. model: Literal["openai", "anthropic"] | None = None,
  7958. openai_api_key: str | None = None,
  7959. anthropic_api_key: str | None = None,
  7960. ) -> ls_schemas.InsightsReport:
  7961. """Generate Insights over your agent chat histories.
  7962. !!! note
  7963. - Only available to Plus and higher tier LangSmith users.
  7964. - Insights Agent uses user's model API key. The cost of the report
  7965. grows linearly with the number of chat histories you upload and the
  7966. size of each history. For more see [insights](https://docs.langchain.com/langsmith/insights).
  7967. - This method will upload your chat histories as traces to LangSmith.
  7968. - If you pass in a model API key this will be set as a workspace secret
  7969. meaning it will be usedin for evaluators and the playground.
  7970. Args:
  7971. chat_histories: A list of chat histories. Each chat history should be a
  7972. list of messages. We recommend formatting these as OpenAI messages with
  7973. a "role" and "content" key. Max length 1000 items.
  7974. instructions: Instructions for the Insights agent. Should focus on what
  7975. your agent does and what types of insights you
  7976. want to generate.
  7977. name: Name for the generated Insights report.
  7978. model: Whether to use OpenAI or Anthropic models. This will impact the
  7979. cost of generating the Insights Report.
  7980. openai_api_key: OpenAI API key to use. Only needed if you have not already
  7981. stored this in LangSmith as a workspace secret.
  7982. anthropic_api_key: Anthropic API key to use. Only needed if you have not
  7983. already stored this in LangSmith as a workspace secret.
  7984. Example:
  7985. ```python
  7986. import os
  7987. from langsmith import Client
  7988. client = client()
  7989. chat_histories = [
  7990. [
  7991. {"role": "user", "content": "how are you"},
  7992. {"role": "assistant", "content": "good!"},
  7993. ],
  7994. [
  7995. {"role": "user", "content": "do you like art"},
  7996. {"role": "assistant", "content": "only Tarkovsky"},
  7997. ],
  7998. ]
  7999. report = client.generate_insights(
  8000. chat_histories=chat_histories,
  8001. name="Conversation Topics",
  8002. instructions="What are the high-level topics of conversations users are having with the assistant?",
  8003. openai_api_key=os.environ["OPENAI_API_KEY"],
  8004. )
  8005. # client.poll_insights(report=report)
  8006. ```
  8007. """
  8008. model = self._ensure_insights_api_key(
  8009. openai_api_key=openai_api_key,
  8010. anthropic_api_key=anthropic_api_key,
  8011. model=model,
  8012. )
  8013. project = self._ingest_insights_runs(chat_histories, name)
  8014. config = {
  8015. "name": name,
  8016. "user_context": {
  8017. "How are your agent traces structured?": "The run.outputs.messages field contains a chat history between the user and the agent. This is all the context you need.",
  8018. "What would you like to learn about your agent?": instructions,
  8019. },
  8020. "last_n_hours": 1,
  8021. "model": model,
  8022. }
  8023. response = self.request_with_retries(
  8024. "POST", f"/sessions/{project.id}/insights", json=config
  8025. )
  8026. ls_utils.raise_for_status_with_text(response)
  8027. res = response.json()
  8028. report = ls_schemas.InsightsReport(
  8029. **res,
  8030. project_id=project.id,
  8031. tenant_id=self._get_tenant_id(),
  8032. host_url=self._host_url,
  8033. )
  8034. print( # noqa: T201
  8035. "The Insights Agent is running! This can take up to 30 minutes to complete."
  8036. " Once the report is completed, you'll be able to see results here: "
  8037. f"{report.link}"
  8038. )
  8039. return report
  8040. @warn_beta
  8041. def poll_insights(
  8042. self,
  8043. *,
  8044. report: ls_schemas.InsightsReport | None = None,
  8045. id: str | uuid.UUID | None = None,
  8046. project_id: str | uuid.UUID | None = None,
  8047. rate: int = 30,
  8048. timeout: int = 30 * 60,
  8049. verbose: bool = False,
  8050. ) -> ls_schemas.InsightsReport:
  8051. """Poll the status of an Insights report.
  8052. Args:
  8053. report: THe InsightsReport.
  8054. id: The Insights report ID. Should only specify if 'report' is not specified.
  8055. project_id: The Tracing project ID. Should only specify if 'report' is not specified.
  8056. """
  8057. if not ((id and project_id) or report):
  8058. raise ValueError("Must specify ('id' and 'project_id') or 'report'.")
  8059. elif (id or project_id) and report:
  8060. raise ValueError(
  8061. "Must specify exactly one of ('id' and 'project_id') or 'report'."
  8062. )
  8063. elif report:
  8064. id = report.id
  8065. project_id = report.project_id
  8066. max_tries = max(1, timeout // rate)
  8067. for i in range(max_tries):
  8068. response = self.request_with_retries(
  8069. "GET", f"/sessions/{project_id}/insights/{id}"
  8070. )
  8071. ls_utils.raise_for_status_with_text(response)
  8072. resp_json = response.json()
  8073. if resp_json["status"] == "success":
  8074. job = ls_schemas.InsightsReport(
  8075. **resp_json,
  8076. project_id=project_id, # type: ignore[arg-type]
  8077. tenant_id=self._get_tenant_id(),
  8078. host_url=self._host_url,
  8079. )
  8080. print( # noqa: T201
  8081. "Insights report completed! View the results at %s",
  8082. job.link,
  8083. )
  8084. return job
  8085. elif resp_json["status"] == "error":
  8086. raise ValueError(f"Failed to generate insights: {resp_json['error']}")
  8087. elif verbose:
  8088. print(f"Polling time: {i * rate}") # noqa: T201
  8089. time.sleep(rate)
  8090. raise TimeoutError("Insights still pending")
  8091. def _ensure_insights_api_key(
  8092. self,
  8093. *,
  8094. openai_api_key: str | None = None,
  8095. anthropic_api_key: str | None = None,
  8096. model: Literal["openai", "anthropic"] | None = None,
  8097. ) -> Literal["openai", "anthropic"]:
  8098. response = self.request_with_retries("GET", "/workspaces/current/secrets")
  8099. ls_utils.raise_for_status_with_text(response)
  8100. workspace_keys = {s.get("key") for s in response.json()}
  8101. target_keys = set()
  8102. if model in (None, "openai"):
  8103. target_keys.add(_OPENAI_API_KEY)
  8104. if model in (None, "anthropic"):
  8105. target_keys.add(_ANTHROPIC_API_KEY)
  8106. if existing_keys := workspace_keys.intersection(target_keys):
  8107. return "openai" if _OPENAI_API_KEY in existing_keys else "anthropic"
  8108. elif model == "openai":
  8109. api_key = openai_api_key
  8110. api_var = _OPENAI_API_KEY
  8111. elif model == "anthropic":
  8112. api_key = anthropic_api_key
  8113. api_var = _ANTHROPIC_API_KEY
  8114. elif openai_api_key or anthropic_api_key:
  8115. api_key = openai_api_key or anthropic_api_key
  8116. api_var = _OPENAI_API_KEY if openai_api_key else _ANTHROPIC_API_KEY
  8117. else:
  8118. raise ValueError("Must specify openai_api_key or anthropic_api_key.")
  8119. response = self.request_with_retries(
  8120. "POST",
  8121. "/workspaces/current/secrets",
  8122. json=[{"key": api_var, "value": api_key}],
  8123. )
  8124. ls_utils.raise_for_status_with_text(response)
  8125. return "openai" if api_var == _OPENAI_API_KEY else "anthropic"
  8126. def _ingest_insights_runs(self, data: list, name: str | None):
  8127. if len(data) > 1000:
  8128. warnings.warn(
  8129. "Can only generate insights over 1000 data. Truncating to first 1000."
  8130. )
  8131. data = data[:1000]
  8132. now = datetime.datetime.now(datetime.timezone.utc)
  8133. project = self.create_project(
  8134. name
  8135. or ("insights " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
  8136. )
  8137. run_ids = [str(uuid.uuid4()) for _ in range(len(data))]
  8138. runs = [
  8139. {
  8140. "inputs": {"messages": x[:1]},
  8141. "outputs": {"messages": x},
  8142. "id": run_id,
  8143. "trace_id": run_id,
  8144. "dotted_order": f"{now.strftime('%Y%m%dT%H%M%S%fZ')}{str(run_id)}",
  8145. "start_time": now - datetime.timedelta(seconds=1),
  8146. "end_time": now,
  8147. "run_type": "chain",
  8148. "session_id": project.id,
  8149. "name": "trace",
  8150. }
  8151. for run_id, x in zip(run_ids, data)
  8152. ]
  8153. self.batch_ingest_runs(create=runs)
  8154. self.flush()
  8155. return project
  8156. def convert_prompt_to_openai_format(
  8157. messages: Any,
  8158. model_kwargs: Optional[dict[str, Any]] = None,
  8159. ) -> dict:
  8160. """Convert a prompt to OpenAI format.
  8161. Requires the `langchain_openai` package to be installed.
  8162. Args:
  8163. messages (Any): The messages to convert.
  8164. model_kwargs (Optional[Dict[str, Any]]): Model configuration arguments including
  8165. `stop` and any other required arguments.
  8166. Returns:
  8167. dict: The prompt in OpenAI format.
  8168. Raises:
  8169. ImportError: If the `langchain_openai` package is not installed.
  8170. ls_utils.LangSmithError: If there is an error during the conversion process.
  8171. """
  8172. try:
  8173. from langchain_openai import ChatOpenAI # type: ignore
  8174. except ImportError:
  8175. raise ImportError(
  8176. "The convert_prompt_to_openai_format function requires the langchain_openai"
  8177. "package to run.\nInstall with `pip install langchain_openai`"
  8178. )
  8179. openai = ChatOpenAI()
  8180. model_kwargs = model_kwargs or {}
  8181. stop = model_kwargs.pop("stop", None)
  8182. try:
  8183. return openai._get_request_payload(messages, stop=stop, **model_kwargs)
  8184. except Exception as e:
  8185. raise ls_utils.LangSmithError(f"Error converting to OpenAI format: {e}")
  8186. def convert_prompt_to_anthropic_format(
  8187. messages: Any,
  8188. model_kwargs: Optional[dict[str, Any]] = None,
  8189. ) -> dict:
  8190. """Convert a prompt to Anthropic format.
  8191. Requires the `langchain_anthropic` package to be installed.
  8192. Args:
  8193. messages (Any): The messages to convert.
  8194. model_kwargs (Optional[Dict[str, Any]]):
  8195. Model configuration arguments including `model_name` and `stop`.
  8196. Returns:
  8197. dict: The prompt in Anthropic format.
  8198. """
  8199. try:
  8200. from langchain_anthropic import ChatAnthropic # type: ignore
  8201. except ImportError:
  8202. raise ImportError(
  8203. "The convert_prompt_to_anthropic_format function requires the "
  8204. "langchain_anthropic package to run.\n"
  8205. "Install with `pip install langchain_anthropic`"
  8206. )
  8207. model_kwargs = model_kwargs or {}
  8208. model_name = model_kwargs.pop("model_name", "claude-3-haiku-20240307")
  8209. stop = model_kwargs.pop("stop", None)
  8210. timeout = model_kwargs.pop("timeout", None)
  8211. anthropic = ChatAnthropic(
  8212. model_name=model_name, timeout=timeout, stop=stop, **model_kwargs
  8213. )
  8214. try:
  8215. return anthropic._get_request_payload(messages, stop=stop)
  8216. except Exception as e:
  8217. raise ls_utils.LangSmithError(f"Error converting to Anthropic format: {e}")
  8218. class _FailedAttachmentReader(io.BytesIO):
  8219. """BytesIO that raises an error when read, for failed attachment downloads."""
  8220. def __init__(self, error: Exception):
  8221. super().__init__()
  8222. self._error = error
  8223. def read(self, size: Optional[int] = -1) -> bytes:
  8224. raise ls_utils.LangSmithError(
  8225. f"Failed to download attachment: {self._error}"
  8226. ) from self._error
  8227. def _convert_stored_attachments_to_attachments_dict(
  8228. data: dict, *, attachments_key: str, api_url: Optional[str] = None
  8229. ) -> dict[str, AttachmentInfo]:
  8230. """Convert attachments from the backend database format to the user facing format."""
  8231. attachments_dict = {}
  8232. if attachments_key in data and data[attachments_key]:
  8233. for key, value in data[attachments_key].items():
  8234. if not key.startswith("attachment."):
  8235. continue
  8236. if api_url is not None:
  8237. full_url = _construct_url(api_url, value["presigned_url"])
  8238. else:
  8239. full_url = value["presigned_url"]
  8240. try:
  8241. response = requests.get(full_url, stream=True)
  8242. response.raise_for_status()
  8243. reader = io.BytesIO(response.content)
  8244. except Exception as e:
  8245. logger.warning(f"Error downloading attachment {key}: {e}")
  8246. reader = _FailedAttachmentReader(e)
  8247. attachments_dict[key.removeprefix("attachment.")] = AttachmentInfo(
  8248. **{
  8249. "presigned_url": value["presigned_url"],
  8250. "reader": reader,
  8251. "mime_type": value.get("mime_type"),
  8252. }
  8253. )
  8254. return attachments_dict
  8255. def _close_files(files: list[io.BufferedReader]) -> None:
  8256. """Close all opened files used in multipart requests."""
  8257. for file in files:
  8258. try:
  8259. file.close()
  8260. except Exception:
  8261. logger.debug("Could not close file: %s", file.name)
  8262. pass
  8263. def _dataset_examples_path(api_url: str, dataset_id: ID_TYPE) -> str:
  8264. if api_url.rstrip("/").endswith("/v1"):
  8265. return f"/platform/datasets/{dataset_id}/examples"
  8266. else:
  8267. return f"/v1/platform/datasets/{dataset_id}/examples"
  8268. def _platform_path(api_url: str, path: str) -> str:
  8269. """Construct a platform API path based on the API URL structure."""
  8270. if api_url.rstrip("/").endswith("/v1"):
  8271. return f"/platform/{path}"
  8272. else:
  8273. return f"/v1/platform/{path}"
  8274. def _construct_url(api_url: str, pathname: str) -> str:
  8275. if pathname.startswith("http"):
  8276. return pathname
  8277. if api_url.startswith("https://"):
  8278. http = "https://"
  8279. api_url = api_url[len("https://") :]
  8280. elif api_url.startswith("http://"):
  8281. http = "http://"
  8282. api_url = api_url[len("http://") :]
  8283. else:
  8284. raise ValueError(
  8285. f"api_url must start with 'http://' or 'https://'. Received {api_url=}"
  8286. )
  8287. api_parts = api_url.rstrip("/").split("/")
  8288. path_parts = pathname.lstrip("/").split("/")
  8289. if not api_parts:
  8290. raise ValueError(
  8291. "Must specify non-empty api_url or pathname must be a full url. "
  8292. f"Received {api_url=}, {pathname=}"
  8293. )
  8294. if not path_parts:
  8295. return api_url
  8296. if path_parts[0] == "api":
  8297. if api_parts[-1] == "api":
  8298. api_parts = api_parts[:-1]
  8299. elif api_parts[-2:] == ["api", "v1"]:
  8300. api_parts = api_parts[:-2]
  8301. parts = api_parts + path_parts
  8302. return http + "/".join(p for p in parts if p)
  8303. def dump_model(model) -> dict[str, Any]:
  8304. """Dump model depending on pydantic version."""
  8305. if hasattr(model, "model_dump"):
  8306. return model.model_dump()
  8307. elif hasattr(model, "dict"):
  8308. return model.dict()
  8309. else:
  8310. raise TypeError("Unsupported model type")
  8311. def prep_obj_for_push(obj: Any) -> Any:
  8312. """Format the object so its Prompt Hub compatible."""
  8313. try:
  8314. from langchain_core.prompts import ChatPromptTemplate
  8315. from langchain_core.prompts.structured import StructuredPrompt
  8316. from langchain_core.runnables import RunnableBinding, RunnableSequence
  8317. except ImportError:
  8318. raise ImportError(
  8319. "The client.create_commit function requires the langchain-core"
  8320. "package to run.\nInstall with `pip install langchain-core`"
  8321. )
  8322. # Transform 3-step RunnableSequence back to 2-step for structured prompts
  8323. # See pull_prompt for the forward transformation
  8324. chain_to_push = obj
  8325. if (
  8326. isinstance(obj, RunnableSequence)
  8327. and isinstance(obj.first, ChatPromptTemplate)
  8328. and isinstance(obj.steps[1], RunnableBinding)
  8329. and 2 <= len(obj.steps) <= 3
  8330. ):
  8331. prompt = obj.first
  8332. bound_model = obj.steps[1]
  8333. model = bound_model.bound
  8334. model_kwargs = bound_model.kwargs
  8335. # have a sequence like:
  8336. # ChatPromptTemplate | ChatModel.with_structured_output()
  8337. if (
  8338. not isinstance(prompt, StructuredPrompt)
  8339. and "ls_structured_output_format" in bound_model.kwargs
  8340. ):
  8341. output_format = bound_model.kwargs["ls_structured_output_format"]
  8342. prompt = StructuredPrompt(messages=prompt.messages, **output_format)
  8343. # have a sequence like: StructuredPrompt | RunnableBinding(bound=ChatModel)
  8344. if isinstance(prompt, StructuredPrompt):
  8345. structured_kwargs = (prompt | model).steps[1].kwargs # type: ignore[attr-defined]
  8346. # remove the kwargs that are bound by with_structured_output()
  8347. bound_model.kwargs = {
  8348. k: v for k, v in model_kwargs.items() if k not in structured_kwargs
  8349. }
  8350. # Can't pipe with | syntax bc StructuredPrompt defines special piping
  8351. # behavior that'll cause bound_model.with_structured_output to be
  8352. # called.
  8353. chain_to_push = RunnableSequence(prompt, bound_model)
  8354. return chain_to_push