backend_cffi.py 149 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478
  1. # Copyright (c) 2016-present, Gregory Szorc
  2. # All rights reserved.
  3. #
  4. # This software may be modified and distributed under the terms
  5. # of the BSD license. See the LICENSE file for details.
  6. """Python interface to the Zstandard (zstd) compression library."""
  7. from __future__ import absolute_import, unicode_literals
  8. # This should match what the C extension exports.
  9. __all__ = [
  10. "BufferSegment",
  11. "BufferSegments",
  12. "BufferWithSegments",
  13. "BufferWithSegmentsCollection",
  14. "ZstdCompressionChunker",
  15. "ZstdCompressionDict",
  16. "ZstdCompressionObj",
  17. "ZstdCompressionParameters",
  18. "ZstdCompressionReader",
  19. "ZstdCompressionWriter",
  20. "ZstdCompressor",
  21. "ZstdDecompressionObj",
  22. "ZstdDecompressionReader",
  23. "ZstdDecompressionWriter",
  24. "ZstdDecompressor",
  25. "ZstdError",
  26. "FrameParameters",
  27. "backend_features",
  28. "estimate_decompression_context_size",
  29. "frame_content_size",
  30. "frame_header_size",
  31. "get_frame_parameters",
  32. "train_dictionary",
  33. # Constants.
  34. "FLUSH_BLOCK",
  35. "FLUSH_FRAME",
  36. "COMPRESSOBJ_FLUSH_FINISH",
  37. "COMPRESSOBJ_FLUSH_BLOCK",
  38. "ZSTD_VERSION",
  39. "FRAME_HEADER",
  40. "CONTENTSIZE_UNKNOWN",
  41. "CONTENTSIZE_ERROR",
  42. "MAX_COMPRESSION_LEVEL",
  43. "COMPRESSION_RECOMMENDED_INPUT_SIZE",
  44. "COMPRESSION_RECOMMENDED_OUTPUT_SIZE",
  45. "DECOMPRESSION_RECOMMENDED_INPUT_SIZE",
  46. "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE",
  47. "MAGIC_NUMBER",
  48. "BLOCKSIZELOG_MAX",
  49. "BLOCKSIZE_MAX",
  50. "WINDOWLOG_MIN",
  51. "WINDOWLOG_MAX",
  52. "CHAINLOG_MIN",
  53. "CHAINLOG_MAX",
  54. "HASHLOG_MIN",
  55. "HASHLOG_MAX",
  56. "MINMATCH_MIN",
  57. "MINMATCH_MAX",
  58. "SEARCHLOG_MIN",
  59. "SEARCHLOG_MAX",
  60. "SEARCHLENGTH_MIN",
  61. "SEARCHLENGTH_MAX",
  62. "TARGETLENGTH_MIN",
  63. "TARGETLENGTH_MAX",
  64. "LDM_MINMATCH_MIN",
  65. "LDM_MINMATCH_MAX",
  66. "LDM_BUCKETSIZELOG_MAX",
  67. "STRATEGY_FAST",
  68. "STRATEGY_DFAST",
  69. "STRATEGY_GREEDY",
  70. "STRATEGY_LAZY",
  71. "STRATEGY_LAZY2",
  72. "STRATEGY_BTLAZY2",
  73. "STRATEGY_BTOPT",
  74. "STRATEGY_BTULTRA",
  75. "STRATEGY_BTULTRA2",
  76. "DICT_TYPE_AUTO",
  77. "DICT_TYPE_RAWCONTENT",
  78. "DICT_TYPE_FULLDICT",
  79. "FORMAT_ZSTD1",
  80. "FORMAT_ZSTD1_MAGICLESS",
  81. ]
  82. import io
  83. import os
  84. from ._cffi import ( # type: ignore
  85. ffi,
  86. lib,
  87. )
  88. backend_features = set() # type: ignore
  89. COMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_CStreamInSize()
  90. COMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_CStreamOutSize()
  91. DECOMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_DStreamInSize()
  92. DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_DStreamOutSize()
  93. new_nonzero = ffi.new_allocator(should_clear_after_alloc=False)
  94. MAX_COMPRESSION_LEVEL = lib.ZSTD_maxCLevel()
  95. MAGIC_NUMBER = lib.ZSTD_MAGICNUMBER
  96. FRAME_HEADER = b"\x28\xb5\x2f\xfd"
  97. CONTENTSIZE_UNKNOWN = lib.ZSTD_CONTENTSIZE_UNKNOWN
  98. CONTENTSIZE_ERROR = lib.ZSTD_CONTENTSIZE_ERROR
  99. ZSTD_VERSION = (
  100. lib.ZSTD_VERSION_MAJOR,
  101. lib.ZSTD_VERSION_MINOR,
  102. lib.ZSTD_VERSION_RELEASE,
  103. )
  104. BLOCKSIZELOG_MAX = lib.ZSTD_BLOCKSIZELOG_MAX
  105. BLOCKSIZE_MAX = lib.ZSTD_BLOCKSIZE_MAX
  106. WINDOWLOG_MIN = lib.ZSTD_WINDOWLOG_MIN
  107. WINDOWLOG_MAX = lib.ZSTD_WINDOWLOG_MAX
  108. CHAINLOG_MIN = lib.ZSTD_CHAINLOG_MIN
  109. CHAINLOG_MAX = lib.ZSTD_CHAINLOG_MAX
  110. HASHLOG_MIN = lib.ZSTD_HASHLOG_MIN
  111. HASHLOG_MAX = lib.ZSTD_HASHLOG_MAX
  112. MINMATCH_MIN = lib.ZSTD_MINMATCH_MIN
  113. MINMATCH_MAX = lib.ZSTD_MINMATCH_MAX
  114. SEARCHLOG_MIN = lib.ZSTD_SEARCHLOG_MIN
  115. SEARCHLOG_MAX = lib.ZSTD_SEARCHLOG_MAX
  116. SEARCHLENGTH_MIN = lib.ZSTD_MINMATCH_MIN
  117. SEARCHLENGTH_MAX = lib.ZSTD_MINMATCH_MAX
  118. TARGETLENGTH_MIN = lib.ZSTD_TARGETLENGTH_MIN
  119. TARGETLENGTH_MAX = lib.ZSTD_TARGETLENGTH_MAX
  120. LDM_MINMATCH_MIN = lib.ZSTD_LDM_MINMATCH_MIN
  121. LDM_MINMATCH_MAX = lib.ZSTD_LDM_MINMATCH_MAX
  122. LDM_BUCKETSIZELOG_MAX = lib.ZSTD_LDM_BUCKETSIZELOG_MAX
  123. STRATEGY_FAST = lib.ZSTD_fast
  124. STRATEGY_DFAST = lib.ZSTD_dfast
  125. STRATEGY_GREEDY = lib.ZSTD_greedy
  126. STRATEGY_LAZY = lib.ZSTD_lazy
  127. STRATEGY_LAZY2 = lib.ZSTD_lazy2
  128. STRATEGY_BTLAZY2 = lib.ZSTD_btlazy2
  129. STRATEGY_BTOPT = lib.ZSTD_btopt
  130. STRATEGY_BTULTRA = lib.ZSTD_btultra
  131. STRATEGY_BTULTRA2 = lib.ZSTD_btultra2
  132. DICT_TYPE_AUTO = lib.ZSTD_dct_auto
  133. DICT_TYPE_RAWCONTENT = lib.ZSTD_dct_rawContent
  134. DICT_TYPE_FULLDICT = lib.ZSTD_dct_fullDict
  135. FORMAT_ZSTD1 = lib.ZSTD_f_zstd1
  136. FORMAT_ZSTD1_MAGICLESS = lib.ZSTD_f_zstd1_magicless
  137. FLUSH_BLOCK = 0
  138. FLUSH_FRAME = 1
  139. COMPRESSOBJ_FLUSH_FINISH = 0
  140. COMPRESSOBJ_FLUSH_BLOCK = 1
  141. def _cpu_count():
  142. # os.cpu_count() was introducd in Python 3.4.
  143. try:
  144. return os.cpu_count() or 0
  145. except AttributeError:
  146. pass
  147. # Linux.
  148. try:
  149. return os.sysconf("SC_NPROCESSORS_ONLN")
  150. except (AttributeError, ValueError):
  151. pass
  152. # TODO implement on other platforms.
  153. return 0
  154. class BufferSegment:
  155. """Represents a segment within a ``BufferWithSegments``.
  156. This type is essentially a reference to N bytes within a
  157. ``BufferWithSegments``.
  158. The object conforms to the buffer protocol.
  159. """
  160. @property
  161. def offset(self):
  162. """The byte offset of this segment within its parent buffer."""
  163. raise NotImplementedError()
  164. def __len__(self):
  165. """Obtain the length of the segment, in bytes."""
  166. raise NotImplementedError()
  167. def tobytes(self):
  168. """Obtain bytes copy of this segment."""
  169. raise NotImplementedError()
  170. class BufferSegments:
  171. """Represents an array of ``(offset, length)`` integers.
  172. This type is effectively an index used by :py:class:`BufferWithSegments`.
  173. The array members are 64-bit unsigned integers using host/native bit order.
  174. Instances conform to the buffer protocol.
  175. """
  176. class BufferWithSegments:
  177. """A memory buffer containing N discrete items of known lengths.
  178. This type is essentially a fixed size memory address and an array
  179. of 2-tuples of ``(offset, length)`` 64-bit unsigned native-endian
  180. integers defining the byte offset and length of each segment within
  181. the buffer.
  182. Instances behave like containers.
  183. Instances also conform to the buffer protocol. So a reference to the
  184. backing bytes can be obtained via ``memoryview(o)``. A *copy* of the
  185. backing bytes can be obtained via ``.tobytes()``.
  186. This type exists to facilitate operations against N>1 items without
  187. the overhead of Python object creation and management. Used with
  188. APIs like :py:meth:`ZstdDecompressor.multi_decompress_to_buffer`, it
  189. is possible to decompress many objects in parallel without the GIL
  190. held, leading to even better performance.
  191. """
  192. @property
  193. def size(self):
  194. """Total sizein bytes of the backing buffer."""
  195. raise NotImplementedError()
  196. def __len__(self):
  197. raise NotImplementedError()
  198. def __getitem__(self, i):
  199. """Obtains a segment within the buffer.
  200. The returned object references memory within this buffer.
  201. :param i:
  202. Integer index of segment to retrieve.
  203. :return:
  204. :py:class:`BufferSegment`
  205. """
  206. raise NotImplementedError()
  207. def segments(self):
  208. """Obtain the array of ``(offset, length)`` segments in the buffer.
  209. :return:
  210. :py:class:`BufferSegments`
  211. """
  212. raise NotImplementedError()
  213. def tobytes(self):
  214. """Obtain bytes copy of this instance."""
  215. raise NotImplementedError()
  216. class BufferWithSegmentsCollection:
  217. """A virtual spanning view over multiple BufferWithSegments.
  218. Instances are constructed from 1 or more :py:class:`BufferWithSegments`
  219. instances. The resulting object behaves like an ordered sequence whose
  220. members are the segments within each ``BufferWithSegments``.
  221. If the object is composed of 2 ``BufferWithSegments`` instances with the
  222. first having 2 segments and the second have 3 segments, then ``b[0]``
  223. and ``b[1]`` access segments in the first object and ``b[2]``, ``b[3]``,
  224. and ``b[4]`` access segments from the second.
  225. """
  226. def __len__(self):
  227. """The number of segments within all ``BufferWithSegments``."""
  228. raise NotImplementedError()
  229. def __getitem__(self, i):
  230. """Obtain the ``BufferSegment`` at an offset."""
  231. raise NotImplementedError()
  232. class ZstdError(Exception):
  233. pass
  234. def _zstd_error(zresult):
  235. # Resolves to bytes on Python 2 and 3. We use the string for formatting
  236. # into error messages, which will be literal unicode. So convert it to
  237. # unicode.
  238. return ffi.string(lib.ZSTD_getErrorName(zresult)).decode("utf-8")
  239. def _make_cctx_params(params):
  240. res = lib.ZSTD_createCCtxParams()
  241. if res == ffi.NULL:
  242. raise MemoryError()
  243. res = ffi.gc(res, lib.ZSTD_freeCCtxParams)
  244. attrs = [
  245. (lib.ZSTD_c_format, params.format),
  246. (lib.ZSTD_c_compressionLevel, params.compression_level),
  247. (lib.ZSTD_c_windowLog, params.window_log),
  248. (lib.ZSTD_c_hashLog, params.hash_log),
  249. (lib.ZSTD_c_chainLog, params.chain_log),
  250. (lib.ZSTD_c_searchLog, params.search_log),
  251. (lib.ZSTD_c_minMatch, params.min_match),
  252. (lib.ZSTD_c_targetLength, params.target_length),
  253. (lib.ZSTD_c_strategy, params.strategy),
  254. (lib.ZSTD_c_contentSizeFlag, params.write_content_size),
  255. (lib.ZSTD_c_checksumFlag, params.write_checksum),
  256. (lib.ZSTD_c_dictIDFlag, params.write_dict_id),
  257. (lib.ZSTD_c_nbWorkers, params.threads),
  258. (lib.ZSTD_c_jobSize, params.job_size),
  259. (lib.ZSTD_c_overlapLog, params.overlap_log),
  260. (lib.ZSTD_c_forceMaxWindow, params.force_max_window),
  261. (lib.ZSTD_c_enableLongDistanceMatching, params.enable_ldm),
  262. (lib.ZSTD_c_ldmHashLog, params.ldm_hash_log),
  263. (lib.ZSTD_c_ldmMinMatch, params.ldm_min_match),
  264. (lib.ZSTD_c_ldmBucketSizeLog, params.ldm_bucket_size_log),
  265. (lib.ZSTD_c_ldmHashRateLog, params.ldm_hash_rate_log),
  266. ]
  267. for param, value in attrs:
  268. _set_compression_parameter(res, param, value)
  269. return res
  270. class ZstdCompressionParameters(object):
  271. """Low-level zstd compression parameters.
  272. This type represents a collection of parameters to control how zstd
  273. compression is performed.
  274. Instances can be constructed from raw parameters or derived from a
  275. base set of defaults specified from a compression level (recommended)
  276. via :py:meth:`ZstdCompressionParameters.from_level`.
  277. >>> # Derive compression settings for compression level 7.
  278. >>> params = zstandard.ZstdCompressionParameters.from_level(7)
  279. >>> # With an input size of 1MB
  280. >>> params = zstandard.ZstdCompressionParameters.from_level(7, source_size=1048576)
  281. Using ``from_level()``, it is also possible to override individual compression
  282. parameters or to define additional settings that aren't automatically derived.
  283. e.g.:
  284. >>> params = zstandard.ZstdCompressionParameters.from_level(4, window_log=10)
  285. >>> params = zstandard.ZstdCompressionParameters.from_level(5, threads=4)
  286. Or you can define low-level compression settings directly:
  287. >>> params = zstandard.ZstdCompressionParameters(window_log=12, enable_ldm=True)
  288. Once a ``ZstdCompressionParameters`` instance is obtained, it can be used to
  289. configure a compressor:
  290. >>> cctx = zstandard.ZstdCompressor(compression_params=params)
  291. Some of these are very low-level settings. It may help to consult the official
  292. zstandard documentation for their behavior. Look for the ``ZSTD_p_*`` constants
  293. in ``zstd.h`` (https://github.com/facebook/zstd/blob/dev/lib/zstd.h).
  294. """
  295. @staticmethod
  296. def from_level(level, source_size=0, dict_size=0, **kwargs):
  297. """Create compression parameters from a compression level.
  298. :param level:
  299. Integer compression level.
  300. :param source_size:
  301. Integer size in bytes of source to be compressed.
  302. :param dict_size:
  303. Integer size in bytes of compression dictionary to use.
  304. :return:
  305. :py:class:`ZstdCompressionParameters`
  306. """
  307. params = lib.ZSTD_getCParams(level, source_size, dict_size)
  308. args = {
  309. "window_log": "windowLog",
  310. "chain_log": "chainLog",
  311. "hash_log": "hashLog",
  312. "search_log": "searchLog",
  313. "min_match": "minMatch",
  314. "target_length": "targetLength",
  315. "strategy": "strategy",
  316. }
  317. for arg, attr in args.items():
  318. if arg not in kwargs:
  319. kwargs[arg] = getattr(params, attr)
  320. return ZstdCompressionParameters(**kwargs)
  321. def __init__(
  322. self,
  323. format=0,
  324. compression_level=0,
  325. window_log=0,
  326. hash_log=0,
  327. chain_log=0,
  328. search_log=0,
  329. min_match=0,
  330. target_length=0,
  331. strategy=-1,
  332. write_content_size=1,
  333. write_checksum=0,
  334. write_dict_id=0,
  335. job_size=0,
  336. overlap_log=-1,
  337. force_max_window=0,
  338. enable_ldm=0,
  339. ldm_hash_log=0,
  340. ldm_min_match=0,
  341. ldm_bucket_size_log=0,
  342. ldm_hash_rate_log=-1,
  343. threads=0,
  344. ):
  345. params = lib.ZSTD_createCCtxParams()
  346. if params == ffi.NULL:
  347. raise MemoryError()
  348. params = ffi.gc(params, lib.ZSTD_freeCCtxParams)
  349. self._params = params
  350. if threads < 0:
  351. threads = _cpu_count()
  352. # We need to set ZSTD_c_nbWorkers before ZSTD_c_jobSize and ZSTD_c_overlapLog
  353. # because setting ZSTD_c_nbWorkers resets the other parameters.
  354. _set_compression_parameter(params, lib.ZSTD_c_nbWorkers, threads)
  355. _set_compression_parameter(params, lib.ZSTD_c_format, format)
  356. _set_compression_parameter(
  357. params, lib.ZSTD_c_compressionLevel, compression_level
  358. )
  359. _set_compression_parameter(params, lib.ZSTD_c_windowLog, window_log)
  360. _set_compression_parameter(params, lib.ZSTD_c_hashLog, hash_log)
  361. _set_compression_parameter(params, lib.ZSTD_c_chainLog, chain_log)
  362. _set_compression_parameter(params, lib.ZSTD_c_searchLog, search_log)
  363. _set_compression_parameter(params, lib.ZSTD_c_minMatch, min_match)
  364. _set_compression_parameter(
  365. params, lib.ZSTD_c_targetLength, target_length
  366. )
  367. if strategy == -1:
  368. strategy = 0
  369. _set_compression_parameter(params, lib.ZSTD_c_strategy, strategy)
  370. _set_compression_parameter(
  371. params, lib.ZSTD_c_contentSizeFlag, write_content_size
  372. )
  373. _set_compression_parameter(
  374. params, lib.ZSTD_c_checksumFlag, write_checksum
  375. )
  376. _set_compression_parameter(params, lib.ZSTD_c_dictIDFlag, write_dict_id)
  377. _set_compression_parameter(params, lib.ZSTD_c_jobSize, job_size)
  378. if overlap_log == -1:
  379. overlap_log = 0
  380. _set_compression_parameter(params, lib.ZSTD_c_overlapLog, overlap_log)
  381. _set_compression_parameter(
  382. params, lib.ZSTD_c_forceMaxWindow, force_max_window
  383. )
  384. _set_compression_parameter(
  385. params, lib.ZSTD_c_enableLongDistanceMatching, enable_ldm
  386. )
  387. _set_compression_parameter(params, lib.ZSTD_c_ldmHashLog, ldm_hash_log)
  388. _set_compression_parameter(
  389. params, lib.ZSTD_c_ldmMinMatch, ldm_min_match
  390. )
  391. _set_compression_parameter(
  392. params, lib.ZSTD_c_ldmBucketSizeLog, ldm_bucket_size_log
  393. )
  394. if ldm_hash_rate_log == -1:
  395. ldm_hash_rate_log = 0
  396. _set_compression_parameter(
  397. params, lib.ZSTD_c_ldmHashRateLog, ldm_hash_rate_log
  398. )
  399. @property
  400. def format(self):
  401. return _get_compression_parameter(self._params, lib.ZSTD_c_format)
  402. @property
  403. def compression_level(self):
  404. return _get_compression_parameter(
  405. self._params, lib.ZSTD_c_compressionLevel
  406. )
  407. @property
  408. def window_log(self):
  409. return _get_compression_parameter(self._params, lib.ZSTD_c_windowLog)
  410. @property
  411. def hash_log(self):
  412. return _get_compression_parameter(self._params, lib.ZSTD_c_hashLog)
  413. @property
  414. def chain_log(self):
  415. return _get_compression_parameter(self._params, lib.ZSTD_c_chainLog)
  416. @property
  417. def search_log(self):
  418. return _get_compression_parameter(self._params, lib.ZSTD_c_searchLog)
  419. @property
  420. def min_match(self):
  421. return _get_compression_parameter(self._params, lib.ZSTD_c_minMatch)
  422. @property
  423. def target_length(self):
  424. return _get_compression_parameter(self._params, lib.ZSTD_c_targetLength)
  425. @property
  426. def strategy(self):
  427. return _get_compression_parameter(self._params, lib.ZSTD_c_strategy)
  428. @property
  429. def write_content_size(self):
  430. return _get_compression_parameter(
  431. self._params, lib.ZSTD_c_contentSizeFlag
  432. )
  433. @property
  434. def write_checksum(self):
  435. return _get_compression_parameter(self._params, lib.ZSTD_c_checksumFlag)
  436. @property
  437. def write_dict_id(self):
  438. return _get_compression_parameter(self._params, lib.ZSTD_c_dictIDFlag)
  439. @property
  440. def job_size(self):
  441. return _get_compression_parameter(self._params, lib.ZSTD_c_jobSize)
  442. @property
  443. def overlap_log(self):
  444. return _get_compression_parameter(self._params, lib.ZSTD_c_overlapLog)
  445. @property
  446. def force_max_window(self):
  447. return _get_compression_parameter(
  448. self._params, lib.ZSTD_c_forceMaxWindow
  449. )
  450. @property
  451. def enable_ldm(self):
  452. return _get_compression_parameter(
  453. self._params, lib.ZSTD_c_enableLongDistanceMatching
  454. )
  455. @property
  456. def ldm_hash_log(self):
  457. return _get_compression_parameter(self._params, lib.ZSTD_c_ldmHashLog)
  458. @property
  459. def ldm_min_match(self):
  460. return _get_compression_parameter(self._params, lib.ZSTD_c_ldmMinMatch)
  461. @property
  462. def ldm_bucket_size_log(self):
  463. return _get_compression_parameter(
  464. self._params, lib.ZSTD_c_ldmBucketSizeLog
  465. )
  466. @property
  467. def ldm_hash_rate_log(self):
  468. return _get_compression_parameter(
  469. self._params, lib.ZSTD_c_ldmHashRateLog
  470. )
  471. @property
  472. def threads(self):
  473. return _get_compression_parameter(self._params, lib.ZSTD_c_nbWorkers)
  474. def estimated_compression_context_size(self):
  475. """Estimated size in bytes needed to compress with these parameters."""
  476. return lib.ZSTD_estimateCCtxSize_usingCCtxParams(self._params)
  477. def estimate_decompression_context_size():
  478. """Estimate the memory size requirements for a decompressor instance.
  479. :return:
  480. Integer number of bytes.
  481. """
  482. return lib.ZSTD_estimateDCtxSize()
  483. def _set_compression_parameter(params, param, value):
  484. zresult = lib.ZSTD_CCtxParams_setParameter(params, param, value)
  485. if lib.ZSTD_isError(zresult):
  486. raise ZstdError(
  487. "unable to set compression context parameter: %s"
  488. % _zstd_error(zresult)
  489. )
  490. def _get_compression_parameter(params, param):
  491. result = ffi.new("int *")
  492. zresult = lib.ZSTD_CCtxParams_getParameter(params, param, result)
  493. if lib.ZSTD_isError(zresult):
  494. raise ZstdError(
  495. "unable to get compression context parameter: %s"
  496. % _zstd_error(zresult)
  497. )
  498. return result[0]
  499. class ZstdCompressionWriter(object):
  500. """Writable compressing stream wrapper.
  501. ``ZstdCompressionWriter`` is a write-only stream interface for writing
  502. compressed data to another stream.
  503. This type conforms to the ``io.RawIOBase`` interface and should be usable
  504. by any type that operates against a *file-object* (``typing.BinaryIO``
  505. in Python type hinting speak). Only methods that involve writing will do
  506. useful things.
  507. As data is written to this stream (e.g. via ``write()``), that data
  508. is sent to the compressor. As compressed data becomes available from
  509. the compressor, it is sent to the underlying stream by calling its
  510. ``write()`` method.
  511. Both ``write()`` and ``flush()`` return the number of bytes written to the
  512. object's ``write()``. In many cases, small inputs do not accumulate enough
  513. data to cause a write and ``write()`` will return ``0``.
  514. Calling ``close()`` will mark the stream as closed and subsequent I/O
  515. operations will raise ``ValueError`` (per the documented behavior of
  516. ``io.RawIOBase``). ``close()`` will also call ``close()`` on the underlying
  517. stream if such a method exists and the instance was constructed with
  518. ``closefd=True``
  519. Instances are obtained by calling :py:meth:`ZstdCompressor.stream_writer`.
  520. Typically usage is as follows:
  521. >>> cctx = zstandard.ZstdCompressor(level=10)
  522. >>> compressor = cctx.stream_writer(fh)
  523. >>> compressor.write(b"chunk 0\\n")
  524. >>> compressor.write(b"chunk 1\\n")
  525. >>> compressor.flush()
  526. >>> # Receiver will be able to decode ``chunk 0\\nchunk 1\\n`` at this point.
  527. >>> # Receiver is also expecting more data in the zstd *frame*.
  528. >>>
  529. >>> compressor.write(b"chunk 2\\n")
  530. >>> compressor.flush(zstandard.FLUSH_FRAME)
  531. >>> # Receiver will be able to decode ``chunk 0\\nchunk 1\\nchunk 2``.
  532. >>> # Receiver is expecting no more data, as the zstd frame is closed.
  533. >>> # Any future calls to ``write()`` at this point will construct a new
  534. >>> # zstd frame.
  535. Instances can be used as context managers. Exiting the context manager is
  536. the equivalent of calling ``close()``, which is equivalent to calling
  537. ``flush(zstandard.FLUSH_FRAME)``:
  538. >>> cctx = zstandard.ZstdCompressor(level=10)
  539. >>> with cctx.stream_writer(fh) as compressor:
  540. ... compressor.write(b'chunk 0')
  541. ... compressor.write(b'chunk 1')
  542. ... ...
  543. .. important::
  544. If ``flush(FLUSH_FRAME)`` is not called, emitted data doesn't
  545. constitute a full zstd *frame* and consumers of this data may complain
  546. about malformed input. It is recommended to use instances as a context
  547. manager to ensure *frames* are properly finished.
  548. If the size of the data being fed to this streaming compressor is known,
  549. you can declare it before compression begins:
  550. >>> cctx = zstandard.ZstdCompressor()
  551. >>> with cctx.stream_writer(fh, size=data_len) as compressor:
  552. ... compressor.write(chunk0)
  553. ... compressor.write(chunk1)
  554. ... ...
  555. Declaring the size of the source data allows compression parameters to
  556. be tuned. And if ``write_content_size`` is used, it also results in the
  557. content size being written into the frame header of the output data.
  558. The size of chunks being ``write()`` to the destination can be specified:
  559. >>> cctx = zstandard.ZstdCompressor()
  560. >>> with cctx.stream_writer(fh, write_size=32768) as compressor:
  561. ... ...
  562. To see how much memory is being used by the streaming compressor:
  563. >>> cctx = zstandard.ZstdCompressor()
  564. >>> with cctx.stream_writer(fh) as compressor:
  565. ... ...
  566. ... byte_size = compressor.memory_size()
  567. Thte total number of bytes written so far are exposed via ``tell()``:
  568. >>> cctx = zstandard.ZstdCompressor()
  569. >>> with cctx.stream_writer(fh) as compressor:
  570. ... ...
  571. ... total_written = compressor.tell()
  572. ``stream_writer()`` accepts a ``write_return_read`` boolean argument to
  573. control the return value of ``write()``. When ``False`` (the default),
  574. ``write()`` returns the number of bytes that were ``write()``'en to the
  575. underlying object. When ``True``, ``write()`` returns the number of bytes
  576. read from the input that were subsequently written to the compressor.
  577. ``True`` is the *proper* behavior for ``write()`` as specified by the
  578. ``io.RawIOBase`` interface and will become the default value in a future
  579. release.
  580. """
  581. def __init__(
  582. self,
  583. compressor,
  584. writer,
  585. source_size,
  586. write_size,
  587. write_return_read,
  588. closefd=True,
  589. ):
  590. self._compressor = compressor
  591. self._writer = writer
  592. self._write_size = write_size
  593. self._write_return_read = bool(write_return_read)
  594. self._closefd = bool(closefd)
  595. self._entered = False
  596. self._closing = False
  597. self._closed = False
  598. self._bytes_compressed = 0
  599. self._dst_buffer = ffi.new("char[]", write_size)
  600. self._out_buffer = ffi.new("ZSTD_outBuffer *")
  601. self._out_buffer.dst = self._dst_buffer
  602. self._out_buffer.size = len(self._dst_buffer)
  603. self._out_buffer.pos = 0
  604. zresult = lib.ZSTD_CCtx_setPledgedSrcSize(compressor._cctx, source_size)
  605. if lib.ZSTD_isError(zresult):
  606. raise ZstdError(
  607. "error setting source size: %s" % _zstd_error(zresult)
  608. )
  609. def __enter__(self):
  610. if self._closed:
  611. raise ValueError("stream is closed")
  612. if self._entered:
  613. raise ZstdError("cannot __enter__ multiple times")
  614. self._entered = True
  615. return self
  616. def __exit__(self, exc_type, exc_value, exc_tb):
  617. self._entered = False
  618. self.close()
  619. self._compressor = None
  620. return False
  621. def __iter__(self):
  622. raise io.UnsupportedOperation()
  623. def __next__(self):
  624. raise io.UnsupportedOperation()
  625. def memory_size(self):
  626. return lib.ZSTD_sizeof_CCtx(self._compressor._cctx)
  627. def fileno(self):
  628. f = getattr(self._writer, "fileno", None)
  629. if f:
  630. return f()
  631. else:
  632. raise OSError("fileno not available on underlying writer")
  633. def close(self):
  634. if self._closed:
  635. return
  636. try:
  637. self._closing = True
  638. self.flush(FLUSH_FRAME)
  639. finally:
  640. self._closing = False
  641. self._closed = True
  642. # Call close() on underlying stream as well.
  643. f = getattr(self._writer, "close", None)
  644. if self._closefd and f:
  645. f()
  646. @property
  647. def closed(self):
  648. return self._closed
  649. def isatty(self):
  650. return False
  651. def readable(self):
  652. return False
  653. def readline(self, size=-1):
  654. raise io.UnsupportedOperation()
  655. def readlines(self, hint=-1):
  656. raise io.UnsupportedOperation()
  657. def seek(self, offset, whence=None):
  658. raise io.UnsupportedOperation()
  659. def seekable(self):
  660. return False
  661. def truncate(self, size=None):
  662. raise io.UnsupportedOperation()
  663. def writable(self):
  664. return True
  665. def writelines(self, lines):
  666. raise NotImplementedError("writelines() is not yet implemented")
  667. def read(self, size=-1):
  668. raise io.UnsupportedOperation()
  669. def readall(self):
  670. raise io.UnsupportedOperation()
  671. def readinto(self, b):
  672. raise io.UnsupportedOperation()
  673. def write(self, data):
  674. """Send data to the compressor and possibly to the inner stream."""
  675. if self._closed:
  676. raise ValueError("stream is closed")
  677. total_write = 0
  678. data_buffer = ffi.from_buffer(data)
  679. in_buffer = ffi.new("ZSTD_inBuffer *")
  680. in_buffer.src = data_buffer
  681. in_buffer.size = len(data_buffer)
  682. in_buffer.pos = 0
  683. out_buffer = self._out_buffer
  684. out_buffer.pos = 0
  685. while in_buffer.pos < in_buffer.size:
  686. zresult = lib.ZSTD_compressStream2(
  687. self._compressor._cctx,
  688. out_buffer,
  689. in_buffer,
  690. lib.ZSTD_e_continue,
  691. )
  692. if lib.ZSTD_isError(zresult):
  693. raise ZstdError(
  694. "zstd compress error: %s" % _zstd_error(zresult)
  695. )
  696. if out_buffer.pos:
  697. self._writer.write(
  698. ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
  699. )
  700. total_write += out_buffer.pos
  701. self._bytes_compressed += out_buffer.pos
  702. out_buffer.pos = 0
  703. if self._write_return_read:
  704. return in_buffer.pos
  705. else:
  706. return total_write
  707. def flush(self, flush_mode=FLUSH_BLOCK):
  708. """Evict data from compressor's internal state and write it to inner stream.
  709. Calling this method may result in 0 or more ``write()`` calls to the
  710. inner stream.
  711. This method will also call ``flush()`` on the inner stream, if such a
  712. method exists.
  713. :param flush_mode:
  714. How to flush the zstd compressor.
  715. ``zstandard.FLUSH_BLOCK`` will flush data already sent to the
  716. compressor but not emitted to the inner stream. The stream is still
  717. writable after calling this. This is the default behavior.
  718. See documentation for other ``zstandard.FLUSH_*`` constants for more
  719. flushing options.
  720. :return:
  721. Integer number of bytes written to the inner stream.
  722. """
  723. if flush_mode == FLUSH_BLOCK:
  724. flush = lib.ZSTD_e_flush
  725. elif flush_mode == FLUSH_FRAME:
  726. flush = lib.ZSTD_e_end
  727. else:
  728. raise ValueError("unknown flush_mode: %r" % flush_mode)
  729. if self._closed:
  730. raise ValueError("stream is closed")
  731. total_write = 0
  732. out_buffer = self._out_buffer
  733. out_buffer.pos = 0
  734. in_buffer = ffi.new("ZSTD_inBuffer *")
  735. in_buffer.src = ffi.NULL
  736. in_buffer.size = 0
  737. in_buffer.pos = 0
  738. while True:
  739. zresult = lib.ZSTD_compressStream2(
  740. self._compressor._cctx, out_buffer, in_buffer, flush
  741. )
  742. if lib.ZSTD_isError(zresult):
  743. raise ZstdError(
  744. "zstd compress error: %s" % _zstd_error(zresult)
  745. )
  746. if out_buffer.pos:
  747. self._writer.write(
  748. ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
  749. )
  750. total_write += out_buffer.pos
  751. self._bytes_compressed += out_buffer.pos
  752. out_buffer.pos = 0
  753. if not zresult:
  754. break
  755. f = getattr(self._writer, "flush", None)
  756. if f and not self._closing:
  757. f()
  758. return total_write
  759. def tell(self):
  760. return self._bytes_compressed
  761. class ZstdCompressionObj(object):
  762. """A compressor conforming to the API in Python's standard library.
  763. This type implements an API similar to compression types in Python's
  764. standard library such as ``zlib.compressobj`` and ``bz2.BZ2Compressor``.
  765. This enables existing code targeting the standard library API to swap
  766. in this type to achieve zstd compression.
  767. .. important::
  768. The design of this API is not ideal for optimal performance.
  769. The reason performance is not optimal is because the API is limited to
  770. returning a single buffer holding compressed data. When compressing
  771. data, we don't know how much data will be emitted. So in order to
  772. capture all this data in a single buffer, we need to perform buffer
  773. reallocations and/or extra memory copies. This can add significant
  774. overhead depending on the size or nature of the compressed data how
  775. much your application calls this type.
  776. If performance is critical, consider an API like
  777. :py:meth:`ZstdCompressor.stream_reader`,
  778. :py:meth:`ZstdCompressor.stream_writer`,
  779. :py:meth:`ZstdCompressor.chunker`, or
  780. :py:meth:`ZstdCompressor.read_to_iter`, which result in less overhead
  781. managing buffers.
  782. Instances are obtained by calling :py:meth:`ZstdCompressor.compressobj`.
  783. Here is how this API should be used:
  784. >>> cctx = zstandard.ZstdCompressor()
  785. >>> cobj = cctx.compressobj()
  786. >>> data = cobj.compress(b"raw input 0")
  787. >>> data = cobj.compress(b"raw input 1")
  788. >>> data = cobj.flush()
  789. Or to flush blocks:
  790. >>> cctx.zstandard.ZstdCompressor()
  791. >>> cobj = cctx.compressobj()
  792. >>> data = cobj.compress(b"chunk in first block")
  793. >>> data = cobj.flush(zstandard.COMPRESSOBJ_FLUSH_BLOCK)
  794. >>> data = cobj.compress(b"chunk in second block")
  795. >>> data = cobj.flush()
  796. For best performance results, keep input chunks under 256KB. This avoids
  797. extra allocations for a large output object.
  798. It is possible to declare the input size of the data that will be fed
  799. into the compressor:
  800. >>> cctx = zstandard.ZstdCompressor()
  801. >>> cobj = cctx.compressobj(size=6)
  802. >>> data = cobj.compress(b"foobar")
  803. >>> data = cobj.flush()
  804. """
  805. def __init__(
  806. self, compressor, write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE
  807. ):
  808. self._compressor = compressor
  809. self._out = ffi.new("ZSTD_outBuffer *")
  810. self._dst_buffer = ffi.new("char[]", write_size)
  811. self._out.dst = self._dst_buffer
  812. self._out.size = write_size
  813. self._out.pos = 0
  814. self._finished = False
  815. def compress(self, data):
  816. """Send data to the compressor.
  817. This method receives bytes to feed to the compressor and returns
  818. bytes constituting zstd compressed data.
  819. The zstd compressor accumulates bytes and the returned bytes may be
  820. substantially smaller or larger than the size of the input data on
  821. any given call. The returned value may be the empty byte string
  822. (``b""``).
  823. :param data:
  824. Data to write to the compressor.
  825. :return:
  826. Compressed data.
  827. """
  828. if self._finished:
  829. raise ZstdError("cannot call compress() after compressor finished")
  830. data_buffer = ffi.from_buffer(data)
  831. source = ffi.new("ZSTD_inBuffer *")
  832. source.src = data_buffer
  833. source.size = len(data_buffer)
  834. source.pos = 0
  835. chunks = []
  836. while source.pos < len(data):
  837. zresult = lib.ZSTD_compressStream2(
  838. self._compressor._cctx, self._out, source, lib.ZSTD_e_continue
  839. )
  840. if lib.ZSTD_isError(zresult):
  841. raise ZstdError(
  842. "zstd compress error: %s" % _zstd_error(zresult)
  843. )
  844. if self._out.pos:
  845. chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
  846. self._out.pos = 0
  847. return b"".join(chunks)
  848. def flush(self, flush_mode=COMPRESSOBJ_FLUSH_FINISH):
  849. """Emit data accumulated in the compressor that hasn't been outputted yet.
  850. The ``flush_mode`` argument controls how to end the stream.
  851. ``zstandard.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the
  852. compression stream and finishes a zstd frame. Once this type of flush
  853. is performed, ``compress()`` and ``flush()`` can no longer be called.
  854. This type of flush **must** be called to end the compression context. If
  855. not called, the emitted data may be incomplete and may not be readable
  856. by a decompressor.
  857. ``zstandard.COMPRESSOBJ_FLUSH_BLOCK`` will flush a zstd block. This
  858. ensures that all data fed to this instance will have been omitted and
  859. can be decoded by a decompressor. Flushes of this type can be performed
  860. multiple times. The next call to ``compress()`` will begin a new zstd
  861. block.
  862. :param flush_mode:
  863. How to flush the zstd compressor.
  864. :return:
  865. Compressed data.
  866. """
  867. if flush_mode not in (
  868. COMPRESSOBJ_FLUSH_FINISH,
  869. COMPRESSOBJ_FLUSH_BLOCK,
  870. ):
  871. raise ValueError("flush mode not recognized")
  872. if self._finished:
  873. raise ZstdError("compressor object already finished")
  874. if flush_mode == COMPRESSOBJ_FLUSH_BLOCK:
  875. z_flush_mode = lib.ZSTD_e_flush
  876. elif flush_mode == COMPRESSOBJ_FLUSH_FINISH:
  877. z_flush_mode = lib.ZSTD_e_end
  878. self._finished = True
  879. else:
  880. raise ZstdError("unhandled flush mode")
  881. assert self._out.pos == 0
  882. in_buffer = ffi.new("ZSTD_inBuffer *")
  883. in_buffer.src = ffi.NULL
  884. in_buffer.size = 0
  885. in_buffer.pos = 0
  886. chunks = []
  887. while True:
  888. zresult = lib.ZSTD_compressStream2(
  889. self._compressor._cctx, self._out, in_buffer, z_flush_mode
  890. )
  891. if lib.ZSTD_isError(zresult):
  892. raise ZstdError(
  893. "error ending compression stream: %s" % _zstd_error(zresult)
  894. )
  895. if self._out.pos:
  896. chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
  897. self._out.pos = 0
  898. if not zresult:
  899. break
  900. return b"".join(chunks)
  901. class ZstdCompressionChunker(object):
  902. """Compress data to uniformly sized chunks.
  903. This type allows you to iteratively feed chunks of data into a compressor
  904. and produce output chunks of uniform size.
  905. ``compress()``, ``flush()``, and ``finish()`` all return an iterator of
  906. ``bytes`` instances holding compressed data. The iterator may be empty.
  907. Callers MUST iterate through all elements of the returned iterator before
  908. performing another operation on the object or else the compressor's
  909. internal state may become confused. This can result in an exception being
  910. raised or malformed data being emitted.
  911. All chunks emitted by ``compress()`` will have a length of the configured
  912. chunk size.
  913. ``flush()`` and ``finish()`` may return a final chunk smaller than
  914. the configured chunk size.
  915. Instances are obtained by calling :py:meth:`ZstdCompressor.chunker`.
  916. Here is how the API should be used:
  917. >>> cctx = zstandard.ZstdCompressor()
  918. >>> chunker = cctx.chunker(chunk_size=32768)
  919. >>>
  920. >>> with open(path, 'rb') as fh:
  921. ... while True:
  922. ... in_chunk = fh.read(32768)
  923. ... if not in_chunk:
  924. ... break
  925. ...
  926. ... for out_chunk in chunker.compress(in_chunk):
  927. ... # Do something with output chunk of size 32768.
  928. ...
  929. ... for out_chunk in chunker.finish():
  930. ... # Do something with output chunks that finalize the zstd frame.
  931. This compressor type is often a better alternative to
  932. :py:class:`ZstdCompressor.compressobj` because it has better performance
  933. properties.
  934. ``compressobj()`` will emit output data as it is available. This results
  935. in a *stream* of output chunks of varying sizes. The consistency of the
  936. output chunk size with ``chunker()`` is more appropriate for many usages,
  937. such as sending compressed data to a socket.
  938. ``compressobj()`` may also perform extra memory reallocations in order
  939. to dynamically adjust the sizes of the output chunks. Since ``chunker()``
  940. output chunks are all the same size (except for flushed or final chunks),
  941. there is less memory allocation/copying overhead.
  942. """
  943. def __init__(self, compressor, chunk_size):
  944. self._compressor = compressor
  945. self._out = ffi.new("ZSTD_outBuffer *")
  946. self._dst_buffer = ffi.new("char[]", chunk_size)
  947. self._out.dst = self._dst_buffer
  948. self._out.size = chunk_size
  949. self._out.pos = 0
  950. self._in = ffi.new("ZSTD_inBuffer *")
  951. self._in.src = ffi.NULL
  952. self._in.size = 0
  953. self._in.pos = 0
  954. self._finished = False
  955. def compress(self, data):
  956. """Feed new input data into the compressor.
  957. :param data:
  958. Data to feed to compressor.
  959. :return:
  960. Iterator of ``bytes`` representing chunks of compressed data.
  961. """
  962. if self._finished:
  963. raise ZstdError("cannot call compress() after compression finished")
  964. if self._in.src != ffi.NULL:
  965. raise ZstdError(
  966. "cannot perform operation before consuming output "
  967. "from previous operation"
  968. )
  969. data_buffer = ffi.from_buffer(data)
  970. if not len(data_buffer):
  971. return
  972. self._in.src = data_buffer
  973. self._in.size = len(data_buffer)
  974. self._in.pos = 0
  975. while self._in.pos < self._in.size:
  976. zresult = lib.ZSTD_compressStream2(
  977. self._compressor._cctx, self._out, self._in, lib.ZSTD_e_continue
  978. )
  979. if self._in.pos == self._in.size:
  980. self._in.src = ffi.NULL
  981. self._in.size = 0
  982. self._in.pos = 0
  983. if lib.ZSTD_isError(zresult):
  984. raise ZstdError(
  985. "zstd compress error: %s" % _zstd_error(zresult)
  986. )
  987. if self._out.pos == self._out.size:
  988. yield ffi.buffer(self._out.dst, self._out.pos)[:]
  989. self._out.pos = 0
  990. def flush(self):
  991. """Flushes all data currently in the compressor.
  992. :return:
  993. Iterator of ``bytes`` of compressed data.
  994. """
  995. if self._finished:
  996. raise ZstdError("cannot call flush() after compression finished")
  997. if self._in.src != ffi.NULL:
  998. raise ZstdError(
  999. "cannot call flush() before consuming output from "
  1000. "previous operation"
  1001. )
  1002. while True:
  1003. zresult = lib.ZSTD_compressStream2(
  1004. self._compressor._cctx, self._out, self._in, lib.ZSTD_e_flush
  1005. )
  1006. if lib.ZSTD_isError(zresult):
  1007. raise ZstdError(
  1008. "zstd compress error: %s" % _zstd_error(zresult)
  1009. )
  1010. if self._out.pos:
  1011. yield ffi.buffer(self._out.dst, self._out.pos)[:]
  1012. self._out.pos = 0
  1013. if not zresult:
  1014. return
  1015. def finish(self):
  1016. """Signals the end of input data.
  1017. No new data can be compressed after this method is called.
  1018. This method will flush buffered data and finish the zstd frame.
  1019. :return:
  1020. Iterator of ``bytes`` of compressed data.
  1021. """
  1022. if self._finished:
  1023. raise ZstdError("cannot call finish() after compression finished")
  1024. if self._in.src != ffi.NULL:
  1025. raise ZstdError(
  1026. "cannot call finish() before consuming output from "
  1027. "previous operation"
  1028. )
  1029. while True:
  1030. zresult = lib.ZSTD_compressStream2(
  1031. self._compressor._cctx, self._out, self._in, lib.ZSTD_e_end
  1032. )
  1033. if lib.ZSTD_isError(zresult):
  1034. raise ZstdError(
  1035. "zstd compress error: %s" % _zstd_error(zresult)
  1036. )
  1037. if self._out.pos:
  1038. yield ffi.buffer(self._out.dst, self._out.pos)[:]
  1039. self._out.pos = 0
  1040. if not zresult:
  1041. self._finished = True
  1042. return
  1043. class ZstdCompressionReader(object):
  1044. """Readable compressing stream wrapper.
  1045. ``ZstdCompressionReader`` is a read-only stream interface for obtaining
  1046. compressed data from a source.
  1047. This type conforms to the ``io.RawIOBase`` interface and should be usable
  1048. by any type that operates against a *file-object* (``typing.BinaryIO``
  1049. in Python type hinting speak).
  1050. Instances are neither writable nor seekable (even if the underlying
  1051. source is seekable). ``readline()`` and ``readlines()`` are not implemented
  1052. because they don't make sense for compressed data. ``tell()`` returns the
  1053. number of compressed bytes emitted so far.
  1054. Instances are obtained by calling :py:meth:`ZstdCompressor.stream_reader`.
  1055. In this example, we open a file for reading and then wrap that file
  1056. handle with a stream from which compressed data can be ``read()``.
  1057. >>> with open(path, 'rb') as fh:
  1058. ... cctx = zstandard.ZstdCompressor()
  1059. ... reader = cctx.stream_reader(fh)
  1060. ... while True:
  1061. ... chunk = reader.read(16384)
  1062. ... if not chunk:
  1063. ... break
  1064. ...
  1065. ... # Do something with compressed chunk.
  1066. Instances can also be used as context managers:
  1067. >>> with open(path, 'rb') as fh:
  1068. ... cctx = zstandard.ZstdCompressor()
  1069. ... with cctx.stream_reader(fh) as reader:
  1070. ... while True:
  1071. ... chunk = reader.read(16384)
  1072. ... if not chunk:
  1073. ... break
  1074. ...
  1075. ... # Do something with compressed chunk.
  1076. When the context manager exits or ``close()`` is called, the stream is
  1077. closed, underlying resources are released, and future operations against
  1078. the compression stream will fail.
  1079. ``stream_reader()`` accepts a ``size`` argument specifying how large the
  1080. input stream is. This is used to adjust compression parameters so they are
  1081. tailored to the source size. e.g.
  1082. >>> with open(path, 'rb') as fh:
  1083. ... cctx = zstandard.ZstdCompressor()
  1084. ... with cctx.stream_reader(fh, size=os.stat(path).st_size) as reader:
  1085. ... ...
  1086. If the ``source`` is a stream, you can specify how large ``read()``
  1087. requests to that stream should be via the ``read_size`` argument.
  1088. It defaults to ``zstandard.COMPRESSION_RECOMMENDED_INPUT_SIZE``. e.g.
  1089. >>> with open(path, 'rb') as fh:
  1090. ... cctx = zstandard.ZstdCompressor()
  1091. ... # Will perform fh.read(8192) when obtaining data to feed into the
  1092. ... # compressor.
  1093. ... with cctx.stream_reader(fh, read_size=8192) as reader:
  1094. ... ...
  1095. """
  1096. def __init__(self, compressor, source, read_size, closefd=True):
  1097. self._compressor = compressor
  1098. self._source = source
  1099. self._read_size = read_size
  1100. self._closefd = closefd
  1101. self._entered = False
  1102. self._closed = False
  1103. self._bytes_compressed = 0
  1104. self._finished_input = False
  1105. self._finished_output = False
  1106. self._in_buffer = ffi.new("ZSTD_inBuffer *")
  1107. # Holds a ref so backing bytes in self._in_buffer stay alive.
  1108. self._source_buffer = None
  1109. def __enter__(self):
  1110. if self._entered:
  1111. raise ValueError("cannot __enter__ multiple times")
  1112. if self._closed:
  1113. raise ValueError("stream is closed")
  1114. self._entered = True
  1115. return self
  1116. def __exit__(self, exc_type, exc_value, exc_tb):
  1117. self._entered = False
  1118. self._compressor = None
  1119. self.close()
  1120. self._source = None
  1121. return False
  1122. def readable(self):
  1123. return True
  1124. def writable(self):
  1125. return False
  1126. def seekable(self):
  1127. return False
  1128. def readline(self):
  1129. raise io.UnsupportedOperation()
  1130. def readlines(self):
  1131. raise io.UnsupportedOperation()
  1132. def write(self, data):
  1133. raise OSError("stream is not writable")
  1134. def writelines(self, ignored):
  1135. raise OSError("stream is not writable")
  1136. def isatty(self):
  1137. return False
  1138. def flush(self):
  1139. return None
  1140. def close(self):
  1141. if self._closed:
  1142. return
  1143. self._closed = True
  1144. f = getattr(self._source, "close", None)
  1145. if self._closefd and f:
  1146. f()
  1147. @property
  1148. def closed(self):
  1149. return self._closed
  1150. def tell(self):
  1151. return self._bytes_compressed
  1152. def readall(self):
  1153. chunks = []
  1154. while True:
  1155. chunk = self.read(1048576)
  1156. if not chunk:
  1157. break
  1158. chunks.append(chunk)
  1159. return b"".join(chunks)
  1160. def __iter__(self):
  1161. raise io.UnsupportedOperation()
  1162. def __next__(self):
  1163. raise io.UnsupportedOperation()
  1164. next = __next__
  1165. def _read_input(self):
  1166. if self._finished_input:
  1167. return
  1168. if hasattr(self._source, "read"):
  1169. data = self._source.read(self._read_size)
  1170. if not data:
  1171. self._finished_input = True
  1172. return
  1173. self._source_buffer = ffi.from_buffer(data)
  1174. self._in_buffer.src = self._source_buffer
  1175. self._in_buffer.size = len(self._source_buffer)
  1176. self._in_buffer.pos = 0
  1177. else:
  1178. self._source_buffer = ffi.from_buffer(self._source)
  1179. self._in_buffer.src = self._source_buffer
  1180. self._in_buffer.size = len(self._source_buffer)
  1181. self._in_buffer.pos = 0
  1182. def _compress_into_buffer(self, out_buffer):
  1183. if self._in_buffer.pos >= self._in_buffer.size:
  1184. return
  1185. old_pos = out_buffer.pos
  1186. zresult = lib.ZSTD_compressStream2(
  1187. self._compressor._cctx,
  1188. out_buffer,
  1189. self._in_buffer,
  1190. lib.ZSTD_e_continue,
  1191. )
  1192. self._bytes_compressed += out_buffer.pos - old_pos
  1193. if self._in_buffer.pos == self._in_buffer.size:
  1194. self._in_buffer.src = ffi.NULL
  1195. self._in_buffer.pos = 0
  1196. self._in_buffer.size = 0
  1197. self._source_buffer = None
  1198. if not hasattr(self._source, "read"):
  1199. self._finished_input = True
  1200. if lib.ZSTD_isError(zresult):
  1201. raise ZstdError("zstd compress error: %s", _zstd_error(zresult))
  1202. return out_buffer.pos and out_buffer.pos == out_buffer.size
  1203. def read(self, size=-1):
  1204. if self._closed:
  1205. raise ValueError("stream is closed")
  1206. if size < -1:
  1207. raise ValueError("cannot read negative amounts less than -1")
  1208. if size == -1:
  1209. return self.readall()
  1210. if self._finished_output or size == 0:
  1211. return b""
  1212. # Need a dedicated ref to dest buffer otherwise it gets collected.
  1213. dst_buffer = ffi.new("char[]", size)
  1214. out_buffer = ffi.new("ZSTD_outBuffer *")
  1215. out_buffer.dst = dst_buffer
  1216. out_buffer.size = size
  1217. out_buffer.pos = 0
  1218. if self._compress_into_buffer(out_buffer):
  1219. return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
  1220. while not self._finished_input:
  1221. self._read_input()
  1222. if self._compress_into_buffer(out_buffer):
  1223. return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
  1224. # EOF
  1225. old_pos = out_buffer.pos
  1226. zresult = lib.ZSTD_compressStream2(
  1227. self._compressor._cctx, out_buffer, self._in_buffer, lib.ZSTD_e_end
  1228. )
  1229. self._bytes_compressed += out_buffer.pos - old_pos
  1230. if lib.ZSTD_isError(zresult):
  1231. raise ZstdError(
  1232. "error ending compression stream: %s", _zstd_error(zresult)
  1233. )
  1234. if zresult == 0:
  1235. self._finished_output = True
  1236. return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
  1237. def read1(self, size=-1):
  1238. if self._closed:
  1239. raise ValueError("stream is closed")
  1240. if size < -1:
  1241. raise ValueError("cannot read negative amounts less than -1")
  1242. if self._finished_output or size == 0:
  1243. return b""
  1244. # -1 returns arbitrary number of bytes.
  1245. if size == -1:
  1246. size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE
  1247. dst_buffer = ffi.new("char[]", size)
  1248. out_buffer = ffi.new("ZSTD_outBuffer *")
  1249. out_buffer.dst = dst_buffer
  1250. out_buffer.size = size
  1251. out_buffer.pos = 0
  1252. # read1() dictates that we can perform at most 1 call to the
  1253. # underlying stream to get input. However, we can't satisfy this
  1254. # restriction with compression because not all input generates output.
  1255. # It is possible to perform a block flush in order to ensure output.
  1256. # But this may not be desirable behavior. So we allow multiple read()
  1257. # to the underlying stream. But unlike read(), we stop once we have
  1258. # any output.
  1259. self._compress_into_buffer(out_buffer)
  1260. if out_buffer.pos:
  1261. return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
  1262. while not self._finished_input:
  1263. self._read_input()
  1264. # If we've filled the output buffer, return immediately.
  1265. if self._compress_into_buffer(out_buffer):
  1266. return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
  1267. # If we've populated the output buffer and we're not at EOF,
  1268. # also return, as we've satisfied the read1() limits.
  1269. if out_buffer.pos and not self._finished_input:
  1270. return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
  1271. # Else if we're at EOS and we have room left in the buffer,
  1272. # fall through to below and try to add more data to the output.
  1273. # EOF.
  1274. old_pos = out_buffer.pos
  1275. zresult = lib.ZSTD_compressStream2(
  1276. self._compressor._cctx, out_buffer, self._in_buffer, lib.ZSTD_e_end
  1277. )
  1278. self._bytes_compressed += out_buffer.pos - old_pos
  1279. if lib.ZSTD_isError(zresult):
  1280. raise ZstdError(
  1281. "error ending compression stream: %s" % _zstd_error(zresult)
  1282. )
  1283. if zresult == 0:
  1284. self._finished_output = True
  1285. return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
  1286. def readinto(self, b):
  1287. if self._closed:
  1288. raise ValueError("stream is closed")
  1289. if self._finished_output:
  1290. return 0
  1291. # TODO use writable=True once we require CFFI >= 1.12.
  1292. dest_buffer = ffi.from_buffer(b)
  1293. ffi.memmove(b, b"", 0)
  1294. out_buffer = ffi.new("ZSTD_outBuffer *")
  1295. out_buffer.dst = dest_buffer
  1296. out_buffer.size = len(dest_buffer)
  1297. out_buffer.pos = 0
  1298. if self._compress_into_buffer(out_buffer):
  1299. return out_buffer.pos
  1300. while not self._finished_input:
  1301. self._read_input()
  1302. if self._compress_into_buffer(out_buffer):
  1303. return out_buffer.pos
  1304. # EOF.
  1305. old_pos = out_buffer.pos
  1306. zresult = lib.ZSTD_compressStream2(
  1307. self._compressor._cctx, out_buffer, self._in_buffer, lib.ZSTD_e_end
  1308. )
  1309. self._bytes_compressed += out_buffer.pos - old_pos
  1310. if lib.ZSTD_isError(zresult):
  1311. raise ZstdError(
  1312. "error ending compression stream: %s", _zstd_error(zresult)
  1313. )
  1314. if zresult == 0:
  1315. self._finished_output = True
  1316. return out_buffer.pos
  1317. def readinto1(self, b):
  1318. if self._closed:
  1319. raise ValueError("stream is closed")
  1320. if self._finished_output:
  1321. return 0
  1322. # TODO use writable=True once we require CFFI >= 1.12.
  1323. dest_buffer = ffi.from_buffer(b)
  1324. ffi.memmove(b, b"", 0)
  1325. out_buffer = ffi.new("ZSTD_outBuffer *")
  1326. out_buffer.dst = dest_buffer
  1327. out_buffer.size = len(dest_buffer)
  1328. out_buffer.pos = 0
  1329. self._compress_into_buffer(out_buffer)
  1330. if out_buffer.pos:
  1331. return out_buffer.pos
  1332. while not self._finished_input:
  1333. self._read_input()
  1334. if self._compress_into_buffer(out_buffer):
  1335. return out_buffer.pos
  1336. if out_buffer.pos and not self._finished_input:
  1337. return out_buffer.pos
  1338. # EOF.
  1339. old_pos = out_buffer.pos
  1340. zresult = lib.ZSTD_compressStream2(
  1341. self._compressor._cctx, out_buffer, self._in_buffer, lib.ZSTD_e_end
  1342. )
  1343. self._bytes_compressed += out_buffer.pos - old_pos
  1344. if lib.ZSTD_isError(zresult):
  1345. raise ZstdError(
  1346. "error ending compression stream: %s" % _zstd_error(zresult)
  1347. )
  1348. if zresult == 0:
  1349. self._finished_output = True
  1350. return out_buffer.pos
  1351. class ZstdCompressor(object):
  1352. """
  1353. Create an object used to perform Zstandard compression.
  1354. Each instance is essentially a wrapper around a ``ZSTD_CCtx`` from
  1355. zstd's C API.
  1356. An instance can compress data various ways. Instances can be used
  1357. multiple times. Each compression operation will use the compression
  1358. parameters defined at construction time.
  1359. .. note:
  1360. When using a compression dictionary and multiple compression
  1361. operations are performed, the ``ZstdCompressionParameters`` derived
  1362. from an integer compression ``level`` and the first compressed data's
  1363. size will be reused for all subsequent operations. This may not be
  1364. desirable if source data sizes vary significantly.
  1365. ``compression_params`` is mutually exclusive with ``level``,
  1366. ``write_checksum``, ``write_content_size``, ``write_dict_id``, and
  1367. ``threads``.
  1368. Assume that each ``ZstdCompressor`` instance can only handle a single
  1369. logical compression operation at the same time. i.e. if you call a method
  1370. like ``stream_reader()`` to obtain multiple objects derived from the same
  1371. ``ZstdCompressor`` instance and attempt to use them simultaneously, errors
  1372. will likely occur.
  1373. If you need to perform multiple logical compression operations and you
  1374. can't guarantee those operations are temporally non-overlapping, you need
  1375. to obtain multiple ``ZstdCompressor`` instances.
  1376. Unless specified otherwise, assume that no two methods of
  1377. ``ZstdCompressor`` instances can be called from multiple Python
  1378. threads simultaneously. In other words, assume instances are not thread safe
  1379. unless stated otherwise.
  1380. :param level:
  1381. Integer compression level. Valid values are all negative integers
  1382. through 22. Lower values generally yield faster operations with lower
  1383. compression ratios. Higher values are generally slower but compress
  1384. better. The default is 3, which is what the ``zstd`` CLI uses. Negative
  1385. levels effectively engage ``--fast`` mode from the ``zstd`` CLI.
  1386. :param dict_data:
  1387. A ``ZstdCompressionDict`` to be used to compress with dictionary
  1388. data.
  1389. :param compression_params:
  1390. A ``ZstdCompressionParameters`` instance defining low-level compression
  1391. parameters. If defined, this will overwrite the ``level`` argument.
  1392. :param write_checksum:
  1393. If True, a 4 byte content checksum will be written with the compressed
  1394. data, allowing the decompressor to perform content verification.
  1395. :param write_content_size:
  1396. If True (the default), the decompressed content size will be included
  1397. in the header of the compressed data. This data will only be written if
  1398. the compressor knows the size of the input data.
  1399. :param write_dict_id:
  1400. Determines whether the dictionary ID will be written into the compressed
  1401. data. Defaults to True. Only adds content to the compressed data if
  1402. a dictionary is being used.
  1403. :param threads:
  1404. Number of threads to use to compress data concurrently. When set,
  1405. compression operations are performed on multiple threads. The default
  1406. value (0) disables multi-threaded compression. A value of ``-1`` means
  1407. to set the number of threads to the number of detected logical CPUs.
  1408. """
  1409. def __init__(
  1410. self,
  1411. level=3,
  1412. dict_data=None,
  1413. compression_params=None,
  1414. write_checksum=None,
  1415. write_content_size=None,
  1416. write_dict_id=None,
  1417. threads=0,
  1418. ):
  1419. if level > lib.ZSTD_maxCLevel():
  1420. raise ValueError(
  1421. "level must be less than %d" % lib.ZSTD_maxCLevel()
  1422. )
  1423. if threads < 0:
  1424. threads = _cpu_count()
  1425. if compression_params and write_checksum is not None:
  1426. raise ValueError(
  1427. "cannot define compression_params and write_checksum"
  1428. )
  1429. if compression_params and write_content_size is not None:
  1430. raise ValueError(
  1431. "cannot define compression_params and write_content_size"
  1432. )
  1433. if compression_params and write_dict_id is not None:
  1434. raise ValueError(
  1435. "cannot define compression_params and write_dict_id"
  1436. )
  1437. if compression_params and threads:
  1438. raise ValueError("cannot define compression_params and threads")
  1439. if compression_params:
  1440. self._params = _make_cctx_params(compression_params)
  1441. else:
  1442. if write_dict_id is None:
  1443. write_dict_id = True
  1444. params = lib.ZSTD_createCCtxParams()
  1445. if params == ffi.NULL:
  1446. raise MemoryError()
  1447. self._params = ffi.gc(params, lib.ZSTD_freeCCtxParams)
  1448. _set_compression_parameter(
  1449. self._params, lib.ZSTD_c_compressionLevel, level
  1450. )
  1451. _set_compression_parameter(
  1452. self._params,
  1453. lib.ZSTD_c_contentSizeFlag,
  1454. write_content_size if write_content_size is not None else 1,
  1455. )
  1456. _set_compression_parameter(
  1457. self._params,
  1458. lib.ZSTD_c_checksumFlag,
  1459. 1 if write_checksum else 0,
  1460. )
  1461. _set_compression_parameter(
  1462. self._params, lib.ZSTD_c_dictIDFlag, 1 if write_dict_id else 0
  1463. )
  1464. if threads:
  1465. _set_compression_parameter(
  1466. self._params, lib.ZSTD_c_nbWorkers, threads
  1467. )
  1468. cctx = lib.ZSTD_createCCtx()
  1469. if cctx == ffi.NULL:
  1470. raise MemoryError()
  1471. self._cctx = cctx
  1472. self._dict_data = dict_data
  1473. # We defer setting up garbage collection until after calling
  1474. # _setup_cctx() to ensure the memory size estimate is more accurate.
  1475. try:
  1476. self._setup_cctx()
  1477. finally:
  1478. self._cctx = ffi.gc(
  1479. cctx, lib.ZSTD_freeCCtx, size=lib.ZSTD_sizeof_CCtx(cctx)
  1480. )
  1481. def _setup_cctx(self):
  1482. zresult = lib.ZSTD_CCtx_setParametersUsingCCtxParams(
  1483. self._cctx, self._params
  1484. )
  1485. if lib.ZSTD_isError(zresult):
  1486. raise ZstdError(
  1487. "could not set compression parameters: %s"
  1488. % _zstd_error(zresult)
  1489. )
  1490. dict_data = self._dict_data
  1491. if dict_data:
  1492. if dict_data._cdict:
  1493. zresult = lib.ZSTD_CCtx_refCDict(self._cctx, dict_data._cdict)
  1494. else:
  1495. zresult = lib.ZSTD_CCtx_loadDictionary_advanced(
  1496. self._cctx,
  1497. dict_data.as_bytes(),
  1498. len(dict_data),
  1499. lib.ZSTD_dlm_byRef,
  1500. dict_data._dict_type,
  1501. )
  1502. if lib.ZSTD_isError(zresult):
  1503. raise ZstdError(
  1504. "could not load compression dictionary: %s"
  1505. % _zstd_error(zresult)
  1506. )
  1507. def memory_size(self):
  1508. """Obtain the memory usage of this compressor, in bytes.
  1509. >>> cctx = zstandard.ZstdCompressor()
  1510. >>> memory = cctx.memory_size()
  1511. """
  1512. return lib.ZSTD_sizeof_CCtx(self._cctx)
  1513. def compress(self, data):
  1514. """
  1515. Compress data in a single operation.
  1516. This is the simplest mechanism to perform compression: simply pass in a
  1517. value and get a compressed value back. It is almost the most prone to
  1518. abuse.
  1519. The input and output values must fit in memory, so passing in very large
  1520. values can result in excessive memory usage. For this reason, one of the
  1521. streaming based APIs is preferred for larger values.
  1522. :param data:
  1523. Source data to compress
  1524. :return:
  1525. Compressed data
  1526. >>> cctx = zstandard.ZstdCompressor()
  1527. >>> compressed = cctx.compress(b"data to compress")
  1528. """
  1529. lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
  1530. data_buffer = ffi.from_buffer(data)
  1531. dest_size = lib.ZSTD_compressBound(len(data_buffer))
  1532. out = new_nonzero("char[]", dest_size)
  1533. zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, len(data_buffer))
  1534. if lib.ZSTD_isError(zresult):
  1535. raise ZstdError(
  1536. "error setting source size: %s" % _zstd_error(zresult)
  1537. )
  1538. out_buffer = ffi.new("ZSTD_outBuffer *")
  1539. in_buffer = ffi.new("ZSTD_inBuffer *")
  1540. out_buffer.dst = out
  1541. out_buffer.size = dest_size
  1542. out_buffer.pos = 0
  1543. in_buffer.src = data_buffer
  1544. in_buffer.size = len(data_buffer)
  1545. in_buffer.pos = 0
  1546. zresult = lib.ZSTD_compressStream2(
  1547. self._cctx, out_buffer, in_buffer, lib.ZSTD_e_end
  1548. )
  1549. if lib.ZSTD_isError(zresult):
  1550. raise ZstdError("cannot compress: %s" % _zstd_error(zresult))
  1551. elif zresult:
  1552. raise ZstdError("unexpected partial frame flush")
  1553. return ffi.buffer(out, out_buffer.pos)[:]
  1554. def compressobj(self, size=-1):
  1555. """
  1556. Obtain a compressor exposing the Python standard library compression API.
  1557. See :py:class:`ZstdCompressionObj` for the full documentation.
  1558. :param size:
  1559. Size in bytes of data that will be compressed.
  1560. :return:
  1561. :py:class:`ZstdCompressionObj`
  1562. """
  1563. lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
  1564. if size < 0:
  1565. size = lib.ZSTD_CONTENTSIZE_UNKNOWN
  1566. zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
  1567. if lib.ZSTD_isError(zresult):
  1568. raise ZstdError(
  1569. "error setting source size: %s" % _zstd_error(zresult)
  1570. )
  1571. cobj = ZstdCompressionObj(self, COMPRESSION_RECOMMENDED_OUTPUT_SIZE)
  1572. return cobj
  1573. def chunker(self, size=-1, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
  1574. """
  1575. Create an object for iterative compressing to same-sized chunks.
  1576. This API is similar to :py:meth:`ZstdCompressor.compressobj` but has
  1577. better performance properties.
  1578. :param size:
  1579. Size in bytes of data that will be compressed.
  1580. :param chunk_size:
  1581. Size of compressed chunks.
  1582. :return:
  1583. :py:class:`ZstdCompressionChunker`
  1584. """
  1585. lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
  1586. if size < 0:
  1587. size = lib.ZSTD_CONTENTSIZE_UNKNOWN
  1588. zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
  1589. if lib.ZSTD_isError(zresult):
  1590. raise ZstdError(
  1591. "error setting source size: %s" % _zstd_error(zresult)
  1592. )
  1593. return ZstdCompressionChunker(self, chunk_size=chunk_size)
  1594. def copy_stream(
  1595. self,
  1596. ifh,
  1597. ofh,
  1598. size=-1,
  1599. read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
  1600. write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE,
  1601. ):
  1602. """
  1603. Copy data between 2 streams while compressing it.
  1604. Data will be read from ``ifh``, compressed, and written to ``ofh``.
  1605. ``ifh`` must have a ``read(size)`` method. ``ofh`` must have a
  1606. ``write(data)``
  1607. method.
  1608. >>> cctx = zstandard.ZstdCompressor()
  1609. >>> with open(input_path, "rb") as ifh, open(output_path, "wb") as ofh:
  1610. ... cctx.copy_stream(ifh, ofh)
  1611. It is also possible to declare the size of the source stream:
  1612. >>> cctx = zstandard.ZstdCompressor()
  1613. >>> cctx.copy_stream(ifh, ofh, size=len_of_input)
  1614. You can also specify how large the chunks that are ``read()``
  1615. and ``write()`` from and to the streams:
  1616. >>> cctx = zstandard.ZstdCompressor()
  1617. >>> cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384)
  1618. The stream copier returns a 2-tuple of bytes read and written:
  1619. >>> cctx = zstandard.ZstdCompressor()
  1620. >>> read_count, write_count = cctx.copy_stream(ifh, ofh)
  1621. :param ifh:
  1622. Source stream to read from
  1623. :param ofh:
  1624. Destination stream to write to
  1625. :param size:
  1626. Size in bytes of the source stream. If defined, compression
  1627. parameters will be tuned for this size.
  1628. :param read_size:
  1629. Chunk sizes that source stream should be ``read()`` from.
  1630. :param write_size:
  1631. Chunk sizes that destination stream should be ``write()`` to.
  1632. :return:
  1633. 2-tuple of ints of bytes read and written, respectively.
  1634. """
  1635. if not hasattr(ifh, "read"):
  1636. raise ValueError("first argument must have a read() method")
  1637. if not hasattr(ofh, "write"):
  1638. raise ValueError("second argument must have a write() method")
  1639. lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
  1640. if size < 0:
  1641. size = lib.ZSTD_CONTENTSIZE_UNKNOWN
  1642. zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
  1643. if lib.ZSTD_isError(zresult):
  1644. raise ZstdError(
  1645. "error setting source size: %s" % _zstd_error(zresult)
  1646. )
  1647. in_buffer = ffi.new("ZSTD_inBuffer *")
  1648. out_buffer = ffi.new("ZSTD_outBuffer *")
  1649. dst_buffer = ffi.new("char[]", write_size)
  1650. out_buffer.dst = dst_buffer
  1651. out_buffer.size = write_size
  1652. out_buffer.pos = 0
  1653. total_read, total_write = 0, 0
  1654. while True:
  1655. data = ifh.read(read_size)
  1656. if not data:
  1657. break
  1658. data_buffer = ffi.from_buffer(data)
  1659. total_read += len(data_buffer)
  1660. in_buffer.src = data_buffer
  1661. in_buffer.size = len(data_buffer)
  1662. in_buffer.pos = 0
  1663. while in_buffer.pos < in_buffer.size:
  1664. zresult = lib.ZSTD_compressStream2(
  1665. self._cctx, out_buffer, in_buffer, lib.ZSTD_e_continue
  1666. )
  1667. if lib.ZSTD_isError(zresult):
  1668. raise ZstdError(
  1669. "zstd compress error: %s" % _zstd_error(zresult)
  1670. )
  1671. if out_buffer.pos:
  1672. ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
  1673. total_write += out_buffer.pos
  1674. out_buffer.pos = 0
  1675. # We've finished reading. Flush the compressor.
  1676. while True:
  1677. zresult = lib.ZSTD_compressStream2(
  1678. self._cctx, out_buffer, in_buffer, lib.ZSTD_e_end
  1679. )
  1680. if lib.ZSTD_isError(zresult):
  1681. raise ZstdError(
  1682. "error ending compression stream: %s" % _zstd_error(zresult)
  1683. )
  1684. if out_buffer.pos:
  1685. ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
  1686. total_write += out_buffer.pos
  1687. out_buffer.pos = 0
  1688. if zresult == 0:
  1689. break
  1690. return total_read, total_write
  1691. def stream_reader(
  1692. self,
  1693. source,
  1694. size=-1,
  1695. read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
  1696. closefd=True,
  1697. ):
  1698. """
  1699. Wrap a readable source with a stream that can read compressed data.
  1700. This will produce an object conforming to the ``io.RawIOBase``
  1701. interface which can be ``read()`` from to retrieve compressed data
  1702. from a source.
  1703. The source object can be any object with a ``read(size)`` method
  1704. or an object that conforms to the buffer protocol.
  1705. See :py:class:`ZstdCompressionReader` for type documentation and usage
  1706. examples.
  1707. :param source:
  1708. Object to read source data from
  1709. :param size:
  1710. Size in bytes of source object.
  1711. :param read_size:
  1712. How many bytes to request when ``read()``'ing from the source.
  1713. :param closefd:
  1714. Whether to close the source stream when the returned stream is
  1715. closed.
  1716. :return:
  1717. :py:class:`ZstdCompressionReader`
  1718. """
  1719. lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
  1720. try:
  1721. size = len(source)
  1722. except Exception:
  1723. pass
  1724. if size < 0:
  1725. size = lib.ZSTD_CONTENTSIZE_UNKNOWN
  1726. zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
  1727. if lib.ZSTD_isError(zresult):
  1728. raise ZstdError(
  1729. "error setting source size: %s" % _zstd_error(zresult)
  1730. )
  1731. return ZstdCompressionReader(self, source, read_size, closefd=closefd)
  1732. def stream_writer(
  1733. self,
  1734. writer,
  1735. size=-1,
  1736. write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE,
  1737. write_return_read=True,
  1738. closefd=True,
  1739. ):
  1740. """
  1741. Create a stream that will write compressed data into another stream.
  1742. The argument to ``stream_writer()`` must have a ``write(data)`` method.
  1743. As compressed data is available, ``write()`` will be called with the
  1744. compressed data as its argument. Many common Python types implement
  1745. ``write()``, including open file handles and ``io.BytesIO``.
  1746. See :py:class:`ZstdCompressionWriter` for more documentation, including
  1747. usage examples.
  1748. :param writer:
  1749. Stream to write compressed data to.
  1750. :param size:
  1751. Size in bytes of data to be compressed. If set, it will be used
  1752. to influence compression parameter tuning and could result in the
  1753. size being written into the header of the compressed data.
  1754. :param write_size:
  1755. How much data to ``write()`` to ``writer`` at a time.
  1756. :param write_return_read:
  1757. Whether ``write()`` should return the number of bytes that were
  1758. consumed from the input.
  1759. :param closefd:
  1760. Whether to ``close`` the ``writer`` when this stream is closed.
  1761. :return:
  1762. :py:class:`ZstdCompressionWriter`
  1763. """
  1764. if not hasattr(writer, "write"):
  1765. raise ValueError("must pass an object with a write() method")
  1766. lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
  1767. if size < 0:
  1768. size = lib.ZSTD_CONTENTSIZE_UNKNOWN
  1769. return ZstdCompressionWriter(
  1770. self, writer, size, write_size, write_return_read, closefd=closefd
  1771. )
  1772. def read_to_iter(
  1773. self,
  1774. reader,
  1775. size=-1,
  1776. read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
  1777. write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE,
  1778. ):
  1779. """
  1780. Read uncompressed data from a reader and return an iterator
  1781. Returns an iterator of compressed data produced from reading from
  1782. ``reader``.
  1783. This method provides a mechanism to stream compressed data out of a
  1784. source as an iterator of data chunks.
  1785. Uncompressed data will be obtained from ``reader`` by calling the
  1786. ``read(size)`` method of it or by reading a slice (if ``reader``
  1787. conforms to the *buffer protocol*). The source data will be streamed
  1788. into a compressor. As compressed data is available, it will be exposed
  1789. to the iterator.
  1790. Data is read from the source in chunks of ``read_size``. Compressed
  1791. chunks are at most ``write_size`` bytes. Both values default to the
  1792. zstd input and and output defaults, respectively.
  1793. If reading from the source via ``read()``, ``read()`` will be called
  1794. until it raises or returns an empty bytes (``b""``). It is perfectly
  1795. valid for the source to deliver fewer bytes than were what requested
  1796. by ``read(size)``.
  1797. The caller is partially in control of how fast data is fed into the
  1798. compressor by how it consumes the returned iterator. The compressor
  1799. will not consume from the reader unless the caller consumes from the
  1800. iterator.
  1801. >>> cctx = zstandard.ZstdCompressor()
  1802. >>> for chunk in cctx.read_to_iter(fh):
  1803. ... # Do something with emitted data.
  1804. ``read_to_iter()`` accepts a ``size`` argument declaring the size of
  1805. the input stream:
  1806. >>> cctx = zstandard.ZstdCompressor()
  1807. >>> for chunk in cctx.read_to_iter(fh, size=some_int):
  1808. >>> pass
  1809. You can also control the size that data is ``read()`` from the source
  1810. and the ideal size of output chunks:
  1811. >>> cctx = zstandard.ZstdCompressor()
  1812. >>> for chunk in cctx.read_to_iter(fh, read_size=16384, write_size=8192):
  1813. >>> pass
  1814. ``read_to_iter()`` does not give direct control over the sizes of chunks
  1815. fed into the compressor. Instead, chunk sizes will be whatever the object
  1816. being read from delivers. These will often be of a uniform size.
  1817. :param reader:
  1818. Stream providing data to be compressed.
  1819. :param size:
  1820. Size in bytes of input data.
  1821. :param read_size:
  1822. Controls how many bytes are ``read()`` from the source.
  1823. :param write_size:
  1824. Controls the output size of emitted chunks.
  1825. :return:
  1826. Iterator of ``bytes``.
  1827. """
  1828. if hasattr(reader, "read"):
  1829. have_read = True
  1830. elif hasattr(reader, "__getitem__"):
  1831. have_read = False
  1832. buffer_offset = 0
  1833. size = len(reader)
  1834. else:
  1835. raise ValueError(
  1836. "must pass an object with a read() method or "
  1837. "conforms to buffer protocol"
  1838. )
  1839. lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
  1840. if size < 0:
  1841. size = lib.ZSTD_CONTENTSIZE_UNKNOWN
  1842. zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
  1843. if lib.ZSTD_isError(zresult):
  1844. raise ZstdError(
  1845. "error setting source size: %s" % _zstd_error(zresult)
  1846. )
  1847. in_buffer = ffi.new("ZSTD_inBuffer *")
  1848. out_buffer = ffi.new("ZSTD_outBuffer *")
  1849. in_buffer.src = ffi.NULL
  1850. in_buffer.size = 0
  1851. in_buffer.pos = 0
  1852. dst_buffer = ffi.new("char[]", write_size)
  1853. out_buffer.dst = dst_buffer
  1854. out_buffer.size = write_size
  1855. out_buffer.pos = 0
  1856. while True:
  1857. # We should never have output data sitting around after a previous
  1858. # iteration.
  1859. assert out_buffer.pos == 0
  1860. # Collect input data.
  1861. if have_read:
  1862. read_result = reader.read(read_size)
  1863. else:
  1864. remaining = len(reader) - buffer_offset
  1865. slice_size = min(remaining, read_size)
  1866. read_result = reader[buffer_offset : buffer_offset + slice_size]
  1867. buffer_offset += slice_size
  1868. # No new input data. Break out of the read loop.
  1869. if not read_result:
  1870. break
  1871. # Feed all read data into the compressor and emit output until
  1872. # exhausted.
  1873. read_buffer = ffi.from_buffer(read_result)
  1874. in_buffer.src = read_buffer
  1875. in_buffer.size = len(read_buffer)
  1876. in_buffer.pos = 0
  1877. while in_buffer.pos < in_buffer.size:
  1878. zresult = lib.ZSTD_compressStream2(
  1879. self._cctx, out_buffer, in_buffer, lib.ZSTD_e_continue
  1880. )
  1881. if lib.ZSTD_isError(zresult):
  1882. raise ZstdError(
  1883. "zstd compress error: %s" % _zstd_error(zresult)
  1884. )
  1885. if out_buffer.pos:
  1886. data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
  1887. out_buffer.pos = 0
  1888. yield data
  1889. assert out_buffer.pos == 0
  1890. # And repeat the loop to collect more data.
  1891. continue
  1892. # If we get here, input is exhausted. End the stream and emit what
  1893. # remains.
  1894. while True:
  1895. assert out_buffer.pos == 0
  1896. zresult = lib.ZSTD_compressStream2(
  1897. self._cctx, out_buffer, in_buffer, lib.ZSTD_e_end
  1898. )
  1899. if lib.ZSTD_isError(zresult):
  1900. raise ZstdError(
  1901. "error ending compression stream: %s" % _zstd_error(zresult)
  1902. )
  1903. if out_buffer.pos:
  1904. data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
  1905. out_buffer.pos = 0
  1906. yield data
  1907. if zresult == 0:
  1908. break
  1909. def multi_compress_to_buffer(self, data, threads=-1):
  1910. """
  1911. Compress multiple pieces of data as a single function call.
  1912. (Experimental. Not yet supported by CFFI backend.)
  1913. This function is optimized to perform multiple compression operations
  1914. as as possible with as little overhead as possible.
  1915. Data to be compressed can be passed as a ``BufferWithSegmentsCollection``,
  1916. a ``BufferWithSegments``, or a list containing byte like objects. Each
  1917. element of the container will be compressed individually using the
  1918. configured parameters on the ``ZstdCompressor`` instance.
  1919. The ``threads`` argument controls how many threads to use for
  1920. compression. The default is ``0`` which means to use a single thread.
  1921. Negative values use the number of logical CPUs in the machine.
  1922. The function returns a ``BufferWithSegmentsCollection``. This type
  1923. represents N discrete memory allocations, each holding 1 or more
  1924. compressed frames.
  1925. Output data is written to shared memory buffers. This means that unlike
  1926. regular Python objects, a reference to *any* object within the collection
  1927. keeps the shared buffer and therefore memory backing it alive. This can
  1928. have undesirable effects on process memory usage.
  1929. The API and behavior of this function is experimental and will likely
  1930. change. Known deficiencies include:
  1931. * If asked to use multiple threads, it will always spawn that many
  1932. threads, even if the input is too small to use them. It should
  1933. automatically lower the thread count when the extra threads would
  1934. just add overhead.
  1935. * The buffer allocation strategy is fixed. There is room to make it
  1936. dynamic, perhaps even to allow one output buffer per input,
  1937. facilitating a variation of the API to return a list without the
  1938. adverse effects of shared memory buffers.
  1939. :param data:
  1940. Source to read discrete pieces of data to compress.
  1941. Can be a ``BufferWithSegmentsCollection``, a ``BufferWithSegments``,
  1942. or a ``list[bytes]``.
  1943. :return:
  1944. BufferWithSegmentsCollection holding compressed data.
  1945. """
  1946. raise NotImplementedError()
  1947. def frame_progression(self):
  1948. """
  1949. Return information on how much work the compressor has done.
  1950. Returns a 3-tuple of (ingested, consumed, produced).
  1951. >>> cctx = zstandard.ZstdCompressor()
  1952. >>> (ingested, consumed, produced) = cctx.frame_progression()
  1953. """
  1954. progression = lib.ZSTD_getFrameProgression(self._cctx)
  1955. return progression.ingested, progression.consumed, progression.produced
  1956. class FrameParameters(object):
  1957. """Information about a zstd frame.
  1958. Instances have the following attributes:
  1959. ``content_size``
  1960. Integer size of original, uncompressed content. This will be ``0`` if the
  1961. original content size isn't written to the frame (controlled with the
  1962. ``write_content_size`` argument to ``ZstdCompressor``) or if the input
  1963. content size was ``0``.
  1964. ``window_size``
  1965. Integer size of maximum back-reference distance in compressed data.
  1966. ``dict_id``
  1967. Integer of dictionary ID used for compression. ``0`` if no dictionary
  1968. ID was used or if the dictionary ID was ``0``.
  1969. ``has_checksum``
  1970. Bool indicating whether a 4 byte content checksum is stored at the end
  1971. of the frame.
  1972. """
  1973. def __init__(self, fparams):
  1974. self.content_size = fparams.frameContentSize
  1975. self.window_size = fparams.windowSize
  1976. self.dict_id = fparams.dictID
  1977. self.has_checksum = bool(fparams.checksumFlag)
  1978. def frame_content_size(data):
  1979. """Obtain the decompressed size of a frame.
  1980. The returned value is usually accurate. But strictly speaking it should
  1981. not be trusted.
  1982. :return:
  1983. ``-1`` if size unknown and a non-negative integer otherwise.
  1984. """
  1985. data_buffer = ffi.from_buffer(data)
  1986. size = lib.ZSTD_getFrameContentSize(data_buffer, len(data_buffer))
  1987. if size == lib.ZSTD_CONTENTSIZE_ERROR:
  1988. raise ZstdError("error when determining content size")
  1989. elif size == lib.ZSTD_CONTENTSIZE_UNKNOWN:
  1990. return -1
  1991. else:
  1992. return size
  1993. def frame_header_size(data):
  1994. """Obtain the size of a frame header.
  1995. :return:
  1996. Integer size in bytes.
  1997. """
  1998. data_buffer = ffi.from_buffer(data)
  1999. zresult = lib.ZSTD_frameHeaderSize(data_buffer, len(data_buffer))
  2000. if lib.ZSTD_isError(zresult):
  2001. raise ZstdError(
  2002. "could not determine frame header size: %s" % _zstd_error(zresult)
  2003. )
  2004. return zresult
  2005. def get_frame_parameters(data, format=FORMAT_ZSTD1):
  2006. """
  2007. Parse a zstd frame header into frame parameters.
  2008. Depending on which fields are present in the frame and their values, the
  2009. length of the frame parameters varies. If insufficient bytes are passed
  2010. in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure
  2011. frame parameters can be parsed, pass in at least 18 bytes.
  2012. :param data:
  2013. Data from which to read frame parameters.
  2014. :param format:
  2015. Set the format of data for the decoder.
  2016. :return:
  2017. :py:class:`FrameParameters`
  2018. """
  2019. params = ffi.new("ZSTD_FrameHeader *")
  2020. data_buffer = ffi.from_buffer(data)
  2021. zresult = lib.ZSTD_getFrameHeader_advanced(
  2022. params, data_buffer, len(data_buffer), format
  2023. )
  2024. if lib.ZSTD_isError(zresult):
  2025. raise ZstdError(
  2026. "cannot get frame parameters: %s" % _zstd_error(zresult)
  2027. )
  2028. if zresult:
  2029. raise ZstdError(
  2030. "not enough data for frame parameters; need %d bytes" % zresult
  2031. )
  2032. return FrameParameters(params[0])
  2033. class ZstdCompressionDict(object):
  2034. """Represents a computed compression dictionary.
  2035. Instances are obtained by calling :py:func:`train_dictionary` or by
  2036. passing bytes obtained from another source into the constructor.
  2037. Instances can be constructed from bytes:
  2038. >>> dict_data = zstandard.ZstdCompressionDict(data)
  2039. It is possible to construct a dictionary from *any* data. If the data
  2040. doesn't begin with a magic header, it will be treated as a *prefix*
  2041. dictionary. *Prefix* dictionaries allow compression operations to
  2042. reference raw data within the dictionary.
  2043. It is possible to force the use of *prefix* dictionaries or to require
  2044. a dictionary header:
  2045. >>> dict_data = zstandard.ZstdCompressionDict(data, dict_type=zstandard.DICT_TYPE_RAWCONTENT)
  2046. >>> dict_data = zstandard.ZstdCompressionDict(data, dict_type=zstandard.DICT_TYPE_FULLDICT)
  2047. You can see how many bytes are in the dictionary by calling ``len()``:
  2048. >>> dict_data = zstandard.train_dictionary(size, samples)
  2049. >>> dict_size = len(dict_data) # will not be larger than ``size``
  2050. Once you have a dictionary, you can pass it to the objects performing
  2051. compression and decompression:
  2052. >>> dict_data = zstandard.train_dictionary(131072, samples)
  2053. >>> cctx = zstandard.ZstdCompressor(dict_data=dict_data)
  2054. >>> for source_data in input_data:
  2055. ... compressed = cctx.compress(source_data)
  2056. ... # Do something with compressed data.
  2057. ...
  2058. >>> dctx = zstandard.ZstdDecompressor(dict_data=dict_data)
  2059. >>> for compressed_data in input_data:
  2060. ... buffer = io.BytesIO()
  2061. ... with dctx.stream_writer(buffer) as decompressor:
  2062. ... decompressor.write(compressed_data)
  2063. ... # Do something with raw data in ``buffer``.
  2064. Dictionaries have unique integer IDs. You can retrieve this ID via:
  2065. >>> dict_id = dict_data.dict_id()
  2066. You can obtain the raw data in the dict (useful for persisting and constructing
  2067. a ``ZstdCompressionDict`` later) via ``as_bytes()``:
  2068. >>> dict_data = zstandard.train_dictionary(size, samples)
  2069. >>> raw_data = dict_data.as_bytes()
  2070. By default, when a ``ZstdCompressionDict`` is *attached* to a
  2071. ``ZstdCompressor``, each ``ZstdCompressor`` performs work to prepare the
  2072. dictionary for use. This is fine if only 1 compression operation is being
  2073. performed or if the ``ZstdCompressor`` is being reused for multiple operations.
  2074. But if multiple ``ZstdCompressor`` instances are being used with the dictionary,
  2075. this can add overhead.
  2076. It is possible to *precompute* the dictionary so it can readily be consumed
  2077. by multiple ``ZstdCompressor`` instances:
  2078. >>> d = zstandard.ZstdCompressionDict(data)
  2079. >>> # Precompute for compression level 3.
  2080. >>> d.precompute_compress(level=3)
  2081. >>> # Precompute with specific compression parameters.
  2082. >>> params = zstandard.ZstdCompressionParameters(...)
  2083. >>> d.precompute_compress(compression_params=params)
  2084. .. note::
  2085. When a dictionary is precomputed, the compression parameters used to
  2086. precompute the dictionary overwrite some of the compression parameters
  2087. specified to ``ZstdCompressor``.
  2088. :param data:
  2089. Dictionary data.
  2090. :param dict_type:
  2091. Type of dictionary. One of the ``DICT_TYPE_*`` constants.
  2092. """
  2093. def __init__(self, data, dict_type=DICT_TYPE_AUTO, k=0, d=0):
  2094. assert isinstance(data, bytes)
  2095. self._data = data
  2096. self.k = k
  2097. self.d = d
  2098. if dict_type not in (
  2099. DICT_TYPE_AUTO,
  2100. DICT_TYPE_RAWCONTENT,
  2101. DICT_TYPE_FULLDICT,
  2102. ):
  2103. raise ValueError(
  2104. "invalid dictionary load mode: %d; must use "
  2105. "DICT_TYPE_* constants"
  2106. )
  2107. self._dict_type = dict_type
  2108. self._cdict = None
  2109. def __len__(self):
  2110. return len(self._data)
  2111. def dict_id(self):
  2112. """Obtain the integer ID of the dictionary."""
  2113. return int(lib.ZDICT_getDictID(self._data, len(self._data)))
  2114. def as_bytes(self):
  2115. """Obtain the ``bytes`` representation of the dictionary."""
  2116. return self._data
  2117. def precompute_compress(self, level=0, compression_params=None):
  2118. """Precompute a dictionary os it can be used by multiple compressors.
  2119. Calling this method on an instance that will be used by multiple
  2120. :py:class:`ZstdCompressor` instances will improve performance.
  2121. """
  2122. if level and compression_params:
  2123. raise ValueError(
  2124. "must only specify one of level or compression_params"
  2125. )
  2126. if not level and not compression_params:
  2127. raise ValueError("must specify one of level or compression_params")
  2128. if level:
  2129. cparams = lib.ZSTD_getCParams(level, 0, len(self._data))
  2130. else:
  2131. cparams = ffi.new("ZSTD_compressionParameters")
  2132. cparams.chainLog = compression_params.chain_log
  2133. cparams.hashLog = compression_params.hash_log
  2134. cparams.minMatch = compression_params.min_match
  2135. cparams.searchLog = compression_params.search_log
  2136. cparams.strategy = compression_params.strategy
  2137. cparams.targetLength = compression_params.target_length
  2138. cparams.windowLog = compression_params.window_log
  2139. cdict = lib.ZSTD_createCDict_advanced(
  2140. self._data,
  2141. len(self._data),
  2142. lib.ZSTD_dlm_byRef,
  2143. self._dict_type,
  2144. cparams,
  2145. lib.ZSTD_defaultCMem,
  2146. )
  2147. if cdict == ffi.NULL:
  2148. raise ZstdError("unable to precompute dictionary")
  2149. self._cdict = ffi.gc(
  2150. cdict, lib.ZSTD_freeCDict, size=lib.ZSTD_sizeof_CDict(cdict)
  2151. )
  2152. @property
  2153. def _ddict(self):
  2154. ddict = lib.ZSTD_createDDict_advanced(
  2155. self._data,
  2156. len(self._data),
  2157. lib.ZSTD_dlm_byRef,
  2158. self._dict_type,
  2159. lib.ZSTD_defaultCMem,
  2160. )
  2161. if ddict == ffi.NULL:
  2162. raise ZstdError("could not create decompression dict")
  2163. ddict = ffi.gc(
  2164. ddict, lib.ZSTD_freeDDict, size=lib.ZSTD_sizeof_DDict(ddict)
  2165. )
  2166. self.__dict__["_ddict"] = ddict
  2167. return ddict
  2168. def train_dictionary(
  2169. dict_size,
  2170. samples,
  2171. k=0,
  2172. d=0,
  2173. f=0,
  2174. split_point=0.0,
  2175. accel=0,
  2176. notifications=0,
  2177. dict_id=0,
  2178. level=0,
  2179. steps=0,
  2180. threads=0,
  2181. ):
  2182. """Train a dictionary from sample data using the COVER algorithm.
  2183. A compression dictionary of size ``dict_size`` will be created from the
  2184. iterable of ``samples``. The raw dictionary bytes will be returned.
  2185. The dictionary training mechanism is known as *cover*. More details about it
  2186. are available in the paper *Effective Construction of Relative Lempel-Ziv
  2187. Dictionaries* (authors: Liao, Petri, Moffat, Wirth).
  2188. The cover algorithm takes parameters ``k`` and ``d``. These are the
  2189. *segment size* and *dmer size*, respectively. The returned dictionary
  2190. instance created by this function has ``k`` and ``d`` attributes
  2191. containing the values for these parameters. If a ``ZstdCompressionDict``
  2192. is constructed from raw bytes data (a content-only dictionary), the
  2193. ``k`` and ``d`` attributes will be ``0``.
  2194. The segment and dmer size parameters to the cover algorithm can either be
  2195. specified manually or ``train_dictionary()`` can try multiple values
  2196. and pick the best one, where *best* means the smallest compressed data size.
  2197. This later mode is called *optimization* mode.
  2198. Under the hood, this function always calls
  2199. ``ZDICT_optimizeTrainFromBuffer_fastCover()``. See the corresponding C library
  2200. documentation for more.
  2201. If neither ``steps`` nor ``threads`` is defined, defaults for ``d``, ``steps``,
  2202. and ``level`` will be used that are equivalent with what
  2203. ``ZDICT_trainFromBuffer()`` would use.
  2204. :param dict_size:
  2205. Target size in bytes of the dictionary to generate.
  2206. :param samples:
  2207. A list of bytes holding samples the dictionary will be trained from.
  2208. :param k:
  2209. Segment size : constraint: 0 < k : Reasonable range [16, 2048+]
  2210. :param d:
  2211. dmer size : constraint: 0 < d <= k : Reasonable range [6, 16]
  2212. :param f:
  2213. log of size of frequency array : constraint: 0 < f <= 31 : 1 means
  2214. default(20)
  2215. :param split_point:
  2216. Percentage of samples used for training: Only used for optimization.
  2217. The first # samples * ``split_point`` samples will be used to training.
  2218. The last # samples * (1 - split_point) samples will be used for testing.
  2219. 0 means default (0.75), 1.0 when all samples are used for both training
  2220. and testing.
  2221. :param accel:
  2222. Acceleration level: constraint: 0 < accel <= 10. Higher means faster
  2223. and less accurate, 0 means default(1).
  2224. :param dict_id:
  2225. Integer dictionary ID for the produced dictionary. Default is 0, which uses
  2226. a random value.
  2227. :param steps:
  2228. Number of steps through ``k`` values to perform when trying parameter
  2229. variations.
  2230. :param threads:
  2231. Number of threads to use when trying parameter variations. Default is 0,
  2232. which means to use a single thread. A negative value can be specified to
  2233. use as many threads as there are detected logical CPUs.
  2234. :param level:
  2235. Integer target compression level when trying parameter variations.
  2236. :param notifications:
  2237. Controls writing of informational messages to ``stderr``. ``0`` (the
  2238. default) means to write nothing. ``1`` writes errors. ``2`` writes
  2239. progression info. ``3`` writes more details. And ``4`` writes all info.
  2240. """
  2241. if not isinstance(samples, list):
  2242. raise TypeError("samples must be a list")
  2243. if threads < 0:
  2244. threads = _cpu_count()
  2245. if not steps and not threads:
  2246. d = d or 8
  2247. steps = steps or 4
  2248. level = level or 3
  2249. total_size = sum(map(len, samples))
  2250. samples_buffer = new_nonzero("char[]", total_size)
  2251. sample_sizes = new_nonzero("size_t[]", len(samples))
  2252. offset = 0
  2253. for i, sample in enumerate(samples):
  2254. if not isinstance(sample, bytes):
  2255. raise ValueError("samples must be bytes")
  2256. sample_len = len(sample)
  2257. ffi.memmove(samples_buffer + offset, sample, sample_len)
  2258. offset += sample_len
  2259. sample_sizes[i] = sample_len
  2260. dict_data = new_nonzero("char[]", dict_size)
  2261. dparams = ffi.new("ZDICT_fastCover_params_t *")[0]
  2262. dparams.k = k
  2263. dparams.d = d
  2264. dparams.f = f
  2265. dparams.steps = steps
  2266. dparams.nbThreads = threads
  2267. dparams.splitPoint = split_point
  2268. dparams.accel = accel
  2269. dparams.zParams.notificationLevel = notifications
  2270. dparams.zParams.dictID = dict_id
  2271. dparams.zParams.compressionLevel = level
  2272. zresult = lib.ZDICT_optimizeTrainFromBuffer_fastCover(
  2273. ffi.addressof(dict_data),
  2274. dict_size,
  2275. ffi.addressof(samples_buffer),
  2276. ffi.addressof(sample_sizes, 0),
  2277. len(samples),
  2278. ffi.addressof(dparams),
  2279. )
  2280. if lib.ZDICT_isError(zresult):
  2281. msg = ffi.string(lib.ZDICT_getErrorName(zresult)).decode("utf-8")
  2282. raise ZstdError("cannot train dict: %s" % msg)
  2283. return ZstdCompressionDict(
  2284. ffi.buffer(dict_data, zresult)[:],
  2285. dict_type=DICT_TYPE_FULLDICT,
  2286. k=dparams.k,
  2287. d=dparams.d,
  2288. )
  2289. class ZstdDecompressionObj(object):
  2290. """A standard library API compatible decompressor.
  2291. This type implements a compressor that conforms to the API by other
  2292. decompressors in Python's standard library. e.g. ``zlib.decompressobj``
  2293. or ``bz2.BZ2Decompressor``. This allows callers to use zstd compression
  2294. while conforming to a similar API.
  2295. Compressed data chunks are fed into ``decompress(data)`` and
  2296. uncompressed output (or an empty bytes) is returned. Output from
  2297. subsequent calls needs to be concatenated to reassemble the full
  2298. decompressed byte sequence.
  2299. If ``read_across_frames=False``, each instance is single use: once an
  2300. input frame is decoded, ``decompress()`` will raise an exception. If
  2301. ``read_across_frames=True``, instances can decode multiple frames.
  2302. >>> dctx = zstandard.ZstdDecompressor()
  2303. >>> dobj = dctx.decompressobj()
  2304. >>> data = dobj.decompress(compressed_chunk_0)
  2305. >>> data = dobj.decompress(compressed_chunk_1)
  2306. By default, calls to ``decompress()`` write output data in chunks of size
  2307. ``DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE``. These chunks are concatenated
  2308. before being returned to the caller. It is possible to define the size of
  2309. these temporary chunks by passing ``write_size`` to ``decompressobj()``:
  2310. >>> dctx = zstandard.ZstdDecompressor()
  2311. >>> dobj = dctx.decompressobj(write_size=1048576)
  2312. .. note::
  2313. Because calls to ``decompress()`` may need to perform multiple
  2314. memory (re)allocations, this streaming decompression API isn't as
  2315. efficient as other APIs.
  2316. """
  2317. def __init__(self, decompressor, write_size, read_across_frames):
  2318. self._decompressor = decompressor
  2319. self._write_size = write_size
  2320. self._finished = False
  2321. self._read_across_frames = read_across_frames
  2322. self._unused_input = b""
  2323. def decompress(self, data):
  2324. """Send compressed data to the decompressor and obtain decompressed data.
  2325. :param data:
  2326. Data to feed into the decompressor.
  2327. :return:
  2328. Decompressed bytes.
  2329. """
  2330. if self._finished:
  2331. raise ZstdError("cannot use a decompressobj multiple times")
  2332. in_buffer = ffi.new("ZSTD_inBuffer *")
  2333. out_buffer = ffi.new("ZSTD_outBuffer *")
  2334. data_buffer = ffi.from_buffer(data)
  2335. if len(data_buffer) == 0:
  2336. return b""
  2337. in_buffer.src = data_buffer
  2338. in_buffer.size = len(data_buffer)
  2339. in_buffer.pos = 0
  2340. dst_buffer = ffi.new("char[]", self._write_size)
  2341. out_buffer.dst = dst_buffer
  2342. out_buffer.size = len(dst_buffer)
  2343. out_buffer.pos = 0
  2344. chunks = []
  2345. while True:
  2346. zresult = lib.ZSTD_decompressStream(
  2347. self._decompressor._dctx, out_buffer, in_buffer
  2348. )
  2349. if lib.ZSTD_isError(zresult):
  2350. raise ZstdError(
  2351. "zstd decompressor error: %s" % _zstd_error(zresult)
  2352. )
  2353. # Always record any output from decompressor.
  2354. if out_buffer.pos:
  2355. chunks.append(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
  2356. # 0 is only seen when a frame is fully decoded *and* fully flushed.
  2357. # Behavior depends on whether we're in single or multiple frame
  2358. # mode.
  2359. if zresult == 0 and not self._read_across_frames:
  2360. # Mark the instance as done and make any unconsumed input available
  2361. # for retrieval.
  2362. self._finished = True
  2363. self._decompressor = None
  2364. self._unused_input = data[in_buffer.pos : in_buffer.size]
  2365. break
  2366. elif zresult == 0 and self._read_across_frames:
  2367. # We're at the end of a fully flushed frame and we can read more.
  2368. # Try to read more if there's any more input.
  2369. if in_buffer.pos == in_buffer.size:
  2370. break
  2371. else:
  2372. out_buffer.pos = 0
  2373. # We're not at the end of the frame *or* we're not fully flushed.
  2374. # The decompressor will write out all the bytes it can to the output
  2375. # buffer. So if the output buffer is partially filled and the input
  2376. # is exhausted, there's nothing more to write. So we've done all we
  2377. # can.
  2378. elif (
  2379. in_buffer.pos == in_buffer.size
  2380. and out_buffer.pos < out_buffer.size
  2381. ):
  2382. break
  2383. else:
  2384. out_buffer.pos = 0
  2385. return b"".join(chunks)
  2386. def flush(self, length=0):
  2387. """Effectively a no-op.
  2388. Implemented for compatibility with the standard library APIs.
  2389. Safe to call at any time.
  2390. :return:
  2391. Empty bytes.
  2392. """
  2393. return b""
  2394. @property
  2395. def unused_data(self):
  2396. """Bytes past the end of compressed data.
  2397. If ``decompress()`` is fed additional data beyond the end of a zstd
  2398. frame, this value will be non-empty once ``decompress()`` fully decodes
  2399. the input frame.
  2400. """
  2401. return self._unused_input
  2402. @property
  2403. def unconsumed_tail(self):
  2404. """Data that has not yet been fed into the decompressor."""
  2405. return b""
  2406. @property
  2407. def eof(self):
  2408. """Whether the end of the compressed data stream has been reached."""
  2409. return self._finished
  2410. class ZstdDecompressionReader(object):
  2411. """Read only decompressor that pull uncompressed data from another stream.
  2412. This type provides a read-only stream interface for performing transparent
  2413. decompression from another stream or data source. It conforms to the
  2414. ``io.RawIOBase`` interface. Only methods relevant to reading are
  2415. implemented.
  2416. >>> with open(path, 'rb') as fh:
  2417. >>> dctx = zstandard.ZstdDecompressor()
  2418. >>> reader = dctx.stream_reader(fh)
  2419. >>> while True:
  2420. ... chunk = reader.read(16384)
  2421. ... if not chunk:
  2422. ... break
  2423. ... # Do something with decompressed chunk.
  2424. The stream can also be used as a context manager:
  2425. >>> with open(path, 'rb') as fh:
  2426. ... dctx = zstandard.ZstdDecompressor()
  2427. ... with dctx.stream_reader(fh) as reader:
  2428. ... ...
  2429. When used as a context manager, the stream is closed and the underlying
  2430. resources are released when the context manager exits. Future operations
  2431. against the stream will fail.
  2432. The ``source`` argument to ``stream_reader()`` can be any object with a
  2433. ``read(size)`` method or any object implementing the *buffer protocol*.
  2434. If the ``source`` is a stream, you can specify how large ``read()`` requests
  2435. to that stream should be via the ``read_size`` argument. It defaults to
  2436. ``zstandard.DECOMPRESSION_RECOMMENDED_INPUT_SIZE``.:
  2437. >>> with open(path, 'rb') as fh:
  2438. ... dctx = zstandard.ZstdDecompressor()
  2439. ... # Will perform fh.read(8192) when obtaining data for the decompressor.
  2440. ... with dctx.stream_reader(fh, read_size=8192) as reader:
  2441. ... ...
  2442. Instances are *partially* seekable. Absolute and relative positions
  2443. (``SEEK_SET`` and ``SEEK_CUR``) forward of the current position are
  2444. allowed. Offsets behind the current read position and offsets relative
  2445. to the end of stream are not allowed and will raise ``ValueError``
  2446. if attempted.
  2447. ``tell()`` returns the number of decompressed bytes read so far.
  2448. Not all I/O methods are implemented. Notably missing is support for
  2449. ``readline()``, ``readlines()``, and linewise iteration support. This is
  2450. because streams operate on binary data - not text data. If you want to
  2451. convert decompressed output to text, you can chain an ``io.TextIOWrapper``
  2452. to the stream:
  2453. >>> with open(path, 'rb') as fh:
  2454. ... dctx = zstandard.ZstdDecompressor()
  2455. ... stream_reader = dctx.stream_reader(fh)
  2456. ... text_stream = io.TextIOWrapper(stream_reader, encoding='utf-8')
  2457. ... for line in text_stream:
  2458. ... ...
  2459. """
  2460. def __init__(
  2461. self,
  2462. decompressor,
  2463. source,
  2464. read_size,
  2465. read_across_frames,
  2466. closefd=True,
  2467. ):
  2468. self._decompressor = decompressor
  2469. self._source = source
  2470. self._read_size = read_size
  2471. self._read_across_frames = bool(read_across_frames)
  2472. self._closefd = bool(closefd)
  2473. self._entered = False
  2474. self._closed = False
  2475. self._bytes_decompressed = 0
  2476. self._finished_input = False
  2477. self._finished_output = False
  2478. self._in_buffer = ffi.new("ZSTD_inBuffer *")
  2479. # Holds a ref to self._in_buffer.src.
  2480. self._source_buffer = None
  2481. def __enter__(self):
  2482. if self._entered:
  2483. raise ValueError("cannot __enter__ multiple times")
  2484. if self._closed:
  2485. raise ValueError("stream is closed")
  2486. self._entered = True
  2487. return self
  2488. def __exit__(self, exc_type, exc_value, exc_tb):
  2489. self._entered = False
  2490. self._decompressor = None
  2491. self.close()
  2492. self._source = None
  2493. return False
  2494. def readable(self):
  2495. return True
  2496. def writable(self):
  2497. return False
  2498. def seekable(self):
  2499. return False
  2500. def readline(self, size=-1):
  2501. raise io.UnsupportedOperation()
  2502. def readlines(self, hint=-1):
  2503. raise io.UnsupportedOperation()
  2504. def write(self, data):
  2505. raise io.UnsupportedOperation()
  2506. def writelines(self, lines):
  2507. raise io.UnsupportedOperation()
  2508. def isatty(self):
  2509. return False
  2510. def flush(self):
  2511. return None
  2512. def close(self):
  2513. if self._closed:
  2514. return None
  2515. self._closed = True
  2516. f = getattr(self._source, "close", None)
  2517. if self._closefd and f:
  2518. f()
  2519. @property
  2520. def closed(self):
  2521. return self._closed
  2522. def tell(self):
  2523. return self._bytes_decompressed
  2524. def readall(self):
  2525. chunks = []
  2526. while True:
  2527. chunk = self.read(1048576)
  2528. if not chunk:
  2529. break
  2530. chunks.append(chunk)
  2531. return b"".join(chunks)
  2532. def __iter__(self):
  2533. raise io.UnsupportedOperation()
  2534. def __next__(self):
  2535. raise io.UnsupportedOperation()
  2536. next = __next__
  2537. def _read_input(self):
  2538. # We have data left over in the input buffer. Use it.
  2539. if self._in_buffer.pos < self._in_buffer.size:
  2540. return
  2541. # All input data exhausted. Nothing to do.
  2542. if self._finished_input:
  2543. return
  2544. # Else populate the input buffer from our source.
  2545. if hasattr(self._source, "read"):
  2546. data = self._source.read(self._read_size)
  2547. if not data:
  2548. self._finished_input = True
  2549. return
  2550. self._source_buffer = ffi.from_buffer(data)
  2551. self._in_buffer.src = self._source_buffer
  2552. self._in_buffer.size = len(self._source_buffer)
  2553. self._in_buffer.pos = 0
  2554. else:
  2555. self._source_buffer = ffi.from_buffer(self._source)
  2556. self._in_buffer.src = self._source_buffer
  2557. self._in_buffer.size = len(self._source_buffer)
  2558. self._in_buffer.pos = 0
  2559. def _decompress_into_buffer(self, out_buffer):
  2560. """Decompress available input into an output buffer.
  2561. Returns True if data in output buffer should be emitted.
  2562. """
  2563. zresult = lib.ZSTD_decompressStream(
  2564. self._decompressor._dctx, out_buffer, self._in_buffer
  2565. )
  2566. if self._in_buffer.pos == self._in_buffer.size:
  2567. self._in_buffer.src = ffi.NULL
  2568. self._in_buffer.pos = 0
  2569. self._in_buffer.size = 0
  2570. self._source_buffer = None
  2571. if not hasattr(self._source, "read"):
  2572. self._finished_input = True
  2573. if lib.ZSTD_isError(zresult):
  2574. raise ZstdError("zstd decompress error: %s" % _zstd_error(zresult))
  2575. # Emit data if there is data AND either:
  2576. # a) output buffer is full (read amount is satisfied)
  2577. # b) we're at end of a frame and not in frame spanning mode
  2578. return out_buffer.pos and (
  2579. out_buffer.pos == out_buffer.size
  2580. or zresult == 0
  2581. and not self._read_across_frames
  2582. )
  2583. def read(self, size=-1):
  2584. if self._closed:
  2585. raise ValueError("stream is closed")
  2586. if size < -1:
  2587. raise ValueError("cannot read negative amounts less than -1")
  2588. if size == -1:
  2589. # This is recursive. But it gets the job done.
  2590. return self.readall()
  2591. if self._finished_output or size == 0:
  2592. return b""
  2593. # We /could/ call into readinto() here. But that introduces more
  2594. # overhead.
  2595. dst_buffer = ffi.new("char[]", size)
  2596. out_buffer = ffi.new("ZSTD_outBuffer *")
  2597. out_buffer.dst = dst_buffer
  2598. out_buffer.size = size
  2599. out_buffer.pos = 0
  2600. self._read_input()
  2601. if self._decompress_into_buffer(out_buffer):
  2602. self._bytes_decompressed += out_buffer.pos
  2603. return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
  2604. while not self._finished_input:
  2605. self._read_input()
  2606. if self._decompress_into_buffer(out_buffer):
  2607. self._bytes_decompressed += out_buffer.pos
  2608. return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
  2609. self._bytes_decompressed += out_buffer.pos
  2610. return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
  2611. def readinto(self, b):
  2612. if self._closed:
  2613. raise ValueError("stream is closed")
  2614. if self._finished_output:
  2615. return 0
  2616. # TODO use writable=True once we require CFFI >= 1.12.
  2617. dest_buffer = ffi.from_buffer(b)
  2618. ffi.memmove(b, b"", 0)
  2619. out_buffer = ffi.new("ZSTD_outBuffer *")
  2620. out_buffer.dst = dest_buffer
  2621. out_buffer.size = len(dest_buffer)
  2622. out_buffer.pos = 0
  2623. self._read_input()
  2624. if self._decompress_into_buffer(out_buffer):
  2625. self._bytes_decompressed += out_buffer.pos
  2626. return out_buffer.pos
  2627. while not self._finished_input:
  2628. self._read_input()
  2629. if self._decompress_into_buffer(out_buffer):
  2630. self._bytes_decompressed += out_buffer.pos
  2631. return out_buffer.pos
  2632. self._bytes_decompressed += out_buffer.pos
  2633. return out_buffer.pos
  2634. def read1(self, size=-1):
  2635. if self._closed:
  2636. raise ValueError("stream is closed")
  2637. if size < -1:
  2638. raise ValueError("cannot read negative amounts less than -1")
  2639. if self._finished_output or size == 0:
  2640. return b""
  2641. # -1 returns arbitrary number of bytes.
  2642. if size == -1:
  2643. size = DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
  2644. dst_buffer = ffi.new("char[]", size)
  2645. out_buffer = ffi.new("ZSTD_outBuffer *")
  2646. out_buffer.dst = dst_buffer
  2647. out_buffer.size = size
  2648. out_buffer.pos = 0
  2649. # read1() dictates that we can perform at most 1 call to underlying
  2650. # stream to get input. However, we can't satisfy this restriction with
  2651. # decompression because not all input generates output. So we allow
  2652. # multiple read(). But unlike read(), we stop once we have any output.
  2653. while not self._finished_input:
  2654. self._read_input()
  2655. self._decompress_into_buffer(out_buffer)
  2656. if out_buffer.pos:
  2657. break
  2658. self._bytes_decompressed += out_buffer.pos
  2659. return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
  2660. def readinto1(self, b):
  2661. if self._closed:
  2662. raise ValueError("stream is closed")
  2663. if self._finished_output:
  2664. return 0
  2665. # TODO use writable=True once we require CFFI >= 1.12.
  2666. dest_buffer = ffi.from_buffer(b)
  2667. ffi.memmove(b, b"", 0)
  2668. out_buffer = ffi.new("ZSTD_outBuffer *")
  2669. out_buffer.dst = dest_buffer
  2670. out_buffer.size = len(dest_buffer)
  2671. out_buffer.pos = 0
  2672. while not self._finished_input and not self._finished_output:
  2673. self._read_input()
  2674. self._decompress_into_buffer(out_buffer)
  2675. if out_buffer.pos:
  2676. break
  2677. self._bytes_decompressed += out_buffer.pos
  2678. return out_buffer.pos
  2679. def seek(self, pos, whence=os.SEEK_SET):
  2680. if self._closed:
  2681. raise ValueError("stream is closed")
  2682. read_amount = 0
  2683. if whence == os.SEEK_SET:
  2684. if pos < 0:
  2685. raise OSError("cannot seek to negative position with SEEK_SET")
  2686. if pos < self._bytes_decompressed:
  2687. raise OSError("cannot seek zstd decompression stream backwards")
  2688. read_amount = pos - self._bytes_decompressed
  2689. elif whence == os.SEEK_CUR:
  2690. if pos < 0:
  2691. raise OSError("cannot seek zstd decompression stream backwards")
  2692. read_amount = pos
  2693. elif whence == os.SEEK_END:
  2694. raise OSError(
  2695. "zstd decompression streams cannot be seeked with SEEK_END"
  2696. )
  2697. while read_amount:
  2698. result = self.read(
  2699. min(read_amount, DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
  2700. )
  2701. if not result:
  2702. break
  2703. read_amount -= len(result)
  2704. return self._bytes_decompressed
  2705. class ZstdDecompressionWriter(object):
  2706. """
  2707. Write-only stream wrapper that performs decompression.
  2708. This type provides a writable stream that performs decompression and writes
  2709. decompressed data to another stream.
  2710. This type implements the ``io.RawIOBase`` interface. Only methods that
  2711. involve writing will do useful things.
  2712. Behavior is similar to :py:meth:`ZstdCompressor.stream_writer`: compressed
  2713. data is sent to the decompressor by calling ``write(data)`` and decompressed
  2714. output is written to the inner stream by calling its ``write(data)``
  2715. method:
  2716. >>> dctx = zstandard.ZstdDecompressor()
  2717. >>> decompressor = dctx.stream_writer(fh)
  2718. >>> # Will call fh.write() with uncompressed data.
  2719. >>> decompressor.write(compressed_data)
  2720. Instances can be used as context managers. However, context managers add no
  2721. extra special behavior other than automatically calling ``close()`` when
  2722. they exit.
  2723. Calling ``close()`` will mark the stream as closed and subsequent I/O
  2724. operations will raise ``ValueError`` (per the documented behavior of
  2725. ``io.RawIOBase``). ``close()`` will also call ``close()`` on the
  2726. underlying stream if such a method exists and the instance was created with
  2727. ``closefd=True``.
  2728. The size of chunks to ``write()`` to the destination can be specified:
  2729. >>> dctx = zstandard.ZstdDecompressor()
  2730. >>> with dctx.stream_writer(fh, write_size=16384) as decompressor:
  2731. >>> pass
  2732. You can see how much memory is being used by the decompressor:
  2733. >>> dctx = zstandard.ZstdDecompressor()
  2734. >>> with dctx.stream_writer(fh) as decompressor:
  2735. >>> byte_size = decompressor.memory_size()
  2736. ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control
  2737. the return value of ``write()``. When ``True`` (the default)``, ``write()``
  2738. returns the number of bytes that were read from the input. When ``False``,
  2739. ``write()`` returns the number of bytes that were ``write()`` to the inner
  2740. stream.
  2741. """
  2742. def __init__(
  2743. self,
  2744. decompressor,
  2745. writer,
  2746. write_size,
  2747. write_return_read,
  2748. closefd=True,
  2749. ):
  2750. decompressor._ensure_dctx()
  2751. self._decompressor = decompressor
  2752. self._writer = writer
  2753. self._write_size = write_size
  2754. self._write_return_read = bool(write_return_read)
  2755. self._closefd = bool(closefd)
  2756. self._entered = False
  2757. self._closing = False
  2758. self._closed = False
  2759. def __enter__(self):
  2760. if self._closed:
  2761. raise ValueError("stream is closed")
  2762. if self._entered:
  2763. raise ZstdError("cannot __enter__ multiple times")
  2764. self._entered = True
  2765. return self
  2766. def __exit__(self, exc_type, exc_value, exc_tb):
  2767. self._entered = False
  2768. self.close()
  2769. return False
  2770. def __iter__(self):
  2771. raise io.UnsupportedOperation()
  2772. def __next__(self):
  2773. raise io.UnsupportedOperation()
  2774. def memory_size(self):
  2775. return lib.ZSTD_sizeof_DCtx(self._decompressor._dctx)
  2776. def close(self):
  2777. if self._closed:
  2778. return
  2779. try:
  2780. self._closing = True
  2781. self.flush()
  2782. finally:
  2783. self._closing = False
  2784. self._closed = True
  2785. f = getattr(self._writer, "close", None)
  2786. if self._closefd and f:
  2787. f()
  2788. @property
  2789. def closed(self):
  2790. return self._closed
  2791. def fileno(self):
  2792. f = getattr(self._writer, "fileno", None)
  2793. if f:
  2794. return f()
  2795. else:
  2796. raise OSError("fileno not available on underlying writer")
  2797. def flush(self):
  2798. if self._closed:
  2799. raise ValueError("stream is closed")
  2800. f = getattr(self._writer, "flush", None)
  2801. if f and not self._closing:
  2802. return f()
  2803. def isatty(self):
  2804. return False
  2805. def readable(self):
  2806. return False
  2807. def readline(self, size=-1):
  2808. raise io.UnsupportedOperation()
  2809. def readlines(self, hint=-1):
  2810. raise io.UnsupportedOperation()
  2811. def seek(self, offset, whence=None):
  2812. raise io.UnsupportedOperation()
  2813. def seekable(self):
  2814. return False
  2815. def tell(self):
  2816. raise io.UnsupportedOperation()
  2817. def truncate(self, size=None):
  2818. raise io.UnsupportedOperation()
  2819. def writable(self):
  2820. return True
  2821. def writelines(self, lines):
  2822. raise io.UnsupportedOperation()
  2823. def read(self, size=-1):
  2824. raise io.UnsupportedOperation()
  2825. def readall(self):
  2826. raise io.UnsupportedOperation()
  2827. def readinto(self, b):
  2828. raise io.UnsupportedOperation()
  2829. def write(self, data):
  2830. if self._closed:
  2831. raise ValueError("stream is closed")
  2832. total_write = 0
  2833. in_buffer = ffi.new("ZSTD_inBuffer *")
  2834. out_buffer = ffi.new("ZSTD_outBuffer *")
  2835. data_buffer = ffi.from_buffer(data)
  2836. in_buffer.src = data_buffer
  2837. in_buffer.size = len(data_buffer)
  2838. in_buffer.pos = 0
  2839. dst_buffer = ffi.new("char[]", self._write_size)
  2840. out_buffer.dst = dst_buffer
  2841. out_buffer.size = len(dst_buffer)
  2842. out_buffer.pos = 0
  2843. dctx = self._decompressor._dctx
  2844. while in_buffer.pos < in_buffer.size:
  2845. zresult = lib.ZSTD_decompressStream(dctx, out_buffer, in_buffer)
  2846. if lib.ZSTD_isError(zresult):
  2847. raise ZstdError(
  2848. "zstd decompress error: %s" % _zstd_error(zresult)
  2849. )
  2850. if out_buffer.pos:
  2851. self._writer.write(
  2852. ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
  2853. )
  2854. total_write += out_buffer.pos
  2855. out_buffer.pos = 0
  2856. if self._write_return_read:
  2857. return in_buffer.pos
  2858. else:
  2859. return total_write
  2860. class ZstdDecompressor(object):
  2861. """
  2862. Context for performing zstandard decompression.
  2863. Each instance is essentially a wrapper around a ``ZSTD_DCtx`` from zstd's
  2864. C API.
  2865. An instance can compress data various ways. Instances can be used multiple
  2866. times.
  2867. The interface of this class is very similar to
  2868. :py:class:`zstandard.ZstdCompressor` (by design).
  2869. Assume that each ``ZstdDecompressor`` instance can only handle a single
  2870. logical compression operation at the same time. i.e. if you call a method
  2871. like ``decompressobj()`` to obtain multiple objects derived from the same
  2872. ``ZstdDecompressor`` instance and attempt to use them simultaneously, errors
  2873. will likely occur.
  2874. If you need to perform multiple logical decompression operations and you
  2875. can't guarantee those operations are temporally non-overlapping, you need
  2876. to obtain multiple ``ZstdDecompressor`` instances.
  2877. Unless specified otherwise, assume that no two methods of
  2878. ``ZstdDecompressor`` instances can be called from multiple Python
  2879. threads simultaneously. In other words, assume instances are not thread safe
  2880. unless stated otherwise.
  2881. :param dict_data:
  2882. Compression dictionary to use.
  2883. :param max_window_size:
  2884. Sets an upper limit on the window size for decompression operations in
  2885. kibibytes. This setting can be used to prevent large memory allocations
  2886. for inputs using large compression windows.
  2887. :param format:
  2888. Set the format of data for the decoder.
  2889. By default this is ``zstandard.FORMAT_ZSTD1``. It can be set to
  2890. ``zstandard.FORMAT_ZSTD1_MAGICLESS`` to allow decoding frames without
  2891. the 4 byte magic header. Not all decompression APIs support this mode.
  2892. """
  2893. def __init__(self, dict_data=None, max_window_size=0, format=FORMAT_ZSTD1):
  2894. self._dict_data = dict_data
  2895. self._max_window_size = max_window_size
  2896. self._format = format
  2897. dctx = lib.ZSTD_createDCtx()
  2898. if dctx == ffi.NULL:
  2899. raise MemoryError()
  2900. self._dctx = dctx
  2901. # Defer setting up garbage collection until full state is loaded so
  2902. # the memory size is more accurate.
  2903. try:
  2904. self._ensure_dctx()
  2905. finally:
  2906. self._dctx = ffi.gc(
  2907. dctx, lib.ZSTD_freeDCtx, size=lib.ZSTD_sizeof_DCtx(dctx)
  2908. )
  2909. def memory_size(self):
  2910. """Size of decompression context, in bytes.
  2911. >>> dctx = zstandard.ZstdDecompressor()
  2912. >>> size = dctx.memory_size()
  2913. """
  2914. return lib.ZSTD_sizeof_DCtx(self._dctx)
  2915. def decompress(
  2916. self,
  2917. data,
  2918. max_output_size=0,
  2919. read_across_frames=False,
  2920. allow_extra_data=True,
  2921. ):
  2922. """
  2923. Decompress data in a single operation.
  2924. This method will decompress the input data in a single operation and
  2925. return the decompressed data.
  2926. The input bytes are expected to contain at least 1 full Zstandard frame
  2927. (something compressed with :py:meth:`ZstdCompressor.compress` or
  2928. similar). If the input does not contain a full frame, an exception will
  2929. be raised.
  2930. ``read_across_frames`` controls whether to read multiple zstandard
  2931. frames in the input. When False, decompression stops after reading the
  2932. first frame. This feature is not yet implemented but the argument is
  2933. provided for forward API compatibility when the default is changed to
  2934. True in a future release. For now, if you need to decompress multiple
  2935. frames, use an API like :py:meth:`ZstdCompressor.stream_reader` with
  2936. ``read_across_frames=True``.
  2937. ``allow_extra_data`` controls how to handle extra input data after a
  2938. fully decoded frame. If False, any extra data (which could be a valid
  2939. zstd frame) will result in ``ZstdError`` being raised. If True, extra
  2940. data is silently ignored. The default will likely change to False in a
  2941. future release when ``read_across_frames`` defaults to True.
  2942. If the input contains extra data after a full frame, that extra input
  2943. data is silently ignored. This behavior is undesirable in many scenarios
  2944. and will likely be changed or controllable in a future release (see
  2945. #181).
  2946. If the frame header of the compressed data does not contain the content
  2947. size, ``max_output_size`` must be specified or ``ZstdError`` will be
  2948. raised. An allocation of size ``max_output_size`` will be performed and an
  2949. attempt will be made to perform decompression into that buffer. If the
  2950. buffer is too small or cannot be allocated, ``ZstdError`` will be
  2951. raised. The buffer will be resized if it is too large.
  2952. Uncompressed data could be much larger than compressed data. As a result,
  2953. calling this function could result in a very large memory allocation
  2954. being performed to hold the uncompressed data. This could potentially
  2955. result in ``MemoryError`` or system memory swapping. If you don't need
  2956. the full output data in a single contiguous array in memory, consider
  2957. using streaming decompression for more resilient memory behavior.
  2958. Usage:
  2959. >>> dctx = zstandard.ZstdDecompressor()
  2960. >>> decompressed = dctx.decompress(data)
  2961. If the compressed data doesn't have its content size embedded within it,
  2962. decompression can be attempted by specifying the ``max_output_size``
  2963. argument:
  2964. >>> dctx = zstandard.ZstdDecompressor()
  2965. >>> uncompressed = dctx.decompress(data, max_output_size=1048576)
  2966. Ideally, ``max_output_size`` will be identical to the decompressed
  2967. output size.
  2968. .. important::
  2969. If the exact size of decompressed data is unknown (not passed in
  2970. explicitly and not stored in the zstd frame), for performance
  2971. reasons it is encouraged to use a streaming API.
  2972. :param data:
  2973. Compressed data to decompress.
  2974. :param max_output_size:
  2975. Integer max size of response.
  2976. If ``0``, there is no limit and we can attempt to allocate an output
  2977. buffer of infinite size.
  2978. :return:
  2979. ``bytes`` representing decompressed output.
  2980. """
  2981. if read_across_frames:
  2982. raise ZstdError(
  2983. "ZstdDecompressor.read_across_frames=True is not yet implemented"
  2984. )
  2985. self._ensure_dctx()
  2986. data_buffer = ffi.from_buffer(data)
  2987. params = ffi.new("ZSTD_FrameHeader *")
  2988. zresult = lib.ZSTD_getFrameHeader_advanced(
  2989. params, data_buffer, len(data_buffer), self._format
  2990. )
  2991. if zresult != 0:
  2992. raise ZstdError("error determining content size from frame header")
  2993. output_size = params.frameContentSize
  2994. if output_size == 0:
  2995. return b""
  2996. elif output_size == lib.ZSTD_CONTENTSIZE_UNKNOWN:
  2997. if not max_output_size:
  2998. raise ZstdError(
  2999. "could not determine content size in frame header"
  3000. )
  3001. result_buffer = ffi.new("char[]", max_output_size)
  3002. result_size = max_output_size
  3003. output_size = 0
  3004. else:
  3005. result_buffer = ffi.new("char[]", output_size)
  3006. result_size = output_size
  3007. out_buffer = ffi.new("ZSTD_outBuffer *")
  3008. out_buffer.dst = result_buffer
  3009. out_buffer.size = result_size
  3010. out_buffer.pos = 0
  3011. in_buffer = ffi.new("ZSTD_inBuffer *")
  3012. in_buffer.src = data_buffer
  3013. in_buffer.size = len(data_buffer)
  3014. in_buffer.pos = 0
  3015. zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
  3016. if lib.ZSTD_isError(zresult):
  3017. raise ZstdError("decompression error: %s" % _zstd_error(zresult))
  3018. elif zresult:
  3019. raise ZstdError(
  3020. "decompression error: did not decompress full frame"
  3021. )
  3022. elif output_size and out_buffer.pos != output_size:
  3023. raise ZstdError(
  3024. "decompression error: decompressed %d bytes; expected %d"
  3025. % (zresult, output_size)
  3026. )
  3027. elif not allow_extra_data and in_buffer.pos < in_buffer.size:
  3028. count = in_buffer.size - in_buffer.pos
  3029. raise ZstdError(
  3030. "compressed input contains %d bytes of unused data, which is disallowed"
  3031. % count
  3032. )
  3033. return ffi.buffer(result_buffer, out_buffer.pos)[:]
  3034. def stream_reader(
  3035. self,
  3036. source,
  3037. read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
  3038. read_across_frames=False,
  3039. closefd=True,
  3040. ):
  3041. """
  3042. Read-only stream wrapper that performs decompression.
  3043. This method obtains an object that conforms to the ``io.RawIOBase``
  3044. interface and performs transparent decompression via ``read()``
  3045. operations. Source data is obtained by calling ``read()`` on a
  3046. source stream or object implementing the buffer protocol.
  3047. See :py:class:`zstandard.ZstdDecompressionReader` for more documentation
  3048. and usage examples.
  3049. :param source:
  3050. Source of compressed data to decompress. Can be any object
  3051. with a ``read(size)`` method or that conforms to the buffer protocol.
  3052. :param read_size:
  3053. Integer number of bytes to read from the source and feed into the
  3054. compressor at a time.
  3055. :param read_across_frames:
  3056. Whether to read data across multiple zstd frames. If False,
  3057. decompression is stopped at frame boundaries.
  3058. :param closefd:
  3059. Whether to close the source stream when this instance is closed.
  3060. :return:
  3061. :py:class:`zstandard.ZstdDecompressionReader`.
  3062. """
  3063. self._ensure_dctx()
  3064. return ZstdDecompressionReader(
  3065. self, source, read_size, read_across_frames, closefd=closefd
  3066. )
  3067. def decompressobj(
  3068. self,
  3069. write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
  3070. read_across_frames=False,
  3071. ):
  3072. """Obtain a standard library compatible incremental decompressor.
  3073. See :py:class:`ZstdDecompressionObj` for more documentation
  3074. and usage examples.
  3075. :param write_size: size of internal output buffer to collect decompressed
  3076. chunks in.
  3077. :param read_across_frames: whether to read across multiple zstd frames.
  3078. If False, reading stops after 1 frame and subsequent decompress
  3079. attempts will raise an exception.
  3080. :return:
  3081. :py:class:`zstandard.ZstdDecompressionObj`
  3082. """
  3083. if write_size < 1:
  3084. raise ValueError("write_size must be positive")
  3085. self._ensure_dctx()
  3086. return ZstdDecompressionObj(
  3087. self, write_size=write_size, read_across_frames=read_across_frames
  3088. )
  3089. def read_to_iter(
  3090. self,
  3091. reader,
  3092. read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
  3093. write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
  3094. skip_bytes=0,
  3095. ):
  3096. """Read compressed data to an iterator of uncompressed chunks.
  3097. This method will read data from ``reader``, feed it to a decompressor,
  3098. and emit ``bytes`` chunks representing the decompressed result.
  3099. >>> dctx = zstandard.ZstdDecompressor()
  3100. >>> for chunk in dctx.read_to_iter(fh):
  3101. ... # Do something with original data.
  3102. ``read_to_iter()`` accepts an object with a ``read(size)`` method that
  3103. will return compressed bytes or an object conforming to the buffer
  3104. protocol.
  3105. ``read_to_iter()`` returns an iterator whose elements are chunks of the
  3106. decompressed data.
  3107. The size of requested ``read()`` from the source can be specified:
  3108. >>> dctx = zstandard.ZstdDecompressor()
  3109. >>> for chunk in dctx.read_to_iter(fh, read_size=16384):
  3110. ... pass
  3111. It is also possible to skip leading bytes in the input data:
  3112. >>> dctx = zstandard.ZstdDecompressor()
  3113. >>> for chunk in dctx.read_to_iter(fh, skip_bytes=1):
  3114. ... pass
  3115. .. tip::
  3116. Skipping leading bytes is useful if the source data contains extra
  3117. *header* data. Traditionally, you would need to create a slice or
  3118. ``memoryview`` of the data you want to decompress. This would create
  3119. overhead. It is more efficient to pass the offset into this API.
  3120. Similarly to :py:meth:`ZstdCompressor.read_to_iter`, the consumer of the
  3121. iterator controls when data is decompressed. If the iterator isn't consumed,
  3122. decompression is put on hold.
  3123. When ``read_to_iter()`` is passed an object conforming to the buffer protocol,
  3124. the behavior may seem similar to what occurs when the simple decompression
  3125. API is used. However, this API works when the decompressed size is unknown.
  3126. Furthermore, if feeding large inputs, the decompressor will work in chunks
  3127. instead of performing a single operation.
  3128. :param reader:
  3129. Source of compressed data. Can be any object with a
  3130. ``read(size)`` method or any object conforming to the buffer
  3131. protocol.
  3132. :param read_size:
  3133. Integer size of data chunks to read from ``reader`` and feed into
  3134. the decompressor.
  3135. :param write_size:
  3136. Integer size of data chunks to emit from iterator.
  3137. :param skip_bytes:
  3138. Integer number of bytes to skip over before sending data into
  3139. the decompressor.
  3140. :return:
  3141. Iterator of ``bytes`` representing uncompressed data.
  3142. """
  3143. if skip_bytes >= read_size:
  3144. raise ValueError("skip_bytes must be smaller than read_size")
  3145. if hasattr(reader, "read"):
  3146. have_read = True
  3147. elif hasattr(reader, "__getitem__"):
  3148. have_read = False
  3149. buffer_offset = 0
  3150. size = len(reader)
  3151. else:
  3152. raise ValueError(
  3153. "must pass an object with a read() method or "
  3154. "conforms to buffer protocol"
  3155. )
  3156. if skip_bytes:
  3157. if have_read:
  3158. reader.read(skip_bytes)
  3159. else:
  3160. if skip_bytes > size:
  3161. raise ValueError("skip_bytes larger than first input chunk")
  3162. buffer_offset = skip_bytes
  3163. self._ensure_dctx()
  3164. in_buffer = ffi.new("ZSTD_inBuffer *")
  3165. out_buffer = ffi.new("ZSTD_outBuffer *")
  3166. dst_buffer = ffi.new("char[]", write_size)
  3167. out_buffer.dst = dst_buffer
  3168. out_buffer.size = len(dst_buffer)
  3169. out_buffer.pos = 0
  3170. while True:
  3171. assert out_buffer.pos == 0
  3172. if have_read:
  3173. read_result = reader.read(read_size)
  3174. else:
  3175. remaining = size - buffer_offset
  3176. slice_size = min(remaining, read_size)
  3177. read_result = reader[buffer_offset : buffer_offset + slice_size]
  3178. buffer_offset += slice_size
  3179. # No new input. Break out of read loop.
  3180. if not read_result:
  3181. break
  3182. # Feed all read data into decompressor and emit output until
  3183. # exhausted.
  3184. read_buffer = ffi.from_buffer(read_result)
  3185. in_buffer.src = read_buffer
  3186. in_buffer.size = len(read_buffer)
  3187. in_buffer.pos = 0
  3188. while in_buffer.pos < in_buffer.size:
  3189. assert out_buffer.pos == 0
  3190. zresult = lib.ZSTD_decompressStream(
  3191. self._dctx, out_buffer, in_buffer
  3192. )
  3193. if lib.ZSTD_isError(zresult):
  3194. raise ZstdError(
  3195. "zstd decompress error: %s" % _zstd_error(zresult)
  3196. )
  3197. if out_buffer.pos:
  3198. data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
  3199. out_buffer.pos = 0
  3200. yield data
  3201. if zresult == 0:
  3202. return
  3203. # Repeat loop to collect more input data.
  3204. continue
  3205. # If we get here, input is exhausted.
  3206. def stream_writer(
  3207. self,
  3208. writer,
  3209. write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
  3210. write_return_read=True,
  3211. closefd=True,
  3212. ):
  3213. """
  3214. Push-based stream wrapper that performs decompression.
  3215. This method constructs a stream wrapper that conforms to the
  3216. ``io.RawIOBase`` interface and performs transparent decompression
  3217. when writing to a wrapper stream.
  3218. See :py:class:`zstandard.ZstdDecompressionWriter` for more documentation
  3219. and usage examples.
  3220. :param writer:
  3221. Destination for decompressed output. Can be any object with a
  3222. ``write(data)``.
  3223. :param write_size:
  3224. Integer size of chunks to ``write()`` to ``writer``.
  3225. :param write_return_read:
  3226. Whether ``write()`` should return the number of bytes of input
  3227. consumed. If False, ``write()`` returns the number of bytes sent
  3228. to the inner stream.
  3229. :param closefd:
  3230. Whether to ``close()`` the inner stream when this stream is closed.
  3231. :return:
  3232. :py:class:`zstandard.ZstdDecompressionWriter`
  3233. """
  3234. if not hasattr(writer, "write"):
  3235. raise ValueError("must pass an object with a write() method")
  3236. return ZstdDecompressionWriter(
  3237. self,
  3238. writer,
  3239. write_size,
  3240. write_return_read,
  3241. closefd=closefd,
  3242. )
  3243. def copy_stream(
  3244. self,
  3245. ifh,
  3246. ofh,
  3247. read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
  3248. write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
  3249. ):
  3250. """
  3251. Copy data between streams, decompressing in the process.
  3252. Compressed data will be read from ``ifh``, decompressed, and written
  3253. to ``ofh``.
  3254. >>> dctx = zstandard.ZstdDecompressor()
  3255. >>> dctx.copy_stream(ifh, ofh)
  3256. e.g. to decompress a file to another file:
  3257. >>> dctx = zstandard.ZstdDecompressor()
  3258. >>> with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
  3259. ... dctx.copy_stream(ifh, ofh)
  3260. The size of chunks being ``read()`` and ``write()`` from and to the
  3261. streams can be specified:
  3262. >>> dctx = zstandard.ZstdDecompressor()
  3263. >>> dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384)
  3264. :param ifh:
  3265. Source stream to read compressed data from.
  3266. Must have a ``read()`` method.
  3267. :param ofh:
  3268. Destination stream to write uncompressed data to.
  3269. Must have a ``write()`` method.
  3270. :param read_size:
  3271. The number of bytes to ``read()`` from the source in a single
  3272. operation.
  3273. :param write_size:
  3274. The number of bytes to ``write()`` to the destination in a single
  3275. operation.
  3276. :return:
  3277. 2-tuple of integers representing the number of bytes read and
  3278. written, respectively.
  3279. """
  3280. if not hasattr(ifh, "read"):
  3281. raise ValueError("first argument must have a read() method")
  3282. if not hasattr(ofh, "write"):
  3283. raise ValueError("second argument must have a write() method")
  3284. self._ensure_dctx()
  3285. in_buffer = ffi.new("ZSTD_inBuffer *")
  3286. out_buffer = ffi.new("ZSTD_outBuffer *")
  3287. dst_buffer = ffi.new("char[]", write_size)
  3288. out_buffer.dst = dst_buffer
  3289. out_buffer.size = write_size
  3290. out_buffer.pos = 0
  3291. total_read, total_write = 0, 0
  3292. # Read all available input.
  3293. while True:
  3294. data = ifh.read(read_size)
  3295. if not data:
  3296. break
  3297. data_buffer = ffi.from_buffer(data)
  3298. total_read += len(data_buffer)
  3299. in_buffer.src = data_buffer
  3300. in_buffer.size = len(data_buffer)
  3301. in_buffer.pos = 0
  3302. # Flush all read data to output.
  3303. while in_buffer.pos < in_buffer.size:
  3304. zresult = lib.ZSTD_decompressStream(
  3305. self._dctx, out_buffer, in_buffer
  3306. )
  3307. if lib.ZSTD_isError(zresult):
  3308. raise ZstdError(
  3309. "zstd decompressor error: %s" % _zstd_error(zresult)
  3310. )
  3311. if out_buffer.pos:
  3312. ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
  3313. total_write += out_buffer.pos
  3314. out_buffer.pos = 0
  3315. # Continue loop to keep reading.
  3316. return total_read, total_write
  3317. def decompress_content_dict_chain(self, frames):
  3318. """
  3319. Decompress a series of frames using the content dictionary chaining technique.
  3320. Such a list of frames is produced by compressing discrete inputs where
  3321. each non-initial input is compressed with a *prefix* dictionary consisting
  3322. of the content of the previous input.
  3323. For example, say you have the following inputs:
  3324. >>> inputs = [b"input 1", b"input 2", b"input 3"]
  3325. The zstd frame chain consists of:
  3326. 1. ``b"input 1"`` compressed in standalone/discrete mode
  3327. 2. ``b"input 2"`` compressed using ``b"input 1"`` as a *prefix* dictionary
  3328. 3. ``b"input 3"`` compressed using ``b"input 2"`` as a *prefix* dictionary
  3329. Each zstd frame **must** have the content size written.
  3330. The following Python code can be used to produce a *prefix dictionary chain*:
  3331. >>> def make_chain(inputs):
  3332. ... frames = []
  3333. ...
  3334. ... # First frame is compressed in standalone/discrete mode.
  3335. ... zctx = zstandard.ZstdCompressor()
  3336. ... frames.append(zctx.compress(inputs[0]))
  3337. ...
  3338. ... # Subsequent frames use the previous fulltext as a prefix dictionary
  3339. ... for i, raw in enumerate(inputs[1:]):
  3340. ... dict_data = zstandard.ZstdCompressionDict(
  3341. ... inputs[i], dict_type=zstandard.DICT_TYPE_RAWCONTENT)
  3342. ... zctx = zstandard.ZstdCompressor(dict_data=dict_data)
  3343. ... frames.append(zctx.compress(raw))
  3344. ...
  3345. ... return frames
  3346. ``decompress_content_dict_chain()`` returns the uncompressed data of the last
  3347. element in the input chain.
  3348. .. note::
  3349. It is possible to implement *prefix dictionary chain* decompression
  3350. on top of other APIs. However, this function will likely be faster -
  3351. especially for long input chains - as it avoids the overhead of
  3352. instantiating and passing around intermediate objects between
  3353. multiple functions.
  3354. :param frames:
  3355. List of ``bytes`` holding compressed zstd frames.
  3356. :return:
  3357. """
  3358. if not isinstance(frames, list):
  3359. raise TypeError("argument must be a list")
  3360. if not frames:
  3361. raise ValueError("empty input chain")
  3362. # First chunk should not be using a dictionary. We handle it specially.
  3363. chunk = frames[0]
  3364. if not isinstance(chunk, bytes):
  3365. raise ValueError("chunk 0 must be bytes")
  3366. # All chunks should be zstd frames and should have content size set.
  3367. chunk_buffer = ffi.from_buffer(chunk)
  3368. params = ffi.new("ZSTD_FrameHeader *")
  3369. zresult = lib.ZSTD_getFrameHeader(
  3370. params, chunk_buffer, len(chunk_buffer)
  3371. )
  3372. if lib.ZSTD_isError(zresult):
  3373. raise ValueError("chunk 0 is not a valid zstd frame")
  3374. elif zresult:
  3375. raise ValueError("chunk 0 is too small to contain a zstd frame")
  3376. if params.frameContentSize == lib.ZSTD_CONTENTSIZE_UNKNOWN:
  3377. raise ValueError("chunk 0 missing content size in frame")
  3378. self._ensure_dctx(load_dict=False)
  3379. last_buffer = ffi.new("char[]", params.frameContentSize)
  3380. out_buffer = ffi.new("ZSTD_outBuffer *")
  3381. out_buffer.dst = last_buffer
  3382. out_buffer.size = len(last_buffer)
  3383. out_buffer.pos = 0
  3384. in_buffer = ffi.new("ZSTD_inBuffer *")
  3385. in_buffer.src = chunk_buffer
  3386. in_buffer.size = len(chunk_buffer)
  3387. in_buffer.pos = 0
  3388. zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
  3389. if lib.ZSTD_isError(zresult):
  3390. raise ZstdError(
  3391. "could not decompress chunk 0: %s" % _zstd_error(zresult)
  3392. )
  3393. elif zresult:
  3394. raise ZstdError("chunk 0 did not decompress full frame")
  3395. # Special case of chain length of 1
  3396. if len(frames) == 1:
  3397. return ffi.buffer(last_buffer, len(last_buffer))[:]
  3398. i = 1
  3399. while i < len(frames):
  3400. chunk = frames[i]
  3401. if not isinstance(chunk, bytes):
  3402. raise ValueError("chunk %d must be bytes" % i)
  3403. chunk_buffer = ffi.from_buffer(chunk)
  3404. zresult = lib.ZSTD_getFrameHeader(
  3405. params, chunk_buffer, len(chunk_buffer)
  3406. )
  3407. if lib.ZSTD_isError(zresult):
  3408. raise ValueError("chunk %d is not a valid zstd frame" % i)
  3409. elif zresult:
  3410. raise ValueError(
  3411. "chunk %d is too small to contain a zstd frame" % i
  3412. )
  3413. if params.frameContentSize == lib.ZSTD_CONTENTSIZE_UNKNOWN:
  3414. raise ValueError("chunk %d missing content size in frame" % i)
  3415. dest_buffer = ffi.new("char[]", params.frameContentSize)
  3416. out_buffer.dst = dest_buffer
  3417. out_buffer.size = len(dest_buffer)
  3418. out_buffer.pos = 0
  3419. in_buffer.src = chunk_buffer
  3420. in_buffer.size = len(chunk_buffer)
  3421. in_buffer.pos = 0
  3422. zresult = lib.ZSTD_decompressStream(
  3423. self._dctx, out_buffer, in_buffer
  3424. )
  3425. if lib.ZSTD_isError(zresult):
  3426. raise ZstdError(
  3427. "could not decompress chunk %d: %s" % _zstd_error(zresult)
  3428. )
  3429. elif zresult:
  3430. raise ZstdError("chunk %d did not decompress full frame" % i)
  3431. last_buffer = dest_buffer
  3432. i += 1
  3433. return ffi.buffer(last_buffer, len(last_buffer))[:]
  3434. def multi_decompress_to_buffer(
  3435. self, frames, decompressed_sizes=None, threads=0
  3436. ):
  3437. """
  3438. Decompress multiple zstd frames to output buffers as a single operation.
  3439. (Experimental. Not available in CFFI backend.)
  3440. Compressed frames can be passed to the function as a
  3441. ``BufferWithSegments``, a ``BufferWithSegmentsCollection``, or as a
  3442. list containing objects that conform to the buffer protocol. For best
  3443. performance, pass a ``BufferWithSegmentsCollection`` or a
  3444. ``BufferWithSegments``, as minimal input validation will be done for
  3445. that type. If calling from Python (as opposed to C), constructing one
  3446. of these instances may add overhead cancelling out the performance
  3447. overhead of validation for list inputs.
  3448. Returns a ``BufferWithSegmentsCollection`` containing the decompressed
  3449. data. All decompressed data is allocated in a single memory buffer. The
  3450. ``BufferWithSegments`` instance tracks which objects are at which offsets
  3451. and their respective lengths.
  3452. >>> dctx = zstandard.ZstdDecompressor()
  3453. >>> results = dctx.multi_decompress_to_buffer([b'...', b'...'])
  3454. The decompressed size of each frame MUST be discoverable. It can either be
  3455. embedded within the zstd frame or passed in via the ``decompressed_sizes``
  3456. argument.
  3457. The ``decompressed_sizes`` argument is an object conforming to the buffer
  3458. protocol which holds an array of 64-bit unsigned integers in the machine's
  3459. native format defining the decompressed sizes of each frame. If this argument
  3460. is passed, it avoids having to scan each frame for its decompressed size.
  3461. This frame scanning can add noticeable overhead in some scenarios.
  3462. >>> frames = [...]
  3463. >>> sizes = struct.pack('=QQQQ', len0, len1, len2, len3)
  3464. >>>
  3465. >>> dctx = zstandard.ZstdDecompressor()
  3466. >>> results = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
  3467. .. note::
  3468. It is possible to pass a ``mmap.mmap()`` instance into this function by
  3469. wrapping it with a ``BufferWithSegments`` instance (which will define the
  3470. offsets of frames within the memory mapped region).
  3471. This function is logically equivalent to performing
  3472. :py:meth:`ZstdCompressor.decompress` on each input frame and returning the
  3473. result.
  3474. This function exists to perform decompression on multiple frames as fast
  3475. as possible by having as little overhead as possible. Since decompression is
  3476. performed as a single operation and since the decompressed output is stored in
  3477. a single buffer, extra memory allocations, Python objects, and Python function
  3478. calls are avoided. This is ideal for scenarios where callers know up front that
  3479. they need to access data for multiple frames, such as when *delta chains* are
  3480. being used.
  3481. Currently, the implementation always spawns multiple threads when requested,
  3482. even if the amount of work to do is small. In the future, it will be smarter
  3483. about avoiding threads and their associated overhead when the amount of
  3484. work to do is small.
  3485. :param frames:
  3486. Source defining zstd frames to decompress.
  3487. :param decompressed_sizes:
  3488. Array of integers representing sizes of decompressed zstd frames.
  3489. :param threads:
  3490. How many threads to use for decompression operations.
  3491. Negative values will use the same number of threads as logical CPUs
  3492. on the machine. Values ``0`` or ``1`` use a single thread.
  3493. :return:
  3494. ``BufferWithSegmentsCollection``
  3495. """
  3496. raise NotImplementedError()
  3497. def _ensure_dctx(self, load_dict=True):
  3498. lib.ZSTD_DCtx_reset(self._dctx, lib.ZSTD_reset_session_only)
  3499. if self._max_window_size:
  3500. zresult = lib.ZSTD_DCtx_setMaxWindowSize(
  3501. self._dctx, self._max_window_size
  3502. )
  3503. if lib.ZSTD_isError(zresult):
  3504. raise ZstdError(
  3505. "unable to set max window size: %s" % _zstd_error(zresult)
  3506. )
  3507. zresult = lib.ZSTD_DCtx_setParameter(
  3508. self._dctx, lib.ZSTD_d_format, self._format
  3509. )
  3510. if lib.ZSTD_isError(zresult):
  3511. raise ZstdError(
  3512. "unable to set decoding format: %s" % _zstd_error(zresult)
  3513. )
  3514. if self._dict_data and load_dict:
  3515. zresult = lib.ZSTD_DCtx_refDDict(self._dctx, self._dict_data._ddict)
  3516. if lib.ZSTD_isError(zresult):
  3517. raise ZstdError(
  3518. "unable to reference prepared dictionary: %s"
  3519. % _zstd_error(zresult)
  3520. )