__init__.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. # Copyright (c) 2017-present, Gregory Szorc
  2. # All rights reserved.
  3. #
  4. # This software may be modified and distributed under the terms
  5. # of the BSD license. See the LICENSE file for details.
  6. # ruff: noqa: F403, F405
  7. """Python interface to the Zstandard (zstd) compression library."""
  8. from __future__ import absolute_import, unicode_literals
  9. # This module serves 2 roles:
  10. #
  11. # 1) Export the C or CFFI "backend" through a central module.
  12. # 2) Implement additional functionality built on top of C or CFFI backend.
  13. import builtins
  14. import io
  15. import os
  16. import platform
  17. import sys
  18. if sys.version_info >= (3, 12):
  19. from collections.abc import Buffer
  20. else:
  21. from typing import ByteString as Buffer
  22. # Some Python implementations don't support C extensions. That's why we have
  23. # a CFFI implementation in the first place. The code here import one of our
  24. # "backends" then re-exports the symbols from this module. For convenience,
  25. # we support falling back to the CFFI backend if the C extension can't be
  26. # imported. But for performance reasons, we only do this on unknown Python
  27. # implementation. Notably, for CPython we require the C extension by default.
  28. # Because someone will inevitably want special behavior, the behavior is
  29. # configurable via an environment variable. A potentially better way to handle
  30. # this is to import a special ``__importpolicy__`` module or something
  31. # defining a variable and `setup.py` could write the file with whatever
  32. # policy was specified at build time. Until someone needs it, we go with
  33. # the hacky but simple environment variable approach.
  34. _module_policy = os.environ.get(
  35. "PYTHON_ZSTANDARD_IMPORT_POLICY", "default"
  36. ).strip()
  37. if _module_policy == "default":
  38. if platform.python_implementation() in ("CPython",):
  39. from .backend_c import * # type: ignore
  40. backend = "cext"
  41. elif platform.python_implementation() in ("PyPy",):
  42. from .backend_cffi import * # type: ignore
  43. backend = "cffi"
  44. else:
  45. try:
  46. from .backend_c import *
  47. backend = "cext"
  48. except ImportError:
  49. from .backend_cffi import *
  50. backend = "cffi"
  51. elif _module_policy == "cffi_fallback":
  52. try:
  53. from .backend_c import *
  54. backend = "cext"
  55. except ImportError:
  56. from .backend_cffi import *
  57. backend = "cffi"
  58. elif _module_policy == "rust":
  59. from .backend_rust import * # type: ignore
  60. backend = "rust"
  61. elif _module_policy == "cext":
  62. from .backend_c import *
  63. backend = "cext"
  64. elif _module_policy == "cffi":
  65. from .backend_cffi import *
  66. backend = "cffi"
  67. else:
  68. raise ImportError(
  69. "unknown module import policy: %s; use default, cffi_fallback, "
  70. "cext, or cffi" % _module_policy
  71. )
  72. # Keep this in sync with python-zstandard.h, rust-ext/src/lib.rs, and debian/changelog.
  73. __version__ = "0.25.0"
  74. _MODE_CLOSED = 0
  75. _MODE_READ = 1
  76. _MODE_WRITE = 2
  77. def open(
  78. filename,
  79. mode="rb",
  80. cctx=None,
  81. dctx=None,
  82. encoding=None,
  83. errors=None,
  84. newline=None,
  85. closefd=None,
  86. ):
  87. """Create a file object with zstd (de)compression.
  88. The object returned from this function will be a
  89. :py:class:`ZstdDecompressionReader` if opened for reading in binary mode,
  90. a :py:class:`ZstdCompressionWriter` if opened for writing in binary mode,
  91. or an ``io.TextIOWrapper`` if opened for reading or writing in text mode.
  92. :param filename:
  93. ``bytes``, ``str``, or ``os.PathLike`` defining a file to open or a
  94. file object (with a ``read()`` or ``write()`` method).
  95. :param mode:
  96. ``str`` File open mode. Accepts any of the open modes recognized by
  97. ``open()``.
  98. :param cctx:
  99. ``ZstdCompressor`` to use for compression. If not specified and file
  100. is opened for writing, the default ``ZstdCompressor`` will be used.
  101. :param dctx:
  102. ``ZstdDecompressor`` to use for decompression. If not specified and file
  103. is opened for reading, the default ``ZstdDecompressor`` will be used.
  104. :param encoding:
  105. ``str`` that defines text encoding to use when file is opened in text
  106. mode.
  107. :param errors:
  108. ``str`` defining text encoding error handling mode.
  109. :param newline:
  110. ``str`` defining newline to use in text mode.
  111. :param closefd:
  112. ``bool`` whether to close the file when the returned object is closed.
  113. Only used if a file object is passed. If a filename is specified, the
  114. opened file is always closed when the returned object is closed.
  115. """
  116. normalized_mode = mode.replace("t", "")
  117. if normalized_mode in ("r", "rb"):
  118. dctx = dctx or ZstdDecompressor()
  119. open_mode = "r"
  120. raw_open_mode = "rb"
  121. elif normalized_mode in ("w", "wb", "a", "ab", "x", "xb"):
  122. cctx = cctx or ZstdCompressor()
  123. open_mode = "w"
  124. raw_open_mode = normalized_mode
  125. if not raw_open_mode.endswith("b"):
  126. raw_open_mode = raw_open_mode + "b"
  127. else:
  128. raise ValueError("Invalid mode: {!r}".format(mode))
  129. if hasattr(os, "PathLike"):
  130. types = (str, bytes, os.PathLike)
  131. else:
  132. types = (str, bytes)
  133. if isinstance(filename, types): # type: ignore
  134. inner_fh = builtins.open(filename, raw_open_mode)
  135. closefd = True
  136. elif hasattr(filename, "read") or hasattr(filename, "write"):
  137. inner_fh = filename
  138. closefd = bool(closefd)
  139. else:
  140. raise TypeError(
  141. "filename must be a str, bytes, file or PathLike object"
  142. )
  143. if open_mode == "r":
  144. fh = dctx.stream_reader(inner_fh, closefd=closefd)
  145. elif open_mode == "w":
  146. fh = cctx.stream_writer(inner_fh, closefd=closefd)
  147. else:
  148. raise RuntimeError("logic error in zstandard.open() handling open mode")
  149. if "b" not in normalized_mode:
  150. return io.TextIOWrapper(
  151. fh, encoding=encoding, errors=errors, newline=newline
  152. )
  153. else:
  154. return fh
  155. def compress(data: Buffer, level: int = 3) -> bytes:
  156. """Compress source data using the zstd compression format.
  157. This performs one-shot compression using basic/default compression
  158. settings.
  159. This method is provided for convenience and is equivalent to calling
  160. ``ZstdCompressor(level=level).compress(data)``.
  161. If you find yourself calling this function in a tight loop,
  162. performance will be greater if you construct a single ``ZstdCompressor``
  163. and repeatedly call ``compress()`` on it.
  164. """
  165. cctx = ZstdCompressor(level=level)
  166. return cctx.compress(data)
  167. def decompress(data: Buffer, max_output_size: int = 0) -> bytes:
  168. """Decompress a zstd frame into its original data.
  169. This performs one-shot decompression using basic/default compression
  170. settings.
  171. This method is provided for convenience and is equivalent to calling
  172. ``ZstdDecompressor().decompress(data, max_output_size=max_output_size)``.
  173. If you find yourself calling this function in a tight loop, performance
  174. will be greater if you construct a single ``ZstdDecompressor`` and
  175. repeatedly call ``decompress()`` on it.
  176. """
  177. dctx = ZstdDecompressor()
  178. return dctx.decompress(data, max_output_size=max_output_size)