_collections.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. from __future__ import annotations
  2. import typing
  3. from collections import OrderedDict
  4. from enum import Enum, auto
  5. from threading import RLock
  6. if typing.TYPE_CHECKING:
  7. # We can only import Protocol if TYPE_CHECKING because it's a development
  8. # dependency, and is not available at runtime.
  9. from typing import Protocol
  10. from typing_extensions import Self
  11. class HasGettableStringKeys(Protocol):
  12. def keys(self) -> typing.Iterator[str]: ...
  13. def __getitem__(self, key: str) -> str: ...
  14. __all__ = ["RecentlyUsedContainer", "HTTPHeaderDict"]
  15. # Key type
  16. _KT = typing.TypeVar("_KT")
  17. # Value type
  18. _VT = typing.TypeVar("_VT")
  19. # Default type
  20. _DT = typing.TypeVar("_DT")
  21. ValidHTTPHeaderSource = typing.Union[
  22. "HTTPHeaderDict",
  23. typing.Mapping[str, str],
  24. typing.Iterable[tuple[str, str]],
  25. "HasGettableStringKeys",
  26. ]
  27. class _Sentinel(Enum):
  28. not_passed = auto()
  29. def ensure_can_construct_http_header_dict(
  30. potential: object,
  31. ) -> ValidHTTPHeaderSource | None:
  32. if isinstance(potential, HTTPHeaderDict):
  33. return potential
  34. elif isinstance(potential, typing.Mapping):
  35. # Full runtime checking of the contents of a Mapping is expensive, so for the
  36. # purposes of typechecking, we assume that any Mapping is the right shape.
  37. return typing.cast(typing.Mapping[str, str], potential)
  38. elif isinstance(potential, typing.Iterable):
  39. # Similarly to Mapping, full runtime checking of the contents of an Iterable is
  40. # expensive, so for the purposes of typechecking, we assume that any Iterable
  41. # is the right shape.
  42. return typing.cast(typing.Iterable[tuple[str, str]], potential)
  43. elif hasattr(potential, "keys") and hasattr(potential, "__getitem__"):
  44. return typing.cast("HasGettableStringKeys", potential)
  45. else:
  46. return None
  47. class RecentlyUsedContainer(typing.Generic[_KT, _VT], typing.MutableMapping[_KT, _VT]):
  48. """
  49. Provides a thread-safe dict-like container which maintains up to
  50. ``maxsize`` keys while throwing away the least-recently-used keys beyond
  51. ``maxsize``.
  52. :param maxsize:
  53. Maximum number of recent elements to retain.
  54. :param dispose_func:
  55. Every time an item is evicted from the container,
  56. ``dispose_func(value)`` is called. Callback which will get called
  57. """
  58. _container: typing.OrderedDict[_KT, _VT]
  59. _maxsize: int
  60. dispose_func: typing.Callable[[_VT], None] | None
  61. lock: RLock
  62. def __init__(
  63. self,
  64. maxsize: int = 10,
  65. dispose_func: typing.Callable[[_VT], None] | None = None,
  66. ) -> None:
  67. super().__init__()
  68. self._maxsize = maxsize
  69. self.dispose_func = dispose_func
  70. self._container = OrderedDict()
  71. self.lock = RLock()
  72. def __getitem__(self, key: _KT) -> _VT:
  73. # Re-insert the item, moving it to the end of the eviction line.
  74. with self.lock:
  75. item = self._container.pop(key)
  76. self._container[key] = item
  77. return item
  78. def __setitem__(self, key: _KT, value: _VT) -> None:
  79. evicted_item = None
  80. with self.lock:
  81. # Possibly evict the existing value of 'key'
  82. try:
  83. # If the key exists, we'll overwrite it, which won't change the
  84. # size of the pool. Because accessing a key should move it to
  85. # the end of the eviction line, we pop it out first.
  86. evicted_item = key, self._container.pop(key)
  87. self._container[key] = value
  88. except KeyError:
  89. # When the key does not exist, we insert the value first so that
  90. # evicting works in all cases, including when self._maxsize is 0
  91. self._container[key] = value
  92. if len(self._container) > self._maxsize:
  93. # If we didn't evict an existing value, and we've hit our maximum
  94. # size, then we have to evict the least recently used item from
  95. # the beginning of the container.
  96. evicted_item = self._container.popitem(last=False)
  97. # After releasing the lock on the pool, dispose of any evicted value.
  98. if evicted_item is not None and self.dispose_func:
  99. _, evicted_value = evicted_item
  100. self.dispose_func(evicted_value)
  101. def __delitem__(self, key: _KT) -> None:
  102. with self.lock:
  103. value = self._container.pop(key)
  104. if self.dispose_func:
  105. self.dispose_func(value)
  106. def __len__(self) -> int:
  107. with self.lock:
  108. return len(self._container)
  109. def __iter__(self) -> typing.NoReturn:
  110. raise NotImplementedError(
  111. "Iteration over this class is unlikely to be threadsafe."
  112. )
  113. def clear(self) -> None:
  114. with self.lock:
  115. # Copy pointers to all values, then wipe the mapping
  116. values = list(self._container.values())
  117. self._container.clear()
  118. if self.dispose_func:
  119. for value in values:
  120. self.dispose_func(value)
  121. def keys(self) -> set[_KT]: # type: ignore[override]
  122. with self.lock:
  123. return set(self._container.keys())
  124. class HTTPHeaderDictItemView(set[tuple[str, str]]):
  125. """
  126. HTTPHeaderDict is unusual for a Mapping[str, str] in that it has two modes of
  127. address.
  128. If we directly try to get an item with a particular name, we will get a string
  129. back that is the concatenated version of all the values:
  130. >>> d['X-Header-Name']
  131. 'Value1, Value2, Value3'
  132. However, if we iterate over an HTTPHeaderDict's items, we will optionally combine
  133. these values based on whether combine=True was called when building up the dictionary
  134. >>> d = HTTPHeaderDict({"A": "1", "B": "foo"})
  135. >>> d.add("A", "2", combine=True)
  136. >>> d.add("B", "bar")
  137. >>> list(d.items())
  138. [
  139. ('A', '1, 2'),
  140. ('B', 'foo'),
  141. ('B', 'bar'),
  142. ]
  143. This class conforms to the interface required by the MutableMapping ABC while
  144. also giving us the nonstandard iteration behavior we want; items with duplicate
  145. keys, ordered by time of first insertion.
  146. """
  147. _headers: HTTPHeaderDict
  148. def __init__(self, headers: HTTPHeaderDict) -> None:
  149. self._headers = headers
  150. def __len__(self) -> int:
  151. return len(list(self._headers.iteritems()))
  152. def __iter__(self) -> typing.Iterator[tuple[str, str]]:
  153. return self._headers.iteritems()
  154. def __contains__(self, item: object) -> bool:
  155. if isinstance(item, tuple) and len(item) == 2:
  156. passed_key, passed_val = item
  157. if isinstance(passed_key, str) and isinstance(passed_val, str):
  158. return self._headers._has_value_for_header(passed_key, passed_val)
  159. return False
  160. class HTTPHeaderDict(typing.MutableMapping[str, str]):
  161. """
  162. :param headers:
  163. An iterable of field-value pairs. Must not contain multiple field names
  164. when compared case-insensitively.
  165. :param kwargs:
  166. Additional field-value pairs to pass in to ``dict.update``.
  167. A ``dict`` like container for storing HTTP Headers.
  168. Field names are stored and compared case-insensitively in compliance with
  169. RFC 7230. Iteration provides the first case-sensitive key seen for each
  170. case-insensitive pair.
  171. Using ``__setitem__`` syntax overwrites fields that compare equal
  172. case-insensitively in order to maintain ``dict``'s api. For fields that
  173. compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add``
  174. in a loop.
  175. If multiple fields that are equal case-insensitively are passed to the
  176. constructor or ``.update``, the behavior is undefined and some will be
  177. lost.
  178. >>> headers = HTTPHeaderDict()
  179. >>> headers.add('Set-Cookie', 'foo=bar')
  180. >>> headers.add('set-cookie', 'baz=quxx')
  181. >>> headers['content-length'] = '7'
  182. >>> headers['SET-cookie']
  183. 'foo=bar, baz=quxx'
  184. >>> headers['Content-Length']
  185. '7'
  186. """
  187. _container: typing.MutableMapping[str, list[str]]
  188. def __init__(self, headers: ValidHTTPHeaderSource | None = None, **kwargs: str):
  189. super().__init__()
  190. self._container = {} # 'dict' is insert-ordered
  191. if headers is not None:
  192. if isinstance(headers, HTTPHeaderDict):
  193. self._copy_from(headers)
  194. else:
  195. self.extend(headers)
  196. if kwargs:
  197. self.extend(kwargs)
  198. def __setitem__(self, key: str, val: str) -> None:
  199. # avoid a bytes/str comparison by decoding before httplib
  200. if isinstance(key, bytes):
  201. key = key.decode("latin-1")
  202. self._container[key.lower()] = [key, val]
  203. def __getitem__(self, key: str) -> str:
  204. if isinstance(key, bytes):
  205. key = key.decode("latin-1")
  206. val = self._container[key.lower()]
  207. return ", ".join(val[1:])
  208. def __delitem__(self, key: str) -> None:
  209. if isinstance(key, bytes):
  210. key = key.decode("latin-1")
  211. del self._container[key.lower()]
  212. def __contains__(self, key: object) -> bool:
  213. if isinstance(key, bytes):
  214. key = key.decode("latin-1")
  215. if isinstance(key, str):
  216. return key.lower() in self._container
  217. return False
  218. def setdefault(self, key: str, default: str = "") -> str:
  219. return super().setdefault(key, default)
  220. def __eq__(self, other: object) -> bool:
  221. maybe_constructable = ensure_can_construct_http_header_dict(other)
  222. if maybe_constructable is None:
  223. return False
  224. else:
  225. other_as_http_header_dict = type(self)(maybe_constructable)
  226. return {k.lower(): v for k, v in self.itermerged()} == {
  227. k.lower(): v for k, v in other_as_http_header_dict.itermerged()
  228. }
  229. def __ne__(self, other: object) -> bool:
  230. return not self.__eq__(other)
  231. def __len__(self) -> int:
  232. return len(self._container)
  233. def __iter__(self) -> typing.Iterator[str]:
  234. # Only provide the originally cased names
  235. for vals in self._container.values():
  236. yield vals[0]
  237. def discard(self, key: str) -> None:
  238. try:
  239. del self[key]
  240. except KeyError:
  241. pass
  242. def add(self, key: str, val: str, *, combine: bool = False) -> None:
  243. """Adds a (name, value) pair, doesn't overwrite the value if it already
  244. exists.
  245. If this is called with combine=True, instead of adding a new header value
  246. as a distinct item during iteration, this will instead append the value to
  247. any existing header value with a comma. If no existing header value exists
  248. for the key, then the value will simply be added, ignoring the combine parameter.
  249. >>> headers = HTTPHeaderDict(foo='bar')
  250. >>> headers.add('Foo', 'baz')
  251. >>> headers['foo']
  252. 'bar, baz'
  253. >>> list(headers.items())
  254. [('foo', 'bar'), ('foo', 'baz')]
  255. >>> headers.add('foo', 'quz', combine=True)
  256. >>> list(headers.items())
  257. [('foo', 'bar, baz, quz')]
  258. """
  259. # avoid a bytes/str comparison by decoding before httplib
  260. if isinstance(key, bytes):
  261. key = key.decode("latin-1")
  262. key_lower = key.lower()
  263. new_vals = [key, val]
  264. # Keep the common case aka no item present as fast as possible
  265. vals = self._container.setdefault(key_lower, new_vals)
  266. if new_vals is not vals:
  267. # if there are values here, then there is at least the initial
  268. # key/value pair
  269. assert len(vals) >= 2
  270. if combine:
  271. vals[-1] = vals[-1] + ", " + val
  272. else:
  273. vals.append(val)
  274. def extend(self, *args: ValidHTTPHeaderSource, **kwargs: str) -> None:
  275. """Generic import function for any type of header-like object.
  276. Adapted version of MutableMapping.update in order to insert items
  277. with self.add instead of self.__setitem__
  278. """
  279. if len(args) > 1:
  280. raise TypeError(
  281. f"extend() takes at most 1 positional arguments ({len(args)} given)"
  282. )
  283. other = args[0] if len(args) >= 1 else ()
  284. if isinstance(other, HTTPHeaderDict):
  285. for key, val in other.iteritems():
  286. self.add(key, val)
  287. elif isinstance(other, typing.Mapping):
  288. for key, val in other.items():
  289. self.add(key, val)
  290. elif isinstance(other, typing.Iterable):
  291. other = typing.cast(typing.Iterable[tuple[str, str]], other)
  292. for key, value in other:
  293. self.add(key, value)
  294. elif hasattr(other, "keys") and hasattr(other, "__getitem__"):
  295. # THIS IS NOT A TYPESAFE BRANCH
  296. # In this branch, the object has a `keys` attr but is not a Mapping or any of
  297. # the other types indicated in the method signature. We do some stuff with
  298. # it as though it partially implements the Mapping interface, but we're not
  299. # doing that stuff safely AT ALL.
  300. for key in other.keys():
  301. self.add(key, other[key])
  302. for key, value in kwargs.items():
  303. self.add(key, value)
  304. @typing.overload
  305. def getlist(self, key: str) -> list[str]: ...
  306. @typing.overload
  307. def getlist(self, key: str, default: _DT) -> list[str] | _DT: ...
  308. def getlist(
  309. self, key: str, default: _Sentinel | _DT = _Sentinel.not_passed
  310. ) -> list[str] | _DT:
  311. """Returns a list of all the values for the named field. Returns an
  312. empty list if the key doesn't exist."""
  313. if isinstance(key, bytes):
  314. key = key.decode("latin-1")
  315. try:
  316. vals = self._container[key.lower()]
  317. except KeyError:
  318. if default is _Sentinel.not_passed:
  319. # _DT is unbound; empty list is instance of List[str]
  320. return []
  321. # _DT is bound; default is instance of _DT
  322. return default
  323. else:
  324. # _DT may or may not be bound; vals[1:] is instance of List[str], which
  325. # meets our external interface requirement of `Union[List[str], _DT]`.
  326. return vals[1:]
  327. def _prepare_for_method_change(self) -> Self:
  328. """
  329. Remove content-specific header fields before changing the request
  330. method to GET or HEAD according to RFC 9110, Section 15.4.
  331. """
  332. content_specific_headers = [
  333. "Content-Encoding",
  334. "Content-Language",
  335. "Content-Location",
  336. "Content-Type",
  337. "Content-Length",
  338. "Digest",
  339. "Last-Modified",
  340. ]
  341. for header in content_specific_headers:
  342. self.discard(header)
  343. return self
  344. # Backwards compatibility for httplib
  345. getheaders = getlist
  346. getallmatchingheaders = getlist
  347. iget = getlist
  348. # Backwards compatibility for http.cookiejar
  349. get_all = getlist
  350. def __repr__(self) -> str:
  351. return f"{type(self).__name__}({dict(self.itermerged())})"
  352. def _copy_from(self, other: HTTPHeaderDict) -> None:
  353. for key in other:
  354. val = other.getlist(key)
  355. self._container[key.lower()] = [key, *val]
  356. def copy(self) -> Self:
  357. clone = type(self)()
  358. clone._copy_from(self)
  359. return clone
  360. def iteritems(self) -> typing.Iterator[tuple[str, str]]:
  361. """Iterate over all header lines, including duplicate ones."""
  362. for key in self:
  363. vals = self._container[key.lower()]
  364. for val in vals[1:]:
  365. yield vals[0], val
  366. def itermerged(self) -> typing.Iterator[tuple[str, str]]:
  367. """Iterate over all headers, merging duplicate ones together."""
  368. for key in self:
  369. val = self._container[key.lower()]
  370. yield val[0], ", ".join(val[1:])
  371. def items(self) -> HTTPHeaderDictItemView: # type: ignore[override]
  372. return HTTPHeaderDictItemView(self)
  373. def _has_value_for_header(self, header_name: str, potential_value: str) -> bool:
  374. if header_name in self:
  375. return potential_value in self._container[header_name.lower()][1:]
  376. return False
  377. def __ior__(self, other: object) -> HTTPHeaderDict:
  378. # Supports extending a header dict in-place using operator |=
  379. # combining items with add instead of __setitem__
  380. maybe_constructable = ensure_can_construct_http_header_dict(other)
  381. if maybe_constructable is None:
  382. return NotImplemented
  383. self.extend(maybe_constructable)
  384. return self
  385. def __or__(self, other: object) -> Self:
  386. # Supports merging header dicts using operator |
  387. # combining items with add instead of __setitem__
  388. maybe_constructable = ensure_can_construct_http_header_dict(other)
  389. if maybe_constructable is None:
  390. return NotImplemented
  391. result = self.copy()
  392. result.extend(maybe_constructable)
  393. return result
  394. def __ror__(self, other: object) -> Self:
  395. # Supports merging header dicts using operator | when other is on left side
  396. # combining items with add instead of __setitem__
  397. maybe_constructable = ensure_can_construct_http_header_dict(other)
  398. if maybe_constructable is None:
  399. return NotImplemented
  400. result = type(self)(maybe_constructable)
  401. result.extend(self)
  402. return result