_elffile.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. """
  2. ELF file parser.
  3. This provides a class ``ELFFile`` that parses an ELF executable in a similar
  4. interface to ``ZipFile``. Only the read interface is implemented.
  5. Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca
  6. ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
  7. """
  8. from __future__ import annotations
  9. import enum
  10. import os
  11. import struct
  12. from typing import IO
  13. class ELFInvalid(ValueError):
  14. pass
  15. class EIClass(enum.IntEnum):
  16. C32 = 1
  17. C64 = 2
  18. class EIData(enum.IntEnum):
  19. Lsb = 1
  20. Msb = 2
  21. class EMachine(enum.IntEnum):
  22. I386 = 3
  23. S390 = 22
  24. Arm = 40
  25. X8664 = 62
  26. AArc64 = 183
  27. class ELFFile:
  28. """
  29. Representation of an ELF executable.
  30. """
  31. def __init__(self, f: IO[bytes]) -> None:
  32. self._f = f
  33. try:
  34. ident = self._read("16B")
  35. except struct.error as e:
  36. raise ELFInvalid("unable to parse identification") from e
  37. magic = bytes(ident[:4])
  38. if magic != b"\x7fELF":
  39. raise ELFInvalid(f"invalid magic: {magic!r}")
  40. self.capacity = ident[4] # Format for program header (bitness).
  41. self.encoding = ident[5] # Data structure encoding (endianness).
  42. try:
  43. # e_fmt: Format for program header.
  44. # p_fmt: Format for section header.
  45. # p_idx: Indexes to find p_type, p_offset, and p_filesz.
  46. e_fmt, self._p_fmt, self._p_idx = {
  47. (1, 1): ("<HHIIIIIHHH", "<IIIIIIII", (0, 1, 4)), # 32-bit LSB.
  48. (1, 2): (">HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB.
  49. (2, 1): ("<HHIQQQIHHH", "<IIQQQQQQ", (0, 2, 5)), # 64-bit LSB.
  50. (2, 2): (">HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB.
  51. }[(self.capacity, self.encoding)]
  52. except KeyError as e:
  53. raise ELFInvalid(
  54. f"unrecognized capacity ({self.capacity}) or encoding ({self.encoding})"
  55. ) from e
  56. try:
  57. (
  58. _,
  59. self.machine, # Architecture type.
  60. _,
  61. _,
  62. self._e_phoff, # Offset of program header.
  63. _,
  64. self.flags, # Processor-specific flags.
  65. _,
  66. self._e_phentsize, # Size of section.
  67. self._e_phnum, # Number of sections.
  68. ) = self._read(e_fmt)
  69. except struct.error as e:
  70. raise ELFInvalid("unable to parse machine and section information") from e
  71. def _read(self, fmt: str) -> tuple[int, ...]:
  72. return struct.unpack(fmt, self._f.read(struct.calcsize(fmt)))
  73. @property
  74. def interpreter(self) -> str | None:
  75. """
  76. The path recorded in the ``PT_INTERP`` section header.
  77. """
  78. for index in range(self._e_phnum):
  79. self._f.seek(self._e_phoff + self._e_phentsize * index)
  80. try:
  81. data = self._read(self._p_fmt)
  82. except struct.error:
  83. continue
  84. if data[self._p_idx[0]] != 3: # Not PT_INTERP.
  85. continue
  86. self._f.seek(data[self._p_idx[1]])
  87. return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0")
  88. return None