# Copyright Spack Project Developers. See COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import bisect
import re
import struct
from struct import calcsize, unpack, unpack_from
from typing import BinaryIO, Callable, Dict, List, NamedTuple, Optional, Pattern, Tuple
[docs]
class ELF_CONSTANTS:
MAGIC = b"\x7fELF"
CLASS32 = 1
CLASS64 = 2
DATA2LSB = 1
DATA2MSB = 2
ET_EXEC = 2
ET_DYN = 3
PT_LOAD = 1
PT_DYNAMIC = 2
PT_INTERP = 3
DT_NULL = 0
DT_NEEDED = 1
DT_STRTAB = 5
DT_SONAME = 14
DT_RPATH = 15
DT_RUNPATH = 29
SHT_STRTAB = 3
[docs]
class ElfFile:
"""Parsed ELF file."""
is_64_bit: bool
is_little_endian: bool
byte_order: str
elf_hdr: ElfHeader
pt_load: List[Tuple[int, int]]
has_pt_interp: bool
pt_interp_p_offset: int
pt_interp_p_filesz: int
pt_interp_str: bytes
has_pt_dynamic: bool
pt_dynamic_p_offset: int
pt_dynamic_p_filesz: int
pt_dynamic_strtab_offset: int
has_rpath: bool
dt_rpath_offset: int
dt_rpath_str: bytes
rpath_strtab_offset: int
is_runpath: bool
has_needed: bool
dt_needed_strtab_offsets: List[int]
dt_needed_strs: List[bytes]
has_soname: bool
dt_soname_strtab_offset: int
dt_soname_str: bytes
__slots__ = [
"is_64_bit",
"is_little_endian",
"byte_order",
"elf_hdr",
"pt_load",
# pt_interp
"has_pt_interp",
"pt_interp_p_offset",
"pt_interp_p_filesz",
"pt_interp_str",
# pt_dynamic
"has_pt_dynamic",
"pt_dynamic_p_offset",
"pt_dynamic_p_filesz",
"pt_dynamic_strtab_offset", # string table for dynamic section
# rpath
"has_rpath",
"dt_rpath_offset",
"dt_rpath_str",
"rpath_strtab_offset",
"is_runpath",
# dt needed
"has_needed",
"dt_needed_strtab_offsets",
"dt_needed_strs",
# dt soname
"has_soname",
"dt_soname_strtab_offset",
"dt_soname_str",
]
def __init__(self):
self.dt_needed_strtab_offsets = []
self.has_soname = False
self.has_rpath = False
self.has_needed = False
self.pt_load = []
self.has_pt_dynamic = False
self.has_pt_interp = False
[docs]
def parse_c_string(byte_string: bytes, start: int = 0) -> bytes:
"""
Retrieve a C-string at a given offset in a byte string
Arguments:
byte_string: String
start: Offset into the string
Returns:
bytes: A copy of the C-string excluding the terminating null byte
"""
str_end = byte_string.find(b"\0", start)
if str_end == -1:
raise ElfParsingError("C-string is not null terminated")
return byte_string[start:str_end]
[docs]
def read_exactly(f: BinaryIO, num_bytes: int, msg: str) -> bytes:
"""
Read exactly num_bytes at the current offset, otherwise raise
a parsing error with the given error message.
Arguments:
f: file handle
num_bytes: Number of bytes to read
msg: Error to show when bytes cannot be read
Returns:
bytes: the ``num_bytes`` bytes that were read.
"""
data = f.read(num_bytes)
if len(data) != num_bytes:
raise ElfParsingError(msg)
return data
[docs]
def parse_pt_interp(f: BinaryIO, elf: ElfFile) -> None:
"""
Parse the interpreter (i.e. absolute path to the dynamic linker)
Arguments:
f: file handle
elf: ELF file parser data
"""
try:
f.seek(elf.pt_interp_p_offset)
except OSError:
raise ElfParsingError("Could not seek to PT_INTERP entry")
data = read_exactly(f, elf.pt_interp_p_filesz, "Malformed PT_INTERP entry")
elf.pt_interp_str = parse_c_string(data)
[docs]
def find_strtab_size_at_offset(f: BinaryIO, elf: ElfFile, offset: int) -> int:
"""
Retrieve the size of a string table section at a particular known offset
Arguments:
f: file handle
elf: ELF file parser data
offset: offset of the section in the file (i.e. ``sh_offset``)
Returns:
int: the size of the string table in bytes
"""
section_hdr_fmt = elf.byte_order + ("LLQQQQLLQQ" if elf.is_64_bit else "LLLLLLLLLL")
section_hdr_size = calcsize(section_hdr_fmt)
try:
f.seek(elf.elf_hdr.e_shoff)
except OSError:
raise ElfParsingError("Could not seek to section header table")
for _ in range(elf.elf_hdr.e_shnum):
data = read_exactly(f, section_hdr_size, "Malformed section header")
sh = SectionHeader(*unpack(section_hdr_fmt, data))
if sh.sh_type == ELF_CONSTANTS.SHT_STRTAB and sh.sh_offset == offset:
return sh.sh_size
raise ElfParsingError("Could not determine strtab size")
[docs]
def retrieve_strtab(f: BinaryIO, elf: ElfFile, offset: int) -> bytes:
"""
Read a full string table at the given offset, which
requires looking it up in the section headers.
Arguments:
elf: ELF file parser data
vaddr: virtual address
Returns: file offset
"""
size = find_strtab_size_at_offset(f, elf, offset)
try:
f.seek(offset)
except OSError:
raise ElfParsingError("Could not seek to string table")
return read_exactly(f, size, "Could not read string table")
[docs]
def vaddr_to_offset(elf: ElfFile, vaddr: int) -> int:
"""
Given a virtual address, find the corresponding offset in the ELF file itself.
Arguments:
elf: ELF file parser data
vaddr: virtual address
"""
idx = bisect.bisect_right([p_vaddr for (p_offset, p_vaddr) in elf.pt_load], vaddr) - 1
p_offset, p_vaddr = elf.pt_load[idx]
return p_offset - p_vaddr + vaddr
[docs]
def parse_pt_dynamic(f: BinaryIO, elf: ElfFile) -> None:
"""
Parse the dynamic section of an ELF file
Arguments:
f: file handle
elf: ELF file parse data
"""
dynamic_array_fmt = elf.byte_order + ("qQ" if elf.is_64_bit else "lL")
dynamic_array_size = calcsize(dynamic_array_fmt)
current_offset = elf.pt_dynamic_p_offset
count_rpath = 0
count_runpath = 0
count_strtab = 0
try:
f.seek(elf.pt_dynamic_p_offset)
except OSError:
raise ElfParsingError("Could not seek to PT_DYNAMIC entry")
# In case of broken ELF files, don't read beyond the advertized size.
for _ in range(elf.pt_dynamic_p_filesz // dynamic_array_size):
data = read_exactly(f, dynamic_array_size, "Malformed dynamic array entry")
tag, val = unpack(dynamic_array_fmt, data)
if tag == ELF_CONSTANTS.DT_NULL:
break
elif tag == ELF_CONSTANTS.DT_RPATH:
count_rpath += 1
elf.rpath_strtab_offset = val
elf.dt_rpath_offset = current_offset
elf.is_runpath = False
elf.has_rpath = True
elif tag == ELF_CONSTANTS.DT_RUNPATH:
count_runpath += 1
elf.rpath_strtab_offset = val
elf.dt_rpath_offset = current_offset
elf.is_runpath = True
elf.has_rpath = True
elif tag == ELF_CONSTANTS.DT_STRTAB:
count_strtab += 1
strtab_vaddr = val
elif tag == ELF_CONSTANTS.DT_NEEDED:
elf.has_needed = True
elf.dt_needed_strtab_offsets.append(val)
elif tag == ELF_CONSTANTS.DT_SONAME:
elf.has_soname = True
elf.dt_soname_strtab_offset = val
current_offset += dynamic_array_size
# No rpath/runpath, that happens.
if count_rpath == count_runpath == 0:
elf.has_rpath = False
elif count_rpath + count_runpath != 1:
raise ElfParsingError("Could not find a unique rpath/runpath.")
if count_strtab != 1:
raise ElfParsingError("Could not find a unique strtab of for the dynamic section strings")
# Nothing to retrieve, so don't bother getting the string table.
if not (elf.has_rpath or elf.has_soname or elf.has_needed):
return
elf.pt_dynamic_strtab_offset = vaddr_to_offset(elf, strtab_vaddr)
string_table = retrieve_strtab(f, elf, elf.pt_dynamic_strtab_offset)
if elf.has_needed:
elf.dt_needed_strs = list(
parse_c_string(string_table, offset) for offset in elf.dt_needed_strtab_offsets
)
if elf.has_soname:
elf.dt_soname_str = parse_c_string(string_table, elf.dt_soname_strtab_offset)
if elf.has_rpath:
elf.dt_rpath_str = parse_c_string(string_table, elf.rpath_strtab_offset)
def _do_parse_elf(
f: BinaryIO, interpreter: bool = True, dynamic_section: bool = True, only_header: bool = False
) -> ElfFile:
# We don't (yet?) allow parsing ELF files at a nonzero offset, we just
# jump to absolute offsets as they are specified in the ELF file.
if f.tell() != 0:
raise ElfParsingError("Cannot parse at a nonzero offset")
elf = ElfFile()
parse_header(f, elf)
if only_header:
return elf
# We don't handle anything but executables and shared libraries now.
if elf.elf_hdr.e_type not in (ELF_CONSTANTS.ET_EXEC, ELF_CONSTANTS.ET_DYN):
raise ElfParsingError("Not an ET_DYN or ET_EXEC type")
parse_program_headers(f, elf)
# Parse PT_INTERP section
if interpreter and elf.has_pt_interp:
parse_pt_interp(f, elf)
# Parse PT_DYNAMIC section.
if dynamic_section and elf.has_pt_dynamic and len(elf.pt_load) > 0:
parse_pt_dynamic(f, elf)
return elf
[docs]
def parse_elf(
f: BinaryIO,
interpreter: bool = False,
dynamic_section: bool = False,
only_header: bool = False,
) -> ElfFile:
"""Given a file handle f for an ELF file opened in binary mode, return an ElfFile
object that is stores data about rpaths"""
try:
return _do_parse_elf(f, interpreter, dynamic_section, only_header)
except (DeprecationWarning, struct.error):
# According to the docs old versions of Python can throw DeprecationWarning
# instead of struct.error.
raise ElfParsingError("Malformed ELF file")
[docs]
def get_rpaths(path: str) -> Optional[List[str]]:
"""Returns list of rpaths of the given file as UTF-8 strings, or None if not set."""
try:
with open(path, "rb") as f:
elf = parse_elf(f, interpreter=False, dynamic_section=True)
return elf.dt_rpath_str.decode("utf-8").split(":") if elf.has_rpath else None
except ElfParsingError:
return None
[docs]
def get_interpreter(path: str) -> Optional[str]:
"""Returns the interpreter of the given file as UTF-8 string, or None if not set."""
try:
with open(path, "rb") as f:
elf = parse_elf(f, interpreter=True, dynamic_section=False)
return elf.pt_interp_str.decode("utf-8") if elf.has_pt_interp else None
except ElfParsingError:
return None
def _delete_dynamic_array_entry(
f: BinaryIO, elf: ElfFile, should_delete: Callable[[int, int], bool]
) -> None:
try:
f.seek(elf.pt_dynamic_p_offset)
except OSError:
raise ElfParsingError("Could not seek to PT_DYNAMIC entry")
dynamic_array_fmt = elf.byte_order + ("qQ" if elf.is_64_bit else "lL")
dynamic_array_size = calcsize(dynamic_array_fmt)
new_offset = elf.pt_dynamic_p_offset # points to the new dynamic array
old_offset = elf.pt_dynamic_p_offset # points to the current dynamic array
for _ in range(elf.pt_dynamic_p_filesz // dynamic_array_size):
data = read_exactly(f, dynamic_array_size, "Malformed dynamic array entry")
tag, val = unpack(dynamic_array_fmt, data)
if tag == ELF_CONSTANTS.DT_NULL or not should_delete(tag, val):
if new_offset != old_offset:
f.seek(new_offset)
f.write(data)
f.seek(old_offset + dynamic_array_size)
new_offset += dynamic_array_size
if tag == ELF_CONSTANTS.DT_NULL:
break
old_offset += dynamic_array_size
[docs]
def delete_rpath(path: str) -> None:
"""Modifies a binary to remove the rpath. It zeros out the rpath string and also drops the
DT_R(UN)PATH entry from the dynamic section, so it doesn't show up in 'readelf -d file', nor
in 'strings file'."""
with open(path, "rb+") as f:
elf = parse_elf(f, interpreter=False, dynamic_section=True)
if not elf.has_rpath:
return
# Zero out the rpath *string* in the binary
new_rpath_string = b"\x00" * len(elf.dt_rpath_str)
rpath_offset = elf.pt_dynamic_strtab_offset + elf.rpath_strtab_offset
f.seek(rpath_offset)
f.write(new_rpath_string)
# Delete DT_RPATH / DT_RUNPATH entries from the dynamic section
_delete_dynamic_array_entry(
f, elf, lambda tag, _: tag == ELF_CONSTANTS.DT_RPATH or tag == ELF_CONSTANTS.DT_RUNPATH
)
[docs]
def delete_needed_from_elf(f: BinaryIO, elf: ElfFile, needed: bytes) -> None:
"""Delete a needed library from the dynamic section of an ELF file"""
if not elf.has_needed or needed not in elf.dt_needed_strs:
return
offset = elf.dt_needed_strtab_offsets[elf.dt_needed_strs.index(needed)]
_delete_dynamic_array_entry(
f, elf, lambda tag, val: tag == ELF_CONSTANTS.DT_NEEDED and val == offset
)
[docs]
class CStringType:
PT_INTERP = 1
RPATH = 2
[docs]
class UpdateCStringAction:
def __init__(self, old_value: bytes, new_value: bytes, offset: int):
self.old_value = old_value
self.new_value = new_value
self.offset = offset
@property
def inplace(self) -> bool:
return len(self.new_value) <= len(self.old_value)
[docs]
def apply(self, f: BinaryIO) -> None:
assert self.inplace
f.seek(self.offset)
f.write(self.new_value)
# We zero out the bits we shortened because (a) it should be a
# C-string and (b) it's nice not to have spurious parts of old
# paths in the output of `strings file`. Note that we're all
# good when pad == 0; the original terminating null is used.
f.write(b"\x00" * (len(self.old_value) - len(self.new_value)))
def _get_rpath_substitution(
elf: ElfFile, regex: Pattern, substitutions: Dict[bytes, bytes]
) -> Optional[UpdateCStringAction]:
"""Make rpath substitutions in-place."""
# If there's no RPATH, then there's no need to replace anything.
if not elf.has_rpath:
return None
# Get the non-empty rpaths. Sometimes there's a bunch of trailing
# colons ::::: used for padding, we don't add them back to make it
# more likely that the string doesn't grow.
rpaths = list(filter(len, elf.dt_rpath_str.split(b":")))
num_rpaths = len(rpaths)
if num_rpaths == 0:
return None
changed = False
for i in range(num_rpaths):
old_rpath = rpaths[i]
match = regex.match(old_rpath)
if match:
changed = True
rpaths[i] = substitutions[match.group()] + old_rpath[match.end() :]
# Nothing to replace!
if not changed:
return None
return UpdateCStringAction(
old_value=elf.dt_rpath_str,
new_value=b":".join(rpaths),
# The rpath is at a given offset in the string table used by the dynamic section.
offset=elf.pt_dynamic_strtab_offset + elf.rpath_strtab_offset,
)
def _get_pt_interp_substitution(
elf: ElfFile, regex: Pattern, substitutions: Dict[bytes, bytes]
) -> Optional[UpdateCStringAction]:
"""Make interpreter substitutions in-place."""
if not elf.has_pt_interp:
return None
match = regex.match(elf.pt_interp_str)
if not match:
return None
return UpdateCStringAction(
old_value=elf.pt_interp_str,
new_value=substitutions[match.group()] + elf.pt_interp_str[match.end() :],
offset=elf.pt_interp_p_offset,
)
[docs]
def substitute_rpath_and_pt_interp_in_place_or_raise(
path: str, substitutions: Dict[bytes, bytes]
) -> bool:
"""Returns true if the rpath and interpreter were modified, false if there was nothing to do.
Raises ElfCStringUpdatesFailed if the ELF file cannot be updated in-place. This exception
contains a list of actions to perform with other tools. The file is left untouched in this
case."""
regex = re.compile(b"|".join(re.escape(p) for p in substitutions.keys()))
try:
with open(path, "rb+") as f:
elf = parse_elf(f, interpreter=True, dynamic_section=True)
# Get the actions to perform.
rpath = _get_rpath_substitution(elf, regex, substitutions)
pt_interp = _get_pt_interp_substitution(elf, regex, substitutions)
# Nothing to do.
if not rpath and not pt_interp:
return False
# If we can't update in-place, leave it to other tools, don't do partial updates.
if rpath and not rpath.inplace or pt_interp and not pt_interp.inplace:
raise ElfCStringUpdatesFailed(rpath, pt_interp)
# Otherwise, apply the updates.
if rpath:
rpath.apply(f)
if pt_interp:
pt_interp.apply(f)
return True
except ElfParsingError:
# This just means the file wasn't an elf file, so there's no point
# in updating its rpath anyways; ignore this problem.
return False
[docs]
def pt_interp(path: str) -> Optional[str]:
"""Retrieve the interpreter of an executable at `path`."""
try:
with open(path, "rb") as f:
elf = parse_elf(f, interpreter=True)
except (OSError, ElfParsingError):
return None
if not elf.has_pt_interp:
return None
return elf.pt_interp_str.decode("utf-8")
[docs]
def get_elf_compat(path):
"""Get a triplet (EI_CLASS, EI_DATA, e_machine) from an ELF file, which can be used to see if
two ELF files are compatible."""
# On ELF platforms supporting, we try to be a bit smarter when it comes to shared
# libraries, by dropping those that are not host compatible.
with open(path, "rb") as f:
elf = parse_elf(f, only_header=True)
return (elf.is_64_bit, elf.is_little_endian, elf.elf_hdr.e_machine)
[docs]
class ElfCStringUpdatesFailed(Exception):
def __init__(
self, rpath: Optional[UpdateCStringAction], pt_interp: Optional[UpdateCStringAction]
):
self.rpath = rpath
self.pt_interp = pt_interp
[docs]
class ElfParsingError(Exception):
pass