diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index fe484c7..2e3ecac 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -49,6 +49,7 @@ jobs: with: name: coverage-${{ matrix.python }} path: .coverage.* + include-hidden-files: true if-no-files-found: ignore ruff: diff --git a/src/elfdeps/__init__.py b/src/elfdeps/__init__.py index eb06ed0..34b9707 100644 --- a/src/elfdeps/__init__.py +++ b/src/elfdeps/__init__.py @@ -5,6 +5,9 @@ "ELFError", "ELFInfo", "SOInfo", + "SymbolBinding", + "SymbolInfo", + "SymbolType", "analyze_dirtree", "analyze_elffile", "analyze_file", @@ -27,6 +30,9 @@ ELFAnalyzeSettings, ELFInfo, SOInfo, + SymbolBinding, + SymbolInfo, + SymbolType, analyze_elffile, analyze_file, ) diff --git a/src/elfdeps/_elfdeps.py b/src/elfdeps/_elfdeps.py index d20c63b..bfd06a5 100644 --- a/src/elfdeps/_elfdeps.py +++ b/src/elfdeps/_elfdeps.py @@ -7,14 +7,21 @@ """ import dataclasses +import enum import os import pathlib import stat +import typing from elftools.elf.constants import VER_FLAGS from elftools.elf.dynamic import DynamicSection from elftools.elf.elffile import ELFFile -from elftools.elf.gnuversions import GNUVerDefSection, GNUVerNeedSection +from elftools.elf.gnuversions import ( + GNUVerDefSection, + GNUVerNeedSection, + GNUVerSymSection, +) +from elftools.elf.sections import SymbolTableSection from ._fileinfo import ( LD_PREFIX, @@ -24,6 +31,65 @@ ) +class SymbolBinding(str, enum.Enum): + """ELF dynamic symbol binding (STB_*)""" + + GLOBAL = "global" # Global symbol + WEAK = "weak" # Weak symbol + + +class SymbolType(str, enum.Enum): + """ELF dynamic symbol type (STT_*)""" + + NOTYPE = "notype" # Symbol type is unspecified + OBJECT = "object" # Symbol is a data object + FUNC = "func" # Symbol is a code object + COMMON = "common" # Symbol is a common data object + TLS = "tls" # Symbol is thread-local data object + GNU_IFUNC = "ifunc" # Symbol is indirect code object + + +_SYMBOL_BINDING_MAP: dict[str, SymbolBinding] = { + "STB_GLOBAL": SymbolBinding.GLOBAL, + "STB_WEAK": SymbolBinding.WEAK, +} + +_SYMBOL_TYPE_MAP: dict[str, SymbolType] = { + "STT_NOTYPE": SymbolType.NOTYPE, + "STT_OBJECT": SymbolType.OBJECT, + "STT_FUNC": SymbolType.FUNC, + "STT_COMMON": SymbolType.COMMON, + "STT_TLS": SymbolType.TLS, + # STT_GNU_IFUNC and STT_LOOS constants have the same int value + "STT_GNU_IFUNC": SymbolType.GNU_IFUNC, + "STT_LOOS": SymbolType.GNU_IFUNC, +} + + +@dataclasses.dataclass(frozen=True, slots=True, order=True) +class SymbolInfo: + """Dynamic symbol information + + name: symbol name (e.g. ``printf``) + version: version tag (e.g. ``GLIBC_2.34``) + binding: symbol binding (global or weak) + type: symbol type (func, object, etc.) + """ + + name: str + version: str | None + binding: SymbolBinding = dataclasses.field(compare=False) + type: SymbolType + + def __str__(self) -> str: + if self.version: + return f"{self.name}@{self.version}" + return self.name + + def __repr__(self) -> str: + return str(self) + + @dataclasses.dataclass(frozen=True, order=True) class SOInfo: """Shared object information @@ -83,6 +149,8 @@ class ELFInfo: marker: str = "" # useful extras runpath: list[str] | None = None + exported_symbols: list[SymbolInfo] | None = None + imported_symbols: list[SymbolInfo] | None = None @dataclasses.dataclass(frozen=True) @@ -94,6 +162,7 @@ class ELFAnalyzeSettings: filter_soname: exclude sonames that don't match 'lib*.so*' require_interp: add dependency on ELF interpreter unique: remove duplicates + include_symbols: extract individual dynamic symbols Flag for collections (analyze tree, tarfile, zipfile) @@ -105,6 +174,7 @@ class ELFAnalyzeSettings: filter_soname: bool = False require_interp: bool = False unique: bool = True + include_symbols: bool = False ignore_suffix: set[str] | frozenset[str] = frozenset( {".py", ".md", ".rst", ".sh", ".txt"} ) @@ -170,9 +240,12 @@ def __init__( requires=[], provides=[], is_exec=is_exec, + exported_symbols=[] if settings.include_symbols else None, + imported_symbols=[] if settings.include_symbols else None, ) self.settings: ELFAnalyzeSettings = settings self._seen: set[tuple[bool, SOInfo]] = set() + self._version_map: dict[int, str] = {} def process(self) -> ELFInfo: """Process ELF file @@ -186,6 +259,8 @@ def process(self) -> ELFInfo: self.info.is_dso = ehdr["e_type"] == "ET_DYN" self.info.interp = self.process_prog_headers() self.process_sections() + if self.settings.include_symbols: + self.process_symbols() # For DSOs which use the .gnu_hash section and don't have a .hash # section, we need to ensure that we have a new enough glibc. @@ -318,8 +393,10 @@ def process_verdef(self, sec: GNUVerDefSection) -> None: # aux entry of verdef with VER_FLG_BASE is the soname if verdef["vd_flags"] & VER_FLAGS.VER_FLG_BASE: soname = aux.name - elif soname is not None and not self.settings.soname_only: - self.add_provides(soname, version=aux.name) + else: + self._version_map.setdefault(verdef["vd_ndx"], aux.name) + if soname is not None and not self.settings.soname_only: + self.add_provides(soname, version=aux.name) def process_verneed(self, sec: GNUVerNeedSection) -> None: """Process GNU version need section @@ -329,13 +406,10 @@ def process_verneed(self, sec: GNUVerNeedSection) -> None: for verneed, vernaux in sec.iter_versions(): soname: str = verneed.name for aux in vernaux: - if ( - aux.name - and self.gen_requires - and soname - and not self.settings.soname_only - ): - self.add_requires(soname, version=aux.name) + if aux.name: + self._version_map[aux["vna_other"]] = aux.name + if self.gen_requires and soname and not self.settings.soname_only: + self.add_requires(soname, version=aux.name) def process_dynamic(self, sec: DynamicSection) -> None: """Process dynamic tags section @@ -375,3 +449,50 @@ def process_prog_headers(self) -> str | None: return interp else: return None + + def process_symbols(self) -> None: + """Extract individual dynamic symbols from .dynsym""" + dynsym_sec = typing.cast( + SymbolTableSection | None, + self.elffile.get_section_by_name(".dynsym"), + ) + if dynsym_sec is None: + return + versym_sec = typing.cast( + GNUVerSymSection | None, + self.elffile.get_section_by_name(".gnu.version"), + ) + assert self.info.exported_symbols is not None + assert self.info.imported_symbols is not None + version_map = self._version_map + for i, sym in enumerate(dynsym_sec.iter_symbols()): + name: str = sym.name + if not name: + continue + # skip non-default visibility (internal, hidden, protected) + if sym["st_other"]["visibility"] != "STV_DEFAULT": + continue + binding = _SYMBOL_BINDING_MAP.get(sym["st_info"]["bind"]) + if binding is None: + continue + sym_type = _SYMBOL_TYPE_MAP.get(sym["st_info"]["type"]) + if sym_type is None: + continue + version: str | None = None + if versym_sec is not None: + try: + ndx = versym_sec.get_symbol(i)["ndx"] + if isinstance(ndx, int): + version = version_map.get(ndx & 0x7FFF) + except (IndexError, KeyError): + pass + sym_info = SymbolInfo( + name=name, + version=version, + binding=binding, + type=sym_type, + ) + if sym["st_shndx"] == "SHN_UNDEF": + self.info.imported_symbols.append(sym_info) + else: + self.info.exported_symbols.append(sym_info) diff --git a/tests/test_elfdeps.py b/tests/test_elfdeps.py index 05da906..4842385 100644 --- a/tests/test_elfdeps.py +++ b/tests/test_elfdeps.py @@ -1,5 +1,7 @@ +import dataclasses import pathlib import sys +import sysconfig import tarfile import zipfile @@ -7,6 +9,8 @@ import elfdeps +SYMBOLS_SETTINGS = elfdeps.ELFAnalyzeSettings(include_symbols=True) + def test_python() -> None: info = elfdeps.analyze_file(pathlib.Path(sys.executable)) @@ -55,7 +59,7 @@ def test_tarmember_python(tmp_path: pathlib.Path): def test_libc() -> None: found = False - for libdir in [pathlib.Path("/lib"), pathlib.Path("/lib64")]: + for libdir in [pathlib.Path("/lib64"), pathlib.Path("/lib")]: libc = libdir / "libc.so.6" if libc.is_file(): found = True @@ -64,3 +68,175 @@ def test_libc() -> None: if not found: pytest.skip("libc not found") + + +def test_symbols_default_none() -> None: + """Symbols are None when include_symbols is False (default).""" + info = elfdeps.analyze_file(pathlib.Path(sys.executable)) + assert info.exported_symbols is None + assert info.imported_symbols is None + + +def test_symbols_python() -> None: + """Python binary has imported symbols when include_symbols is True.""" + info = elfdeps.analyze_file(pathlib.Path(sys.executable), settings=SYMBOLS_SETTINGS) + assert info.imported_symbols is not None + assert info.exported_symbols is not None + assert info.imported_symbols + for sym in info.imported_symbols + info.exported_symbols: + assert sym.name + + +def test_symbols_libc() -> None: + """libc exports many versioned function and object symbols.""" + found = False + for libdir in [pathlib.Path("/lib64"), pathlib.Path("/lib")]: + libc = libdir / "libc.so.6" + if libc.is_file(): + found = True + info = elfdeps.analyze_file(libc, settings=SYMBOLS_SETTINGS) + assert info.exported_symbols is not None + assert len(info.exported_symbols) > 100 + versioned_funcs = [ + s + for s in info.exported_symbols + if s.type == elfdeps.SymbolType.FUNC and s.version + ] + assert versioned_funcs + objects = [ + s for s in info.exported_symbols if s.type == elfdeps.SymbolType.OBJECT + ] + assert objects + + if not found: + pytest.skip("libc not found") + + +def test_symbolinfo_frozen() -> None: + """SymbolInfo is frozen and has slots.""" + sym = elfdeps.SymbolInfo( + name="test", + version="V1", + binding=elfdeps.SymbolBinding.GLOBAL, + type=elfdeps.SymbolType.FUNC, + ) + assert sym.__slots__ == ("name", "version", "binding", "type") + with pytest.raises(dataclasses.FrozenInstanceError): + sym.name = "other" # type: ignore[misc] + + +def test_symbolinfo_str() -> None: + """SymbolInfo str format.""" + sym_versioned = elfdeps.SymbolInfo( + name="printf", + version="GLIBC_2.34", + binding=elfdeps.SymbolBinding.GLOBAL, + type=elfdeps.SymbolType.FUNC, + ) + assert str(sym_versioned) == "printf@GLIBC_2.34" + + sym_plain = elfdeps.SymbolInfo( + name="data_start", + version=None, + binding=elfdeps.SymbolBinding.WEAK, + type=elfdeps.SymbolType.NOTYPE, + ) + assert str(sym_plain) == "data_start" + + +def test_symbolinfo_ordering() -> None: + """SymbolInfo supports ordering; binding is ignored.""" + a = elfdeps.SymbolInfo( + "aaa", None, elfdeps.SymbolBinding.GLOBAL, elfdeps.SymbolType.FUNC + ) + b = elfdeps.SymbolInfo( + "bbb", None, elfdeps.SymbolBinding.WEAK, elfdeps.SymbolType.FUNC + ) + assert a < b + assert sorted([b, a]) == [a, b] + # same name/version/type but different binding: equal and same hash + g = elfdeps.SymbolInfo( + "foo", "V1", elfdeps.SymbolBinding.GLOBAL, elfdeps.SymbolType.FUNC + ) + w = elfdeps.SymbolInfo( + "foo", "V1", elfdeps.SymbolBinding.WEAK, elfdeps.SymbolType.FUNC + ) + assert g == w + assert hash(g) == hash(w) + assert {g, w} == {g} + + +def test_symbol_binding_enum() -> None: + """SymbolBinding enum values.""" + assert elfdeps.SymbolBinding.GLOBAL.value == "global" + assert elfdeps.SymbolBinding.WEAK.value == "weak" + assert elfdeps.SymbolBinding("global") is elfdeps.SymbolBinding.GLOBAL + + +def test_symbol_type_enum() -> None: + """SymbolType enum values.""" + assert elfdeps.SymbolType.FUNC.value == "func" + assert elfdeps.SymbolType.OBJECT.value == "object" + assert elfdeps.SymbolType.NOTYPE.value == "notype" + assert elfdeps.SymbolType.TLS.value == "tls" + assert elfdeps.SymbolType("func") is elfdeps.SymbolType.FUNC + + +def test_symbols_binding_types() -> None: + """All extracted symbols have valid binding and type.""" + info = elfdeps.analyze_file(pathlib.Path(sys.executable), settings=SYMBOLS_SETTINGS) + assert info.exported_symbols is not None + assert info.imported_symbols is not None + for sym in info.exported_symbols + info.imported_symbols: + assert isinstance(sym.binding, elfdeps.SymbolBinding) + assert isinstance(sym.type, elfdeps.SymbolType) + + +def test_symbols_libpython() -> None: + """libpython imports libc allocators and exports stable ABI symbols.""" + instsoname = sysconfig.get_config_var("INSTSONAME") + if instsoname is None: + # static build, use the Python executable itself + libpython = pathlib.Path(sys.executable) + else: + libdir = sysconfig.get_config_var("LIBDIR") + libpython = pathlib.Path(libdir) / instsoname + assert libpython.is_file(), f"{libpython} not found" + info = elfdeps.analyze_file(libpython, settings=SYMBOLS_SETTINGS) + assert info.exported_symbols is not None + assert info.imported_symbols is not None + exported = {s.name: s for s in info.exported_symbols} + imported = {s.name: s for s in info.imported_symbols} + # libc allocators are imported as global functions + for name in ("malloc", "free"): + sym = imported[name] + assert sym.binding == elfdeps.SymbolBinding.GLOBAL + assert sym.type == elfdeps.SymbolType.FUNC + # stable ABI functions from https://docs.python.org/3/c-api/stable.html + for name in ( + "PyList_Append", + "PyTuple_New", + "PyDict_SetItem", + "PySet_Add", + "PyBytes_AsString", + "PyUnicode_FromString", + "PyLong_AsLong", + "PyFloat_FromDouble", + "PyErr_SetString", + "PyObject_GetAttr", + "PyType_Ready", + "PyGILState_Ensure", + ): + sym = exported[name] + assert sym.binding == elfdeps.SymbolBinding.GLOBAL + assert sym.type == elfdeps.SymbolType.FUNC + # stable ABI type objects are exported as global data objects + for name in ( + "PyList_Type", + "PyDict_Type", + "PyFloat_Type", + "PyExc_ValueError", + ): + sym = exported[name] + assert sym.binding == elfdeps.SymbolBinding.GLOBAL + assert sym.type == elfdeps.SymbolType.OBJECT