From 375e9dd0c06359d07ef5434f95d5699dc9a1ac70 Mon Sep 17 00:00:00 2001 From: Luca Bruni Date: Thu, 16 Apr 2026 17:32:29 -0400 Subject: [PATCH 1/3] packaging: reserve RPATH pad to avoid patchelf growing .dynstr (#4271) py_packaging's patchelf --add-rpath + --set-rpath pass was extending .dynstr on every shipped ELF, which made patchelf prepend a writable PT_LOAD segment at a non-canonical base address. RHEL 8.10 / EL 4.18 kernels reject that layout in execve() with SIGSEGV, so `pip install rocm[libraries]` failed to compile any HIP program on RHEL 8.10 baremetal. Reserve ~1KB of RPATH string space at link time in every shipped ELF so the packaging rewrite fits in place without resizing .dynstr. The pad is defined once in cmake/therock_subproject_utils.cmake and injected into every subproject's init file. Auto-managed targets get the pad via therock_set_install_rpath; the three NO_INSTALL_RPATH carve-outs pick it up via their pre-hook (amd-llvm, amd-comgr) or a new super-project post-hook (hipify), so no submodule changes are required. py_packaging's _extend_rpath + _normalize_rpath are collapsed into a single _rewrite_rpath that strips the pad and writes the final entries with one patchelf --set-rpath --force-rpath call. Two gates added to rocm_sdk.tests.core_test (runs under rocm-sdk test on every PR and nightly): testPatchelfPadStripped greps shipped files for the pad marker, and testExecutableElfLayoutIntact parses 64-bit LE ELFs under core/bin and asserts the first PT_LOAD segment is read-only. Made-with: Cursor --- build_tools/_therock_utils/py_packaging.py | 92 +++++++---- .../rocm/src/rocm_sdk/tests/core_test.py | 145 ++++++++++++++++++ cmake/therock_subproject.cmake | 15 +- cmake/therock_subproject_utils.cmake | 59 +++++++ compiler/post_hook_hipify.cmake | 40 +++++ compiler/pre_hook_amd-comgr.cmake | 4 +- compiler/pre_hook_amd-llvm.cmake | 9 +- 7 files changed, 331 insertions(+), 33 deletions(-) create mode 100644 compiler/post_hook_hipify.cmake diff --git a/build_tools/_therock_utils/py_packaging.py b/build_tools/_therock_utils/py_packaging.py index 639c96bd91c..9d9dc21a577 100644 --- a/build_tools/_therock_utils/py_packaging.py +++ b/build_tools/_therock_utils/py_packaging.py @@ -36,6 +36,21 @@ ENABLED_VLOG_LEVEL: int = 5 +# Marker substring baked into RPATH pad entries at link time by +# cmake/therock_subproject_utils.cmake. ELFs TheRock ships are linked with a +# ~1KB filler RPATH entry containing this marker so that when we later rewrite +# RPATHs below with `patchelf --set-rpath`, the replacement always fits inside +# the .dynstr allocation that was reserved at link time. Patchelf therefore +# overwrites in place and does not need to prepend a new PT_LOAD segment, +# which is the mutation that crashes execve() on RHEL 8.10 / EL 4.18 kernels. +# See https://github.com/ROCm/TheRock/issues/4271 for the full diagnosis. +# +# The same constant is duplicated in +# build_tools/packaging/python/templates/rocm/src/rocm_sdk/tests/core_test.py +# because that file ships inside the wheel and cannot import from build-only +# modules. Keep the two string literals in sync. +PATCHELF_PAD_MARKER = "__therock_patchelf_pad__" + def log(*args, vlog: int = 0, **kwargs): if vlog > ENABLED_VLOG_LEVEL: @@ -428,10 +443,11 @@ def _populate_file( # Update RPATHs on Linux. file_type = get_file_type(dest_path) if file_type == "exe" or file_type == "so": - self._extend_rpath(dest_path) - self._normalize_rpath(dest_path) + self._rewrite_rpath(dest_path, self._dep_rpath_entries(dest_path)) - def _extend_rpath(self, file_path: Path): + def _dep_rpath_entries(self, file_path: Path) -> list[str]: + """$ORIGIN-relative RPATH entries contributed by rpath_deps packages.""" + entries: list[str] = [] for dep_project, rpath in self.rpath_deps: parent_relpath = self._platform_dir.parent.relative_to( file_path.parent, walk_up=True @@ -439,40 +455,56 @@ def _extend_rpath(self, file_path: Path): dep_py_package_name = dep_project.entry.get_py_package_name( self.target_family ) - addl_rpath = f"$ORIGIN/{parent_relpath}/{dep_py_package_name}/{rpath}" - log(f" ADD_RPATH: {file_path}: {addl_rpath}") - patchelf_cl = [ - "patchelf", - "--add-rpath", - addl_rpath, - str(file_path), - ] - subprocess.check_call(patchelf_cl) - - def _normalize_rpath(self, file_path: Path): + entries.append(f"$ORIGIN/{parent_relpath}/{dep_py_package_name}/{rpath}") + return entries + + def _rewrite_rpath(self, file_path: Path, extra_rpath_entries: Sequence[str] = ()): + """Rewrite DT_RPATH in place. + + Reads the existing RPATH, strips any filler entries the build planted + at link time (identified by PATCHELF_PAD_MARKER), appends the requested + extra_rpath_entries deduplicated against what's already there, and + writes the result back with a single `patchelf --set-rpath`. + + Using `--set-rpath` means patchelf overwrites the existing .dynstr + entry in place as long as the new string fits within the space that + was reserved at link time by the pad. That's the whole point: it + avoids the .dynstr-growth path that prepends a new PT_LOAD segment + and trips the RHEL 8.10 execve() bug (see PATCHELF_PAD_MARKER docs + and issue #4271). If the replacement ever doesn't fit, patchelf will + fall back to growing .dynstr and the CI pad-marker grep will flag it. + """ existing_rpath = ( - subprocess.check_output( - [ - "patchelf", - "--print-rpath", - str(file_path), - ] - ) + subprocess.check_output(["patchelf", "--print-rpath", str(file_path)]) .decode() .strip() ) - if not existing_rpath: - return - # Possibly in the future, do manual normalization of the RPATH. - norm_rpath = existing_rpath + entries: list[str] = [] + seen: set[str] = set() + if existing_rpath: + for entry in existing_rpath.split(":"): + if not entry or PATCHELF_PAD_MARKER in entry: + continue + if entry in seen: + continue + entries.append(entry) + seen.add(entry) + for entry in extra_rpath_entries: + if entry and entry not in seen: + entries.append(entry) + seen.add(entry) + + new_rpath = ":".join(entries) + if new_rpath == existing_rpath: + return - log(f" NORMALIZE_RPATH: {file_path}: {norm_rpath}") + log(f" REWRITE_RPATH: {file_path}: {existing_rpath!r} -> {new_rpath!r}") subprocess.check_call( [ "patchelf", "--set-rpath", - norm_rpath, + new_rpath, # Forces the use of RPATH vs RUNPATH, which is more appropriate # for hermetic libraries like these since it does not allow # LD_LIBRARY_PATH to interfere. @@ -637,10 +669,12 @@ def _populate_devel_file( shutil.copy2(src_entry.path, dest_path) if not is_windows: - # Update RPATHs on Linux. + # Update RPATHs on Linux. Devel files don't pull in new + # dep-relative RPATHs, but we still run the rewrite so any link- + # time pad entries get stripped in place. file_type = get_file_type(dest_path) if file_type == "exe" or file_type == "so": - self._normalize_rpath(dest_path) + self._rewrite_rpath(dest_path) MAGIC_AR_MATCH = re.compile("ar archive") diff --git a/build_tools/packaging/python/templates/rocm/src/rocm_sdk/tests/core_test.py b/build_tools/packaging/python/templates/rocm/src/rocm_sdk/tests/core_test.py index e1dba224a78..06fed262e29 100644 --- a/build_tools/packaging/python/templates/rocm/src/rocm_sdk/tests/core_test.py +++ b/build_tools/packaging/python/templates/rocm/src/rocm_sdk/tests/core_test.py @@ -5,8 +5,10 @@ import importlib import locale +import mmap from pathlib import Path import platform +import struct import subprocess import sys import unittest @@ -16,6 +18,17 @@ import rocm_sdk +# Keep in sync with PATCHELF_PAD_MARKER in +# build_tools/_therock_utils/py_packaging.py. See that file for why the pad +# exists and why we want the absence of this marker in shipped binaries. +PATCHELF_PAD_MARKER_BYTES = b"__therock_patchelf_pad__" +ELF_MAGIC = b"\x7fELF" +# ELF program header flag bits. +PF_X = 0x1 +PF_W = 0x2 +PF_R = 0x4 +PT_LOAD = 1 + utils.assert_is_physical_package(rocm_sdk) core_mod_name = di.ALL_PACKAGES["core"].get_py_package_name() @@ -60,6 +73,57 @@ ) +def _file_contains_bytes(path: Path, needle: bytes) -> bool: + try: + size = path.stat().st_size + except OSError: + return False + if size < len(needle): + return False + with open(path, "rb") as f: + with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mm: + return mm.find(needle) != -1 + + +def _is_elf(path: Path) -> bool: + try: + with open(path, "rb") as f: + return f.read(4) == ELF_MAGIC + except OSError: + return False + + +def _parse_elf64_pt_loads(path: Path): + """Return (e_entry, [(p_flags, p_vaddr, p_memsz), ...]) for PT_LOAD segments. + + Only supports 64-bit little-endian ELFs. Returns None for anything else + (32-bit, big-endian, non-ELF). ROCm only ships 64-bit LE binaries. + """ + with open(path, "rb") as f: + ehdr = f.read(64) + if len(ehdr) < 64 or not ehdr.startswith(ELF_MAGIC): + return None + if ehdr[4] != 2 or ehdr[5] != 1: + return None + e_entry, e_phoff = struct.unpack_from(" Date: Fri, 17 Apr 2026 14:42:43 -0400 Subject: [PATCH 2/3] packaging: collapse RPATH pad filler in build logs The ~1KB RPATH pad added for issue #4271 makes from-source build logs nearly unreadable when printed verbatim. Dump a compact placeholder instead: - therock_set_install_rpath now prints "(+patchelf pad)" once per target instead of the full $ORIGIN/.__therock_patchelf_pad___XXX... string; CMake still writes the real pad into INSTALL_RPATH. - py_packaging._rewrite_rpath collapses the pad entry to "" when logging the before/after RPATH; patchelf still overwrites the real .dynstr bytes in place. Shipped ELFs are unchanged. Cosmetic only. Made-with: Cursor --- build_tools/_therock_utils/py_packaging.py | 20 +++++++++++++++++++- cmake/therock_subproject_utils.cmake | 11 ++++++++--- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/build_tools/_therock_utils/py_packaging.py b/build_tools/_therock_utils/py_packaging.py index 9d9dc21a577..751baf37158 100644 --- a/build_tools/_therock_utils/py_packaging.py +++ b/build_tools/_therock_utils/py_packaging.py @@ -52,6 +52,21 @@ PATCHELF_PAD_MARKER = "__therock_patchelf_pad__" +def _format_rpath_for_log(rpath: str) -> str: + """Collapse the pad filler to '' for readable REWRITE_RPATH logs. + + The pad entry is ~1KB of X's and dumping it unmodified into every log line + makes `py_packaging.log` unreadable and spooks users watching the build + (issue #4271). The on-disk value is untouched; this only affects logging. + """ + if not rpath: + return rpath + return ":".join( + "" if PATCHELF_PAD_MARKER in entry else entry + for entry in rpath.split(":") + ) + + def log(*args, vlog: int = 0, **kwargs): if vlog > ENABLED_VLOG_LEVEL: return @@ -499,7 +514,10 @@ def _rewrite_rpath(self, file_path: Path, extra_rpath_entries: Sequence[str] = ( if new_rpath == existing_rpath: return - log(f" REWRITE_RPATH: {file_path}: {existing_rpath!r} -> {new_rpath!r}") + log( + f" REWRITE_RPATH: {file_path}: " + f"{_format_rpath_for_log(existing_rpath)!r} -> {new_rpath!r}" + ) subprocess.check_call( [ "patchelf", diff --git a/cmake/therock_subproject_utils.cmake b/cmake/therock_subproject_utils.cmake index 089ec611f1b..cbd2101ab4e 100644 --- a/cmake/therock_subproject_utils.cmake +++ b/cmake/therock_subproject_utils.cmake @@ -97,12 +97,17 @@ function(therock_set_install_rpath) list(APPEND _rpath "$ORIGIN${path_suffix}") endif() endforeach() - # Append the patchelf pad on ELF platforms so py_packaging can rewrite - # RPATHs in place (see long comment above). + # Log the user-meaningful RPATH before appending the patchelf pad. The pad + # is ~1KB of filler and dumping it into every "Set RPATH" status line would + # drown out from-source build logs (issue #4271). + set(_rpath_log_suffix "") if(NOT CMAKE_SYSTEM_NAME MATCHES "Darwin" AND THEROCK_INSTALL_RPATH_PAD) + set(_rpath_log_suffix " (+patchelf pad)") + endif() + message(STATUS "Set RPATH ${_rpath}${_rpath_log_suffix} on ${ARG_TARGETS}") + if(_rpath_log_suffix) list(APPEND _rpath "${THEROCK_INSTALL_RPATH_PAD}") endif() - message(STATUS "Set RPATH ${_rpath} on ${ARG_TARGETS}") set_target_properties(${ARG_TARGETS} PROPERTIES INSTALL_RPATH "${_rpath}") endfunction() From c337c18041364ea2e3083e7b71cd4b443f44e097 Mon Sep 17 00:00:00 2001 From: Luca Bruni Date: Fri, 17 Apr 2026 15:21:16 -0400 Subject: [PATCH 3/3] Fix pre-commit failure Co-authored-by: Claude Made-with: Cursor --- build_tools/_therock_utils/py_packaging.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/build_tools/_therock_utils/py_packaging.py b/build_tools/_therock_utils/py_packaging.py index 751baf37158..27a858cf84a 100644 --- a/build_tools/_therock_utils/py_packaging.py +++ b/build_tools/_therock_utils/py_packaging.py @@ -62,8 +62,7 @@ def _format_rpath_for_log(rpath: str) -> str: if not rpath: return rpath return ":".join( - "" if PATCHELF_PAD_MARKER in entry else entry - for entry in rpath.split(":") + "" if PATCHELF_PAD_MARKER in entry else entry for entry in rpath.split(":") )