iav · iav · May 10, 2026 · May 10, 2026 · May 13, 2026 · May 13, 2026
diff --git a/lib/functions/main/rootfs-image.sh b/lib/functions/main/rootfs-image.sh
@@ -13,6 +13,11 @@ function build_rootfs_and_image() {
 	# get a basic rootfs, either from cache or from scratch
 	get_or_create_rootfs_cache_chroot_sdcard # only occurrence of this; has its own logging sections
 
+	# Cache-hit path also benefits — the extracted rootfs has libc/ld-linux, so kernel
+	# binfmt_elf can run 32-bit ARM ELF natively. Idempotent on cache-miss path where
+	# create_new_rootfs_cache_via_debootstrap already activated this.
+	_native_armhf_setup_binfmt_elf || true
+
 	# deploy the qemu binary, no matter where the rootfs came from (built or cached)
 	LOG_SECTION="deploy_qemu_binary_to_chroot_image" do_with_logging deploy_qemu_binary_to_chroot "${SDCARD}" "image" # undeployed at end of this function
 

diff --git a/lib/functions/rootfs/qemu-static.sh b/lib/functions/rootfs/qemu-static.sh
@@ -17,6 +17,13 @@ function deploy_qemu_binary_to_chroot() {
 		return 0
 	fi
 
+	# Native armhf path is active: kernel binfmt_elf executes 32-bit ARM ELF via
+	# CONFIG_COMPAT, no qemu-arm-static needed inside the chroot.
+	if [[ "${ARMBIAN_NATIVE_ARMHF_VIA_BINFMT_ELF:-no}" == "yes" ]]; then
+		display_alert "Native armhf via binfmt_elf" "skipping qemu binary deployment during ${caller}" "info"
+		return 0
+	fi
+
 	# Source: try the historical name first (qemu-<arch>-static), fall back
 	# to the bare name shipped by Ubuntu resolute's qemu-user-binfmt package
 	# (e.g. /usr/bin/qemu-aarch64).
@@ -76,8 +83,19 @@ function undeploy_qemu_binary_from_chroot() {
 	declare dst_target_bkp="${dst_target}.armbian.orig"
 	declare dst_target_alt_bkp="${dst_target_alt}.armbian.orig"
 
-	# Check the binary we deployed is there. If not, panic, as we've lost control.
+	# Check the binary we deployed is there. Two reasons it might be missing:
+	#   1. ARMBIAN_NATIVE_ARMHF_VIA_BINFMT_ELF was active when the matching deploy
+	#      ran, so nothing was copied — graceful no-op.
+	#   2. Genuine state loss — panic, we lost control.
+	# We must NOT skip the removal solely on the native-armhf flag, because deploy
+	# may have run before that flag was set (rootfs-create deploys at line 134,
+	# native-armhf flips at line 149); skipping the undeploy in that case leaks
+	# the host's qemu-arm-static into the rootfs cache tarball.
 	if [[ ! -f "${dst_target}" ]]; then
+		if [[ "${ARMBIAN_NATIVE_ARMHF_VIA_BINFMT_ELF:-no}" == "yes" ]]; then
+			display_alert "Native armhf via binfmt_elf" "no qemu binary to remove during ${caller}" "debug"
+			return 0
+		fi
 		exit_with_error "Missing qemu binary during undeploy_qemu_binary_from_chroot from ${caller}"
 	fi
 
@@ -132,6 +150,227 @@ function prepare_host_binfmt_qemu() {
 	return 0
 }
 
+# Native armhf on aarch64 host: runtime-disable qemu-arm in binfmt_misc so 32-bit
+# ARM ELF falls through to kernel binfmt_elf and runs natively via CONFIG_COMPAT
+# (~12× faster than qemu emulation). Killswitch: NATIVE_ARMHF_ON_ARM64=no.
+#
+# Multi-build coordination is purely kernel-level: each builder holds LOCK_SH on
+# /proc/sys/fs/binfmt_misc/qemu-arm; first-arrival `echo 0`, last-out (LOCK_EX-NB
+# succeeds → no other SH holders) `echo 1`. No userspace state, no per-builder
+# files. Trade-off: an admin's pre-existing `disabled` state is not preserved
+# across the build window.
+
+# Read the qemu-arm 'enabled' flag without touching it. Echoes one of:
+#   1       — registered and enabled
+#   0       — registered and disabled
+#   missing — not registered
+function _native_armhf_observe_qemu_arm_state() {
+	if [[ ! -e /proc/sys/fs/binfmt_misc/qemu-arm ]]; then
+		echo "missing"
+		return 0
+	fi
+	if head -1 /proc/sys/fs/binfmt_misc/qemu-arm 2> /dev/null | grep -q '^enabled'; then
+		echo "1"
+	else
+		echo "0"
+	fi
+}
+
+function _native_armhf_setup_binfmt_elf() {
+	# Idempotent: callers in rootfs-create.sh and rootfs-image.sh invoke this
+	# from both the cache-miss and cache-hit paths.
+	[[ "${ARMBIAN_NATIVE_ARMHF_VIA_BINFMT_ELF:-no}" == "yes" ]] && return 0
+
+	# Gate by ARCH/host first: native armhf binfmt_elf only applies to armhf
+	# builds on aarch64 hosts. Unrelated builds (amd64/arm64 targets, x86 host)
+	# must not touch qemu-arm at all — neither to disable nor to anchor an
+	# emulation hold via NATIVE_ARMHF_ON_ARM64=no. Killswitch evaluation lives
+	# below this gate.
+	[[ "${ARCH}" == "armhf" ]] || return 1
+	[[ "$(arch)" == "aarch64" ]] || return 1
+
+	declare killswitch=no
+	case "${NATIVE_ARMHF_ON_ARM64:-auto}" in
+		no | never | disabled) killswitch=yes ;;
+	esac
+
+	# Killswitch path: take SH-lock on qemu-arm so concurrent native-armhf
+	# builders detect us via EX-NB probe and refuse to switch qemu-arm off.
+	# Without this anchor an N-builder arriving mid-K-chroot would echo 0 and
+	# silently break K's qemu-arm-static routing.
+	if [[ "${killswitch}" == "yes" ]]; then
+		if [[ ! -e /proc/sys/fs/binfmt_misc/qemu-arm ]]; then
+			display_alert "Native armhf via binfmt_elf" "killswitch requested but qemu-arm not registered; cannot anchor emulation" "wrn"
+			return 1
+		fi
+		if ! { exec {_native_armhf_emul_lock_fd}< /proc/sys/fs/binfmt_misc/qemu-arm; } 2> /dev/null; then
+			display_alert "Native armhf via binfmt_elf" "cannot open binfmt_misc/qemu-arm; killswitch cannot anchor" "wrn"
+			return 1
+		fi
+		# Blocking SH with timeout: an N-builder's EX hold (probe / state-write
+		# / EX→SH downgrade window) is sub-millisecond, but we may collide. A
+		# non-blocking SH would fall through to emulation without an anchor and
+		# let the peer complete its EX→SH transition with qemu-arm=0, breaking
+		# our killswitch chroot exec routing later. Wait briefly for the peer's
+		# transition to settle.
+		if ! flock -s -w 30 "${_native_armhf_emul_lock_fd}"; then
+			exec {_native_armhf_emul_lock_fd}>&-
+			unset _native_armhf_emul_lock_fd
+			display_alert "Native armhf via binfmt_elf" "could not acquire emulation SH-lock within 30s; concurrent native-armhf transition stuck?" "wrn"
+			return 1
+		fi
+		# Post-SH state check: the peer's transition may have completed before
+		# our SH waiters were granted. If qemu-arm is 0, peer is now native and
+		# the killswitch contract cannot be honored.
+		if [[ "$(_native_armhf_observe_qemu_arm_state)" != "1" ]]; then
+			exec {_native_armhf_emul_lock_fd}>&-
+			unset _native_armhf_emul_lock_fd
+			display_alert "Native armhf via binfmt_elf" "killswitch requested but qemu-arm already disabled by concurrent native-armhf builders" "err"
+			exit_with_error "cannot honor NATIVE_ARMHF_ON_ARM64=no: concurrent native-armhf builders have disabled qemu-arm. Wait for them to finish or run on a separate host."
+		fi
+		add_cleanup_handler trap_handler_native_armhf_release_emul_lock
+		display_alert "Native armhf via binfmt_elf" "killswitch active; emulation-mode SH-lock acquired (blocks concurrent native-armhf switchover)" "info"
+		return 1
+	fi
+
+	# Pre-flight is unreliable when qemu-arm is enabled (it interprets the
+	# arch-test stub); the authoritative check is post-disable below.
+	if ! arch-test armhf > /dev/null 2>&1; then
+		display_alert "Native armhf via binfmt_elf" "arch-test pre-flight failed; falling back to qemu-arm-static emulation" "info"
+		return 1
+	fi
+
+	# qemu-arm not registered → native already active, no anchor needed.
+	if [[ ! -e /proc/sys/fs/binfmt_misc/qemu-arm ]]; then
+		display_alert "Native armhf via binfmt_elf" "qemu-arm not registered; native armhf already in effect" "info"
+		declare -g ARMBIAN_NATIVE_ARMHF_VIA_BINFMT_ELF=yes
+		return 0
+	fi
+
+	# Group-scoped 2>/dev/null: a bare `exec {fd}< file 2>/dev/null` would
+	# persistently redirect THIS shell's stderr (since exec without a command
+	# applies redirections to the current shell), silencing every later
+	# display_alert that writes to stderr.
+	if ! { exec {_native_armhf_lock_fd}< /proc/sys/fs/binfmt_misc/qemu-arm; } 2> /dev/null; then
+		display_alert "Native armhf via binfmt_elf" "cannot open binfmt_misc/qemu-arm; falling back to qemu emulation" "wrn"
+		return 1
+	fi
+
+	# Take EX-NB first to determine whether we are alone or joining. While
+	# we hold EX, no SH/EX can enter, so state observation and the disable
+	# write are atomic w.r.t. concurrent K- or N-builders. Then downgrade
+	# EX → SH on the same fd; Linux flock(2) performs the transition under
+	# the inode's flc_lock, granting pending SH waiters only after our SH
+	# is in place — by which time qemu-arm is already 0 and any joiner
+	# sees joiner-territory (state=0).
+	#
+	# EX-NB failure means someone else holds SH. State distinguishes:
+	#   state=1 → killswitch K-builder holds the emulation-mode anchor;
+	#             switching qemu-arm off would corrupt its chroot exec.
+	#   state=0 → peer N-builder; we are a joiner, take SH without writing.
+	if flock -x -n "${_native_armhf_lock_fd}"; then
+		if [[ "$(_native_armhf_observe_qemu_arm_state)" == "1" ]]; then
+			if ! echo 0 > /proc/sys/fs/binfmt_misc/qemu-arm 2> /dev/null; then
+				display_alert "Native armhf via binfmt_elf" "could not disable qemu-arm (no CAP_SYS_ADMIN?); falling back to qemu-arm-static emulation" "wrn"
+				exec {_native_armhf_lock_fd}>&-
+				unset _native_armhf_lock_fd
+				return 1
+			fi
+		fi
+		if ! flock -s "${_native_armhf_lock_fd}"; then
+			display_alert "Native armhf via binfmt_elf" "could not downgrade EX→SH on binfmt_misc/qemu-arm; falling back to qemu emulation" "wrn"
+			exec {_native_armhf_lock_fd}>&-
+			unset _native_armhf_lock_fd
+			return 1
+		fi
+	elif [[ "$(_native_armhf_observe_qemu_arm_state)" == "1" ]]; then
+		exec {_native_armhf_lock_fd}>&-
+		unset _native_armhf_lock_fd
+		display_alert "Native armhf via binfmt_elf" "concurrent build holds emulation-mode lock (NATIVE_ARMHF_ON_ARM64=no)" "err"
+		exit_with_error "cannot enable native armhf: concurrent build with NATIVE_ARMHF_ON_ARM64=no holds emulation lock. Wait for it to finish or run on a separate host."
+	else
+		if ! flock -s -w 30 "${_native_armhf_lock_fd}"; then
+			display_alert "Native armhf via binfmt_elf" "could not acquire shared flock on binfmt_misc/qemu-arm within 30s; falling back to qemu emulation" "wrn"
+			exec {_native_armhf_lock_fd}>&-
+			unset _native_armhf_lock_fd
+			return 1
+		fi
+		# Joiner state-recheck. Between observing state=0 above and acquiring
+		# SH here, the last live N-builder may have released its SH and run
+		# trap_handler_native_armhf_restore_qemu_arm, taking EX-NB and writing
+		# qemu-arm back to "1". Our blocking SH then gets granted on an empty
+		# lock — state=1 now. Continuing as a native joiner would skip the
+		# qemu-arm-static deploy while binfmt actually routes to qemu, and
+		# chroot exec fails because the qemu binary isn't in the chroot.
+		# Release SH and fall back to the qemu emulation path; the caller
+		# (prepare_host_binfmt_qemu_cross) will register qemu-arm normally.
+		if [[ "$(_native_armhf_observe_qemu_arm_state)" != "0" ]]; then
+			display_alert "Native armhf via binfmt_elf" "joiner lost race to last-out restorer; qemu-arm re-enabled, falling back to emulation" "wrn"
+			exec {_native_armhf_lock_fd}>&-
+			unset _native_armhf_lock_fd
+			return 1
+		fi
+	fi
+
+	# Register cleanup BEFORE the authoritative arch-test, so a failure
+	# there still releases the lock via the trap handler.
+	add_cleanup_handler trap_handler_native_armhf_restore_qemu_arm
+
+	# Post-disable check is authoritative: arch-test now faces what the
+	# chroot exec will face. False-positive if host kernel lacks COMPAT_VDSO
+	# (see extensions/arm64-compat-vdso, PR #9284).
+	if ! arch-test armhf > /dev/null 2>&1; then
+		display_alert "Native armhf via binfmt_elf" "post-disable verification failed (host kernel lacks COMPAT_VDSO — see extensions/arm64-compat-vdso); restoring and falling back to emulation" "wrn"
+		trap_handler_native_armhf_restore_qemu_arm
+		return 1
+	fi
+
+	display_alert "Native armhf via binfmt_elf" "kernel $(uname -r), aarch64 host with COMPAT_VDSO; qemu-arm disabled, kernel binfmt_elf takes over" "info"
+	declare -g ARMBIAN_NATIVE_ARMHF_VIA_BINFMT_ELF=yes
+	return 0
+}
+
+# Killswitch path cleanup: just release the SH-lock fd. No state mutation,
+# no last-out detection — the killswitch builder never wrote to qemu-arm.
+function trap_handler_native_armhf_release_emul_lock() {
+	[[ -n "${_native_armhf_emul_lock_fd:-}" ]] || return 0
+	exec {_native_armhf_emul_lock_fd}>&-
+	unset _native_armhf_emul_lock_fd
+}
+
+# Cleanup ordering invariant: this handler must run AFTER cleanups that kill
+# the build's subshells (umount / SDCARD / MOUNT teardown). BSD flock is per-
+# OFD, so a forked subshell inheriting our SH-fd shares the same lock entry —
+# the LOCK_EX-NB probe below would falsely block on the inherited fd of a
+# still-alive child. add_cleanup_handler PREPENDS to the handler list and
+# run_cleanup_handlers iterates it in order, so later-registered handlers
+# run first. Our setup is invoked before mount_chroot in both call sites
+# (rootfs-create.sh and rootfs-image.sh), so mount_chroot's umount handlers
+# are registered after ours, sit at the head of the list, and execute before
+# us — by the time we run, the chroot is unmounted and inheriting subshells
+# have exited. Verified empirically (SIGINT mid-chroot). If a future call
+# site registers our setup AFTER mount_chroot, this invariant inverts and
+# the EX-NB probe will spuriously fail; the documented escape hatches are
+# POSIX F_SETLK on a helper fd or explicit descendant-kill.
+function trap_handler_native_armhf_restore_qemu_arm() {
+	[[ -n "${_native_armhf_lock_fd:-}" ]] || return 0
+	exec {_native_armhf_lock_fd}>&-
+	unset _native_armhf_lock_fd
+
+	[[ -e /proc/sys/fs/binfmt_misc/qemu-arm ]] || return 0
+
+	# Group-scoped 2>/dev/null on the exec — see _native_armhf_setup_binfmt_elf.
+	declare last_fd
+	if ! { exec {last_fd}< /proc/sys/fs/binfmt_misc/qemu-arm; } 2> /dev/null; then
+		return 0
+	fi
+	if flock -x -n "${last_fd}"; then
+		echo 1 > /proc/sys/fs/binfmt_misc/qemu-arm 2> /dev/null || true
+		display_alert "Native armhf via binfmt_elf" "last out; qemu-arm restored to enabled" "info"
+	fi
+	exec {last_fd}>&-
+}
+
 # The actual binfmt manipulations when cross-build is confirmed above.
 function prepare_host_binfmt_qemu_cross() {
 	local failed_binfmt_modprobe=0
@@ -179,6 +418,23 @@ function prepare_host_binfmt_qemu_cross() {
 			continue
 		fi
 
+		# Skip wanted_arch=arm preparation entirely when this build doesn't
+		# target armhf. The Apple-Silicon helper below mutates global kernel
+		# binfmt_misc/qemu-arm state, which is irrelevant for cross builds
+		# targeting amd64/riscv64/etc and would needlessly race with any
+		# concurrent native-armhf owner on the host.
+		if [[ "${host_arch}" == "aarch64" && "${wanted_arch}" == "arm" && "${ARCH}" != "armhf" ]]; then
+			display_alert "binfmt qemu-arm" "skipped: target ARCH=${ARCH} doesn't need qemu-arm" "debug"
+			continue
+		fi
+
+		# Native armhf activation deferred. _native_armhf_setup_binfmt_elf is
+		# called AFTER mmdebstrap (rootfs-create.sh / rootfs-image.sh) — calling
+		# it here, before the chroot has libc/ld-linux-armhf.so.3, would leave
+		# bootstrap with no qemu registration and no native interpreter inside
+		# the chroot, breaking armhf maintainer-script execution. Let prepare_host
+		# register qemu-arm normally; the post-mmdebstrap call switches to native.
+
 		if [[ ! -e "/proc/sys/fs/binfmt_misc/qemu-${wanted_arch}" || ! -e "/usr/share/binfmts/qemu-${wanted_arch}" ]]; then
 			display_alert "Updating binfmts" "update-binfmts --enable qemu-${wanted_arch}" "debug"
 
@@ -193,6 +449,22 @@ function prepare_host_binfmt_qemu_cross() {
 }
 
 function prepare_host_binfmt_qemu_cross_arm64_host_armhf_target() {
+	# Conservative guard: refuse to mutate global qemu-arm state if it is
+	# observably disabled. That state means another concurrent armbian build
+	# is using the native-armhf path and we'd clobber it by re-enabling
+	# qemu-arm here. (Reachable only via NATIVE_ARMHF_ON_ARM64=no/never/
+	# disabled opt-out — otherwise _native_armhf_setup_binfmt_elf would have
+	# already exit'd with the "concurrent native-armhf build" error before
+	# we got here.)
+	if [[ -e /proc/sys/fs/binfmt_misc/qemu-arm ]]; then
+		declare observed_qemu_arm
+		observed_qemu_arm="$(_native_armhf_observe_qemu_arm_state)"
+		if [[ "${observed_qemu_arm}" == "0" ]]; then
+			display_alert "binfmt qemu-arm" "registered but observably disabled — another concurrent build likely holds native-armhf; refusing to clobber" "err"
+			exit_with_error "qemu-arm globally disabled by another concurrent build; cannot safely re-enable. Wait for it to finish or run on a separate host."
+		fi
+	fi
+
 	display_alert "Trying to update binfmts - aarch64 mostly does 32-bit sans emulation, but Apple said no" "update-binfmts --enable qemu-${wanted_arch}" "debug"
 	run_host_command_logged update-binfmts --enable "qemu-${wanted_arch}" "&>" "/dev/null" "||" "true" # don't fail nor produce output, which can be misleading.
 
@@ -201,12 +473,13 @@ function prepare_host_binfmt_qemu_cross_arm64_host_armhf_target() {
 		run_host_command_logged arch-test "||" true
 	fi
 
-	# to check, we use arch-test; if will return 0 if _either_ the host can natively run armhf, or if qemu-arm is correctly working.
-	if arch-test arm; then
+	# to check, we use arch-test; will return 0 if _either_ the host can natively run armhf, or if qemu-arm is correctly working.
+	# Use armhf (Debian-arch) rather than arm to match the build target and the post-disable check in _native_armhf_setup_binfmt_elf.
+	if arch-test armhf; then
 		display_alert "Host can run armhf natively or emulation is correctly setup already" "no need to enable qemu-arm" "debug"
 	else
 		display_alert "arm64 host can't run armhf natively" "importing enabling qemu-arm" "debug"
-		cat <<-BINFMT_ARM_MAGIC >/usr/share/binfmts/qemu-arm
+		cat <<- BINFMT_ARM_MAGIC > /usr/share/binfmts/qemu-arm
 			package qemu-user-static
 			interpreter /usr/bin/qemu-arm-static
 			magic \x7f\x45\x4c\x46\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28\x00
@@ -221,7 +494,7 @@ function prepare_host_binfmt_qemu_cross_arm64_host_armhf_target() {
 
 		# Test again using arch-test.
 		display_alert "Checking if arm 32-bit emulation on arm64 works after enabling" "qemu-arm emulation" "info"
-		run_host_command_logged arch-test arm
+		run_host_command_logged arch-test armhf
 		display_alert "arm 32-bit emulation on arm64" "has been correctly setup" "cachehit"
 	fi
 }
diff --git a/lib/functions/rootfs/rootfs-create.sh b/lib/functions/rootfs/rootfs-create.sh
@@ -61,7 +61,7 @@ function create_new_rootfs_cache_via_debootstrap() {
 			local debootstrap_apt_mirror="http://localhost:3142/${APT_MIRROR}"
 			acng_check_status_or_restart
 			;;
-		no)     ;& # do nothing, fallthrough
+		no) ;& # do nothing, fallthrough
 		"")
 			:  # still do nothing
 			;; # stop falling
@@ -139,9 +139,14 @@ function create_new_rootfs_cache_via_debootstrap() {
 
 	skip_target_check="yes" local_apt_deb_cache_prepare "for mmdebstrap" # just for size reference in logs
 
-
 	[[ ! -f "${SDCARD}/bin/bash" ]] && exit_with_error "mmdebstrap did not produce /bin/bash"
 
+	# mmdebstrap done, libc/ld-linux are in ${SDCARD}. Disable qemu-arm in binfmt_misc
+	# so subsequent chroot apt-get/dpkg/customize calls fall through to kernel binfmt_elf
+	# and run 32-bit ARM ELF natively via CONFIG_COMPAT. mmdebstrap above used qemu-arm
+	# because its cross-arch path requires that registration to be present.
+	_native_armhf_setup_binfmt_elf || true
+
 	# Done with mmdebstrap. Clean-up its litterbox.
 	display_alert "Cleaning up after mmdebstrap" "mmdebstrap cleanup" "info"
 	run_host_command_logged rm -rf "${SDCARD}/var/cache/apt" "${SDCARD}/var/lib/apt/lists"