diff --git a/supervisor/core.py b/supervisor/core.py index 0f77bd3b992..4ba8614555a 100644 --- a/supervisor/core.py +++ b/supervisor/core.py @@ -42,6 +42,8 @@ def __init__(self, coresys: CoreSys) -> None: self.coresys: CoreSys = coresys self._state: CoreState = CoreState.INITIALIZE self.exit_code: int = 0 + self._shutdown_event: asyncio.Event = asyncio.Event() + self._startup_complete: asyncio.Event = asyncio.Event() @property def state(self) -> CoreState: @@ -82,6 +84,9 @@ async def set_state(self, new_state: CoreState) -> None: self._state = new_state await self._write_run_state() + if self._state == CoreState.RUNNING: + self._startup_complete.set() + # Don't attempt to notify anyone on CLOSE as we're about to stop the event loop if self._state != CoreState.CLOSE: self.sys_bus.fire_event(BusEvent.SUPERVISOR_STATE_CHANGE, self._state) @@ -314,6 +319,9 @@ async def stop(self) -> None: # don't process scheduler anymore await self.set_state(CoreState.STOPPING) + # Cancel shutdown monitor task before tearing down infrastructure + await self.sys_host.unload() + # Stage 1 try: async with asyncio.timeout(10): @@ -352,28 +360,54 @@ async def stop(self) -> None: self.sys_loop.stop() async def shutdown(self, *, remove_homeassistant_container: bool = False) -> None: - """Shutdown all running containers in correct order.""" + """Shutdown all running containers in correct order. + + Reentrant: if a shutdown is already in progress, subsequent calls + await completion of the existing shutdown rather than starting a second one. + """ + if self.state in STARTING_STATES: + _LOGGER.debug( + "Shutdown requested while Supervisor is still starting up, waiting for startup to complete" + ) + await self._startup_complete.wait() + + # Supervisor is already tearing down, no point running shutdown + if self.state in (CoreState.STOPPING, CoreState.CLOSE): + _LOGGER.warning("Ignoring shutdown request, Supervisor is already stopping") + return + + # Another shutdown is in progress, wait for it to complete + if self.state == CoreState.SHUTDOWN: + await self._shutdown_event.wait() + return + + # Reset event for this shutdown cycle (supports repeated use, e.g. backup restore) + self._shutdown_event.clear() + # don't process scheduler anymore if self.state == CoreState.RUNNING: await self.set_state(CoreState.SHUTDOWN) - # Shutdown Application Add-ons, using Home Assistant API - await self.sys_addons.shutdown(AddonStartup.APPLICATION) + try: + # Shutdown Application Add-ons, using Home Assistant API + await self.sys_addons.shutdown(AddonStartup.APPLICATION) - # Close Home Assistant - with suppress(HassioError): - await self.sys_homeassistant.core.stop( - remove_container=remove_homeassistant_container - ) + # Close Home Assistant + with suppress(HassioError): + await self.sys_homeassistant.core.stop( + remove_container=remove_homeassistant_container + ) - # Shutdown System Add-ons - await self.sys_addons.shutdown(AddonStartup.SERVICES) - await self.sys_addons.shutdown(AddonStartup.SYSTEM) - await self.sys_addons.shutdown(AddonStartup.INITIALIZE) + # Shutdown System Add-ons + await self.sys_addons.shutdown(AddonStartup.SERVICES) + await self.sys_addons.shutdown(AddonStartup.SYSTEM) + await self.sys_addons.shutdown(AddonStartup.INITIALIZE) - # Shutdown all Plugins - if self.state in (CoreState.STOPPING, CoreState.SHUTDOWN): - await self.sys_plugins.shutdown() + # Shutdown all Plugins + if self.state in (CoreState.STOPPING, CoreState.SHUTDOWN): + await self.sys_plugins.shutdown() + finally: + self._shutdown_event.set() async def _update_last_boot(self) -> None: """Update last boot time.""" diff --git a/supervisor/dbus/const.py b/supervisor/dbus/const.py index 8e49cc3f701..e31dc5228d6 100644 --- a/supervisor/dbus/const.py +++ b/supervisor/dbus/const.py @@ -48,6 +48,9 @@ DBUS_SIGNAL_NM_CONNECTION_ACTIVE_CHANGED = ( "org.freedesktop.NetworkManager.Connection.Active.StateChanged" ) +DBUS_SIGNAL_LOGIND_PREPARE_FOR_SHUTDOWN = ( + "org.freedesktop.login1.Manager.PrepareForShutdown" +) DBUS_SIGNAL_PROPERTIES_CHANGED = "org.freedesktop.DBus.Properties.PropertiesChanged" DBUS_SIGNAL_RAUC_INSTALLER_COMPLETED = "de.pengutronix.rauc.Installer.Completed" diff --git a/supervisor/dbus/logind.py b/supervisor/dbus/logind.py index c3a1834f4ce..b62113f5eb9 100644 --- a/supervisor/dbus/logind.py +++ b/supervisor/dbus/logind.py @@ -5,7 +5,12 @@ from dbus_fast.aio.message_bus import MessageBus from ..exceptions import DBusError, DBusInterfaceError, DBusServiceUnkownError -from .const import DBUS_NAME_LOGIND, DBUS_OBJECT_LOGIND +from ..utils.dbus import DBusSignalWrapper +from .const import ( + DBUS_NAME_LOGIND, + DBUS_OBJECT_LOGIND, + DBUS_SIGNAL_LOGIND_PREPARE_FOR_SHUTDOWN, +) from .interface import DBusInterface from .utils import dbus_connected @@ -29,7 +34,7 @@ async def connect(self, bus: MessageBus): await super().connect(bus) except DBusError: _LOGGER.warning("Can't connect to systemd-logind") - except (DBusServiceUnkownError, DBusInterfaceError): + except DBusServiceUnkownError, DBusInterfaceError: _LOGGER.warning("No systemd-logind support on the host.") @dbus_connected @@ -41,3 +46,13 @@ async def reboot(self) -> None: async def power_off(self) -> None: """Power off host computer.""" await self.connected_dbus.Manager.call("power_off", False) + + @dbus_connected + async def inhibit(self, what: str, who: str, why: str, mode: str) -> int: + """Take an inhibitor lock. Returns a file descriptor.""" + return await self.connected_dbus.Manager.call("inhibit", what, who, why, mode) + + @dbus_connected + def prepare_for_shutdown(self) -> DBusSignalWrapper: + """Return a signal wrapper for PrepareForShutdown signal.""" + return self.connected_dbus.signal(DBUS_SIGNAL_LOGIND_PREPARE_FOR_SHUTDOWN) diff --git a/supervisor/dbus/manager.py b/supervisor/dbus/manager.py index 201165f2819..ed417e31574 100644 --- a/supervisor/dbus/manager.py +++ b/supervisor/dbus/manager.py @@ -123,7 +123,7 @@ async def load(self) -> None: try: self._bus = connected_bus = await MessageBus( - bus_type=BusType.SYSTEM + bus_type=BusType.SYSTEM, negotiate_unix_fd=True ).connect() except Exception as err: raise DBusFatalError( diff --git a/supervisor/host/manager.py b/supervisor/host/manager.py index a8a94f5b928..036e92e6b16 100644 --- a/supervisor/host/manager.py +++ b/supervisor/host/manager.py @@ -1,15 +1,23 @@ """Host function like audio, D-Bus or systemd.""" +import asyncio from contextlib import suppress from functools import lru_cache import logging +import os from typing import Self from awesomeversion import AwesomeVersion from ..const import BusEvent from ..coresys import CoreSys, CoreSysAttributes -from ..exceptions import HassioError, HostLogError, PulseAudioError +from ..exceptions import ( + DBusError, + DBusNotConnectedError, + HassioError, + HostLogError, + PulseAudioError, +) from ..hardware.const import PolicyGroup from ..hardware.data import Device from .apparmor import AppArmorControl @@ -38,6 +46,7 @@ def __init__(self, coresys: CoreSys): self._network: NetworkManager = NetworkManager(coresys) self._sound: SoundControl = SoundControl(coresys) self._logs: LogsControl = LogsControl(coresys) + self._shutdown_monitor_task: asyncio.Task | None = None async def post_init(self) -> Self: """Post init actions that must occur in event loop.""" @@ -180,6 +189,63 @@ async def load(self): except HassioError as err: _LOGGER.warning("Loading host AppArmor on start failed: %s", err) + # Start monitoring for host shutdown signals (ACPI power button, etc.) + if self.sys_dbus.logind.is_connected: + self._shutdown_monitor_task = self.sys_create_task( + self._monitor_host_shutdown() + ) + + async def unload(self) -> None: + """Shutdown host manager and cancel background tasks.""" + if self._shutdown_monitor_task and not self._shutdown_monitor_task.done(): + self._shutdown_monitor_task.cancel() + with suppress(asyncio.CancelledError): + await self._shutdown_monitor_task + self._shutdown_monitor_task = None + + async def _monitor_host_shutdown(self) -> None: + """Monitor for host shutdown via logind PrepareForShutdown signal. + + Takes an inhibitor lock to delay shutdown while we gracefully stop + all running services. When PrepareForShutdown fires, runs the graceful + shutdown sequence and then releases the lock so the host can proceed. + """ + try: + inhibit_fd: int = await self.sys_dbus.logind.inhibit( + "shutdown", + "Home Assistant Supervisor", + "Gracefully stopping running services", + "delay", + ) + except (DBusError, DBusNotConnectedError) as err: + _LOGGER.warning( + "Could not take shutdown inhibitor lock from logind: %s", err + ) + return + + _LOGGER.info("Shutdown inhibitor lock acquired from logind") + + try: + async with self.sys_dbus.logind.prepare_for_shutdown() as signal: + while True: + msg = await signal.wait_for_signal() + active = msg[0] + if not active: + continue + + _LOGGER.info( + "Host shutdown/reboot detected, gracefully stopping services" + ) + await self.sys_core.shutdown() + break + except (DBusError, DBusNotConnectedError, OSError) as err: + _LOGGER.warning("Error monitoring host shutdown signal: %s", err) + finally: + if isinstance(inhibit_fd, int): + with suppress(OSError): + await self.sys_run_in_executor(os.close, inhibit_fd) + _LOGGER.info("Shutdown inhibitor lock released") + async def _hardware_events(self, device: Device) -> None: """Process hardware requests.""" if self.sys_hardware.policy.is_match_cgroup(PolicyGroup.AUDIO, device): diff --git a/tests/dbus/test_login.py b/tests/dbus/test_login.py index bada0476b7b..8c6fb388e1c 100644 --- a/tests/dbus/test_login.py +++ b/tests/dbus/test_login.py @@ -1,6 +1,8 @@ """Test login dbus interface.""" # pylint: disable=import-error +import os + from dbus_fast.aio.message_bus import MessageBus import pytest @@ -45,6 +47,39 @@ async def test_power_off(logind_service: LogindService, dbus_session_bus: Messag assert logind_service.PowerOff.calls == [(False,)] +async def test_inhibit(logind_service: LogindService, dbus_session_bus: MessageBus): + """Test taking an inhibitor lock.""" + logind_service.Inhibit.calls.clear() + logind = Logind() + + with pytest.raises(DBusNotConnectedError): + await logind.inhibit("shutdown", "test", "testing", "delay") + + await logind.connect(dbus_session_bus) + + fd = await logind.inhibit("shutdown", "Test", "Testing inhibit", "delay") + assert logind_service.Inhibit.calls == [ + ("shutdown", "Test", "Testing inhibit", "delay") + ] + if fd is not None: + os.close(fd) + + +async def test_prepare_for_shutdown_signal( + logind_service: LogindService, dbus_session_bus: MessageBus +): + """Test PrepareForShutdown signal.""" + logind = Logind() + await logind.connect(dbus_session_bus) + + async with logind.prepare_for_shutdown() as signal: + logind_service.PrepareForShutdown() + await logind_service.ping() + + msg = await signal.wait_for_signal() + assert msg == [True] + + async def test_dbus_logind_connect_error( dbus_session_bus: MessageBus, caplog: pytest.LogCaptureFixture ): diff --git a/tests/dbus_service_mocks/logind.py b/tests/dbus_service_mocks/logind.py index fbd27f362a2..d7082f2813a 100644 --- a/tests/dbus_service_mocks/logind.py +++ b/tests/dbus_service_mocks/logind.py @@ -1,6 +1,10 @@ """Mock of logind dbus service.""" +import os +import tempfile + from dbus_fast import DBusError +from dbus_fast.service import signal from .base import DBusServiceMock, dbus_method @@ -31,3 +35,15 @@ def Reboot(self, interactive: "b") -> None: @dbus_method() def PowerOff(self, interactive: "b") -> None: """PowerOff.""" + + @dbus_method() + def Inhibit(self, what: "s", who: "s", why: "s", mode: "s") -> "h": + """Take an inhibitor lock. Returns a file descriptor.""" + fd, path = tempfile.mkstemp() + os.unlink(path) + return fd + + @signal() + def PrepareForShutdown(self) -> "b": + """Signal prepare for shutdown.""" + return True diff --git a/tests/host/test_manager.py b/tests/host/test_manager.py index b43963838cf..bdf23d4a09c 100644 --- a/tests/host/test_manager.py +++ b/tests/host/test_manager.py @@ -1,18 +1,31 @@ """Test host manager.""" -from unittest.mock import patch +import asyncio +import os +from unittest.mock import AsyncMock, PropertyMock, patch from awesomeversion import AwesomeVersion import pytest +from supervisor.const import CoreState from supervisor.coresys import CoreSys from supervisor.dbus.const import MulticastProtocolEnabled +from supervisor.exceptions import DBusError from tests.dbus_service_mocks.base import DBusServiceMock +from tests.dbus_service_mocks.logind import Logind as LogindService from tests.dbus_service_mocks.rauc import Rauc as RaucService from tests.dbus_service_mocks.systemd import Systemd as SystemdService +@pytest.fixture(name="logind_service") +async def fixture_logind_service( + all_dbus_services: dict[str, DBusServiceMock | dict[str, DBusServiceMock]], +) -> LogindService: + """Return logind service mock.""" + yield all_dbus_services["logind"] + + @pytest.fixture(name="systemd_service") async def fixture_systemd_service( all_dbus_services: dict[str, DBusServiceMock | dict[str, DBusServiceMock]], @@ -71,3 +84,146 @@ async def test_reload_os( await coresys.host.reload() assert rauc_service.GetSlotStatus.calls == [()] + + +async def test_host_shutdown_on_prepare_for_shutdown_signal( + coresys: CoreSys, logind_service: LogindService +): + """Test graceful shutdown when PrepareForShutdown signal is received.""" + shutdown_called = asyncio.Event() + + async def mock_shutdown(**kwargs): + shutdown_called.set() + + await coresys.host.load() + await coresys.core.set_state(CoreState.RUNNING) + + # Give the monitor task time to start and register the signal listener + # (needs multiple yields for inhibit D-Bus call + AddMatch call) + await asyncio.sleep(0.1) + + with patch.object(coresys.core, "shutdown", side_effect=mock_shutdown): + # Emit PrepareForShutdown(true) signal as if host is shutting down + logind_service.PrepareForShutdown() + await logind_service.ping() + + async with asyncio.timeout(2): + await shutdown_called.wait() + + +async def test_host_shutdown_signal_reentrant( + coresys: CoreSys, logind_service: LogindService +): + """Test PrepareForShutdown during in-progress shutdown awaits same shutdown.""" + shutdown_called = asyncio.Event() + + async def mock_shutdown(**kwargs): + shutdown_called.set() + + await coresys.host.load() + await coresys.core.set_state(CoreState.SHUTDOWN) + + # Give the monitor task time to start and register the signal listener + await asyncio.sleep(0.1) + + with patch.object(coresys.core, "shutdown", side_effect=mock_shutdown): + logind_service.PrepareForShutdown() + await logind_service.ping() + + # shutdown() is called reentrantly - it awaits the in-progress shutdown + async with asyncio.timeout(2): + await shutdown_called.wait() + + +async def test_host_unload_cancels_monitor_task( + coresys: CoreSys, logind_service: LogindService +): + """Test unload cancels the shutdown monitor task.""" + await coresys.host.load() + await asyncio.sleep(0.1) + + assert coresys.host._shutdown_monitor_task is not None + assert not coresys.host._shutdown_monitor_task.done() + + await coresys.host.unload() + + assert coresys.host._shutdown_monitor_task is None + + +async def test_host_unload_no_monitor_task(coresys: CoreSys): + """Test unload when no monitor task was started.""" + # Don't call load(), so no monitor task exists + assert coresys.host._shutdown_monitor_task is None + await coresys.host.unload() + assert coresys.host._shutdown_monitor_task is None + + +async def test_monitor_inhibit_lock_failure( + coresys: CoreSys, + logind_service: LogindService, + caplog: pytest.LogCaptureFixture, +): + """Test monitor task logs warning when inhibit lock fails.""" + with patch.object( + coresys.dbus.logind, "inhibit", side_effect=DBusError("test error") + ): + await coresys.host.load() + await asyncio.sleep(0.1) + + assert "Could not take shutdown inhibitor lock from logind" in caplog.text + + +async def test_monitor_dbus_error_during_signal_wait( + coresys: CoreSys, + logind_service: LogindService, + caplog: pytest.LogCaptureFixture, +): + """Test monitor task handles D-Bus errors during signal monitoring.""" + with patch.object( + coresys.dbus.logind, + "prepare_for_shutdown", + side_effect=DBusError("connection lost"), + ): + await coresys.host.load() + await asyncio.sleep(0.1) + + assert "Error monitoring host shutdown signal" in caplog.text + + +async def test_inhibitor_lock_released_after_shutdown( + coresys: CoreSys, + logind_service: LogindService, + caplog: pytest.LogCaptureFixture, +): + """Test that the inhibitor lock FD is closed after shutdown completes.""" + # Mock inhibit to return a real FD (session bus doesn't negotiate unix FDs) + r_fd, w_fd = os.pipe() + os.close(w_fd) + + with patch.object( + coresys.dbus.logind, "inhibit", new_callable=AsyncMock, return_value=r_fd + ): + await coresys.host.load() + await asyncio.sleep(0.1) + + await coresys.core.set_state(CoreState.RUNNING) + + with patch.object(coresys.core, "shutdown", new_callable=AsyncMock): + logind_service.PrepareForShutdown() + await logind_service.ping() + await asyncio.sleep(0.2) + + assert "Shutdown inhibitor lock released" in caplog.text + + +async def test_no_monitor_task_without_logind(coresys: CoreSys): + """Test no monitor task is started when logind is not connected.""" + with patch.object( + type(coresys.dbus.logind), + "is_connected", + new_callable=PropertyMock, + return_value=False, + ): + await coresys.host.load() + + assert coresys.host._shutdown_monitor_task is None diff --git a/tests/os/test_data_disk.py b/tests/os/test_data_disk.py index 63b25e520cc..2b31e1654f6 100644 --- a/tests/os/test_data_disk.py +++ b/tests/os/test_data_disk.py @@ -271,6 +271,7 @@ async def test_datadisk_migrate_between_external_renames( all_dbus_services["os_agent"].emit_properties_changed({"Version": "1.5.0"}) await all_dbus_services["os_agent"].ping() + await coresys.core.set_state(CoreState.RUNNING) await coresys.os.datadisk.migrate_disk("Generic-Flash-Disk-61BCDDB6") assert datadisk_service.MarkDataMove.calls == [()] diff --git a/tests/resolution/fixup/test_system_adopt_data_disk.py b/tests/resolution/fixup/test_system_adopt_data_disk.py index 1366cebcb51..cbb24399d0e 100644 --- a/tests/resolution/fixup/test_system_adopt_data_disk.py +++ b/tests/resolution/fixup/test_system_adopt_data_disk.py @@ -5,6 +5,7 @@ from dbus_fast import DBusError, ErrorType, Variant import pytest +from supervisor.const import CoreState from supervisor.coresys import CoreSys from supervisor.resolution.const import ContextType, IssueType, SuggestionType from supervisor.resolution.data import Issue, Suggestion @@ -98,6 +99,7 @@ async def test_fixup( ["/org/freedesktop/UDisks2/block_devices/mmcblk1p3"], ] + await coresys.core.set_state(CoreState.RUNNING) await system_adopt_data_disk() assert mmcblk1p3_filesystem_service.SetLabel.calls == [ @@ -176,6 +178,7 @@ async def test_fixup_reboot_failed( ["/org/freedesktop/UDisks2/block_devices/mmcblk1p3"], ] + await coresys.core.set_state(CoreState.RUNNING) await system_adopt_data_disk() assert mmcblk1p3_filesystem_service.SetLabel.calls == [ @@ -228,6 +231,7 @@ async def test_fixup_disabled_data_disk( ["/org/freedesktop/UDisks2/block_devices/mmcblk1p3"], ] + await coresys.core.set_state(CoreState.RUNNING) await system_adopt_data_disk() assert mmcblk1p3_filesystem_service.SetLabel.calls == [ diff --git a/tests/resolution/fixup/test_system_execute_reboot.py b/tests/resolution/fixup/test_system_execute_reboot.py index 084cfcde75d..f14e4641fca 100644 --- a/tests/resolution/fixup/test_system_execute_reboot.py +++ b/tests/resolution/fixup/test_system_execute_reboot.py @@ -1,5 +1,6 @@ """Test fixup system reboot.""" +from supervisor.const import CoreState from supervisor.coresys import CoreSys from supervisor.resolution.const import ContextType, IssueType, SuggestionType from supervisor.resolution.data import Issue, Suggestion @@ -25,6 +26,7 @@ async def test_fixup( ) coresys.resolution.add_issue(Issue(IssueType.REBOOT_REQUIRED, ContextType.SYSTEM)) + await coresys.core.set_state(CoreState.RUNNING) await system_execute_reboot() assert logind_service.Reboot.calls == [(False,)] diff --git a/tests/test_core.py b/tests/test_core.py index a393fe87523..be30243e888 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,6 +1,7 @@ """Testing handling with CoreState.""" # pylint: disable=W0212 +import asyncio import datetime import errno from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch @@ -146,3 +147,83 @@ async def test_write_state_failure( assert "Can't update the Supervisor state" in caplog.text assert coresys.core.state == CoreState.RUNNING + + +async def test_shutdown_reentrant_waits(coresys: CoreSys): + """Test that concurrent shutdown calls wait for the first to complete.""" + call_count = 0 + shutdown_started = asyncio.Event() + proceed = asyncio.Event() + + original_shutdown = coresys.addons.shutdown + + async def slow_addon_shutdown(startup): + nonlocal call_count + call_count += 1 + shutdown_started.set() + await proceed.wait() + return await original_shutdown(startup) + + await coresys.core.set_state(CoreState.RUNNING) + + with patch.object(coresys.addons, "shutdown", side_effect=slow_addon_shutdown): + # Start first shutdown + task1 = asyncio.create_task(coresys.core.shutdown()) + await shutdown_started.wait() + + # Second call should wait, not start a new shutdown + task2 = asyncio.create_task(coresys.core.shutdown()) + await asyncio.sleep(0.05) + + # Let the shutdown proceed + proceed.set() + + await asyncio.gather(task1, task2) + + # Addon shutdown was only called by the first shutdown (4 startup levels) + assert call_count == 4 + assert coresys.core._shutdown_event.is_set() + + +async def test_shutdown_event_reset_between_cycles(coresys: CoreSys): + """Test that shutdown event is reset for repeated shutdown cycles (e.g. backup restore).""" + await coresys.core.set_state(CoreState.FREEZE) + + # First shutdown cycle + await coresys.core.shutdown() + assert coresys.core._shutdown_event.is_set() + + # Simulate backup restore returning to RUNNING + await coresys.core.set_state(CoreState.RUNNING) + + # Second shutdown cycle should work (event was cleared) + second_entered = False + + original_shutdown = coresys.addons.shutdown + + async def track_addon_shutdown(startup): + nonlocal second_entered + second_entered = True + return await original_shutdown(startup) + + with patch.object(coresys.addons, "shutdown", side_effect=track_addon_shutdown): + await coresys.core.shutdown() + + assert second_entered + assert coresys.core._shutdown_event.is_set() + + +@pytest.mark.parametrize( + "state", [CoreState.STOPPING, CoreState.CLOSE], ids=["stopping", "close"] +) +async def test_shutdown_ignored_during_stop( + coresys: CoreSys, caplog: pytest.LogCaptureFixture, state: CoreState +): + """Test that shutdown is ignored when Supervisor is already stopping.""" + await coresys.core.set_state(state) + + with patch.object(coresys.addons, "shutdown") as mock_addon_shutdown: + await coresys.core.shutdown() + + mock_addon_shutdown.assert_not_called() + assert "Ignoring shutdown request, Supervisor is already stopping" in caplog.text