From 634e682f48d08e09170e0c9a57a96c0ed34a6b96 Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Sat, 18 Apr 2026 13:28:14 -0700 Subject: [PATCH 1/7] fix(shields): verify config lock and fail hard on re-lock failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The shields-up re-lock wrapped all five kubectlExec calls in a single try/catch. If any one failed the rest were skipped, but the code still reported "Shields UP" and cleared the shields-down state — leaving the config file writable while claiming it was locked. - Run each chmod/chown/chattr independently so a single failure does not abort the remaining operations - After all attempts, verify actual on-disk state (stat + lsattr) - If verification fails, exit non-zero without updating state so shields remain DOWN until manually fixed - Add config re-lock to the auto-restore timer (was missing entirely — the timer only restored the policy, never re-locked the config file) Signed-off-by: Aaron Erickson --- src/lib/shields-timer.ts | 36 ++++++++++++++- src/lib/shields.ts | 98 ++++++++++++++++++++++++++++++++++------ 2 files changed, 119 insertions(+), 15 deletions(-) diff --git a/src/lib/shields-timer.ts b/src/lib/shields-timer.ts index 7fd2aad2c8..1a7715b805 100644 --- a/src/lib/shields-timer.ts +++ b/src/lib/shields-timer.ts @@ -7,17 +7,19 @@ // restores the captured policy snapshot. // // Usage (internal — called by shields.ts via fork()): -// node shields-timer.js +// node shields-timer.js const fs = require("fs"); const path = require("path"); +const { execFileSync } = require("child_process"); const { run } = require("./runner"); const { buildPolicySetCommand } = require("./policies"); const STATE_DIR = path.join(process.env.HOME ?? "/tmp", ".nemoclaw", "state"); const AUDIT_FILE = path.join(STATE_DIR, "shields-audit.jsonl"); +const K3S_CONTAINER = "openshell-cluster-nemoclaw"; -const [sandboxName, snapshotPath, restoreAtIso] = process.argv.slice(2); +const [sandboxName, snapshotPath, restoreAtIso, configPath, configDir] = process.argv.slice(2); const STATE_FILE = path.join(STATE_DIR, `shields-${sandboxName}.json`); const restoreAtMs = new Date(restoreAtIso).getTime(); const delayMs = Math.max(0, restoreAtMs - Date.now()); @@ -26,6 +28,14 @@ if (!sandboxName || !snapshotPath || !restoreAtIso || isNaN(restoreAtMs)) { process.exit(1); } +function kubectlExec(cmd) { + execFileSync("docker", [ + "exec", K3S_CONTAINER, + "kubectl", "exec", "-n", "openshell", sandboxName, "-c", "agent", "--", + ...cmd, + ], { stdio: ["ignore", "pipe", "pipe"], timeout: 15000 }); +} + function appendAudit(entry) { try { fs.appendFileSync(AUDIT_FILE, JSON.stringify(entry) + "\n", { mode: 0o600 }); @@ -103,6 +113,28 @@ setTimeout(() => { process.exit(1); } + // Re-lock config file (each operation independent) + if (configPath) { + const lockErrors = []; + try { kubectlExec(["chmod", "444", configPath]); } catch { lockErrors.push("chmod 444"); } + try { kubectlExec(["chown", "root:root", configPath]); } catch { lockErrors.push("chown file"); } + if (configDir) { + try { kubectlExec(["chmod", "755", configDir]); } catch { lockErrors.push("chmod dir"); } + try { kubectlExec(["chown", "root:root", configDir]); } catch { lockErrors.push("chown dir"); } + } + try { kubectlExec(["chattr", "+i", configPath]); } catch { lockErrors.push("chattr +i"); } + + if (lockErrors.length > 0) { + appendAudit({ + action: "shields_auto_restore_lock_warning", + sandbox: sandboxName, + timestamp: now, + restored_by: "auto_timer", + warning: `Some lock operations failed: ${lockErrors.join(", ")}`, + }); + } + } + // Audit appendAudit({ action: "shields_auto_restore", diff --git a/src/lib/shields.ts b/src/lib/shields.ts index 3a4e359c81..88ca89bc46 100644 --- a/src/lib/shields.ts +++ b/src/lib/shields.ts @@ -44,6 +44,14 @@ function kubectlExec(sandboxName: string, cmd: string[]): void { ], { stdio: ["ignore", "pipe", "pipe"], timeout: 15000 }); } +function kubectlExecCapture(sandboxName: string, cmd: string[]): string { + return execFileSync("docker", [ + "exec", K3S_CONTAINER, + "kubectl", "exec", "-n", "openshell", sandboxName, "-c", "agent", "--", + ...cmd, + ], { stdio: ["ignore", "pipe", "pipe"], timeout: 15000 }).toString().trim(); +} + // Re-export for tests and external consumers const MAX_TIMEOUT_SECONDS = MAX_SECONDS; const DEFAULT_TIMEOUT_SECONDS = DEFAULT_SECONDS; @@ -144,6 +152,70 @@ function unlockAgentConfig(sandboxName: string, target: { configPath: string; co } } +// --------------------------------------------------------------------------- +// Config lock — shared between shields-up, auto-restore timer, and rollback +// +// Each operation runs independently so a single failure does not skip the +// rest. After all attempts, we verify the actual on-disk state and throw +// if the config is not properly locked. +// --------------------------------------------------------------------------- + +function lockAgentConfig(sandboxName: string, target: { configPath: string; configDir: string }): void { + const errors: string[] = []; + + try { kubectlExec(sandboxName, ["chmod", "444", target.configPath]); } + catch { errors.push("chmod 444 config file"); } + + try { kubectlExec(sandboxName, ["chown", "root:root", target.configPath]); } + catch { errors.push("chown root:root config file"); } + + try { kubectlExec(sandboxName, ["chmod", "755", target.configDir]); } + catch { errors.push("chmod 755 config dir"); } + + try { kubectlExec(sandboxName, ["chown", "root:root", target.configDir]); } + catch { errors.push("chown root:root config dir"); } + + try { kubectlExec(sandboxName, ["chattr", "+i", target.configPath]); } + catch { errors.push("chattr +i config file"); } + + if (errors.length > 0) { + console.error(` Some lock operations failed: ${errors.join(", ")}`); + } + + // Verify the lock actually took effect + const issues: string[] = []; + try { + const perms = kubectlExecCapture(sandboxName, ["stat", "-c", "%a %U:%G", target.configPath]); + const [mode, owner] = perms.split(" "); + if (!/^4[0-4][0-4]$/.test(mode)) issues.push(`file mode=${mode} (expected 444)`); + if (owner !== "root:root") issues.push(`file owner=${owner} (expected root:root)`); + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + issues.push(`file stat failed: ${msg}`); + } + + try { + const dirPerms = kubectlExecCapture(sandboxName, ["stat", "-c", "%a %U:%G", target.configDir]); + const [dirMode, dirOwner] = dirPerms.split(" "); + if (dirMode !== "755") issues.push(`dir mode=${dirMode} (expected 755)`); + if (dirOwner !== "root:root") issues.push(`dir owner=${dirOwner} (expected root:root)`); + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + issues.push(`dir stat failed: ${msg}`); + } + + try { + const attrs = kubectlExecCapture(sandboxName, ["lsattr", "-d", target.configPath]); + if (!attrs.includes("i")) issues.push("immutable bit not set"); + } catch { + // lsattr may not be available on all images — skip + } + + if (issues.length > 0) { + throw new Error(`Config not locked: ${issues.join(", ")}`); + } +} + // --------------------------------------------------------------------------- // shields down // --------------------------------------------------------------------------- @@ -250,7 +322,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void { const actualScript = fs.existsSync(timerScriptJs) ? timerScriptJs : timerScript; try { - const child = fork(actualScript, [sandboxName, snapshotPath, restoreAt.toISOString()], { + const child = fork(actualScript, [sandboxName, snapshotPath, restoreAt.toISOString(), target.configPath, target.configDir], { detached: true, stdio: ["ignore", "ignore", "ignore", "ipc"], }); @@ -273,13 +345,11 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void { const message = err instanceof Error ? err.message : String(err); console.error(` Cannot start auto-restore timer: ${message}`); console.error(" Rolling back — restoring policy from snapshot..."); + run(buildPolicySetCommand(snapshotPath, sandboxName), { ignoreError: true }); try { - run(buildPolicySetCommand(snapshotPath, sandboxName), { ignoreError: true }); - kubectlExec(sandboxName, ["chmod", "444", target.configPath]); - kubectlExec(sandboxName, ["chown", "root:root", target.configPath]); - kubectlExec(sandboxName, ["chattr", "+i", target.configPath]); + lockAgentConfig(sandboxName, target); } catch { - // Best effort rollback + console.error(" Warning: Rollback re-lock could not be verified. Check config manually."); } saveShieldsState(sandboxName, { shieldsDown: false, @@ -351,16 +421,18 @@ function shieldsUp(sandboxName: string): void { // 2b. Re-lock config file to read-only. // Restore the Dockerfile's original permissions and immutable bit. // Uses kubectl exec to bypass Landlock (same as shields down). + // Each operation runs independently and the result is verified. + // If verification fails, shields remain DOWN — we do not lie about state. const target = resolveAgentConfig(sandboxName); console.log(` Locking ${target.agentName} config (${target.configPath})...`); try { - kubectlExec(sandboxName, ["chmod", "444", target.configPath]); - kubectlExec(sandboxName, ["chown", "root:root", target.configPath]); - kubectlExec(sandboxName, ["chmod", "755", target.configDir]); - kubectlExec(sandboxName, ["chown", "root:root", target.configDir]); - kubectlExec(sandboxName, ["chattr", "+i", target.configPath]); - } catch { - console.error(" Warning: Could not re-lock config file."); + lockAgentConfig(sandboxName, target); + } catch (err: unknown) { + const message = err instanceof Error ? err.message : String(err); + console.error(` ERROR: ${message}`); + console.error(" Shields remain DOWN — manual intervention required."); + console.error(` Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`); + process.exit(1); } // 3. Calculate duration From 537a717a9b3a116b56edb32dd73851f477d5ed4d Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Sat, 18 Apr 2026 13:42:14 -0700 Subject: [PATCH 2/7] fix(shields): parse lsattr flags field and verify timer re-lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address CodeRabbit review feedback: - Parse only the lsattr flag field (first token) instead of matching against the full output line, which could false-positive on filenames containing "i" (e.g. config.yaml) - Move the timer's shieldsDown state update to after re-lock verification succeeds — previously the timer cleared shieldsDown before re-locking, reintroducing the same false-positive this PR is fixing - Timer now verifies config lock via stat + lsattr and keeps shields DOWN if verification fails Signed-off-by: Aaron Erickson --- src/lib/shields-timer.ts | 83 +++++++++++++++++++++++++++++----------- src/lib/shields.ts | 3 +- 2 files changed, 62 insertions(+), 24 deletions(-) diff --git a/src/lib/shields-timer.ts b/src/lib/shields-timer.ts index 1a7715b805..524fbcf654 100644 --- a/src/lib/shields-timer.ts +++ b/src/lib/shields-timer.ts @@ -85,23 +85,10 @@ setTimeout(() => { process.exit(1); } - // Update state first — policy restore can take seconds and callers - // check state to determine whether shields are up or down. - updateState({ - shieldsDown: false, - shieldsDownAt: null, - shieldsDownTimeout: null, - shieldsDownReason: null, - shieldsDownPolicy: null, - }); - // Restore policy (slow — openshell policy set --wait blocks) const result = run(buildPolicySetCommand(snapshotPath, sandboxName), { ignoreError: true }); if (result.status !== 0) { - // Policy restore failed — revert state so callers know shields are - // still down. The sandbox remains relaxed. - updateState({ shieldsDown: true }); appendAudit({ action: "shields_up_failed", sandbox: sandboxName, @@ -114,6 +101,7 @@ setTimeout(() => { } // Re-lock config file (each operation independent) + let lockVerified = true; if (configPath) { const lockErrors = []; try { kubectlExec(["chmod", "444", configPath]); } catch { lockErrors.push("chmod 444"); } @@ -124,25 +112,74 @@ setTimeout(() => { } try { kubectlExec(["chattr", "+i", configPath]); } catch { lockErrors.push("chattr +i"); } - if (lockErrors.length > 0) { + // Verify the lock took effect + const issues = []; + try { + const perms = execFileSync("docker", [ + "exec", K3S_CONTAINER, + "kubectl", "exec", "-n", "openshell", sandboxName, "-c", "agent", "--", + "stat", "-c", "%a %U:%G", configPath, + ], { stdio: ["ignore", "pipe", "pipe"], timeout: 15000 }).toString().trim(); + const [mode, owner] = perms.split(" "); + if (!/^4[0-4][0-4]$/.test(mode)) issues.push(`file mode=${mode}`); + if (owner !== "root:root") issues.push(`file owner=${owner}`); + } catch { + issues.push("file stat failed"); + } + + try { + const attrs = execFileSync("docker", [ + "exec", K3S_CONTAINER, + "kubectl", "exec", "-n", "openshell", sandboxName, "-c", "agent", "--", + "lsattr", "-d", configPath, + ], { stdio: ["ignore", "pipe", "pipe"], timeout: 15000 }).toString().trim(); + const [flags] = attrs.split(/\s+/, 1); + if (!flags.includes("i")) issues.push("immutable bit not set"); + } catch { + // lsattr may not be available — skip + } + + if (issues.length > 0 || lockErrors.length > 0) { + lockVerified = issues.length === 0; appendAudit({ action: "shields_auto_restore_lock_warning", sandbox: sandboxName, timestamp: now, restored_by: "auto_timer", - warning: `Some lock operations failed: ${lockErrors.join(", ")}`, + warning: `Lock issues: ${[...lockErrors, ...issues].join(", ")}`, + lock_verified: lockVerified, }); } } - // Audit - appendAudit({ - action: "shields_auto_restore", - sandbox: sandboxName, - timestamp: now, - restored_by: "auto_timer", - policy_snapshot: snapshotPath, - }); + // Only mark shields as UP if the lock was verified (or no config path) + if (lockVerified) { + updateState({ + shieldsDown: false, + shieldsDownAt: null, + shieldsDownTimeout: null, + shieldsDownReason: null, + shieldsDownPolicy: null, + }); + + appendAudit({ + action: "shields_auto_restore", + sandbox: sandboxName, + timestamp: now, + restored_by: "auto_timer", + policy_snapshot: snapshotPath, + }); + } else { + appendAudit({ + action: "shields_up_failed", + sandbox: sandboxName, + timestamp: now, + restored_by: "auto_timer", + error: "Config re-lock verification failed — shields remain DOWN", + }); + cleanupMarker(); + process.exit(1); + } } catch (err) { appendAudit({ action: "shields_up_failed", diff --git a/src/lib/shields.ts b/src/lib/shields.ts index 88ca89bc46..02934ed77d 100644 --- a/src/lib/shields.ts +++ b/src/lib/shields.ts @@ -206,7 +206,8 @@ function lockAgentConfig(sandboxName: string, target: { configPath: string; conf try { const attrs = kubectlExecCapture(sandboxName, ["lsattr", "-d", target.configPath]); - if (!attrs.includes("i")) issues.push("immutable bit not set"); + const [flags] = attrs.trim().split(/\s+/, 1); + if (!flags.includes("i")) issues.push("immutable bit not set"); } catch { // lsattr may not be available on all images — skip } From 36302701919506730f79890baf9a8497e10a8cd3 Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Sat, 18 Apr 2026 18:04:20 -0700 Subject: [PATCH 3/7] fix(shields): treat immutable bit as best-effort on unsupported fs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The chattr +i immutable flag is defense-in-depth but not all container filesystems support it. When chattr fails, the verification now falls back to mode + ownership checks (444 root:root) which are sufficient to prevent the sandbox user from modifying the config. Previously the verification unconditionally required the immutable bit, causing shields-up to fail on any filesystem where chattr is not supported — even though chmod/chown succeeded. Signed-off-by: Aaron Erickson --- src/lib/shields-timer.ts | 28 ++++++++++++++++------------ src/lib/shields.ts | 28 ++++++++++++++++++++-------- 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/src/lib/shields-timer.ts b/src/lib/shields-timer.ts index 524fbcf654..6c0785ce91 100644 --- a/src/lib/shields-timer.ts +++ b/src/lib/shields-timer.ts @@ -110,9 +110,11 @@ setTimeout(() => { try { kubectlExec(["chmod", "755", configDir]); } catch { lockErrors.push("chmod dir"); } try { kubectlExec(["chown", "root:root", configDir]); } catch { lockErrors.push("chown dir"); } } - try { kubectlExec(["chattr", "+i", configPath]); } catch { lockErrors.push("chattr +i"); } + let chattrSupported = true; + try { kubectlExec(["chattr", "+i", configPath]); } catch { chattrSupported = false; lockErrors.push("chattr +i"); } - // Verify the lock took effect + // Verify the lock took effect. + // Mode + ownership are mandatory; immutable bit only checked if chattr succeeded. const issues = []; try { const perms = execFileSync("docker", [ @@ -127,16 +129,18 @@ setTimeout(() => { issues.push("file stat failed"); } - try { - const attrs = execFileSync("docker", [ - "exec", K3S_CONTAINER, - "kubectl", "exec", "-n", "openshell", sandboxName, "-c", "agent", "--", - "lsattr", "-d", configPath, - ], { stdio: ["ignore", "pipe", "pipe"], timeout: 15000 }).toString().trim(); - const [flags] = attrs.split(/\s+/, 1); - if (!flags.includes("i")) issues.push("immutable bit not set"); - } catch { - // lsattr may not be available — skip + if (chattrSupported) { + try { + const attrs = execFileSync("docker", [ + "exec", K3S_CONTAINER, + "kubectl", "exec", "-n", "openshell", sandboxName, "-c", "agent", "--", + "lsattr", "-d", configPath, + ], { stdio: ["ignore", "pipe", "pipe"], timeout: 15000 }).toString().trim(); + const [flags] = attrs.split(/\s+/, 1); + if (!flags.includes("i")) issues.push("immutable bit not set"); + } catch { + // lsattr may not be available — skip + } } if (issues.length > 0 || lockErrors.length > 0) { diff --git a/src/lib/shields.ts b/src/lib/shields.ts index 02934ed77d..150e354d6e 100644 --- a/src/lib/shields.ts +++ b/src/lib/shields.ts @@ -175,14 +175,22 @@ function lockAgentConfig(sandboxName: string, target: { configPath: string; conf try { kubectlExec(sandboxName, ["chown", "root:root", target.configDir]); } catch { errors.push("chown root:root config dir"); } + // chattr +i is defense-in-depth — some filesystems don't support it. + // Track whether it succeeded so verification knows what to expect. + let chattrSupported = true; try { kubectlExec(sandboxName, ["chattr", "+i", target.configPath]); } - catch { errors.push("chattr +i config file"); } + catch { + chattrSupported = false; + errors.push("chattr +i config file"); + } if (errors.length > 0) { console.error(` Some lock operations failed: ${errors.join(", ")}`); } - // Verify the lock actually took effect + // Verify the lock actually took effect. + // Mode + ownership are mandatory; immutable bit is only checked if + // chattr succeeded (filesystem may not support it). const issues: string[] = []; try { const perms = kubectlExecCapture(sandboxName, ["stat", "-c", "%a %U:%G", target.configPath]); @@ -204,12 +212,16 @@ function lockAgentConfig(sandboxName: string, target: { configPath: string; conf issues.push(`dir stat failed: ${msg}`); } - try { - const attrs = kubectlExecCapture(sandboxName, ["lsattr", "-d", target.configPath]); - const [flags] = attrs.trim().split(/\s+/, 1); - if (!flags.includes("i")) issues.push("immutable bit not set"); - } catch { - // lsattr may not be available on all images — skip + if (chattrSupported) { + try { + const attrs = kubectlExecCapture(sandboxName, ["lsattr", "-d", target.configPath]); + const [flags] = attrs.trim().split(/\s+/, 1); + if (!flags.includes("i")) issues.push("immutable bit not set"); + } catch { + // lsattr may not be available on all images — skip + } + } else { + console.error(" Note: Immutable bit (chattr +i) not supported — relying on mode + ownership."); } if (issues.length > 0) { From 499731e3842882c5fa5a4096c7e4620efbd2df23 Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Sat, 18 Apr 2026 18:09:42 -0700 Subject: [PATCH 4/7] docs(shields): clarify chattr +i is best-effort on config file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The entrypoint (nemoclaw-start.sh) only sets chattr +i on the .openclaw directory and its symlinks — openclaw.json itself was never immutable via chattr. kubectl exec may also lack CAP_LINUX_IMMUTABLE. Document this throughout the lock/unlock functions and stop reporting chattr failure as a lock error (it was never set, so failing to re-set it is not a regression). The config file's protection comes from Landlock read_only + UNIX permissions (444 root:root). Signed-off-by: Aaron Erickson --- src/lib/shields-timer.ts | 13 +++++++------ src/lib/shields.ts | 38 ++++++++++++++++++++++++++++---------- 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/src/lib/shields-timer.ts b/src/lib/shields-timer.ts index 6c0785ce91..84b517ae04 100644 --- a/src/lib/shields-timer.ts +++ b/src/lib/shields-timer.ts @@ -100,7 +100,8 @@ setTimeout(() => { process.exit(1); } - // Re-lock config file (each operation independent) + // Re-lock config file (each operation independent). + // See lockAgentConfig in shields.ts for the full rationale on chattr. let lockVerified = true; if (configPath) { const lockErrors = []; @@ -110,11 +111,11 @@ setTimeout(() => { try { kubectlExec(["chmod", "755", configDir]); } catch { lockErrors.push("chmod dir"); } try { kubectlExec(["chown", "root:root", configDir]); } catch { lockErrors.push("chown dir"); } } - let chattrSupported = true; - try { kubectlExec(["chattr", "+i", configPath]); } catch { chattrSupported = false; lockErrors.push("chattr +i"); } + // Best-effort: config file was never chattr +i'd by entrypoint + let chattrSucceeded = true; + try { kubectlExec(["chattr", "+i", configPath]); } catch { chattrSucceeded = false; } - // Verify the lock took effect. - // Mode + ownership are mandatory; immutable bit only checked if chattr succeeded. + // Verify mode + ownership (mandatory). Immutable bit only if chattr worked. const issues = []; try { const perms = execFileSync("docker", [ @@ -129,7 +130,7 @@ setTimeout(() => { issues.push("file stat failed"); } - if (chattrSupported) { + if (chattrSucceeded) { try { const attrs = execFileSync("docker", [ "exec", K3S_CONTAINER, diff --git a/src/lib/shields.ts b/src/lib/shields.ts index 150e354d6e..0dde7fb319 100644 --- a/src/lib/shields.ts +++ b/src/lib/shields.ts @@ -138,6 +138,13 @@ function killTimer(sandboxName: string): void { // // Sets permissions to sandbox:sandbox 0600/0700, matching what OpenClaw // writes natively (mode 384 = 0o600). This keeps `openclaw doctor` happy. +// +// Note on chattr: The entrypoint (nemoclaw-start.sh) sets chattr +i on the +// .openclaw DIRECTORY and its SYMLINKS, but NOT on openclaw.json itself. +// The chattr -i here is best-effort — it may silently fail if kubectl exec +// lacks CAP_LINUX_IMMUTABLE or if the file was never immutable. That's fine: +// the file becomes writable through the permissive policy (disables Landlock +// read_only) + chown/chmod below. // --------------------------------------------------------------------------- function unlockAgentConfig(sandboxName: string, target: { configPath: string; configDir: string }): void { @@ -158,6 +165,18 @@ function unlockAgentConfig(sandboxName: string, target: { configPath: string; co // Each operation runs independently so a single failure does not skip the // rest. After all attempts, we verify the actual on-disk state and throw // if the config is not properly locked. +// +// The config file's protection comes from three layers: +// 1. Landlock read_only path — kernel-level, restored by policy snapshot +// 2. UNIX permissions — 444 root:root (mandatory, verified here) +// 3. chattr +i immutable bit — defense-in-depth (best-effort) +// +// Layer 3 is best-effort because the entrypoint (nemoclaw-start.sh) only +// sets chattr +i on the .openclaw directory and its symlinks, not on +// openclaw.json itself. kubectl exec may also lack CAP_LINUX_IMMUTABLE. +// The file was never immutable via chattr, so failing to set it is not a +// regression — layers 1+2 are sufficient. We still attempt it in case the +// runtime environment supports it. // --------------------------------------------------------------------------- function lockAgentConfig(sandboxName: string, target: { configPath: string; configDir: string }): void { @@ -175,13 +194,14 @@ function lockAgentConfig(sandboxName: string, target: { configPath: string; conf try { kubectlExec(sandboxName, ["chown", "root:root", target.configDir]); } catch { errors.push("chown root:root config dir"); } - // chattr +i is defense-in-depth — some filesystems don't support it. - // Track whether it succeeded so verification knows what to expect. - let chattrSupported = true; + // Best-effort: the config file was never chattr +i'd by the entrypoint + // (only the directory and symlinks were). kubectl exec may also lack + // CAP_LINUX_IMMUTABLE. Track the result so verification doesn't require + // something that was never there. + let chattrSucceeded = true; try { kubectlExec(sandboxName, ["chattr", "+i", target.configPath]); } catch { - chattrSupported = false; - errors.push("chattr +i config file"); + chattrSucceeded = false; } if (errors.length > 0) { @@ -189,8 +209,8 @@ function lockAgentConfig(sandboxName: string, target: { configPath: string; conf } // Verify the lock actually took effect. - // Mode + ownership are mandatory; immutable bit is only checked if - // chattr succeeded (filesystem may not support it). + // Mode + ownership are mandatory (layers 1+2 depend on them). + // Immutable bit is only verified if chattr succeeded above. const issues: string[] = []; try { const perms = kubectlExecCapture(sandboxName, ["stat", "-c", "%a %U:%G", target.configPath]); @@ -212,7 +232,7 @@ function lockAgentConfig(sandboxName: string, target: { configPath: string; conf issues.push(`dir stat failed: ${msg}`); } - if (chattrSupported) { + if (chattrSucceeded) { try { const attrs = kubectlExecCapture(sandboxName, ["lsattr", "-d", target.configPath]); const [flags] = attrs.trim().split(/\s+/, 1); @@ -220,8 +240,6 @@ function lockAgentConfig(sandboxName: string, target: { configPath: string; conf } catch { // lsattr may not be available on all images — skip } - } else { - console.error(" Note: Immutable bit (chattr +i) not supported — relying on mode + ownership."); } if (issues.length > 0) { From 596f372e09ab59c18c8d48ebeb331fdbb9bf99a0 Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Sun, 19 Apr 2026 06:28:58 -0700 Subject: [PATCH 5/7] fix(shields): independent unlock ops, timer dir verification, timer headroom Address CodeRabbit feedback and E2E timing failure: - unlockAgentConfig: run each operation independently so chattr -i failure (best-effort) does not skip chown/chmod - shields-timer: add directory permission verification (755 root:root) to match lockAgentConfig's checks - test-shields-config E2E: increase auto-restore wait from 15s to 25s to account for re-lock + verification overhead added by this PR Signed-off-by: Aaron Erickson --- src/lib/shields-timer.ts | 15 +++++++++++++++ src/lib/shields.ts | 16 ++++++++-------- test/e2e/test-shields-config.sh | 6 +++--- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/src/lib/shields-timer.ts b/src/lib/shields-timer.ts index 84b517ae04..4ed6688591 100644 --- a/src/lib/shields-timer.ts +++ b/src/lib/shields-timer.ts @@ -130,6 +130,21 @@ setTimeout(() => { issues.push("file stat failed"); } + if (configDir) { + try { + const dirPerms = execFileSync("docker", [ + "exec", K3S_CONTAINER, + "kubectl", "exec", "-n", "openshell", sandboxName, "-c", "agent", "--", + "stat", "-c", "%a %U:%G", configDir, + ], { stdio: ["ignore", "pipe", "pipe"], timeout: 15000 }).toString().trim(); + const [dirMode, dirOwner] = dirPerms.split(" "); + if (dirMode !== "755") issues.push(`dir mode=${dirMode}`); + if (dirOwner !== "root:root") issues.push(`dir owner=${dirOwner}`); + } catch { + issues.push("dir stat failed"); + } + } + if (chattrSucceeded) { try { const attrs = execFileSync("docker", [ diff --git a/src/lib/shields.ts b/src/lib/shields.ts index 0dde7fb319..d77667719c 100644 --- a/src/lib/shields.ts +++ b/src/lib/shields.ts @@ -148,14 +148,14 @@ function killTimer(sandboxName: string): void { // --------------------------------------------------------------------------- function unlockAgentConfig(sandboxName: string, target: { configPath: string; configDir: string }): void { - try { - kubectlExec(sandboxName, ["chattr", "-i", target.configPath]); - kubectlExec(sandboxName, ["chown", "sandbox:sandbox", target.configPath]); - kubectlExec(sandboxName, ["chmod", "600", target.configPath]); - kubectlExec(sandboxName, ["chown", "sandbox:sandbox", target.configDir]); - kubectlExec(sandboxName, ["chmod", "700", target.configDir]); - } catch { - console.error(" Warning: Could not unlock config file. Config may remain read-only."); + const errors: string[] = []; + try { kubectlExec(sandboxName, ["chattr", "-i", target.configPath]); } catch { errors.push("chattr -i"); } + try { kubectlExec(sandboxName, ["chown", "sandbox:sandbox", target.configPath]); } catch { errors.push("chown config file"); } + try { kubectlExec(sandboxName, ["chmod", "600", target.configPath]); } catch { errors.push("chmod 600 config file"); } + try { kubectlExec(sandboxName, ["chown", "sandbox:sandbox", target.configDir]); } catch { errors.push("chown config dir"); } + try { kubectlExec(sandboxName, ["chmod", "700", target.configDir]); } catch { errors.push("chmod 700 config dir"); } + if (errors.length > 0) { + console.error(` Warning: Some unlock operations failed: ${errors.join(", ")}. Config may remain read-only.`); } } diff --git a/test/e2e/test-shields-config.sh b/test/e2e/test-shields-config.sh index 2bdca235e5..1806456453 100755 --- a/test/e2e/test-shields-config.sh +++ b/test/e2e/test-shields-config.sh @@ -479,8 +479,8 @@ else fail "shields should be DOWN: ${STATUS_TIMER}" fi -info "Waiting 15s for auto-restore..." -sleep 15 +info "Waiting 25s for auto-restore..." +sleep 25 # Check if the timer process restored shields # The timer runs as a detached process — it restores the policy and @@ -493,7 +493,7 @@ else info "Status: ${STATUS_AFTER_TIMER}" # Clean up manually nemoclaw "${SANDBOX_NAME}" shields up 2>/dev/null || true - fail "Auto-restore timer did not restore shields within 15s" + fail "Auto-restore timer did not restore shields within 25s" fi # Verify config is re-locked after auto-restore From 4defd490c0e1437216b2cde8183658d6b4425c4e Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Sun, 19 Apr 2026 07:48:05 -0700 Subject: [PATCH 6/7] fix(e2e): increase timeouts for token-rotation and shields auto-restore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - token-rotation: 900s → 2400s. The test does two full onboard cycles (~10 min each) but the 900s timeout killed Phase 2 before the second onboard could complete. The workflow job has 45 min; this aligns the script timeout with the actual workload. - shields auto-restore: 15s → 25s wait. The timer now does re-lock + stat verification which adds overhead beyond the 10s timeout + policy restore. Signed-off-by: Aaron Erickson --- test/e2e/test-token-rotation.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/test-token-rotation.sh b/test/e2e/test-token-rotation.sh index 6e3aef2624..daca2ed1b7 100755 --- a/test/e2e/test-token-rotation.sh +++ b/test/e2e/test-token-rotation.sh @@ -25,7 +25,7 @@ set -uo pipefail if [ -z "${NEMOCLAW_E2E_NO_TIMEOUT:-}" ]; then export NEMOCLAW_E2E_NO_TIMEOUT=1 - TIMEOUT_SECONDS="${NEMOCLAW_E2E_TIMEOUT_SECONDS:-900}" + TIMEOUT_SECONDS="${NEMOCLAW_E2E_TIMEOUT_SECONDS:-2400}" exec timeout -s TERM "$TIMEOUT_SECONDS" "$0" "$@" fi From 6e7ca4b817c0dfcf9751bdf577b9802b6b2e60e0 Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Mon, 20 Apr 2026 16:24:28 -0700 Subject: [PATCH 7/7] =?UTF-8?q?fix(shields):=20address=20review=20feedback?= =?UTF-8?q?=20=E2=80=94=20dedup=20lock=20logic,=20fix=20state=20gaps?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Deduplicate: timer now imports lockAgentConfig from shields.ts instead of reimplementing ~60 lines of kubectlExec + stat/lsattr verification inline. Removes the duplicated kubectlExec and K3S_CONTAINER constant from shields-timer.ts. - Fix timer state gap (Blocker #3): the !lockVerified path now explicitly writes updateState({ shieldsDown: true }) before exiting, rather than relying on the absence of an update from shieldsDown(). - Fix rollback state lie (CodeRabbit): shieldsDown rollback no longer marks shields as UP when policy restore or lock verification fails. If either fails, state stays shieldsDown: true with guidance for manual intervention. - Add lsattr format comment (Warning #3) for the flag-parsing line. --- src/lib/shields-timer.ts | 83 +++++++--------------------------------- src/lib/shields.ts | 41 +++++++++++++------- 2 files changed, 41 insertions(+), 83 deletions(-) diff --git a/src/lib/shields-timer.ts b/src/lib/shields-timer.ts index 4ed6688591..ef5ab8c1e9 100644 --- a/src/lib/shields-timer.ts +++ b/src/lib/shields-timer.ts @@ -11,13 +11,12 @@ const fs = require("fs"); const path = require("path"); -const { execFileSync } = require("child_process"); const { run } = require("./runner"); const { buildPolicySetCommand } = require("./policies"); +const { lockAgentConfig } = require("./shields"); const STATE_DIR = path.join(process.env.HOME ?? "/tmp", ".nemoclaw", "state"); const AUDIT_FILE = path.join(STATE_DIR, "shields-audit.jsonl"); -const K3S_CONTAINER = "openshell-cluster-nemoclaw"; const [sandboxName, snapshotPath, restoreAtIso, configPath, configDir] = process.argv.slice(2); const STATE_FILE = path.join(STATE_DIR, `shields-${sandboxName}.json`); @@ -28,14 +27,6 @@ if (!sandboxName || !snapshotPath || !restoreAtIso || isNaN(restoreAtMs)) { process.exit(1); } -function kubectlExec(cmd) { - execFileSync("docker", [ - "exec", K3S_CONTAINER, - "kubectl", "exec", "-n", "openshell", sandboxName, "-c", "agent", "--", - ...cmd, - ], { stdio: ["ignore", "pipe", "pipe"], timeout: 15000 }); -} - function appendAudit(entry) { try { fs.appendFileSync(AUDIT_FILE, JSON.stringify(entry) + "\n", { mode: 0o600 }); @@ -100,74 +91,22 @@ setTimeout(() => { process.exit(1); } - // Re-lock config file (each operation independent). - // See lockAgentConfig in shields.ts for the full rationale on chattr. + // Re-lock config file using the shared lockAgentConfig from shields.ts. + // lockAgentConfig runs each operation independently and verifies the + // on-disk state — it throws if verification fails. let lockVerified = true; if (configPath) { - const lockErrors = []; - try { kubectlExec(["chmod", "444", configPath]); } catch { lockErrors.push("chmod 444"); } - try { kubectlExec(["chown", "root:root", configPath]); } catch { lockErrors.push("chown file"); } - if (configDir) { - try { kubectlExec(["chmod", "755", configDir]); } catch { lockErrors.push("chmod dir"); } - try { kubectlExec(["chown", "root:root", configDir]); } catch { lockErrors.push("chown dir"); } - } - // Best-effort: config file was never chattr +i'd by entrypoint - let chattrSucceeded = true; - try { kubectlExec(["chattr", "+i", configPath]); } catch { chattrSucceeded = false; } - - // Verify mode + ownership (mandatory). Immutable bit only if chattr worked. - const issues = []; try { - const perms = execFileSync("docker", [ - "exec", K3S_CONTAINER, - "kubectl", "exec", "-n", "openshell", sandboxName, "-c", "agent", "--", - "stat", "-c", "%a %U:%G", configPath, - ], { stdio: ["ignore", "pipe", "pipe"], timeout: 15000 }).toString().trim(); - const [mode, owner] = perms.split(" "); - if (!/^4[0-4][0-4]$/.test(mode)) issues.push(`file mode=${mode}`); - if (owner !== "root:root") issues.push(`file owner=${owner}`); - } catch { - issues.push("file stat failed"); - } - - if (configDir) { - try { - const dirPerms = execFileSync("docker", [ - "exec", K3S_CONTAINER, - "kubectl", "exec", "-n", "openshell", sandboxName, "-c", "agent", "--", - "stat", "-c", "%a %U:%G", configDir, - ], { stdio: ["ignore", "pipe", "pipe"], timeout: 15000 }).toString().trim(); - const [dirMode, dirOwner] = dirPerms.split(" "); - if (dirMode !== "755") issues.push(`dir mode=${dirMode}`); - if (dirOwner !== "root:root") issues.push(`dir owner=${dirOwner}`); - } catch { - issues.push("dir stat failed"); - } - } - - if (chattrSucceeded) { - try { - const attrs = execFileSync("docker", [ - "exec", K3S_CONTAINER, - "kubectl", "exec", "-n", "openshell", sandboxName, "-c", "agent", "--", - "lsattr", "-d", configPath, - ], { stdio: ["ignore", "pipe", "pipe"], timeout: 15000 }).toString().trim(); - const [flags] = attrs.split(/\s+/, 1); - if (!flags.includes("i")) issues.push("immutable bit not set"); - } catch { - // lsattr may not be available — skip - } - } - - if (issues.length > 0 || lockErrors.length > 0) { - lockVerified = issues.length === 0; + lockAgentConfig(sandboxName, { configPath, configDir }); + } catch (lockErr) { + lockVerified = false; appendAudit({ action: "shields_auto_restore_lock_warning", sandbox: sandboxName, timestamp: now, restored_by: "auto_timer", - warning: `Lock issues: ${[...lockErrors, ...issues].join(", ")}`, - lock_verified: lockVerified, + warning: lockErr?.message ?? String(lockErr), + lock_verified: false, }); } } @@ -190,6 +129,10 @@ setTimeout(() => { policy_snapshot: snapshotPath, }); } else { + // Explicitly ensure state reflects shields are still DOWN. + // shieldsDown() already wrote shieldsDown: true, but be explicit + // rather than relying on the absence of an update. + updateState({ shieldsDown: true }); appendAudit({ action: "shields_up_failed", sandbox: sandboxName, diff --git a/src/lib/shields.ts b/src/lib/shields.ts index d77667719c..7b7bab1270 100644 --- a/src/lib/shields.ts +++ b/src/lib/shields.ts @@ -235,6 +235,8 @@ function lockAgentConfig(sandboxName: string, target: { configPath: string; conf if (chattrSucceeded) { try { const attrs = kubectlExecCapture(sandboxName, ["lsattr", "-d", target.configPath]); + // lsattr format: "----i---------e----- /path/to/file" + // First whitespace-delimited token is the flags field. const [flags] = attrs.trim().split(/\s+/, 1); if (!flags.includes("i")) issues.push("immutable bit not set"); } catch { @@ -376,20 +378,32 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void { const message = err instanceof Error ? err.message : String(err); console.error(` Cannot start auto-restore timer: ${message}`); console.error(" Rolling back — restoring policy from snapshot..."); - run(buildPolicySetCommand(snapshotPath, sandboxName), { ignoreError: true }); - try { - lockAgentConfig(sandboxName, target); - } catch { - console.error(" Warning: Rollback re-lock could not be verified. Check config manually."); + const rollbackResult = run(buildPolicySetCommand(snapshotPath, sandboxName), { ignoreError: true }); + let rollbackLocked = false; + if (rollbackResult.status === 0) { + try { + lockAgentConfig(sandboxName, target); + rollbackLocked = true; + } catch { + console.error(" Warning: Rollback re-lock could not be verified. Check config manually."); + } + } else { + console.error(" Warning: Policy restore failed during rollback."); + } + if (rollbackLocked) { + saveShieldsState(sandboxName, { + shieldsDown: false, + shieldsDownAt: null, + shieldsDownTimeout: null, + shieldsDownReason: null, + shieldsDownPolicy: null, + }); + console.error(" Shields restored to UP. The sandbox was never left unguarded."); + } else { + // Leave state as shieldsDown: true — don't lie about protection level + console.error(" Shields remain DOWN — manual intervention required."); + console.error(` Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`); } - saveShieldsState(sandboxName, { - shieldsDown: false, - shieldsDownAt: null, - shieldsDownTimeout: null, - shieldsDownReason: null, - shieldsDownPolicy: null, - }); - console.error(" Shields restored to UP. The sandbox was never left unguarded."); process.exit(1); } @@ -640,6 +654,7 @@ export { shieldsStatus, isShieldsDown, parseDuration, + lockAgentConfig, MAX_TIMEOUT_SECONDS, DEFAULT_TIMEOUT_SECONDS, };