NVIDIA · ericksoa · Apr 21, 2026 · Apr 18, 2026 · Apr 18, 2026 · Apr 18, 2026
diff --git a/src/lib/shields-timer.ts b/src/lib/shields-timer.ts
@@ -7,17 +7,18 @@
 // restores the captured policy snapshot.
 //
 // Usage (internal — called by shields.ts via fork()):
-//   node shields-timer.js <sandbox-name> <snapshot-path> <restore-at-iso>
+//   node shields-timer.js <sandbox-name> <snapshot-path> <restore-at-iso> <config-path> <config-dir>
 
 const fs = require("fs");
 const path = require("path");
 const { run } = require("./runner");
 const { buildPolicySetCommand } = require("./policies");
+const { lockAgentConfig } = require("./shields");
 
 const STATE_DIR = path.join(process.env.HOME ?? "/tmp", ".nemoclaw", "state");
 const AUDIT_FILE = path.join(STATE_DIR, "shields-audit.jsonl");
 
-const [sandboxName, snapshotPath, restoreAtIso] = process.argv.slice(2);
+const [sandboxName, snapshotPath, restoreAtIso, configPath, configDir] = process.argv.slice(2);
 const STATE_FILE = path.join(STATE_DIR, `shields-${sandboxName}.json`);
 const restoreAtMs = new Date(restoreAtIso).getTime();
 const delayMs = Math.max(0, restoreAtMs - Date.now());
@@ -75,23 +76,10 @@ setTimeout(() => {
       process.exit(1);
     }
 
-    // Update state first — policy restore can take seconds and callers
-    // check state to determine whether shields are up or down.
-    updateState({
-      shieldsDown: false,
-      shieldsDownAt: null,
-      shieldsDownTimeout: null,
-      shieldsDownReason: null,
-      shieldsDownPolicy: null,
-    });
-
     // Restore policy (slow — openshell policy set --wait blocks)
     const result = run(buildPolicySetCommand(snapshotPath, sandboxName), { ignoreError: true });
 
     if (result.status !== 0) {
-      // Policy restore failed — revert state so callers know shields are
-      // still down. The sandbox remains relaxed.
-      updateState({ shieldsDown: true });
       appendAudit({
         action: "shields_up_failed",
         sandbox: sandboxName,
@@ -103,14 +91,58 @@ setTimeout(() => {
       process.exit(1);
     }
 
-    // Audit
-    appendAudit({
-      action: "shields_auto_restore",
-      sandbox: sandboxName,
-      timestamp: now,
-      restored_by: "auto_timer",
-      policy_snapshot: snapshotPath,
-    });
+    // Re-lock config file using the shared lockAgentConfig from shields.ts.
+    // lockAgentConfig runs each operation independently and verifies the
+    // on-disk state — it throws if verification fails.
+    let lockVerified = true;
+    if (configPath) {
+      try {
+        lockAgentConfig(sandboxName, { configPath, configDir });
+      } catch (lockErr) {
+        lockVerified = false;
+        appendAudit({
+          action: "shields_auto_restore_lock_warning",
+          sandbox: sandboxName,
+          timestamp: now,
+          restored_by: "auto_timer",
+          warning: lockErr?.message ?? String(lockErr),
+          lock_verified: false,
+        });
+      }
+    }
+
+    // Only mark shields as UP if the lock was verified (or no config path)
+    if (lockVerified) {
+      updateState({
+        shieldsDown: false,
+        shieldsDownAt: null,
+        shieldsDownTimeout: null,
+        shieldsDownReason: null,
+        shieldsDownPolicy: null,
+      });
+
+      appendAudit({
+        action: "shields_auto_restore",
+        sandbox: sandboxName,
+        timestamp: now,
+        restored_by: "auto_timer",
+        policy_snapshot: snapshotPath,
+      });
+    } else {
+      // Explicitly ensure state reflects shields are still DOWN.
+      // shieldsDown() already wrote shieldsDown: true, but be explicit
+      // rather than relying on the absence of an update.
+      updateState({ shieldsDown: true });
+      appendAudit({
+        action: "shields_up_failed",
+        sandbox: sandboxName,
+        timestamp: now,
+        restored_by: "auto_timer",
+        error: "Config re-lock verification failed — shields remain DOWN",
+      });
+      cleanupMarker();
+      process.exit(1);
+    }
   } catch (err) {
     appendAudit({
       action: "shields_up_failed",

diff --git a/src/lib/shields.ts b/src/lib/shields.ts
@@ -44,6 +44,14 @@ function kubectlExec(sandboxName: string, cmd: string[]): void {
   ], { stdio: ["ignore", "pipe", "pipe"], timeout: 15000 });
 }
 
+function kubectlExecCapture(sandboxName: string, cmd: string[]): string {
+  return execFileSync("docker", [
+    "exec", K3S_CONTAINER,
+    "kubectl", "exec", "-n", "openshell", sandboxName, "-c", "agent", "--",
+    ...cmd,
+  ], { stdio: ["ignore", "pipe", "pipe"], timeout: 15000 }).toString().trim();
+}
+
 // Re-export for tests and external consumers
 const MAX_TIMEOUT_SECONDS = MAX_SECONDS;
 const DEFAULT_TIMEOUT_SECONDS = DEFAULT_SECONDS;
@@ -130,17 +138,114 @@ function killTimer(sandboxName: string): void {
 //
 // Sets permissions to sandbox:sandbox 0600/0700, matching what OpenClaw
 // writes natively (mode 384 = 0o600). This keeps `openclaw doctor` happy.
+//
+// Note on chattr: The entrypoint (nemoclaw-start.sh) sets chattr +i on the
+// .openclaw DIRECTORY and its SYMLINKS, but NOT on openclaw.json itself.
+// The chattr -i here is best-effort — it may silently fail if kubectl exec
+// lacks CAP_LINUX_IMMUTABLE or if the file was never immutable. That's fine:
+// the file becomes writable through the permissive policy (disables Landlock
+// read_only) + chown/chmod below.
 // ---------------------------------------------------------------------------
 
 function unlockAgentConfig(sandboxName: string, target: { configPath: string; configDir: string }): void {
+  const errors: string[] = [];
+  try { kubectlExec(sandboxName, ["chattr", "-i", target.configPath]); } catch { errors.push("chattr -i"); }
+  try { kubectlExec(sandboxName, ["chown", "sandbox:sandbox", target.configPath]); } catch { errors.push("chown config file"); }
+  try { kubectlExec(sandboxName, ["chmod", "600", target.configPath]); } catch { errors.push("chmod 600 config file"); }
+  try { kubectlExec(sandboxName, ["chown", "sandbox:sandbox", target.configDir]); } catch { errors.push("chown config dir"); }
+  try { kubectlExec(sandboxName, ["chmod", "700", target.configDir]); } catch { errors.push("chmod 700 config dir"); }
+  if (errors.length > 0) {
+    console.error(`  Warning: Some unlock operations failed: ${errors.join(", ")}. Config may remain read-only.`);
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Config lock — shared between shields-up, auto-restore timer, and rollback
+//
+// Each operation runs independently so a single failure does not skip the
+// rest. After all attempts, we verify the actual on-disk state and throw
+// if the config is not properly locked.
+//
+// The config file's protection comes from three layers:
+//   1. Landlock read_only path — kernel-level, restored by policy snapshot
+//   2. UNIX permissions — 444 root:root (mandatory, verified here)
+//   3. chattr +i immutable bit — defense-in-depth (best-effort)
+//
+// Layer 3 is best-effort because the entrypoint (nemoclaw-start.sh) only
+// sets chattr +i on the .openclaw directory and its symlinks, not on
+// openclaw.json itself. kubectl exec may also lack CAP_LINUX_IMMUTABLE.
+// The file was never immutable via chattr, so failing to set it is not a
+// regression — layers 1+2 are sufficient. We still attempt it in case the
+// runtime environment supports it.
+// ---------------------------------------------------------------------------
+
+function lockAgentConfig(sandboxName: string, target: { configPath: string; configDir: string }): void {
+  const errors: string[] = [];
+
+  try { kubectlExec(sandboxName, ["chmod", "444", target.configPath]); }
+  catch { errors.push("chmod 444 config file"); }
+
+  try { kubectlExec(sandboxName, ["chown", "root:root", target.configPath]); }
+  catch { errors.push("chown root:root config file"); }
+
+  try { kubectlExec(sandboxName, ["chmod", "755", target.configDir]); }
+  catch { errors.push("chmod 755 config dir"); }
+
+  try { kubectlExec(sandboxName, ["chown", "root:root", target.configDir]); }
+  catch { errors.push("chown root:root config dir"); }
+
+  // Best-effort: the config file was never chattr +i'd by the entrypoint
+  // (only the directory and symlinks were). kubectl exec may also lack
+  // CAP_LINUX_IMMUTABLE. Track the result so verification doesn't require
+  // something that was never there.
+  let chattrSucceeded = true;
+  try { kubectlExec(sandboxName, ["chattr", "+i", target.configPath]); }
+  catch {
+    chattrSucceeded = false;
+  }
+
+  if (errors.length > 0) {
+    console.error(`  Some lock operations failed: ${errors.join(", ")}`);
+  }
+
+  // Verify the lock actually took effect.
+  // Mode + ownership are mandatory (layers 1+2 depend on them).
+  // Immutable bit is only verified if chattr succeeded above.
+  const issues: string[] = [];
   try {
-    kubectlExec(sandboxName, ["chattr", "-i", target.configPath]);
-    kubectlExec(sandboxName, ["chown", "sandbox:sandbox", target.configPath]);
-    kubectlExec(sandboxName, ["chmod", "600", target.configPath]);
-    kubectlExec(sandboxName, ["chown", "sandbox:sandbox", target.configDir]);
-    kubectlExec(sandboxName, ["chmod", "700", target.configDir]);
-  } catch {
-    console.error("  Warning: Could not unlock config file. Config may remain read-only.");
+    const perms = kubectlExecCapture(sandboxName, ["stat", "-c", "%a %U:%G", target.configPath]);
+    const [mode, owner] = perms.split(" ");
+    if (!/^4[0-4][0-4]$/.test(mode)) issues.push(`file mode=${mode} (expected 444)`);
+    if (owner !== "root:root") issues.push(`file owner=${owner} (expected root:root)`);
+  } catch (err: unknown) {
+    const msg = err instanceof Error ? err.message : String(err);
+    issues.push(`file stat failed: ${msg}`);
+  }
+
+  try {
+    const dirPerms = kubectlExecCapture(sandboxName, ["stat", "-c", "%a %U:%G", target.configDir]);
+    const [dirMode, dirOwner] = dirPerms.split(" ");
+    if (dirMode !== "755") issues.push(`dir mode=${dirMode} (expected 755)`);
+    if (dirOwner !== "root:root") issues.push(`dir owner=${dirOwner} (expected root:root)`);
+  } catch (err: unknown) {
+    const msg = err instanceof Error ? err.message : String(err);
+    issues.push(`dir stat failed: ${msg}`);
+  }
+
+  if (chattrSucceeded) {
+    try {
+      const attrs = kubectlExecCapture(sandboxName, ["lsattr", "-d", target.configPath]);
+      // lsattr format: "----i---------e----- /path/to/file"
+      // First whitespace-delimited token is the flags field.
+      const [flags] = attrs.trim().split(/\s+/, 1);
+      if (!flags.includes("i")) issues.push("immutable bit not set");
+    } catch {
+      // lsattr may not be available on all images — skip
+    }
+  }
+
+  if (issues.length > 0) {
+    throw new Error(`Config not locked: ${issues.join(", ")}`);
   }
 }
 
@@ -250,7 +355,7 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
   const actualScript = fs.existsSync(timerScriptJs) ? timerScriptJs : timerScript;
 
   try {
-    const child = fork(actualScript, [sandboxName, snapshotPath, restoreAt.toISOString()], {
+    const child = fork(actualScript, [sandboxName, snapshotPath, restoreAt.toISOString(), target.configPath, target.configDir], {
       detached: true,
       stdio: ["ignore", "ignore", "ignore", "ipc"],
     });
@@ -273,22 +378,32 @@ function shieldsDown(sandboxName: string, opts: ShieldsDownOpts = {}): void {
     const message = err instanceof Error ? err.message : String(err);
     console.error(`  Cannot start auto-restore timer: ${message}`);
     console.error("  Rolling back — restoring policy from snapshot...");
-    try {
-      run(buildPolicySetCommand(snapshotPath, sandboxName), { ignoreError: true });
-      kubectlExec(sandboxName, ["chmod", "444", target.configPath]);
-      kubectlExec(sandboxName, ["chown", "root:root", target.configPath]);
-      kubectlExec(sandboxName, ["chattr", "+i", target.configPath]);
-    } catch {
-      // Best effort rollback
+    const rollbackResult = run(buildPolicySetCommand(snapshotPath, sandboxName), { ignoreError: true });
+    let rollbackLocked = false;
+    if (rollbackResult.status === 0) {
+      try {
+        lockAgentConfig(sandboxName, target);
+        rollbackLocked = true;
+      } catch {
+        console.error("  Warning: Rollback re-lock could not be verified. Check config manually.");
+      }
+    } else {
+      console.error("  Warning: Policy restore failed during rollback.");
+    }
+    if (rollbackLocked) {
+      saveShieldsState(sandboxName, {
+        shieldsDown: false,
+        shieldsDownAt: null,
+        shieldsDownTimeout: null,
+        shieldsDownReason: null,
+        shieldsDownPolicy: null,
+      });
+      console.error("  Shields restored to UP. The sandbox was never left unguarded.");
+    } else {
+      // Leave state as shieldsDown: true — don't lie about protection level
+      console.error("  Shields remain DOWN — manual intervention required.");
+      console.error(`  Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`);
     }
-    saveShieldsState(sandboxName, {
-      shieldsDown: false,
-      shieldsDownAt: null,
-      shieldsDownTimeout: null,
-      shieldsDownReason: null,
-      shieldsDownPolicy: null,
-    });
-    console.error("  Shields restored to UP. The sandbox was never left unguarded.");
     process.exit(1);
   }
 
@@ -351,16 +466,18 @@ function shieldsUp(sandboxName: string): void {
   // 2b. Re-lock config file to read-only.
   //     Restore the Dockerfile's original permissions and immutable bit.
   //     Uses kubectl exec to bypass Landlock (same as shields down).
+  //     Each operation runs independently and the result is verified.
+  //     If verification fails, shields remain DOWN — we do not lie about state.
   const target = resolveAgentConfig(sandboxName);
   console.log(`  Locking ${target.agentName} config (${target.configPath})...`);
   try {
-    kubectlExec(sandboxName, ["chmod", "444", target.configPath]);
-    kubectlExec(sandboxName, ["chown", "root:root", target.configPath]);
-    kubectlExec(sandboxName, ["chmod", "755", target.configDir]);
-    kubectlExec(sandboxName, ["chown", "root:root", target.configDir]);
-    kubectlExec(sandboxName, ["chattr", "+i", target.configPath]);
-  } catch {
-    console.error("  Warning: Could not re-lock config file.");
+    lockAgentConfig(sandboxName, target);
+  } catch (err: unknown) {
+    const message = err instanceof Error ? err.message : String(err);
+    console.error(`  ERROR: ${message}`);
+    console.error("  Shields remain DOWN — manual intervention required.");
+    console.error(`  Re-lock manually via kubectl exec, then run: nemoclaw ${sandboxName} shields up`);
+    process.exit(1);
   }
 
   // 3. Calculate duration
@@ -537,6 +654,7 @@ export {
   shieldsStatus,
   isShieldsDown,
   parseDuration,
+  lockAgentConfig,
   MAX_TIMEOUT_SECONDS,
   DEFAULT_TIMEOUT_SECONDS,
 };
diff --git a/test/e2e/test-shields-config.sh b/test/e2e/test-shields-config.sh
@@ -485,8 +485,8 @@ else
   fail "shields should be DOWN: ${STATUS_TIMER}"
 fi
 
-info "Waiting 15s for auto-restore..."
-sleep 15
+info "Waiting 25s for auto-restore..."
+sleep 25
 
 # Check if the timer process restored shields
 # The timer runs as a detached process — it restores the policy and
@@ -499,7 +499,7 @@ else
   info "Status: ${STATUS_AFTER_TIMER}"
   # Clean up manually
   nemoclaw "${SANDBOX_NAME}" shields up 2>/dev/null || true
-  fail "Auto-restore timer did not restore shields within 15s"
+  fail "Auto-restore timer did not restore shields within 25s"
 fi
 
 # Verify config is re-locked after auto-restore

diff --git a/test/e2e/test-token-rotation.sh b/test/e2e/test-token-rotation.sh
@@ -25,7 +25,7 @@ set -uo pipefail
 
 if [ -z "${NEMOCLAW_E2E_NO_TIMEOUT:-}" ]; then
   export NEMOCLAW_E2E_NO_TIMEOUT=1
-  TIMEOUT_SECONDS="${NEMOCLAW_E2E_TIMEOUT_SECONDS:-900}"
+  TIMEOUT_SECONDS="${NEMOCLAW_E2E_TIMEOUT_SECONDS:-2400}"
   exec timeout -s TERM "$TIMEOUT_SECONDS" "$0" "$@"
 fi