diff --git a/src/lib/onboard-session.test.ts b/src/lib/onboard-session.test.ts
index d72ad02367..75e75f7a3b 100644
--- a/src/lib/onboard-session.test.ts
+++ b/src/lib/onboard-session.test.ts
@@ -173,6 +173,88 @@ describe("onboard session", () => {
     expect(written.pid).toBe(process.pid);
   });
 
+  it("regression #1281: stale-cleanup race does not unlink a fresh lock claimed by another process", () => {
+    // Reproduces the race: the lock file we read as 'stale' gets replaced
+    // with a fresh claim from a faster concurrent process between our
+    // read and our unlink. The slower process must NOT unlink the fresh
+    // lock, otherwise both processes end up thinking they hold the lock.
+    fs.mkdirSync(path.dirname(session.LOCK_FILE), { recursive: true });
+
+    // 1. Lay down a stale lock from a dead PID (PID 999999 on the test box).
+    const staleLock = JSON.stringify({
+      pid: 999999,
+      startedAt: "2026-03-25T00:00:00.000Z",
+      command: "nemoclaw onboard",
+    });
+    fs.writeFileSync(session.LOCK_FILE, staleLock, { mode: 0o600 });
+
+    // 2. Wrap fs.statSync so the swap happens just before stat #2:
+    //    - stat #1 (inside acquireOnboardLock): reads the stale inode
+    //      and returns it unmodified. readFileSync then reads the
+    //      ORIGINAL stale lock (dead PID 999999), isProcessAlive
+    //      returns false, and acquireOnboardLock enters the stale-
+    //      cleanup path calling unlinkIfInodeMatches.
+    //    - stat #2 (inside unlinkIfInodeMatches): BEFORE the actual
+    //      stat, swap the file for a fresh claim. stat #2 then sees
+    //      a different inode → must skip the unlink.
+    //
+    //    CodeRabbit correctly flagged the original test: swapping on
+    //    stat #1 caused readFileSync to see the live PID and exit
+    //    via isProcessAlive, never reaching unlinkIfInodeMatches.
+    let statCallCount = 0;
+    const originalStatSync = fs.statSync;
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    (fs as any).statSync = function (...args: unknown[]) {
+      statCallCount += 1;
+      // Just before stat #2 (inside unlinkIfInodeMatches), simulate
+      // the race: a concurrent fast process unlinks the stale lock
+      // and writes a fresh claim. stat #2 then sees a new inode.
+      if (statCallCount === 2) {
+        // Write the fresh claim to a temp file first, then rename over
+        // the stale lock. This guarantees a different inode even on
+        // tmpfs/overlayfs which can reuse inodes after unlink+recreate.
+        const tmpClaim = session.LOCK_FILE + ".race-tmp";
+        fs.writeFileSync(
+          tmpClaim,
+          JSON.stringify({
+            pid: process.ppid,
+            startedAt: new Date().toISOString(),
+            command: "nemoclaw onboard (fresh claim from concurrent process)",
+          }),
+          { mode: 0o600 },
+        );
+        fs.renameSync(tmpClaim, session.LOCK_FILE);
+      }
+      return (originalStatSync as unknown as (...a: unknown[]) => unknown).apply(fs, args);
+    };
+
+    try {
+      // The acquire call will see EEXIST (stale lock present), stat it,
+      // then the swap happens, then the second stat (inside the cleanup
+      // helper) sees a different inode → must NOT unlink.
+      const result = session.acquireOnboardLock("nemoclaw onboard --resume");
+      // The fresh lock that the simulated concurrent process wrote
+      // should still be on disk after acquireOnboardLock returns.
+      expect(fs.existsSync(session.LOCK_FILE)).toBe(true);
+      const onDisk = JSON.parse(fs.readFileSync(session.LOCK_FILE, "utf8"));
+      // The lock content should be the fresh claim, NOT the stale one
+      // and NOT a new one written by acquireOnboardLock after a wrong
+      // unlink.
+      expect(onDisk.command).toContain("fresh claim from concurrent process");
+      // The fresh claim is held by a different live PID (process.ppid),
+      // so acquireOnboardLock MUST report acquisition failure and
+      // surface that pid as the holder. This is the mutual-exclusion
+      // loser path — without it, the regression would only verify the
+      // fresh file survived, not that the contender correctly stood
+      // down.
+      expect(result.acquired).toBe(false);
+      expect(result.holderPid).toBe(process.ppid);
+    } finally {
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      (fs as any).statSync = originalStatSync;
+    }
+  });
+
   it("treats unreadable or transient lock contents as a retry, not a stale lock", () => {
     fs.mkdirSync(path.dirname(session.LOCK_FILE), { recursive: true });
     fs.writeFileSync(session.LOCK_FILE, "{not-json", { mode: 0o600 });
diff --git a/src/lib/onboard-session.ts b/src/lib/onboard-session.ts
index 1f470bb6a3..ef2cfbb28d 100644
--- a/src/lib/onboard-session.ts
+++ b/src/lib/onboard-session.ts
@@ -334,6 +334,13 @@ function isProcessAlive(pid: number): boolean {
   }
 }
 
+// File descriptor we hold across the lifetime of an acquired lock. On
+// release, fstat(fd).ino vs stat(path).ino confirms the on-disk path
+// still resolves to the file we created — closing the residual TOCTOU
+// window in the inode-only check by tying ownership to a live
+// descriptor rather than a value re-read from disk. See #1281.
+let heldLockFd: number | null = null;
+
 export function acquireOnboardLock(command: string | null = null): LockResult {
   ensureSessionDir();
   const payload = JSON.stringify(
@@ -346,17 +353,38 @@ export function acquireOnboardLock(command: string | null = null): LockResult {
     2,
   );
 
-  for (let attempt = 0; attempt < 2; attempt++) {
+  // The retry budget here used to be 2, which is the bare minimum needed
+  // for "see-stale → cleanup → reclaim". With the inode-verified cleanup
+  // below it can take a few additional spins under contention because
+  // multiple concurrent stale-cleaners can race and lose to each other
+  // before one reclaims, so give the loop a little more room.
+  // See issue #1281.
+  const MAX_ATTEMPTS = 5;
+
+  for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) {
+    let fd: number;
     try {
-      fs.writeFileSync(LOCK_FILE, payload, { flag: "wx", mode: 0o600 });
-      return { acquired: true, lockFile: LOCK_FILE, stale: false };
+      // openSync(..., "wx", mode) is the atomic create-or-fail
+      // primitive. We hold the resulting fd at module scope so
+      // releaseOnboardLock() can later confirm the on-disk path still
+      // resolves to the same file we created (fstat ino vs stat ino).
+      fd = fs.openSync(LOCK_FILE, "wx", 0o600);
     } catch (error: unknown) {
       if ((error as NodeJS.ErrnoException)?.code !== "EEXIST") {
         throw error;
       }
 
+      // Capture both the parsed lock and the inode so we can verify the
+      // file we're about to unlink is STILL the same stale file we read.
+      // Without the inode check, two concurrent processes can both read
+      // the same stale lock, and the slower one will unlink the fresh
+      // lock the faster one just claimed, breaking mutual exclusion.
+      // See issue #1281.
       let existing: LockInfo | null;
+      let staleInode: bigint | null;
       try {
+        const stat = fs.statSync(LOCK_FILE, { bigint: true });
+        staleInode = stat.ino;
         existing = parseLockFile(fs.readFileSync(LOCK_FILE, "utf8"));
       } catch (readError: unknown) {
         if ((readError as NodeJS.ErrnoException)?.code === "ENOENT") {
@@ -365,9 +393,12 @@ export function acquireOnboardLock(command: string | null = null): LockResult {
         throw readError;
       }
       if (!existing) {
+        // Malformed lock file — leave it on disk (a human or another
+        // process may be mid-write) and retry. Pre-#1281 behavior
+        // preserved: never unlink a malformed lock automatically.
         continue;
       }
-      if (existing && isProcessAlive(existing.pid)) {
+      if (isProcessAlive(existing.pid)) {
         return {
           acquired: false,
           lockFile: LOCK_FILE,
@@ -378,20 +409,109 @@ export function acquireOnboardLock(command: string | null = null): LockResult {
         };
       }
 
-      try {
-        fs.unlinkSync(LOCK_FILE);
-      } catch (unlinkError: unknown) {
-        if ((unlinkError as NodeJS.ErrnoException)?.code !== "ENOENT") {
-          throw unlinkError;
-        }
-      }
+      // Stale: unlink ONLY if the file on disk is still the same inode
+      // we just read. If a concurrent process already cleaned up and
+      // claimed the lock, the inode will have changed and we'll fall
+      // through to the next iteration where openSync(wx) will either
+      // succeed (we win) or fail EEXIST against the new holder (and we
+      // re-read it).
+      unlinkIfInodeMatches(LOCK_FILE, staleInode);
+      continue;
+    }
+
+    // Atomic create succeeded — write the payload and keep the fd open
+    // for the lifetime of the lock so releaseOnboardLock() can verify
+    // ownership via the live descriptor.
+    try {
+      fs.writeSync(fd, payload);
+    } catch (writeError) {
+      try { fs.closeSync(fd); } catch { /* ignore */ }
+      try { fs.unlinkSync(LOCK_FILE); } catch { /* ignore */ }
+      throw writeError;
     }
+    heldLockFd = fd;
+    return { acquired: true, lockFile: LOCK_FILE, stale: false };
   }
 
   return { acquired: false, lockFile: LOCK_FILE, stale: true };
 }
 
+/**
+ * Unlink LOCK_FILE only if its current inode equals `expectedInode`.
+ * The dual stat-then-unlink is the only portable POSIX primitive Node
+ * exposes for this — there's no atomic "unlink-if-inode" syscall — so
+ * a sufficiently unlucky race can still slip through. The window is
+ * orders of magnitude smaller than the unconditional unlink it
+ * replaces, and the outer loop will detect a wrong unlink on its next
+ * `writeFileSync(wx)` attempt because either we re-create the file
+ * or we observe the new lock with a different inode.
+ */
+function unlinkIfInodeMatches(filePath: string, expectedInode: bigint | null): void {
+  if (expectedInode === null) {
+    return;
+  }
+  try {
+    const stat = fs.statSync(filePath, { bigint: true });
+    if (stat.ino !== expectedInode) {
+      // Someone else replaced the file. Leave it alone.
+      return;
+    }
+  } catch (statError: unknown) {
+    if ((statError as NodeJS.ErrnoException)?.code === "ENOENT") {
+      return;
+    }
+    throw statError;
+  }
+  try {
+    fs.unlinkSync(filePath);
+  } catch (unlinkError: unknown) {
+    if ((unlinkError as NodeJS.ErrnoException)?.code !== "ENOENT") {
+      throw unlinkError;
+    }
+  }
+}
+
 export function releaseOnboardLock(): void {
+  // Preferred path: we hold the fd from a successful acquireOnboardLock.
+  // Verify the on-disk path still resolves to the same file (fstat ino
+  // == stat ino) before unlinking. If they disagree, another process
+  // has already replaced the lock and we must NOT touch their file.
+  if (heldLockFd !== null) {
+    const fd = heldLockFd;
+    heldLockFd = null;
+    try {
+      const fdStat = fs.fstatSync(fd, { bigint: true });
+      let pathInode: bigint | null = null;
+      try {
+        const pathStat = fs.statSync(LOCK_FILE, { bigint: true });
+        pathInode = pathStat.ino;
+      } catch (error: unknown) {
+        if ((error as NodeJS.ErrnoException)?.code !== "ENOENT") {
+          // Unexpected — fall through to closing the fd.
+        }
+      }
+      if (pathInode !== null && pathInode === fdStat.ino) {
+        try {
+          fs.unlinkSync(LOCK_FILE);
+        } catch (unlinkError: unknown) {
+          if ((unlinkError as NodeJS.ErrnoException)?.code !== "ENOENT") {
+            // Best effort — surfacing this would mask the real error.
+          }
+        }
+      }
+    } catch {
+      // fstat can fail if the fd was already closed somehow; nothing
+      // safe to do beyond closing it below.
+    } finally {
+      try { fs.closeSync(fd); } catch { /* ignore */ }
+    }
+    return;
+  }
+
+  // Fallback (no fd held — e.g., a test wrote the lock file directly,
+  // or a previous release already ran): preserve the legacy pid-based
+  // behavior so we never unlink a malformed lock and never unlink a
+  // lock owned by another pid.
   try {
     if (!fs.existsSync(LOCK_FILE)) return;
     let existing: LockInfo | null = null;