diff --git a/src/lib/onboard-session.test.ts b/src/lib/onboard-session.test.ts index d72ad02367..75e75f7a3b 100644 --- a/src/lib/onboard-session.test.ts +++ b/src/lib/onboard-session.test.ts @@ -173,6 +173,88 @@ describe("onboard session", () => { expect(written.pid).toBe(process.pid); }); + it("regression #1281: stale-cleanup race does not unlink a fresh lock claimed by another process", () => { + // Reproduces the race: the lock file we read as 'stale' gets replaced + // with a fresh claim from a faster concurrent process between our + // read and our unlink. The slower process must NOT unlink the fresh + // lock, otherwise both processes end up thinking they hold the lock. + fs.mkdirSync(path.dirname(session.LOCK_FILE), { recursive: true }); + + // 1. Lay down a stale lock from a dead PID (PID 999999 on the test box). + const staleLock = JSON.stringify({ + pid: 999999, + startedAt: "2026-03-25T00:00:00.000Z", + command: "nemoclaw onboard", + }); + fs.writeFileSync(session.LOCK_FILE, staleLock, { mode: 0o600 }); + + // 2. Wrap fs.statSync so the swap happens just before stat #2: + // - stat #1 (inside acquireOnboardLock): reads the stale inode + // and returns it unmodified. readFileSync then reads the + // ORIGINAL stale lock (dead PID 999999), isProcessAlive + // returns false, and acquireOnboardLock enters the stale- + // cleanup path calling unlinkIfInodeMatches. + // - stat #2 (inside unlinkIfInodeMatches): BEFORE the actual + // stat, swap the file for a fresh claim. stat #2 then sees + // a different inode → must skip the unlink. + // + // CodeRabbit correctly flagged the original test: swapping on + // stat #1 caused readFileSync to see the live PID and exit + // via isProcessAlive, never reaching unlinkIfInodeMatches. + let statCallCount = 0; + const originalStatSync = fs.statSync; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (fs as any).statSync = function (...args: unknown[]) { + statCallCount += 1; + // Just before stat #2 (inside unlinkIfInodeMatches), simulate + // the race: a concurrent fast process unlinks the stale lock + // and writes a fresh claim. stat #2 then sees a new inode. + if (statCallCount === 2) { + // Write the fresh claim to a temp file first, then rename over + // the stale lock. This guarantees a different inode even on + // tmpfs/overlayfs which can reuse inodes after unlink+recreate. + const tmpClaim = session.LOCK_FILE + ".race-tmp"; + fs.writeFileSync( + tmpClaim, + JSON.stringify({ + pid: process.ppid, + startedAt: new Date().toISOString(), + command: "nemoclaw onboard (fresh claim from concurrent process)", + }), + { mode: 0o600 }, + ); + fs.renameSync(tmpClaim, session.LOCK_FILE); + } + return (originalStatSync as unknown as (...a: unknown[]) => unknown).apply(fs, args); + }; + + try { + // The acquire call will see EEXIST (stale lock present), stat it, + // then the swap happens, then the second stat (inside the cleanup + // helper) sees a different inode → must NOT unlink. + const result = session.acquireOnboardLock("nemoclaw onboard --resume"); + // The fresh lock that the simulated concurrent process wrote + // should still be on disk after acquireOnboardLock returns. + expect(fs.existsSync(session.LOCK_FILE)).toBe(true); + const onDisk = JSON.parse(fs.readFileSync(session.LOCK_FILE, "utf8")); + // The lock content should be the fresh claim, NOT the stale one + // and NOT a new one written by acquireOnboardLock after a wrong + // unlink. + expect(onDisk.command).toContain("fresh claim from concurrent process"); + // The fresh claim is held by a different live PID (process.ppid), + // so acquireOnboardLock MUST report acquisition failure and + // surface that pid as the holder. This is the mutual-exclusion + // loser path — without it, the regression would only verify the + // fresh file survived, not that the contender correctly stood + // down. + expect(result.acquired).toBe(false); + expect(result.holderPid).toBe(process.ppid); + } finally { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (fs as any).statSync = originalStatSync; + } + }); + it("treats unreadable or transient lock contents as a retry, not a stale lock", () => { fs.mkdirSync(path.dirname(session.LOCK_FILE), { recursive: true }); fs.writeFileSync(session.LOCK_FILE, "{not-json", { mode: 0o600 }); diff --git a/src/lib/onboard-session.ts b/src/lib/onboard-session.ts index 1f470bb6a3..ef2cfbb28d 100644 --- a/src/lib/onboard-session.ts +++ b/src/lib/onboard-session.ts @@ -334,6 +334,13 @@ function isProcessAlive(pid: number): boolean { } } +// File descriptor we hold across the lifetime of an acquired lock. On +// release, fstat(fd).ino vs stat(path).ino confirms the on-disk path +// still resolves to the file we created — closing the residual TOCTOU +// window in the inode-only check by tying ownership to a live +// descriptor rather than a value re-read from disk. See #1281. +let heldLockFd: number | null = null; + export function acquireOnboardLock(command: string | null = null): LockResult { ensureSessionDir(); const payload = JSON.stringify( @@ -346,17 +353,38 @@ export function acquireOnboardLock(command: string | null = null): LockResult { 2, ); - for (let attempt = 0; attempt < 2; attempt++) { + // The retry budget here used to be 2, which is the bare minimum needed + // for "see-stale → cleanup → reclaim". With the inode-verified cleanup + // below it can take a few additional spins under contention because + // multiple concurrent stale-cleaners can race and lose to each other + // before one reclaims, so give the loop a little more room. + // See issue #1281. + const MAX_ATTEMPTS = 5; + + for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) { + let fd: number; try { - fs.writeFileSync(LOCK_FILE, payload, { flag: "wx", mode: 0o600 }); - return { acquired: true, lockFile: LOCK_FILE, stale: false }; + // openSync(..., "wx", mode) is the atomic create-or-fail + // primitive. We hold the resulting fd at module scope so + // releaseOnboardLock() can later confirm the on-disk path still + // resolves to the same file we created (fstat ino vs stat ino). + fd = fs.openSync(LOCK_FILE, "wx", 0o600); } catch (error: unknown) { if ((error as NodeJS.ErrnoException)?.code !== "EEXIST") { throw error; } + // Capture both the parsed lock and the inode so we can verify the + // file we're about to unlink is STILL the same stale file we read. + // Without the inode check, two concurrent processes can both read + // the same stale lock, and the slower one will unlink the fresh + // lock the faster one just claimed, breaking mutual exclusion. + // See issue #1281. let existing: LockInfo | null; + let staleInode: bigint | null; try { + const stat = fs.statSync(LOCK_FILE, { bigint: true }); + staleInode = stat.ino; existing = parseLockFile(fs.readFileSync(LOCK_FILE, "utf8")); } catch (readError: unknown) { if ((readError as NodeJS.ErrnoException)?.code === "ENOENT") { @@ -365,9 +393,12 @@ export function acquireOnboardLock(command: string | null = null): LockResult { throw readError; } if (!existing) { + // Malformed lock file — leave it on disk (a human or another + // process may be mid-write) and retry. Pre-#1281 behavior + // preserved: never unlink a malformed lock automatically. continue; } - if (existing && isProcessAlive(existing.pid)) { + if (isProcessAlive(existing.pid)) { return { acquired: false, lockFile: LOCK_FILE, @@ -378,20 +409,109 @@ export function acquireOnboardLock(command: string | null = null): LockResult { }; } - try { - fs.unlinkSync(LOCK_FILE); - } catch (unlinkError: unknown) { - if ((unlinkError as NodeJS.ErrnoException)?.code !== "ENOENT") { - throw unlinkError; - } - } + // Stale: unlink ONLY if the file on disk is still the same inode + // we just read. If a concurrent process already cleaned up and + // claimed the lock, the inode will have changed and we'll fall + // through to the next iteration where openSync(wx) will either + // succeed (we win) or fail EEXIST against the new holder (and we + // re-read it). + unlinkIfInodeMatches(LOCK_FILE, staleInode); + continue; + } + + // Atomic create succeeded — write the payload and keep the fd open + // for the lifetime of the lock so releaseOnboardLock() can verify + // ownership via the live descriptor. + try { + fs.writeSync(fd, payload); + } catch (writeError) { + try { fs.closeSync(fd); } catch { /* ignore */ } + try { fs.unlinkSync(LOCK_FILE); } catch { /* ignore */ } + throw writeError; } + heldLockFd = fd; + return { acquired: true, lockFile: LOCK_FILE, stale: false }; } return { acquired: false, lockFile: LOCK_FILE, stale: true }; } +/** + * Unlink LOCK_FILE only if its current inode equals `expectedInode`. + * The dual stat-then-unlink is the only portable POSIX primitive Node + * exposes for this — there's no atomic "unlink-if-inode" syscall — so + * a sufficiently unlucky race can still slip through. The window is + * orders of magnitude smaller than the unconditional unlink it + * replaces, and the outer loop will detect a wrong unlink on its next + * `writeFileSync(wx)` attempt because either we re-create the file + * or we observe the new lock with a different inode. + */ +function unlinkIfInodeMatches(filePath: string, expectedInode: bigint | null): void { + if (expectedInode === null) { + return; + } + try { + const stat = fs.statSync(filePath, { bigint: true }); + if (stat.ino !== expectedInode) { + // Someone else replaced the file. Leave it alone. + return; + } + } catch (statError: unknown) { + if ((statError as NodeJS.ErrnoException)?.code === "ENOENT") { + return; + } + throw statError; + } + try { + fs.unlinkSync(filePath); + } catch (unlinkError: unknown) { + if ((unlinkError as NodeJS.ErrnoException)?.code !== "ENOENT") { + throw unlinkError; + } + } +} + export function releaseOnboardLock(): void { + // Preferred path: we hold the fd from a successful acquireOnboardLock. + // Verify the on-disk path still resolves to the same file (fstat ino + // == stat ino) before unlinking. If they disagree, another process + // has already replaced the lock and we must NOT touch their file. + if (heldLockFd !== null) { + const fd = heldLockFd; + heldLockFd = null; + try { + const fdStat = fs.fstatSync(fd, { bigint: true }); + let pathInode: bigint | null = null; + try { + const pathStat = fs.statSync(LOCK_FILE, { bigint: true }); + pathInode = pathStat.ino; + } catch (error: unknown) { + if ((error as NodeJS.ErrnoException)?.code !== "ENOENT") { + // Unexpected — fall through to closing the fd. + } + } + if (pathInode !== null && pathInode === fdStat.ino) { + try { + fs.unlinkSync(LOCK_FILE); + } catch (unlinkError: unknown) { + if ((unlinkError as NodeJS.ErrnoException)?.code !== "ENOENT") { + // Best effort — surfacing this would mask the real error. + } + } + } + } catch { + // fstat can fail if the fd was already closed somehow; nothing + // safe to do beyond closing it below. + } finally { + try { fs.closeSync(fd); } catch { /* ignore */ } + } + return; + } + + // Fallback (no fd held — e.g., a test wrote the lock file directly, + // or a previous release already ran): preserve the legacy pid-based + // behavior so we never unlink a malformed lock and never unlink a + // lock owned by another pid. try { if (!fs.existsSync(LOCK_FILE)) return; let existing: LockInfo | null = null;