diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4e33e585fb..a8eecc04d4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -183,7 +183,7 @@ repos: hooks: - id: hadolint name: hadolint - entry: hadolint + entry: scripts/run-hadolint.sh language: system files: (Dockerfile[^/]*|.*\.dockerfile)$ types: [file] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3af1c82c7f..07abbf3208 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,3 +1,6 @@ + + + # Contributing to NVIDIA NemoClaw Thank you for your interest in contributing to NVIDIA NemoClaw. This guide covers how to set up your development environment, run tests, and submit changes. @@ -18,7 +21,7 @@ Install the following before you begin. - Python 3.11+ (for blueprint and documentation builds) - Docker (running) - [uv](https://docs.astral.sh/uv/) (for Python dependency management) -- [hadolint](https://github.com/hadolint/hadolint) (Dockerfile linter — `brew install hadolint` on macOS) +- [hadolint](https://github.com/hadolint/hadolint) (Dockerfile linter — either install it locally, e.g. `brew install hadolint` on macOS, or keep Docker available for the pinned container fallback) ## Getting Started @@ -92,6 +95,8 @@ All git hooks are managed by [prek](https://prek.j178.dev/), a fast, single-bina For a full manual check: `npx prek run --all-files`. For scoped runs: `npx prek run --from-ref --to-ref HEAD`. +If `hadolint` is not on your `PATH`, the local `hadolint` hook falls back to the pinned `hadolint/hadolint:v2.14.0` Docker image automatically. Dockerfile linting is still required, so contributors must have either a local `hadolint` binary or a working Docker daemon. + If you still have `core.hooksPath` set from an old Husky setup, Git will ignore `.git/hooks`. Run `git config --unset core.hooksPath` in this repo, then `npm install` so `prek install` (via `prepare`) can register the hooks. `make check` remains the primary documented linter entry point. diff --git a/scripts/run-hadolint.sh b/scripts/run-hadolint.sh new file mode 100755 index 0000000000..63f3d58f37 --- /dev/null +++ b/scripts/run-hadolint.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +readonly HADOLINT_IMAGE="${HADOLINT_IMAGE:-hadolint/hadolint:v2.14.0}" + +run_via_docker() { + if ! command -v docker >/dev/null 2>&1; then + printf '%s\n' "hadolint is not installed and Docker is unavailable." >&2 + printf '%s\n' "Install hadolint locally or make Docker available, then rerun prek." >&2 + return 127 + fi + + if ! docker info >/dev/null 2>&1; then + printf '%s\n' "hadolint is not installed and Docker is not ready." >&2 + printf '%s\n' "Start Docker or install hadolint locally, then rerun prek." >&2 + return 1 + fi + + printf '%s\n' "hadolint not found on PATH; linting Dockerfiles via Docker image ${HADOLINT_IMAGE}" >&2 + + exec docker run --rm \ + -v "${PWD}:${PWD}" \ + -w "${PWD}" \ + "${HADOLINT_IMAGE}" \ + hadolint "$@" +} + +main() { + if command -v hadolint >/dev/null 2>&1; then + exec hadolint "$@" + fi + + run_via_docker "$@" +} + +main "$@" diff --git a/src/lib/ansi-utils.ts b/src/lib/ansi-utils.ts new file mode 100644 index 0000000000..25b4a7060d --- /dev/null +++ b/src/lib/ansi-utils.ts @@ -0,0 +1,12 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Strip ANSI escape sequences from terminal-oriented output. + * Covers CSI (color, erase, cursor), OSC, and C1 two-byte escapes per ECMA-48. + */ +export const ANSI_RE = /\x1B(?:\[[0-?]*[ -/]*[@-~]|\][^\x07]*(?:\x07|\x1B\\)|[@-_])/g; + +export function stripAnsi(value: string): string { + return String(value || "").replace(ANSI_RE, ""); +} diff --git a/src/lib/dashboard.test.ts b/src/lib/dashboard.test.ts index ba1d51f4b4..90e1218902 100644 --- a/src/lib/dashboard.test.ts +++ b/src/lib/dashboard.test.ts @@ -120,4 +120,13 @@ describe("buildControlUiUrls", () => { const urls = buildControlUiUrls("my-token", 19000); expect(urls).toEqual(["http://127.0.0.1:19000/#token=my-token"]); }); + + it("honors an explicit chatUiUrl override instead of reading only CHAT_UI_URL from the environment", () => { + process.env.CHAT_UI_URL = "https://env-dashboard.example.com"; + const urls = buildControlUiUrls("tok", 19999, "https://override.example.com"); + expect(urls).toEqual([ + "http://127.0.0.1:19999/#token=tok", + "https://override.example.com/#token=tok", + ]); + }); }); diff --git a/src/lib/dashboard.ts b/src/lib/dashboard.ts index f3d313f890..ba3e357fdf 100644 --- a/src/lib/dashboard.ts +++ b/src/lib/dashboard.ts @@ -40,11 +40,14 @@ export function resolveDashboardForwardTarget( export function buildControlUiUrls( token: string | null = null, port: number = CONTROL_UI_PORT, + chatUiUrl: string | null = null, ): string[] { const hash = token ? `#token=${token}` : ""; const baseUrl = `http://127.0.0.1:${port}`; const urls = [`${baseUrl}${CONTROL_UI_PATH}${hash}`]; - const chatUi = (process.env.CHAT_UI_URL || "").trim().replace(/\/$/, ""); + const chatUi = String(chatUiUrl ?? process.env.CHAT_UI_URL ?? "") + .trim() + .replace(/\/$/, ""); if (chatUi && /^https?:\/\//i.test(chatUi) && chatUi !== baseUrl) { urls.push(`${chatUi}${CONTROL_UI_PATH}${hash}`); } diff --git a/src/lib/onboard-bootstrap.test.ts b/src/lib/onboard-bootstrap.test.ts new file mode 100644 index 0000000000..11cb3e3185 --- /dev/null +++ b/src/lib/onboard-bootstrap.test.ts @@ -0,0 +1,149 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import { createRequire } from "node:module"; +import os from "node:os"; +import path from "node:path"; + +import { afterEach, beforeEach, describe, expect, it } from "vitest"; + +const require = createRequire(import.meta.url); +const bootstrapDistPath = require.resolve("../../dist/lib/onboard-bootstrap"); +const persistentDriverDistPath = require.resolve("../../dist/lib/onboard-persistent-driver"); +const flowStateDistPath = require.resolve("../../dist/lib/onboard-flow-state"); +const sessionDistPath = require.resolve("../../dist/lib/onboard-session"); +const originalHome = process.env.HOME; +let tmpDir: string; + +beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-bootstrap-")); + process.env.HOME = tmpDir; + delete require.cache[bootstrapDistPath]; + delete require.cache[persistentDriverDistPath]; + delete require.cache[flowStateDistPath]; + delete require.cache[sessionDistPath]; +}); + +afterEach(() => { + delete require.cache[bootstrapDistPath]; + delete require.cache[persistentDriverDistPath]; + delete require.cache[flowStateDistPath]; + delete require.cache[sessionDistPath]; + fs.rmSync(tmpDir, { recursive: true, force: true }); + if (originalHome === undefined) { + delete process.env.HOME; + } else { + process.env.HOME = originalHome; + } +}); + +describe("initializeOnboardRun", () => { + it("creates a fresh session and resolves --from paths", () => { + const { initializeOnboardRun } = require("../../dist/lib/onboard-bootstrap"); + + const result = initializeOnboardRun({ + resume: false, + mode: "non-interactive", + requestedFromDockerfile: "./Dockerfile.custom", + requestedAgent: "hermes", + }); + + expect(result.ok).toBe(true); + if (!result.ok) { + throw new Error("expected fresh onboarding initialization to succeed"); + } + expect(result.value.session.mode).toBe("non-interactive"); + expect(result.value.session.agent).toBe("hermes"); + expect(result.value.fromDockerfile).toBe(path.resolve("./Dockerfile.custom")); + expect(result.value.driver.session?.metadata.fromDockerfile).toBe( + path.resolve("./Dockerfile.custom"), + ); + }); + + it("returns a friendly error when no resumable session exists", () => { + const { initializeOnboardRun } = require("../../dist/lib/onboard-bootstrap"); + + const result = initializeOnboardRun({ + resume: true, + mode: "interactive", + requestedFromDockerfile: null, + requestedAgent: null, + }); + + expect(result).toEqual({ + ok: false, + lines: [" No resumable onboarding session was found.", " Run: nemoclaw onboard"], + }); + }); + + it("reports resume conflicts using the shared formatter", () => { + const onboardSession = require("../../dist/lib/onboard-session"); + const { initializeOnboardRun } = require("../../dist/lib/onboard-bootstrap"); + + onboardSession.saveSession( + onboardSession.createSession({ + sandboxName: "alpha", + provider: "nvidia-prod", + model: "meta/llama-3.3-70b-instruct", + }), + ); + + const result = initializeOnboardRun({ + resume: true, + mode: "interactive", + requestedFromDockerfile: null, + requestedAgent: null, + getResumeConflicts: (session: { sandboxName: string | null; provider: string | null }) => [ + { field: "sandbox", requested: "beta", recorded: session.sandboxName }, + { field: "provider", requested: "openai-api", recorded: session.provider }, + ], + }); + + expect(result).toEqual({ + ok: false, + lines: [ + " Resumable state belongs to sandbox 'alpha', not 'beta'.", + " Resumable state recorded provider 'nvidia-prod', not 'openai-api'.", + " Run: nemoclaw onboard # start a fresh onboarding session", + " Or rerun with the original settings to continue that session.", + ], + }); + }); + + it("loads a resumable session, reuses the recorded Dockerfile, and clears failure state", () => { + const onboardSession = require("../../dist/lib/onboard-session"); + const { initializeOnboardRun } = require("../../dist/lib/onboard-bootstrap"); + + onboardSession.saveSession( + onboardSession.createSession({ + mode: "interactive", + status: "failed", + sandboxName: "alpha", + metadata: { gatewayName: "nemoclaw", fromDockerfile: "/tmp/Recorded.Dockerfile" }, + failure: { + step: "policies", + message: "policy apply failed", + recordedAt: "2026-04-17T00:00:00.000Z", + }, + }), + ); + + const result = initializeOnboardRun({ + resume: true, + mode: "non-interactive", + requestedFromDockerfile: null, + requestedAgent: null, + getResumeConflicts: () => [], + }); + + expect(result.ok).toBe(true); + if (!result.ok) { + throw new Error("expected resume initialization to succeed"); + } + expect(result.value.fromDockerfile).toBe("/tmp/Recorded.Dockerfile"); + expect(result.value.session.mode).toBe("non-interactive"); + expect(result.value.session.status).toBe("in_progress"); + expect(result.value.session.failure).toBeNull(); + }); +}); diff --git a/src/lib/onboard-bootstrap.ts b/src/lib/onboard-bootstrap.ts new file mode 100644 index 0000000000..c6cc7356bf --- /dev/null +++ b/src/lib/onboard-bootstrap.ts @@ -0,0 +1,100 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import path from "node:path"; + +import { PersistentOnboardDriver } from "./onboard-persistent-driver"; +import { buildResumeConflictLines, type ResumeConfigConflict } from "./onboard-resume"; +import { createSession, type Session } from "./onboard-session"; + +export interface InitializeOnboardRunOptions { + resume: boolean; + mode: Session["mode"]; + requestedFromDockerfile: string | null; + requestedAgent: string | null; + getResumeConflicts?: (session: Session) => ResumeConfigConflict[]; +} + +export interface InitializedOnboardRun { + driver: PersistentOnboardDriver; + session: Session; + fromDockerfile: string | null; +} + +export interface InitializeOnboardRunFailure { + ok: false; + lines: string[]; +} + +export interface InitializeOnboardRunSuccess { + ok: true; + value: InitializedOnboardRun; +} + +export type InitializeOnboardRunResult = + | InitializeOnboardRunFailure + | InitializeOnboardRunSuccess; + +export function initializeOnboardRun( + options: InitializeOnboardRunOptions, +): InitializeOnboardRunResult { + const driver = new PersistentOnboardDriver({ resume: options.resume }); + + if (options.resume) { + const session = driver.session; + if (!session || session.resumable === false) { + return { + ok: false, + lines: [" No resumable onboarding session was found.", " Run: nemoclaw onboard"], + }; + } + + const sessionFrom = session.metadata.fromDockerfile || null; + const fromDockerfile = options.requestedFromDockerfile + ? path.resolve(options.requestedFromDockerfile) + : sessionFrom + ? path.resolve(sessionFrom) + : null; + const resumeConflicts = options.getResumeConflicts?.(session) ?? []; + if (resumeConflicts.length > 0) { + return { + ok: false, + lines: buildResumeConflictLines(resumeConflicts), + }; + } + + const updatedSession = driver.update((current) => { + current.mode = options.mode; + current.failure = null; + current.status = "in_progress"; + return current; + }); + return { + ok: true, + value: { + driver, + session: updatedSession, + fromDockerfile, + }, + }; + } + + const fromDockerfile = options.requestedFromDockerfile + ? path.resolve(options.requestedFromDockerfile) + : null; + const session = driver.replaceSession( + createSession({ + mode: options.mode, + agent: options.requestedAgent, + metadata: { gatewayName: "nemoclaw", fromDockerfile: fromDockerfile || null }, + }), + ); + return { + ok: true, + value: { + driver, + session, + fromDockerfile, + }, + }; +} diff --git a/src/lib/onboard-dashboard-print.test.ts b/src/lib/onboard-dashboard-print.test.ts new file mode 100644 index 0000000000..4ae90e72ee --- /dev/null +++ b/src/lib/onboard-dashboard-print.test.ts @@ -0,0 +1,98 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; +// Import from compiled dist/ so coverage is attributed correctly. +import { + getDashboardProviderLabel, + printOnboardDashboard, +} from "../../dist/lib/onboard-dashboard-print"; + +describe("onboard-dashboard-print", () => { + it("maps known provider ids to user-facing labels", () => { + expect(getDashboardProviderLabel("nvidia-prod")).toBe("NVIDIA Endpoints"); + expect(getDashboardProviderLabel("openai-api")).toBe("OpenAI"); + expect(getDashboardProviderLabel("anthropic-prod")).toBe("Anthropic"); + expect(getDashboardProviderLabel("gemini-api")).toBe("Google Gemini"); + expect(getDashboardProviderLabel("custom-provider")).toBe("custom-provider"); + }); + + it("prints the tokenized OpenClaw dashboard when a token is available", () => { + const lines: string[] = []; + printOnboardDashboard("alpha", "gpt-5.4", "openai-api", null, null, { + getNimStatus: () => ({ running: false }), + fetchGatewayAuthTokenFromSandbox: () => "secret-token", + getDashboardAccessInfo: () => [ + { label: "Dashboard", url: "http://127.0.0.1:18789/#token=secret-token" }, + ], + getDashboardGuidanceLines: () => ["Port 18789 must be forwarded before opening these URLs."], + note: (message) => lines.push(`note:${message}`), + log: (message = "") => lines.push(message), + printAgentDashboardUi: () => { + throw new Error("should not enter agent dashboard path"); + }, + buildControlUiUrls: () => [], + getWslHostAddress: () => null, + buildAuthenticatedDashboardUrl: (baseUrl, token) => `${baseUrl}#token=${token}`, + }); + + expect(lines).toContain(" OpenClaw UI (tokenized URL; treat it like a password)"); + expect(lines).toContain(" Dashboard: http://127.0.0.1:18789/#token=secret-token"); + expect(lines).not.toContainEqual(expect.stringMatching(/^note:/)); + }); + + it("prints agent dashboard UI and appends a WSL URL when needed", () => { + const lines: string[] = []; + const printAgentDashboardUi = vi.fn(); + printOnboardDashboard( + "alpha", + "meta/llama-3.3-70b-instruct", + "nvidia-prod", + "nim-123", + { name: "hermes" }, + { + getNimStatus: () => ({ running: true }), + fetchGatewayAuthTokenFromSandbox: () => "secret-token", + getDashboardAccessInfo: () => [], + getDashboardGuidanceLines: () => [], + note: (message) => lines.push(`note:${message}`), + log: (message = "") => lines.push(message), + printAgentDashboardUi, + buildControlUiUrls: (token, port) => [`http://127.0.0.1:${port}/#token=${token}`], + getWslHostAddress: () => "172.24.240.1", + buildAuthenticatedDashboardUrl: (baseUrl, token) => `${baseUrl}#token=${token}`, + }, + ); + + expect(printAgentDashboardUi).toHaveBeenCalledTimes(1); + const buildUrls = printAgentDashboardUi.mock.calls[0][3].buildControlUiUrls; + expect(buildUrls("secret-token", 19999)).toEqual([ + "http://127.0.0.1:19999/#token=secret-token", + "http://172.24.240.1:19999/#token=secret-token", + ]); + }); + + it("prints fallback token guidance when the token cannot be fetched", () => { + const lines: string[] = []; + printOnboardDashboard("alpha", "gpt-5.4", "openai-api", null, null, { + getNimStatus: () => ({ running: false }), + fetchGatewayAuthTokenFromSandbox: () => null, + getDashboardAccessInfo: () => [{ label: "Dashboard", url: "http://127.0.0.1:18789/" }], + getDashboardGuidanceLines: () => ["No dashboard URLs were generated."], + note: (message) => lines.push(`note:${message}`), + log: (message = "") => lines.push(message), + printAgentDashboardUi: () => { + throw new Error("should not enter agent dashboard path"); + }, + buildControlUiUrls: () => [], + getWslHostAddress: () => null, + buildAuthenticatedDashboardUrl: (baseUrl, token) => `${baseUrl}#token=${token}`, + }); + + expect(lines).toContain(" OpenClaw UI"); + expect(lines).toContain( + " Token: nemoclaw alpha connect → jq -r '.gateway.auth.token' /sandbox/.openclaw/openclaw.json", + ); + expect(lines).toContain("note: Could not read gateway token from the sandbox (download failed)."); + }); +}); diff --git a/src/lib/onboard-dashboard-print.ts b/src/lib/onboard-dashboard-print.ts new file mode 100644 index 0000000000..70477902a6 --- /dev/null +++ b/src/lib/onboard-dashboard-print.ts @@ -0,0 +1,114 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import type { DashboardAccessInfo } from "./onboard-dashboard"; + +export function getDashboardProviderLabel(provider: string): string { + if (provider === "nvidia-prod" || provider === "nvidia-nim") return "NVIDIA Endpoints"; + if (provider === "openai-api") return "OpenAI"; + if (provider === "anthropic-prod") return "Anthropic"; + if (provider === "compatible-anthropic-endpoint") { + return "Other Anthropic-compatible endpoint"; + } + if (provider === "gemini-api") return "Google Gemini"; + if (provider === "compatible-endpoint") return "Other OpenAI-compatible endpoint"; + if (provider === "vllm-local") return "Local vLLM"; + if (provider === "ollama-local") return "Local Ollama"; + return provider; +} + +export interface PrintOnboardDashboardDeps { + getNimStatus: (sandboxName: string, nimContainer: string | null) => { running: boolean }; + fetchGatewayAuthTokenFromSandbox: (sandboxName: string) => string | null; + getDashboardAccessInfo: ( + sandboxName: string, + options: { token: string | null }, + ) => DashboardAccessInfo[]; + getDashboardGuidanceLines: (dashboardAccess: DashboardAccessInfo[]) => string[]; + note: (message: string) => void; + log: (message?: string) => void; + printAgentDashboardUi: ( + sandboxName: string, + token: string | null, + agent: TAgent, + deps: { + note: (message: string) => void; + buildControlUiUrls: (token: string | null, port: number) => string[]; + }, + ) => void; + buildControlUiUrls: (token: string | null, port: number) => string[]; + getWslHostAddress: () => string | null; + buildAuthenticatedDashboardUrl: (baseUrl: string, token: string | null) => string; +} + +export function printOnboardDashboard( + sandboxName: string, + model: string, + provider: string, + nimContainer: string | null, + agent: TAgent | null, + deps: PrintOnboardDashboardDeps, +): void { + const nimStat = deps.getNimStatus(sandboxName, nimContainer); + const nimLabel = nimStat.running ? "running" : "not running"; + const providerLabel = getDashboardProviderLabel(provider); + const token = deps.fetchGatewayAuthTokenFromSandbox(sandboxName); + const dashboardAccess = deps.getDashboardAccessInfo(sandboxName, { token }); + const guidanceLines = deps.getDashboardGuidanceLines(dashboardAccess); + + deps.log(""); + deps.log(` ${"─".repeat(50)}`); + deps.log(` Sandbox ${sandboxName} (Landlock + seccomp + netns)`); + deps.log(` Model ${model} (${providerLabel})`); + deps.log(` NIM ${nimLabel}`); + deps.log(` ${"─".repeat(50)}`); + deps.log(` Run: nemoclaw ${sandboxName} connect`); + deps.log(` Status: nemoclaw ${sandboxName} status`); + deps.log(` Logs: nemoclaw ${sandboxName} logs --follow`); + deps.log(""); + + if (agent) { + deps.printAgentDashboardUi(sandboxName, token, agent, { + note: deps.note, + buildControlUiUrls: (tokenValue, port) => { + const urls = deps.buildControlUiUrls(tokenValue, port); + const wslHostAddress = deps.getWslHostAddress(); + if (wslHostAddress) { + const wslUrl = deps.buildAuthenticatedDashboardUrl( + `http://${wslHostAddress}:${port}/`, + tokenValue, + ); + if (!urls.includes(wslUrl)) { + urls.push(wslUrl); + } + } + return urls; + }, + }); + } else if (token) { + deps.log(" OpenClaw UI (tokenized URL; treat it like a password)"); + for (const line of guidanceLines) { + deps.log(` ${line}`); + } + for (const entry of dashboardAccess) { + deps.log(` ${entry.label}: ${entry.url}`); + } + } else { + deps.note(" Could not read gateway token from the sandbox (download failed)."); + deps.log(" OpenClaw UI"); + for (const line of guidanceLines) { + deps.log(` ${line}`); + } + for (const entry of dashboardAccess) { + deps.log(` ${entry.label}: ${entry.url}`); + } + deps.log( + ` Token: nemoclaw ${sandboxName} connect → jq -r '.gateway.auth.token' /sandbox/.openclaw/openclaw.json`, + ); + deps.log( + " append #token= to the URL, or see /tmp/gateway.log inside the sandbox.", + ); + } + deps.log(` ${"─".repeat(50)}`); + deps.log(""); +} diff --git a/src/lib/onboard-dashboard.test.ts b/src/lib/onboard-dashboard.test.ts new file mode 100644 index 0000000000..a69fa47b7a --- /dev/null +++ b/src/lib/onboard-dashboard.test.ts @@ -0,0 +1,155 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +// Import from compiled dist/ so coverage is attributed correctly. +import { + buildAuthenticatedDashboardUrl, + ensureDashboardForward, + fetchGatewayAuthTokenFromSandbox, + getDashboardAccessInfo, + getDashboardForwardPort, + getDashboardForwardStartCommand, + getDashboardForwardTarget, + getDashboardGuidanceLines, + getWslHostAddress, +} from "../../dist/lib/onboard-dashboard"; + +const originalEnv = process.env.CHAT_UI_URL; + +beforeEach(() => { + delete process.env.CHAT_UI_URL; +}); + +afterEach(() => { + if (originalEnv !== undefined) { + process.env.CHAT_UI_URL = originalEnv; + } else { + delete process.env.CHAT_UI_URL; + } +}); + +describe("onboard-dashboard", () => { + it("fetches a gateway auth token from a downloaded sandbox config", () => { + const tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-dashboard-token-")); + const sandboxDir = path.join(tmpRoot, "nested", "sandbox"); + fs.mkdirSync(sandboxDir, { recursive: true }); + fs.writeFileSync( + path.join(sandboxDir, "openclaw.json"), + JSON.stringify({ gateway: { auth: { token: "secret-token" } } }), + ); + const runOpenshell = vi.fn((_args, _opts) => { + const destDir = _args[4]; + fs.cpSync(tmpRoot, destDir, { recursive: true }); + return { status: 0 }; + }); + try { + expect(fetchGatewayAuthTokenFromSandbox("alpha", { runOpenshell })).toBe("secret-token"); + } finally { + fs.rmSync(tmpRoot, { recursive: true, force: true }); + } + }); + + it("derives dashboard forward info and authenticated URLs", () => { + expect(getDashboardForwardPort("http://127.0.0.1:19999")).toBe("19999"); + expect(getDashboardForwardTarget("http://127.0.0.1:19999", { isWsl: false })).toBe( + "19999", + ); + expect(getDashboardForwardTarget("http://127.0.0.1:19999", { isWsl: true })).toBe( + "0.0.0.0:19999", + ); + expect(buildAuthenticatedDashboardUrl("http://127.0.0.1:19999/", "secret-token")).toBe( + "http://127.0.0.1:19999/#token=secret-token", + ); + }); + + it("builds dashboard access info and WSL guidance", () => { + process.env.CHAT_UI_URL = "https://env-dashboard.example.com"; + const access = getDashboardAccessInfo("the-crucible", { + token: "secret-token", + chatUiUrl: "http://127.0.0.1:19999", + env: { WSL_DISTRO_NAME: "Ubuntu" }, + platform: "linux", + release: "6.6.87.2-microsoft-standard-WSL2", + runCapture: (command) => (command.includes("hostname -I") ? "172.24.240.1\n" : ""), + }); + + // Explicit chatUiUrl must override process.env.CHAT_UI_URL when building URLs. + expect(access).toEqual([ + { label: "Dashboard", url: "http://127.0.0.1:19999/#token=secret-token" }, + { label: "VS Code/WSL", url: "http://172.24.240.1:19999/#token=secret-token" }, + ]); + expect( + getDashboardGuidanceLines(access, { + chatUiUrl: "http://127.0.0.1:19999", + env: { WSL_DISTRO_NAME: "Ubuntu" }, + platform: "linux", + release: "6.6.87.2-microsoft-standard-WSL2", + }), + ).toEqual([ + "Port 19999 must be forwarded before opening these URLs.", + "WSL detected: if localhost fails in Windows, use the WSL host IP shown by `hostname -I`.", + ]); + }); + + it("builds dashboard forward start commands with the correct target", () => { + const command = getDashboardForwardStartCommand("the-crucible", { + chatUiUrl: "http://127.0.0.1:19999", + openshellBinary: "/usr/bin/openshell", + isWsl: false, + openshellShellCommand: (args, options = {}) => { + const binary = options.openshellBinary || "openshell"; + return [binary, ...args].join(" "); + }, + }); + + expect(command).toContain("forward start --background 19999 the-crucible"); + }); + + it("restores the dashboard forward and warns when the background forward start fails", () => { + const warnings: string[] = []; + const calls: string[] = []; + ensureDashboardForward("the-crucible", { + chatUiUrl: "https://chat.example.com", + runOpenshell: (args) => { + calls.push(args.join(" ")); + return args.includes("start") ? { status: 1 } : { status: 0 }; + }, + warningWriter: (message = "") => warnings.push(message), + }); + + expect(calls).toEqual([ + "forward stop 18789", + "forward start --background 0.0.0.0:18789 the-crucible", + ]); + expect(warnings).toEqual([ + "! Port 18789 forward did not start — port may be in use by another process.", + " Check: docker ps --format 'table {{.Names}}\\t{{.Ports}}' | grep 18789", + " Free the port, then reconnect: nemoclaw the-crucible connect", + ]); + }); + + it("uses CHAT_UI_URL as the fallback forward source when chatUiUrl is omitted", () => { + process.env.CHAT_UI_URL = "https://chat.example.com"; + const calls: string[] = []; + ensureDashboardForward("the-crucible", { + runOpenshell: (args) => { + calls.push(args.join(" ")); + return { status: 0 }; + }, + }); + + expect(calls).toEqual([ + "forward stop 18789", + "forward start --background 0.0.0.0:18789 the-crucible", + ]); + }); + + it("returns null for WSL host lookups outside WSL", () => { + expect(getWslHostAddress({ isWsl: false })).toBeNull(); + }); +}); diff --git a/src/lib/onboard-dashboard.ts b/src/lib/onboard-dashboard.ts new file mode 100644 index 0000000000..e6d5f19b15 --- /dev/null +++ b/src/lib/onboard-dashboard.ts @@ -0,0 +1,233 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { buildControlUiUrls, resolveDashboardForwardTarget } from "./dashboard"; +import { DASHBOARD_PORT } from "./ports"; +import { isWsl } from "./platform"; + +const CONTROL_UI_PORT = DASHBOARD_PORT; + +function findOpenclawJsonPath(dir: string): string | null { + const directPath = path.join(dir, ".openclaw", "openclaw.json"); + if (fs.existsSync(directPath)) return directPath; + if (!fs.existsSync(dir)) return null; + const entries = fs.readdirSync(dir, { withFileTypes: true }); + for (const entry of entries) { + const filePath = path.join(dir, entry.name); + if (entry.isDirectory()) { + const found = findOpenclawJsonPath(filePath); + if (found) return found; + } else if (entry.name === "openclaw.json") { + return filePath; + } + } + return null; +} + +export interface FetchGatewayAuthTokenDeps { + runOpenshell: ( + args: string[], + opts?: { ignoreError?: boolean; stdio?: [string, string, string] }, + ) => { status: number }; +} + +/** + * Pull gateway.auth.token from the sandbox image via openshell sandbox download + * so onboard can print copy-paste Control UI URLs with #token=. + */ +export function fetchGatewayAuthTokenFromSandbox( + sandboxName: string, + deps: FetchGatewayAuthTokenDeps, +): string | null { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-token-")); + try { + const destDir = `${tmpDir}${path.sep}`; + const result = deps.runOpenshell( + ["sandbox", "download", sandboxName, "/sandbox/.openclaw/openclaw.json", destDir], + { ignoreError: true, stdio: ["ignore", "ignore", "ignore"] }, + ); + if (result.status !== 0) return null; + const jsonPath = findOpenclawJsonPath(tmpDir); + if (!jsonPath) return null; + const cfg = JSON.parse(fs.readFileSync(jsonPath, "utf-8")); + const token = cfg && cfg.gateway && cfg.gateway.auth && cfg.gateway.auth.token; + return typeof token === "string" && token.length > 0 ? token : null; + } catch { + return null; + } finally { + try { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } catch { + // ignore cleanup errors + } + } +} + +export function getDashboardForwardPort( + chatUiUrl = process.env.CHAT_UI_URL || `http://127.0.0.1:${CONTROL_UI_PORT}`, +): string { + const forwardTarget = resolveDashboardForwardTarget(chatUiUrl); + return forwardTarget.includes(":") + ? (forwardTarget.split(":").pop() ?? String(CONTROL_UI_PORT)) + : forwardTarget; +} + +export function getDashboardForwardTarget( + chatUiUrl = process.env.CHAT_UI_URL || `http://127.0.0.1:${CONTROL_UI_PORT}`, + options: { isWsl?: boolean; platform?: NodeJS.Platform; release?: string; env?: NodeJS.ProcessEnv } = {}, +): string { + const port = getDashboardForwardPort(chatUiUrl); + return isWsl(options) ? `0.0.0.0:${port}` : resolveDashboardForwardTarget(chatUiUrl); +} + +export function getDashboardForwardStartCommand( + sandboxName: string, + options: { + chatUiUrl?: string; + openshellBinary?: string; + isWsl?: boolean; + platform?: NodeJS.Platform; + release?: string; + env?: NodeJS.ProcessEnv; + openshellShellCommand: (args: string[], options?: { openshellBinary?: string }) => string; + }, +): string { + const chatUiUrl = + options.chatUiUrl || process.env.CHAT_UI_URL || `http://127.0.0.1:${CONTROL_UI_PORT}`; + const forwardTarget = getDashboardForwardTarget(chatUiUrl, options); + return `${options.openshellShellCommand( + ["forward", "start", "--background", forwardTarget, sandboxName], + options, + )}`; +} + +export function buildAuthenticatedDashboardUrl(baseUrl: string, token: string | null = null): string { + if (!token) return baseUrl; + return `${baseUrl}#token=${encodeURIComponent(token)}`; +} + +export function getWslHostAddress( + options: { + wslHostAddress?: string; + isWsl?: boolean; + platform?: NodeJS.Platform; + release?: string; + env?: NodeJS.ProcessEnv; + runCapture?: (command: string, options?: { ignoreError?: boolean }) => string; + } = {}, +): string | null { + if (options.wslHostAddress) { + return options.wslHostAddress; + } + if (!isWsl(options)) { + return null; + } + const runCaptureFn = options.runCapture ?? (() => ""); + const output = runCaptureFn("hostname -I 2>/dev/null", { ignoreError: true }); + const candidates = String(output || "") + .trim() + .split(/\s+/) + .filter(Boolean); + return candidates[0] || null; +} + +export interface DashboardAccessInfo { + label: string; + url: string; +} + +export function getDashboardAccessInfo( + sandboxName: string, + options: { + token?: string | null; + chatUiUrl?: string; + wslHostAddress?: string; + isWsl?: boolean; + platform?: NodeJS.Platform; + release?: string; + env?: NodeJS.ProcessEnv; + runCapture?: (command: string, options?: { ignoreError?: boolean }) => string; + fetchToken?: (sandboxName: string) => string | null; + } = {}, +): DashboardAccessInfo[] { + const token = Object.prototype.hasOwnProperty.call(options, "token") + ? (options.token ?? null) + : options.fetchToken?.(sandboxName) ?? null; + const chatUiUrl = + options.chatUiUrl || process.env.CHAT_UI_URL || `http://127.0.0.1:${CONTROL_UI_PORT}`; + const dashboardPort = Number(getDashboardForwardPort(chatUiUrl)); + const dashboardAccess = buildControlUiUrls(token, dashboardPort, chatUiUrl).map((url, index) => ({ + label: index === 0 ? "Dashboard" : `Alt ${index}`, + url: buildAuthenticatedDashboardUrl(url, null), + })); + + const wslHostAddress = getWslHostAddress(options); + if (wslHostAddress) { + const wslUrl = buildAuthenticatedDashboardUrl( + `http://${wslHostAddress}:${dashboardPort}/`, + token, + ); + if (!dashboardAccess.some((access) => access.url === wslUrl)) { + dashboardAccess.push({ label: "VS Code/WSL", url: wslUrl }); + } + } + + return dashboardAccess; +} + +export function getDashboardGuidanceLines( + dashboardAccess: DashboardAccessInfo[] = [], + options: { + chatUiUrl?: string; + isWsl?: boolean; + platform?: NodeJS.Platform; + release?: string; + env?: NodeJS.ProcessEnv; + } = {}, +): string[] { + const dashboardPort = getDashboardForwardPort( + options.chatUiUrl || process.env.CHAT_UI_URL || `http://127.0.0.1:${CONTROL_UI_PORT}`, + ); + const guidance = [`Port ${dashboardPort} must be forwarded before opening these URLs.`]; + if (isWsl(options)) { + guidance.push( + "WSL detected: if localhost fails in Windows, use the WSL host IP shown by `hostname -I`.", + ); + } + if (dashboardAccess.length === 0) { + guidance.push("No dashboard URLs were generated."); + } + return guidance; +} + +export function ensureDashboardForward( + sandboxName: string, + deps: { + chatUiUrl?: string; + runOpenshell: ( + args: string[], + opts?: { ignoreError?: boolean; stdio?: [string, string, string] }, + ) => { status: number }; + warningWriter?: (message?: string) => void; + }, +): void { + const chatUiUrl = + deps.chatUiUrl || process.env.CHAT_UI_URL || `http://127.0.0.1:${CONTROL_UI_PORT}`; + const portToStop = getDashboardForwardPort(chatUiUrl); + const forwardTarget = getDashboardForwardTarget(chatUiUrl); + deps.runOpenshell(["forward", "stop", portToStop], { ignoreError: true }); + const fwdResult = deps.runOpenshell( + ["forward", "start", "--background", forwardTarget, sandboxName], + { ignoreError: true, stdio: ["ignore", "ignore", "ignore"] }, + ); + if (fwdResult && fwdResult.status !== 0) { + const warn = deps.warningWriter ?? console.warn; + warn(`! Port ${portToStop} forward did not start — port may be in use by another process.`); + warn(` Check: docker ps --format 'table {{.Names}}\\t{{.Ports}}' | grep ${portToStop}`); + warn(` Free the port, then reconnect: nemoclaw ${sandboxName} connect`); + } +} diff --git a/src/lib/onboard-driver.test.ts b/src/lib/onboard-driver.test.ts new file mode 100644 index 0000000000..72953c7fb3 --- /dev/null +++ b/src/lib/onboard-driver.test.ts @@ -0,0 +1,200 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it } from "vitest"; +// Import from compiled dist/ so coverage is attributed correctly. +import { InMemoryOnboardDriver } from "../../dist/lib/onboard-driver"; + +describe("InMemoryOnboardDriver", () => { + it("round-trips resumable checkpoints through persisted sessions", () => { + const driver = InMemoryOnboardDriver.fresh({ + mode: "non-interactive", + requestedSandboxName: "alpha", + }); + + driver.enterWorkflow(); + expect(driver.state.phase).toBe("preflight"); + expect(driver.reloadForResume().state.phase).toBe("preflight"); + + driver.finishPreflight(); + expect(driver.state.phase).toBe("gateway"); + expect(driver.reloadForResume().state.phase).toBe("gateway"); + + driver.finishGateway(); + expect(driver.state.phase).toBe("provider_selection"); + expect(driver.reloadForResume().state.phase).toBe("provider_selection"); + + driver.finishProviderSelection({ + provider: "openai-api", + model: "gpt-5.4", + endpointUrl: "https://api.openai.com/v1", + credentialEnv: "OPENAI_API_KEY", + preferredInferenceApi: "responses", + }); + expect(driver.state.phase).toBe("inference"); + expect(driver.reloadForResume().state.phase).toBe("inference"); + + driver.finishInference(); + expect(driver.state.phase).toBe("messaging"); + expect(driver.reloadForResume().state.phase).toBe("messaging"); + + driver.finishMessaging(["telegram", "slack"]); + expect(driver.state.phase).toBe("sandbox"); + expect(driver.reloadForResume().state.phase).toBe("sandbox"); + + driver.finishSandbox("alpha", { fetchEnabled: true }); + expect(driver.state.phase).toBe("runtime_setup"); + expect(driver.reloadForResume().state.phase).toBe("runtime_setup"); + + driver.finishRuntimeSetup(); + expect(driver.state.phase).toBe("policies"); + expect(driver.reloadForResume().state.phase).toBe("policies"); + + driver.finishPolicies(["npm", "telegram"]); + expect(driver.state.phase).toBe("complete"); + + const resumed = driver.reloadForResume(); + expect(resumed.state.phase).toBe("complete"); + expect(resumed.state.ctx.sandboxName).toBe("alpha"); + expect(resumed.state.ctx.messagingChannels).toEqual(["telegram", "slack"]); + expect(resumed.state.ctx.policyPresets).toEqual(["npm", "telegram"]); + }); + + it("resumes runtime_setup failures from the canonical phase even when the persisted step is openclaw", () => { + const driver = InMemoryOnboardDriver.fresh({ requestedSandboxName: "alpha" }); + driver + .enterWorkflow() + .finishPreflight() + .finishGateway() + .finishProviderSelection({ provider: "openai-api", model: "gpt-5.4" }) + .finishInference() + .finishMessaging([]) + .finishSandbox("alpha") + .fail("OpenClaw bootstrap failed", "runtime_boot_failed"); + + const resumed = driver.reloadForResume(); + expect(resumed.state.phase).toBe("failed"); + if (resumed.state.phase !== "failed") { + throw new Error("expected failed state"); + } + expect(resumed.state.failedFrom).toBe("runtime_setup"); + expect(resumed.state.error.code).toBe("persisted_runtime_setup_failure"); + expect(resumed.session.steps.runtime_setup.status).toBe("failed"); + expect(resumed.session.steps.openclaw.status).toBe("failed"); + }); + + it("keeps agent runtime sessions on the agent_setup path while exposing canonical runtime_setup", () => { + const driver = InMemoryOnboardDriver.fresh({ + requestedSandboxName: "alpha", + runtimeTarget: { kind: "agent", agentName: "hermes" }, + }); + driver + .enterWorkflow() + .finishPreflight() + .finishGateway() + .finishProviderSelection({ provider: "nvidia-nim", model: "meta/llama-3.3-70b-instruct" }) + .finishInference() + .finishMessaging(["slack"]) + .finishSandbox("alpha") + .finishRuntimeSetup(); + + const resumed = driver.reloadForResume(); + expect(resumed.state.phase).toBe("policies"); + expect(resumed.session.steps.agent_setup.status).toBe("complete"); + expect(resumed.session.steps.runtime_setup.status).toBe("complete"); + expect(resumed.state.ctx.runtimeTarget).toEqual({ kind: "agent", agentName: "hermes" }); + }); + + it("returns an immutable cloned state snapshot", () => { + const driver = InMemoryOnboardDriver.fresh({ requestedSandboxName: "alpha" }); + driver.enterWorkflow().finishPreflight(); + + const snapshot = driver.state as { phase: string }; + expect(Object.isFrozen(snapshot)).toBe(true); + expect(snapshot).not.toBe(driver.state); + try { + snapshot.phase = "boot"; + } catch { + // expected in strict mode + } + expect(driver.state.phase).toBe("gateway"); + + driver.finishGateway(); + expect(snapshot.phase).toBe("gateway"); + expect(driver.state.phase).toBe("provider_selection"); + }); + + it("clears provider-specific metadata when a later selection omits it", () => { + const driver = InMemoryOnboardDriver.fresh({ requestedSandboxName: "alpha" }); + driver + .enterWorkflow() + .finishPreflight() + .finishGateway() + .finishProviderSelection({ + provider: "compatible-openai", + model: "stale-model", + endpointUrl: "https://old.example.com/v1", + credentialEnv: "COMPATIBLE_API_KEY", + preferredInferenceApi: "responses", + nimContainer: "nim-stale", + }); + + const resumed = driver.reloadForResume(); + resumed.finishProviderSelection({ + provider: "openai-api", + model: "gpt-5.4", + endpointUrl: null, + credentialEnv: null, + preferredInferenceApi: null, + nimContainer: null, + }); + + expect(resumed.session.provider).toBe("openai-api"); + expect(resumed.session.model).toBe("gpt-5.4"); + expect(resumed.session.endpointUrl).toBeNull(); + expect(resumed.session.credentialEnv).toBeNull(); + expect(resumed.session.preferredInferenceApi).toBeNull(); + expect(resumed.session.nimContainer).toBeNull(); + }); + + it("defensively copies messaging channels and policy presets before storing them", () => { + const driver = InMemoryOnboardDriver.fresh({ requestedSandboxName: "alpha" }); + const channels = ["telegram"]; + const presets = ["npm"]; + + driver + .enterWorkflow() + .finishPreflight() + .finishGateway() + .finishProviderSelection({ provider: "openai-api", model: "gpt-5.4" }) + .finishInference() + .finishMessaging(channels) + .finishSandbox("alpha") + .finishRuntimeSetup() + .finishPolicies(presets); + + channels.push("slack"); + presets.push("pypi"); + + expect(driver.session.messagingChannels).toEqual(["telegram"]); + expect(driver.session.policyPresets).toEqual(["npm"]); + if (driver.state.phase !== "complete") { + throw new Error("expected complete state"); + } + expect(driver.state.ctx.messagingChannels).toEqual(["telegram"]); + expect(driver.state.ctx.policyPresets).toEqual(["npm"]); + }); + + it("uses the sanitized failure message in the public state", () => { + const driver = InMemoryOnboardDriver.fresh({ requestedSandboxName: "alpha" }); + driver.fail("NVIDIA_API_KEY=nvapi-secret Bearer topsecret"); + + if (driver.state.phase !== "failed") { + throw new Error("expected failed state"); + } + expect(driver.state.error.message).toContain("NVIDIA_API_KEY="); + expect(driver.state.error.message).toContain("Bearer "); + expect(driver.state.error.message).not.toContain("nvapi-secret"); + expect(driver.state.error.message).not.toContain("topsecret"); + }); +}); diff --git a/src/lib/onboard-driver.ts b/src/lib/onboard-driver.ts new file mode 100644 index 0000000000..566d4e89ed --- /dev/null +++ b/src/lib/onboard-driver.ts @@ -0,0 +1,382 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { + createInitialOnboardContext, + transitionOnboardState, + type OnboardFlowState, + type OnboardRuntimeTarget, +} from "./onboard-fsm"; +import { deriveOnboardFlowState } from "./onboard-flow-state"; +import { + applySessionComplete, + applyStepComplete, + applyStepFailed, + createSession, + normalizeSession, + type Session, +} from "./onboard-session"; +import type { WebSearchConfig } from "./web-search"; + +export interface InMemoryOnboardDriverOptions { + mode?: Session["mode"]; + runtimeTarget?: OnboardRuntimeTarget; + fromDockerfile?: string | null; + requestedSandboxName?: string | null; +} + +function sessionOverridesFromOptions(options: InMemoryOnboardDriverOptions): Partial { + return { + mode: options.mode, + agent: options.runtimeTarget?.kind === "agent" ? options.runtimeTarget.agentName : null, + metadata: { + gatewayName: "nemoclaw", + fromDockerfile: options.fromDockerfile ?? null, + }, + sandboxName: options.requestedSandboxName ?? null, + }; +} + +type ValidFailurePhase = + | "preflight" + | "gateway" + | "provider_selection" + | "inference" + | "messaging" + | "sandbox" + | "runtime_setup" + | "policies"; + +const VALID_FAILURE_PHASES = new Set([ + "preflight", + "gateway", + "provider_selection", + "inference", + "messaging", + "sandbox", + "runtime_setup", + "policies", +]); + +function isValidFailurePhase(phase: string): phase is ValidFailurePhase { + return VALID_FAILURE_PHASES.has(phase as ValidFailurePhase); +} + +function deepFreeze(value: T): T { + if (typeof value !== "object" || value === null) { + return value; + } + if (Object.isFrozen(value)) { + return value; + } + Object.freeze(value); + for (const child of Object.values(value as Record)) { + deepFreeze(child); + } + return value; +} + +export class InMemoryOnboardDriver { + #session: Session; + #state: OnboardFlowState; + #requestedSandboxName: string | null; + + private constructor(session: Session, state: OnboardFlowState, requestedSandboxName: string | null) { + this.#session = session; + this.#state = state; + this.#requestedSandboxName = requestedSandboxName; + } + + static fresh(options: InMemoryOnboardDriverOptions = {}): InMemoryOnboardDriver { + const session = createSession(sessionOverridesFromOptions(options)); + const state = deriveOnboardFlowState(session, { + resume: false, + requestedSandboxName: options.requestedSandboxName ?? null, + }); + return new InMemoryOnboardDriver(session, state, options.requestedSandboxName ?? null); + } + + static resume(session: Session, options: Pick = {}): InMemoryOnboardDriver { + const state = deriveOnboardFlowState(session, { + resume: true, + requestedSandboxName: options.requestedSandboxName ?? session.sandboxName, + }); + return new InMemoryOnboardDriver( + session, + state, + options.requestedSandboxName ?? session.sandboxName, + ); + } + + get session(): Session { + const cloned = normalizeSession(JSON.parse(JSON.stringify(this.#session))); + if (!cloned) { + throw new Error("Failed to clone onboarding session"); + } + return cloned; + } + + get state(): OnboardFlowState { + return deepFreeze(structuredClone(this.#state)); + } + + enterWorkflow(): this { + if (this.#state.phase !== "boot") { + return this; + } + this.#state = transitionOnboardState(this.#state, { type: "SESSION_READY" }); + return this; + } + + finishPreflight(): this { + applyStepComplete(this.#session, "preflight"); + if (this.#state.phase === "preflight") { + this.#state = transitionOnboardState(this.#state, { type: "PREFLIGHT_PASSED" }); + } else { + this.#state = deriveOnboardFlowState(this.#session, { + resume: true, + requestedSandboxName: this.#requestedSandboxName, + }); + } + return this; + } + + finishGateway(): this { + applyStepComplete(this.#session, "gateway"); + if (this.#state.phase === "gateway") { + this.#state = transitionOnboardState(this.#state, { type: "SESSION_READY" }); + } else { + this.#state = deriveOnboardFlowState(this.#session, { + resume: true, + requestedSandboxName: this.#requestedSandboxName, + }); + } + return this; + } + + finishProviderSelection(selection: { + provider: string; + model: string; + endpointUrl?: string | null; + credentialEnv?: string | null; + preferredInferenceApi?: string | null; + nimContainer?: string | null; + }): this { + applyStepComplete(this.#session, "provider_selection", { + provider: selection.provider, + model: selection.model, + endpointUrl: selection.endpointUrl ?? null, + credentialEnv: selection.credentialEnv ?? null, + preferredInferenceApi: selection.preferredInferenceApi ?? null, + nimContainer: selection.nimContainer ?? null, + }); + if (this.#state.phase === "provider_selection") { + this.#state = transitionOnboardState(this.#state, { + type: "PROVIDER_SELECTED", + selection: { + provider: selection.provider, + model: selection.model, + endpointUrl: selection.endpointUrl ?? null, + credentialEnv: selection.credentialEnv ?? null, + preferredInferenceApi: selection.preferredInferenceApi ?? null, + nimContainer: selection.nimContainer ?? null, + }, + }); + } else { + this.#state = deriveOnboardFlowState(this.#session, { + resume: true, + requestedSandboxName: this.#requestedSandboxName, + }); + } + return this; + } + + finishInference(): this { + applyStepComplete(this.#session, "inference", { + sandboxName: this.#session.sandboxName ?? undefined, + provider: this.#session.provider ?? undefined, + model: this.#session.model ?? undefined, + nimContainer: this.#session.nimContainer ?? undefined, + }); + if (this.#state.phase === "inference") { + this.#state = transitionOnboardState(this.#state, { type: "INFERENCE_CONFIGURED" }); + } else { + this.#state = deriveOnboardFlowState(this.#session, { + resume: true, + requestedSandboxName: this.#requestedSandboxName, + }); + } + return this; + } + + finishMessaging(messagingChannels: string[]): this { + const channels = [...messagingChannels]; + applyStepComplete(this.#session, "messaging", { + sandboxName: this.#session.sandboxName ?? undefined, + provider: this.#session.provider ?? undefined, + model: this.#session.model ?? undefined, + messagingChannels: channels, + }); + if (this.#state.phase === "messaging") { + this.#state = transitionOnboardState(this.#state, { + type: "MESSAGING_CONFIGURED", + messagingChannels: channels, + }); + } else { + this.#state = deriveOnboardFlowState(this.#session, { + resume: true, + requestedSandboxName: this.#requestedSandboxName, + }); + } + return this; + } + + finishSandbox(sandboxName: string, webSearchConfig: WebSearchConfig | null = null): this { + applyStepComplete(this.#session, "sandbox", { + sandboxName, + provider: this.#session.provider ?? undefined, + model: this.#session.model ?? undefined, + nimContainer: this.#session.nimContainer ?? undefined, + webSearchConfig, + }); + if (this.#state.phase === "sandbox") { + this.#state = transitionOnboardState(this.#state, { + type: "SANDBOX_READY", + sandboxName, + webSearchConfig, + }); + } else { + this.#state = deriveOnboardFlowState(this.#session, { + resume: true, + requestedSandboxName: this.#requestedSandboxName, + }); + } + return this; + } + + finishRuntimeSetup(): this { + const runtimeStep = this.#session.agent ? "agent_setup" : "openclaw"; + applyStepComplete(this.#session, runtimeStep, { + sandboxName: this.#session.sandboxName ?? undefined, + provider: this.#session.provider ?? undefined, + model: this.#session.model ?? undefined, + }); + if (this.#state.phase === "runtime_setup") { + this.#state = transitionOnboardState(this.#state, { type: "RUNTIME_CONFIGURED" }); + } else { + this.#state = deriveOnboardFlowState(this.#session, { + resume: true, + requestedSandboxName: this.#requestedSandboxName, + }); + } + return this; + } + + finishPolicies(policyPresets: string[]): this { + const presets = [...policyPresets]; + applyStepComplete(this.#session, "policies", { + sandboxName: this.#session.sandboxName ?? undefined, + provider: this.#session.provider ?? undefined, + model: this.#session.model ?? undefined, + policyPresets: presets, + }); + applySessionComplete(this.#session, { + sandboxName: this.#session.sandboxName ?? undefined, + provider: this.#session.provider ?? undefined, + model: this.#session.model ?? undefined, + policyPresets: presets, + }); + if (this.#state.phase === "policies") { + this.#state = transitionOnboardState(this.#state, { + type: "POLICIES_APPLIED", + policyPresets: presets, + }); + } else { + this.#state = deriveOnboardFlowState(this.#session, { + resume: true, + requestedSandboxName: this.#requestedSandboxName, + }); + } + return this; + } + + fail(message: string, code = "driver_failure"): this { + const failurePhase = this.#state.phase === "boot" ? "preflight" : this.#state.phase; + if (isValidFailurePhase(failurePhase)) { + const failureStep = + failurePhase === "runtime_setup" + ? this.#session.agent + ? "agent_setup" + : "openclaw" + : failurePhase; + applyStepFailed(this.#session, failureStep, message); + this.#state = deriveOnboardFlowState(this.#session, { + resume: true, + requestedSandboxName: this.#requestedSandboxName, + }); + if (this.#state.phase === "failed") { + this.#state = { + ...this.#state, + error: { + code, + message: this.#session.failure?.message ?? message, + recoverable: this.#session.resumable, + }, + }; + } + } + return this; + } + + reloadForResume(): InMemoryOnboardDriver { + return InMemoryOnboardDriver.resume(this.session, { + requestedSandboxName: this.#requestedSandboxName, + }); + } + + reset(): this { + const session = createSession({ + ...sessionOverridesFromOptions({ + mode: this.#session.mode, + runtimeTarget: this.#session.agent + ? { kind: "agent", agentName: this.#session.agent } + : { kind: "openclaw" }, + fromDockerfile: this.#session.metadata.fromDockerfile, + requestedSandboxName: this.#requestedSandboxName, + }), + status: "in_progress", + resumable: true, + }); + this.#session = session; + this.#state = createInitialOnboardStateFromSession(session, this.#requestedSandboxName); + return this; + } +} + +function createInitialOnboardStateFromSession( + session: Session, + requestedSandboxName: string | null, +): OnboardFlowState { + return { + phase: "boot", + ctx: createInitialOnboardContext({ + mode: session.mode, + resume: false, + runtimeTarget: session.agent + ? { kind: "agent", agentName: session.agent } + : { kind: "openclaw" }, + fromDockerfile: session.metadata.fromDockerfile, + requestedSandboxName, + sandboxName: session.sandboxName, + provider: session.provider, + model: session.model, + endpointUrl: session.endpointUrl, + credentialEnv: session.credentialEnv, + preferredInferenceApi: session.preferredInferenceApi, + nimContainer: session.nimContainer, + webSearchConfig: session.webSearchConfig, + messagingChannels: session.messagingChannels ?? [], + policyPresets: session.policyPresets ?? [], + }), + }; +} diff --git a/src/lib/onboard-entry.test.ts b/src/lib/onboard-entry.test.ts new file mode 100644 index 0000000000..b33d8ef331 --- /dev/null +++ b/src/lib/onboard-entry.test.ts @@ -0,0 +1,147 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; +// Import from compiled dist/ so coverage is attributed correctly. +import { runOnboardingEntry } from "../../dist/lib/onboard-entry"; + +describe("runOnboardingEntry", () => { + it("drives the shell, initialization, orchestration, and dashboard rendering", async () => { + const releaseOnboardLock = vi.fn(); + const printDashboard = vi.fn(); + const applyShellState = vi.fn(); + const createOnboardRunContext = vi.fn((initializedRun) => ({ + driver: { session: { lastStepStarted: null } }, + fromDockerfile: initializedRun.fromDockerfile, + session: initializedRun.session, + updateSession: vi.fn(), + startStep: vi.fn(), + completeStep: vi.fn(), + skipStep: vi.fn(), + failStep: vi.fn(), + completeSession: vi.fn(), + })); + const buildOrchestratorDeps = vi.fn(() => ({ kind: "deps" })); + const runOnboardingOrchestratorMock = vi.fn(async () => ({ + sandboxName: "alpha", + model: "gpt-5.4", + provider: "openai-api", + nimContainer: null, + agent: null, + policyResult: { kind: "complete", policyPresets: ["npm"] }, + })); + const onceProcessExit = vi.fn(); + + await runOnboardingEntry( + { + resume: true, + agent: "hermes", + acceptThirdPartySoftware: true, + }, + { + env: {}, + resolveShellState: () => ({ + nonInteractive: true, + recreateSandbox: false, + resume: true, + dangerouslySkipPermissions: true, + requestedFromDockerfile: "/tmp/Custom.Dockerfile", + }), + applyShellState, + getDangerouslySkipPermissionsWarningLines: () => ["warn-1", "warn-2"], + ensureUsageNoticeConsent: async () => true, + validateRequestedProviderHint: vi.fn(), + acquireOnboardLock: vi.fn(() => ({ acquired: true, lockFile: "/tmp/onboard.lock", stale: false })), + buildOnboardLockCommand: vi.fn(() => "nemoclaw onboard --resume --non-interactive"), + getOnboardLockConflictLines: vi.fn(() => []), + releaseOnboardLock, + clearGatewayEnv: vi.fn(), + initializeOnboardRun: vi.fn(() => ({ + ok: true as const, + value: { + driver: { session: { lastStepStarted: null } }, + session: { mode: "non-interactive" }, + fromDockerfile: "/tmp/Custom.Dockerfile", + }, + })) as never, + getResumeConflicts: vi.fn(() => []), + createOnboardRunContext: createOnboardRunContext as never, + getOnboardBannerLines: () => ["", " NemoClaw Onboarding", " (resume mode)", " ==================="], + buildOrchestratorDeps: buildOrchestratorDeps as never, + runOnboardingOrchestrator: runOnboardingOrchestratorMock as never, + printDashboard, + note: vi.fn(), + log: vi.fn(), + error: vi.fn(), + exit: ((code: number) => { + throw new Error(`exit:${code}`); + }) as never, + onceProcessExit, + }, + ); + + expect(applyShellState).toHaveBeenCalledWith({ + nonInteractive: true, + recreateSandbox: false, + resume: true, + dangerouslySkipPermissions: true, + requestedFromDockerfile: "/tmp/Custom.Dockerfile", + }); + expect(createOnboardRunContext).toHaveBeenCalledTimes(1); + expect(buildOrchestratorDeps).toHaveBeenCalledTimes(1); + expect(runOnboardingOrchestratorMock).toHaveBeenCalledTimes(1); + expect(printDashboard).toHaveBeenCalledWith("alpha", "gpt-5.4", "openai-api", null, null); + expect(releaseOnboardLock).toHaveBeenCalledTimes(1); + expect(onceProcessExit).toHaveBeenCalledTimes(2); + }); + + it("prints lock conflict guidance and exits before initialization", async () => { + const error = vi.fn(); + await expect( + runOnboardingEntry( + { + resume: false, + agent: null, + acceptThirdPartySoftware: false, + }, + { + env: {}, + resolveShellState: () => ({ + nonInteractive: false, + recreateSandbox: false, + resume: false, + dangerouslySkipPermissions: false, + requestedFromDockerfile: null, + }), + applyShellState: vi.fn(), + getDangerouslySkipPermissionsWarningLines: () => [], + ensureUsageNoticeConsent: async () => true, + validateRequestedProviderHint: vi.fn(), + acquireOnboardLock: vi.fn(() => ({ acquired: false, lockFile: "/tmp/onboard.lock", stale: false })), + buildOnboardLockCommand: vi.fn(() => "nemoclaw onboard"), + getOnboardLockConflictLines: vi.fn(() => ["line-1", "line-2"]), + releaseOnboardLock: vi.fn(), + clearGatewayEnv: vi.fn(), + initializeOnboardRun: vi.fn(() => { + throw new Error("should not initialize when lock is held"); + }) as never, + createOnboardRunContext: vi.fn() as never, + getOnboardBannerLines: () => [], + buildOrchestratorDeps: vi.fn() as never, + runOnboardingOrchestrator: vi.fn() as never, + printDashboard: vi.fn(), + note: vi.fn(), + log: vi.fn(), + error, + exit: ((code: number) => { + throw new Error(`exit:${code}`); + }) as never, + onceProcessExit: vi.fn(), + }, + ), + ).rejects.toThrow("exit:1"); + + expect(error).toHaveBeenNthCalledWith(1, "line-1"); + expect(error).toHaveBeenNthCalledWith(2, "line-2"); + }); +}); diff --git a/src/lib/onboard-entry.ts b/src/lib/onboard-entry.ts new file mode 100644 index 0000000000..e4760d2a37 --- /dev/null +++ b/src/lib/onboard-entry.ts @@ -0,0 +1,190 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import type { + InitializeOnboardRunOptions, + InitializeOnboardRunResult, + InitializedOnboardRun, +} from "./onboard-bootstrap"; +import type { OnboardOrchestratorDeps, OnboardOrchestratorResult } from "./onboard-orchestrator"; +import type { OnboardRunContext } from "./onboard-run-context"; +import type { OnboardShellInput, OnboardShellState } from "./onboard-shell"; +import type { LockResult, Session } from "./onboard-session"; + +export interface RunOnboardingEntryDeps< + TGpu = unknown, + TAgent extends { name: string } = { name: string }, +> { + env: NodeJS.ProcessEnv; + resolveShellState: (opts: OnboardShellInput, env: NodeJS.ProcessEnv) => OnboardShellState; + applyShellState: (state: OnboardShellState) => void; + getDangerouslySkipPermissionsWarningLines: () => string[]; + ensureUsageNoticeConsent: (options: { + nonInteractive: boolean; + acceptedByFlag: boolean; + writeLine: (message?: string) => void; + }) => Promise; + validateRequestedProviderHint: () => void; + acquireOnboardLock: (command: string) => LockResult; + buildOnboardLockCommand: ( + state: Pick, + ) => string; + getOnboardLockConflictLines: (lockResult: LockResult) => string[]; + releaseOnboardLock: () => void; + clearGatewayEnv: () => void; + initializeOnboardRun: ( + options: InitializeOnboardRunOptions, + ) => InitializeOnboardRunResult; + getResumeConflicts?: ( + session: Session, + shellState: OnboardShellState, + requestedAgent: string | null, + ) => NonNullable extends ( + ...args: never[] + ) => infer T + ? T + : never; + createOnboardRunContext: (initializedRun: InitializedOnboardRun) => OnboardRunContext; + getOnboardBannerLines: ( + state: Pick, + ) => string[]; + buildOrchestratorDeps: ( + runContext: OnboardRunContext, + shellState: OnboardShellState, + requestedAgent: string | null, + ) => OnboardOrchestratorDeps; + runOnboardingOrchestrator: ( + runContext: OnboardRunContext, + deps: OnboardOrchestratorDeps, + ) => Promise>; + printDashboard: ( + sandboxName: string, + model: string, + provider: string, + nimContainer?: string | null, + agent?: TAgent | null, + ) => void; + note: (message: string) => void; + log: (message?: string) => void; + error: (message?: string) => void; + exit: (code: number) => never; + onceProcessExit: (handler: (code: number) => void) => void; +} + +export async function runOnboardingEntry< + TGpu = unknown, + TAgent extends { name: string } = { name: string }, +>( + opts: OnboardShellInput & { + acceptThirdPartySoftware?: boolean; + agent?: string | null; + }, + deps: RunOnboardingEntryDeps, +): Promise { + const shellState = deps.resolveShellState(opts, deps.env); + deps.applyShellState(shellState); + + const { dangerouslySkipPermissions, requestedFromDockerfile, resume } = shellState; + if (dangerouslySkipPermissions) { + for (const line of deps.getDangerouslySkipPermissionsWarningLines()) { + deps.error(line); + } + } + + deps.clearGatewayEnv(); + const noticeAccepted = await deps.ensureUsageNoticeConsent({ + nonInteractive: shellState.nonInteractive, + acceptedByFlag: opts.acceptThirdPartySoftware === true, + writeLine: deps.error, + }); + if (!noticeAccepted) { + deps.exit(1); + } + + // Validate NEMOCLAW_PROVIDER early so invalid values fail before preflight. + deps.validateRequestedProviderHint(); + + const lockResult = deps.acquireOnboardLock( + deps.buildOnboardLockCommand({ + resume, + nonInteractive: shellState.nonInteractive, + requestedFromDockerfile, + }), + ); + if (!lockResult.acquired) { + for (const line of deps.getOnboardLockConflictLines(lockResult)) { + deps.error(line); + } + deps.exit(1); + } + + let lockReleased = false; + const releaseOnboardLock = () => { + if (lockReleased) return; + lockReleased = true; + deps.releaseOnboardLock(); + }; + deps.onceProcessExit(releaseOnboardLock); + + try { + const initializedRun = deps.initializeOnboardRun({ + resume, + mode: shellState.nonInteractive ? "non-interactive" : "interactive", + requestedFromDockerfile, + requestedAgent: opts.agent || null, + getResumeConflicts: deps.getResumeConflicts + ? (session) => deps.getResumeConflicts!(session, shellState, opts.agent || null) + : undefined, + }); + if (!initializedRun.ok) { + for (const line of initializedRun.lines) { + deps.error(line); + } + deps.exit(1); + } + + const runContext = deps.createOnboardRunContext(initializedRun.value); + let completed = false; + deps.onceProcessExit((code) => { + if (!completed && code !== 0) { + const failedStep = runContext.driver.session?.lastStepStarted; + if (failedStep) { + runContext.failStep(failedStep, "Onboarding exited before the step completed."); + } + } + }); + + for (const line of deps.getOnboardBannerLines({ + nonInteractive: shellState.nonInteractive, + resume, + })) { + if (line.length === 0) { + deps.log(""); + } else if (line.startsWith(" (")) { + deps.note(line); + } else { + deps.log(line); + } + } + + const orchestrationResult = await deps.runOnboardingOrchestrator( + runContext, + deps.buildOrchestratorDeps(runContext, shellState, opts.agent || null), + ); + if (orchestrationResult.policyResult.kind === "sandbox_not_ready") { + deps.error(`\n${orchestrationResult.policyResult.message}`); + deps.exit(1); + } + + completed = true; + deps.printDashboard( + orchestrationResult.sandboxName, + orchestrationResult.model, + orchestrationResult.provider, + orchestrationResult.nimContainer, + orchestrationResult.agent, + ); + } finally { + releaseOnboardLock(); + } +} diff --git a/src/lib/onboard-flow-state.test.ts b/src/lib/onboard-flow-state.test.ts new file mode 100644 index 0000000000..1a4a937515 --- /dev/null +++ b/src/lib/onboard-flow-state.test.ts @@ -0,0 +1,298 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it } from "vitest"; + +// Import from compiled dist/ so coverage is attributed correctly. +import { + deriveOnboardFlowState, + getEffectiveMessagingStepState, + getResumeExecutablePhase, + hasCompletedOnboardStep, + hasReachedOnboardPhase, +} from "../../dist/lib/onboard-flow-state"; +import { createSession } from "../../dist/lib/onboard-session"; + +describe("onboard-flow-state", () => { + it("maps resumable checkpoints to the next canonical phase", () => { + const checkpoints = [ + { + name: "fresh session", + setup: () => createSession(), + expectedPhase: "preflight", + }, + { + name: "after preflight", + setup: () => { + const session = createSession(); + session.steps.preflight.status = "complete"; + return session; + }, + expectedPhase: "gateway", + }, + { + name: "after gateway", + setup: () => { + const session = createSession(); + session.steps.preflight.status = "complete"; + session.steps.gateway.status = "complete"; + return session; + }, + expectedPhase: "provider_selection", + }, + { + name: "after provider selection", + setup: () => { + const session = createSession({ provider: "openai-api", model: "gpt-5.4" }); + session.steps.preflight.status = "complete"; + session.steps.gateway.status = "complete"; + session.steps.provider_selection.status = "complete"; + return session; + }, + expectedPhase: "inference", + }, + { + name: "after inference", + setup: () => { + const session = createSession({ provider: "openai-api", model: "gpt-5.4" }); + session.steps.preflight.status = "complete"; + session.steps.gateway.status = "complete"; + session.steps.provider_selection.status = "complete"; + session.steps.inference.status = "complete"; + return session; + }, + expectedPhase: "messaging", + }, + { + name: "after messaging", + setup: () => { + const session = createSession({ + provider: "openai-api", + model: "gpt-5.4", + messagingChannels: ["telegram"], + }); + session.steps.preflight.status = "complete"; + session.steps.gateway.status = "complete"; + session.steps.provider_selection.status = "complete"; + session.steps.inference.status = "complete"; + session.steps.messaging.status = "complete"; + return session; + }, + expectedPhase: "sandbox", + }, + { + name: "after sandbox", + setup: () => { + const session = createSession({ + provider: "openai-api", + model: "gpt-5.4", + sandboxName: "alpha", + }); + session.steps.preflight.status = "complete"; + session.steps.gateway.status = "complete"; + session.steps.provider_selection.status = "complete"; + session.steps.inference.status = "complete"; + session.steps.messaging.status = "complete"; + session.steps.sandbox.status = "complete"; + return session; + }, + expectedPhase: "runtime_setup", + }, + { + name: "after runtime setup", + setup: () => { + const session = createSession({ + provider: "openai-api", + model: "gpt-5.4", + sandboxName: "alpha", + }); + session.steps.preflight.status = "complete"; + session.steps.gateway.status = "complete"; + session.steps.provider_selection.status = "complete"; + session.steps.inference.status = "complete"; + session.steps.messaging.status = "complete"; + session.steps.sandbox.status = "complete"; + session.steps.runtime_setup.status = "complete"; + return session; + }, + expectedPhase: "policies", + }, + ] as const; + + for (const checkpoint of checkpoints) { + const state = deriveOnboardFlowState(checkpoint.setup()); + expect(state.phase, checkpoint.name).toBe(checkpoint.expectedPhase); + } + }); + + it("derives boot when no session exists", () => { + const state = deriveOnboardFlowState(null, { requestedSandboxName: "alpha" }); + expect(state.phase).toBe("boot"); + expect(state.ctx.requestedSandboxName).toBe("alpha"); + }); + + it("derives the next resumable phase from completed checkpoints", () => { + const session = createSession({ + provider: "openai-api", + model: "gpt-5.4", + sandboxName: "alpha", + messagingChannels: ["telegram"], + }); + session.steps.preflight.status = "complete"; + session.steps.gateway.status = "complete"; + session.steps.provider_selection.status = "complete"; + session.steps.inference.status = "complete"; + session.steps.messaging.status = "complete"; + + const state = deriveOnboardFlowState(session); + expect(state.phase).toBe("sandbox"); + expect(state.ctx.provider).toBe("openai-api"); + expect(state.ctx.model).toBe("gpt-5.4"); + expect(state.ctx.messagingChannels).toEqual(["telegram"]); + }); + + it("treats sandbox-complete legacy sessions as having completed messaging", () => { + const session = createSession({ + provider: "openai-api", + model: "gpt-5.4", + sandboxName: "alpha", + messagingChannels: ["telegram"], + }); + session.steps.preflight.status = "complete"; + session.steps.gateway.status = "complete"; + session.steps.provider_selection.status = "complete"; + session.steps.inference.status = "complete"; + session.steps.sandbox.status = "complete"; + + const messaging = getEffectiveMessagingStepState(session); + const state = deriveOnboardFlowState(session); + + expect(messaging.status).toBe("complete"); + expect(state.phase).toBe("runtime_setup"); + expect(state.ctx.sandboxName).toBe("alpha"); + }); + + it("canonicalizes runtime-step failures to runtime_setup", () => { + const session = createSession({ + status: "failed", + resumable: true, + sandboxName: "alpha", + provider: "openai-api", + model: "gpt-5.4", + failure: { + step: "openclaw", + message: "gateway boot failed", + recordedAt: "2026-04-17T00:00:00.000Z", + }, + }); + session.steps.openclaw.status = "failed"; + + const state = deriveOnboardFlowState(session); + expect(state.phase).toBe("failed"); + if (state.phase !== "failed") { + throw new Error("expected failed phase"); + } + expect(state.failedFrom).toBe("runtime_setup"); + expect(state.error.code).toBe("persisted_runtime_setup_failure"); + expect(state.error.recoverable).toBe(true); + }); + + it("reports canonical phase progress and completed steps for resumable failures", () => { + const session = createSession({ + status: "failed", + resumable: true, + provider: "openai-api", + model: "gpt-5.4", + sandboxName: "alpha", + failure: { + step: "sandbox", + message: "sandbox creation failed", + recordedAt: "2026-04-17T00:00:00.000Z", + }, + }); + session.steps.preflight.status = "complete"; + session.steps.gateway.status = "complete"; + session.steps.provider_selection.status = "complete"; + session.steps.inference.status = "complete"; + session.steps.messaging.status = "complete"; + session.steps.sandbox.status = "failed"; + + const state = deriveOnboardFlowState(session); + expect(getResumeExecutablePhase(state)).toBe("sandbox"); + expect(hasReachedOnboardPhase(state, "messaging")).toBe(true); + expect(hasReachedOnboardPhase(state, "runtime_setup")).toBe(false); + expect(hasCompletedOnboardStep(state, "messaging")).toBe(true); + expect(hasCompletedOnboardStep(state, "sandbox")).toBe(false); + }); + + it("treats legacy session.agent='openclaw' as the default runtime target", () => { + const session = createSession({ + agent: "openclaw", + sandboxName: "alpha", + provider: "openai-api", + model: "gpt-5.4", + status: "complete", + resumable: false, + }); + session.steps.policies.status = "complete"; + + const state = deriveOnboardFlowState(session); + expect(state.phase).toBe("complete"); + expect(state.ctx.runtimeTarget).toEqual({ kind: "openclaw" }); + }); + + it("rewinds a malformed completed session instead of manufacturing empty sandbox/provider/model fields", () => { + const session = createSession({ + status: "complete", + resumable: false, + provider: "openai-api", + model: "gpt-5.4", + sandboxName: null, + messagingChannels: ["telegram"], + policyPresets: ["npm"], + }); + session.steps.preflight.status = "complete"; + session.steps.gateway.status = "complete"; + session.steps.provider_selection.status = "complete"; + session.steps.inference.status = "complete"; + session.steps.messaging.status = "complete"; + session.steps.sandbox.status = "complete"; + session.steps.runtime_setup.status = "complete"; + session.steps.policies.status = "complete"; + + const state = deriveOnboardFlowState(session); + expect(state.phase).toBe("sandbox"); + expect(state.ctx.provider).toBe("openai-api"); + expect(state.ctx.model).toBe("gpt-5.4"); + }); + + it("rewinds malformed selection checkpoints instead of promoting empty provider/model placeholders", () => { + const session = createSession({ sandboxName: "alpha", provider: null, model: null }); + session.steps.preflight.status = "complete"; + session.steps.gateway.status = "complete"; + session.steps.provider_selection.status = "complete"; + + const state = deriveOnboardFlowState(session); + expect(state.phase).toBe("provider_selection"); + }); + + it("keeps agent runtime targets and completed policy state", () => { + const session = createSession({ + status: "complete", + resumable: false, + agent: "hermes", + sandboxName: "alpha", + provider: "nvidia-nim", + model: "meta/llama-3.3-70b-instruct", + messagingChannels: ["slack"], + policyPresets: ["npm", "slack"], + }); + session.steps.policies.status = "complete"; + + const state = deriveOnboardFlowState(session); + expect(state.phase).toBe("complete"); + expect(state.ctx.runtimeTarget).toEqual({ kind: "agent", agentName: "hermes" }); + expect(state.ctx.messagingChannels).toEqual(["slack"]); + expect(state.ctx.policyPresets).toEqual(["npm", "slack"]); + }); +}); diff --git a/src/lib/onboard-flow-state.ts b/src/lib/onboard-flow-state.ts new file mode 100644 index 0000000000..b058815929 --- /dev/null +++ b/src/lib/onboard-flow-state.ts @@ -0,0 +1,260 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { + createInitialOnboardContext, + createInitialOnboardState, + type OnboardBaseContext, + type OnboardFlowState, + type OnboardStepState, + type OnboardVisibleStep, +} from "./onboard-fsm"; +import type { Session } from "./onboard-session"; + +function buildContext( + session: Session, + options: { resume?: boolean; requestedSandboxName?: string | null } = {}, +): OnboardBaseContext { + return createInitialOnboardContext({ + mode: session.mode, + resume: options.resume ?? true, + runtimeTarget: + session.agent && session.agent !== "openclaw" + ? { kind: "agent", agentName: session.agent } + : { kind: "openclaw" }, + fromDockerfile: session.metadata.fromDockerfile, + requestedSandboxName: options.requestedSandboxName ?? session.sandboxName, + sandboxName: session.sandboxName, + provider: session.provider, + model: session.model, + endpointUrl: session.endpointUrl, + credentialEnv: session.credentialEnv, + preferredInferenceApi: session.preferredInferenceApi, + nimContainer: session.nimContainer, + webSearchConfig: session.webSearchConfig, + messagingChannels: session.messagingChannels ?? [], + policyPresets: session.policyPresets ?? [], + }); +} + +const PHASE_ORDER = [ + "boot", + "preflight", + "gateway", + "provider_selection", + "inference", + "messaging", + "sandbox", + "runtime_setup", + "policies", + "complete", +] as const satisfies readonly OnboardFlowState["phase"][]; + +const STEP_TO_NEXT_PHASE = { + preflight: "gateway", + gateway: "provider_selection", + provider_selection: "inference", + inference: "messaging", + messaging: "sandbox", + sandbox: "runtime_setup", + runtime_setup: "policies", + policies: "complete", +} as const satisfies Record; + +function cloneStepState(step: OnboardStepState): OnboardStepState { + return { + status: step.status, + startedAt: step.startedAt, + completedAt: step.completedAt, + error: step.error, + }; +} + +export function getEffectiveMessagingStepState(session: Session): OnboardStepState { + const recorded = session.steps.messaging; + if (recorded.status !== "pending") { + return cloneStepState(recorded); + } + + const sandboxState = session.steps.sandbox; + if ( + sandboxState.status === "in_progress" || + sandboxState.status === "complete" || + sandboxState.status === "failed" || + Array.isArray(session.messagingChannels) + ) { + return { + status: "complete", + startedAt: sandboxState.startedAt, + completedAt: sandboxState.startedAt, + error: null, + }; + } + + return cloneStepState(recorded); +} + +function hasSelectionContext( + session: Session, +): session is Session & { provider: string; model: string } { + return typeof session.provider === "string" && typeof session.model === "string"; +} + +function hasSandboxContext( + session: Session, +): session is Session & { provider: string; model: string; sandboxName: string } { + return hasSelectionContext(session) && typeof session.sandboxName === "string"; +} + +function getFailureOrigin(session: Session): + | Exclude + | null { + const step = session.failure?.step ?? session.lastStepStarted; + if (step === "openclaw" || step === "agent_setup" || step === "runtime_setup") { + return "runtime_setup"; + } + if ( + step === "preflight" || + step === "gateway" || + step === "provider_selection" || + step === "inference" || + step === "messaging" || + step === "sandbox" || + step === "policies" + ) { + return step; + } + return null; +} + +export function getResumeExecutablePhase(state: OnboardFlowState): (typeof PHASE_ORDER)[number] { + return state.phase === "failed" ? state.failedFrom : state.phase; +} + +export function hasReachedOnboardPhase( + state: OnboardFlowState, + phase: (typeof PHASE_ORDER)[number], +): boolean { + return ( + PHASE_ORDER.indexOf(getResumeExecutablePhase(state)) >= PHASE_ORDER.indexOf(phase) + ); +} + +export function hasCompletedOnboardStep( + state: OnboardFlowState, + step: OnboardVisibleStep, +): boolean { + return hasReachedOnboardPhase(state, STEP_TO_NEXT_PHASE[step]); +} + +export function deriveOnboardFlowState( + session: Session | null, + options: { resume?: boolean; requestedSandboxName?: string | null } = {}, +): OnboardFlowState { + if (!session) { + return createInitialOnboardState({ + resume: options.resume ?? false, + requestedSandboxName: options.requestedSandboxName ?? null, + }); + } + + const ctx = buildContext(session, options); + const messagingState = getEffectiveMessagingStepState(session); + + if ( + (session.status === "complete" || session.steps.policies.status === "complete") && + hasSandboxContext(session) + ) { + return { + phase: "complete", + ctx: { + ...ctx, + sandboxName: session.sandboxName, + provider: session.provider, + model: session.model, + policyPresets: session.policyPresets ?? [], + }, + }; + } + + if (session.status === "failed") { + const failedFrom = getFailureOrigin(session) ?? "preflight"; + return { + phase: "failed", + ctx, + failedFrom, + error: { + code: `persisted_${failedFrom}_failure`, + message: session.failure?.message ?? "Onboarding failed.", + recoverable: session.resumable, + }, + }; + } + + if (session.steps.runtime_setup.status === "complete" && hasSandboxContext(session)) { + return { + phase: "policies", + ctx: { + ...ctx, + sandboxName: session.sandboxName, + provider: session.provider, + model: session.model, + }, + }; + } + + if (session.steps.sandbox.status === "complete" && hasSandboxContext(session)) { + return { + phase: "runtime_setup", + ctx: { + ...ctx, + sandboxName: session.sandboxName, + provider: session.provider, + model: session.model, + }, + }; + } + + if (messagingState.status === "complete" && hasSelectionContext(session)) { + return { + phase: "sandbox", + ctx: { + ...ctx, + provider: session.provider, + model: session.model, + }, + }; + } + + if (session.steps.inference.status === "complete" && hasSelectionContext(session)) { + return { + phase: "messaging", + ctx: { + ...ctx, + provider: session.provider, + model: session.model, + }, + }; + } + + if (session.steps.provider_selection.status === "complete" && hasSelectionContext(session)) { + return { + phase: "inference", + ctx: { + ...ctx, + provider: session.provider, + model: session.model, + }, + }; + } + + if (session.steps.gateway.status === "complete") { + return { phase: "provider_selection", ctx }; + } + + if (session.steps.preflight.status === "complete") { + return { phase: "gateway", ctx }; + } + + return { phase: "preflight", ctx }; +} diff --git a/src/lib/onboard-fsm.test.ts b/src/lib/onboard-fsm.test.ts new file mode 100644 index 0000000000..ac252940da --- /dev/null +++ b/src/lib/onboard-fsm.test.ts @@ -0,0 +1,156 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, expectTypeOf, it } from "vitest"; + +// Import from compiled dist/ so coverage is attributed correctly. +import { + createEmptyStepLedger, + createInitialOnboardState, + isOnboardStepName, + ONBOARD_STEP_ALIAS_TO_VISIBLE, + ONBOARD_STEP_META, + transitionOnboardState, +} from "../../dist/lib/onboard-fsm"; + +describe("onboard-fsm", () => { + it("defines numbered visible step metadata for all user-facing steps", () => { + expect(ONBOARD_STEP_META.preflight.number).toBe(1); + expect(ONBOARD_STEP_META.messaging.number).toBe(5); + expect(ONBOARD_STEP_META.runtime_setup.number).toBe(7); + expect(ONBOARD_STEP_META.policies.number).toBe(8); + }); + + it("builds a step ledger that includes canonical and legacy runtime step names", () => { + const ledger = createEmptyStepLedger(); + + expect(Object.keys(ledger)).toEqual([ + "preflight", + "gateway", + "provider_selection", + "inference", + "messaging", + "sandbox", + "runtime_setup", + "policies", + "openclaw", + "agent_setup", + ]); + expect(ledger.messaging.status).toBe("pending"); + expect(ledger.runtime_setup.status).toBe("pending"); + expect(ledger.openclaw.status).toBe("pending"); + expect(ledger.agent_setup.status).toBe("pending"); + }); + + it("maps legacy runtime steps to the canonical visible step", () => { + expect(ONBOARD_STEP_ALIAS_TO_VISIBLE.openclaw).toBe("runtime_setup"); + expect(ONBOARD_STEP_ALIAS_TO_VISIBLE.agent_setup).toBe("runtime_setup"); + }); + + it("recognizes valid step names including the new messaging/runtime_setup pair", () => { + expect(isOnboardStepName("messaging")).toBe(true); + expect(isOnboardStepName("runtime_setup")).toBe(true); + expect(isOnboardStepName("openclaw")).toBe(true); + expect(isOnboardStepName("agent_setup")).toBe(true); + expect(isOnboardStepName("not-a-step")).toBe(false); + }); + + it("transitions through the happy path with progressively richer context", () => { + const boot = createInitialOnboardState({ resume: true, requestedSandboxName: "demo-box" }); + const preflight = transitionOnboardState(boot, { type: "SESSION_READY" }); + const gateway = transitionOnboardState(preflight, { type: "PREFLIGHT_PASSED" }); + const selection = transitionOnboardState(gateway, { type: "SESSION_READY" }); + const inference = transitionOnboardState(selection, { + type: "PROVIDER_SELECTED", + selection: { + provider: "nvidia-nim", + model: "meta/llama-3.3-70b-instruct", + endpointUrl: "https://integrate.api.nvidia.com/v1", + credentialEnv: "NVIDIA_API_KEY", + preferredInferenceApi: "openai-completions", + nimContainer: null, + }, + }); + const messaging = transitionOnboardState(inference, { type: "INFERENCE_CONFIGURED" }); + const sandbox = transitionOnboardState(messaging, { + type: "MESSAGING_CONFIGURED", + messagingChannels: ["telegram", "slack"], + }); + const runtime = transitionOnboardState(sandbox, { + type: "SANDBOX_READY", + sandboxName: "demo-box", + webSearchConfig: { fetchEnabled: true }, + }); + const policies = transitionOnboardState(runtime, { type: "RUNTIME_CONFIGURED" }); + const complete = transitionOnboardState(policies, { + type: "POLICIES_APPLIED", + policyPresets: ["npm", "pypi", "telegram"], + }); + + expect(complete.phase).toBe("complete"); + expect(complete.ctx.resume).toBe(true); + expect(complete.ctx.provider).toBe("nvidia-nim"); + expect(complete.ctx.model).toBe("meta/llama-3.3-70b-instruct"); + expect(complete.ctx.messagingChannels).toEqual(["telegram", "slack"]); + expect(complete.ctx.sandboxName).toBe("demo-box"); + expect(complete.ctx.webSearchConfig).toEqual({ fetchEnabled: true }); + expect(complete.ctx.policyPresets).toEqual(["npm", "pypi", "telegram"]); + }); + + it("throws a clear error when a runtime transition is unsupported", () => { + const state = createInitialOnboardState(); + expect(() => + transitionOnboardState( + state as never, + { type: "PREFLIGHT_PASSED" } as never, + ), + ).toThrow(/Invalid onboarding transition: boot -> PREFLIGHT_PASSED/); + }); + + it("captures the failed phase and supports typed reset", () => { + const boot = createInitialOnboardState({ mode: "non-interactive" }); + const preflight = transitionOnboardState(boot, { type: "SESSION_READY" }); + const failed = transitionOnboardState(preflight, { + type: "FAIL", + error: { + code: "docker_unreachable", + message: "Docker is installed but not reachable.", + recoverable: true, + }, + }); + + expect(failed.phase).toBe("failed"); + expect(failed.failedFrom).toBe("preflight"); + expect(failed.error.recoverable).toBe(true); + + const reset = transitionOnboardState(failed, { + type: "RESET", + ctx: failed.ctx, + }); + expect(reset.phase).toBe("boot"); + expect(reset.ctx.mode).toBe("non-interactive"); + }); + + it("exposes context narrowing that tsc can prove", () => { + const boot = createInitialOnboardState(); + const preflight = transitionOnboardState(boot, { type: "SESSION_READY" }); + const gateway = transitionOnboardState(preflight, { type: "PREFLIGHT_PASSED" }); + const selection = transitionOnboardState(gateway, { type: "SESSION_READY" }); + const inference = transitionOnboardState(selection, { + type: "PROVIDER_SELECTED", + selection: { + provider: "openai-api", + model: "gpt-5.4", + endpointUrl: "https://api.openai.com/v1", + credentialEnv: "OPENAI_API_KEY", + preferredInferenceApi: "responses", + nimContainer: null, + }, + }); + + expectTypeOf(inference.phase).toEqualTypeOf<"inference">(); + expectTypeOf(inference.ctx.provider).toEqualTypeOf(); + expectTypeOf(inference.ctx.model).toEqualTypeOf(); + expectTypeOf(inference.ctx.sandboxName).toEqualTypeOf(); + }); +}); diff --git a/src/lib/onboard-fsm.ts b/src/lib/onboard-fsm.ts new file mode 100644 index 0000000000..0291e66339 --- /dev/null +++ b/src/lib/onboard-fsm.ts @@ -0,0 +1,421 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import type { WebSearchConfig } from "./web-search"; + +export const ONBOARD_VISIBLE_STEPS = [ + "preflight", + "gateway", + "provider_selection", + "inference", + "messaging", + "sandbox", + "runtime_setup", + "policies", +] as const; + +export const ONBOARD_RUNTIME_STEP_ALIASES = ["openclaw", "agent_setup"] as const; + +export const ONBOARD_SESSION_STEPS = [ + ...ONBOARD_VISIBLE_STEPS, + ...ONBOARD_RUNTIME_STEP_ALIASES, +] as const; + +export type OnboardVisibleStep = (typeof ONBOARD_VISIBLE_STEPS)[number]; +export type OnboardRuntimeStepAlias = (typeof ONBOARD_RUNTIME_STEP_ALIASES)[number]; +export type OnboardStepName = (typeof ONBOARD_SESSION_STEPS)[number]; + +export type OnboardMode = "interactive" | "non-interactive"; +export type OnboardRunStatus = "in_progress" | "complete" | "failed"; +export type OnboardStepStatus = "pending" | "in_progress" | "complete" | "failed" | "skipped"; + +export interface OnboardStepMeta { + number: number; + title: string; +} + +export const ONBOARD_STEP_META = { + preflight: { number: 1, title: "Preflight checks" }, + gateway: { number: 2, title: "Starting OpenShell gateway" }, + provider_selection: { number: 3, title: "Configuring inference (NIM)" }, + inference: { number: 4, title: "Setting up inference provider" }, + messaging: { number: 5, title: "Messaging channels" }, + sandbox: { number: 6, title: "Creating sandbox" }, + runtime_setup: { number: 7, title: "Setting up runtime inside sandbox" }, + policies: { number: 8, title: "Policy presets" }, +} as const satisfies Record; + +export const ONBOARD_STEP_ALIAS_TO_VISIBLE = { + openclaw: "runtime_setup", + agent_setup: "runtime_setup", +} as const satisfies Record; + +export interface OnboardStepState { + status: OnboardStepStatus; + startedAt: string | null; + completedAt: string | null; + error: string | null; +} + +export type OnboardStepLedger = { + [K in OnboardStepName]: OnboardStepState; +}; + +export interface OnboardRuntimeTargetOpenClaw { + kind: "openclaw"; +} + +export interface OnboardRuntimeTargetAgent { + kind: "agent"; + agentName: string; +} + +export type OnboardRuntimeTarget = OnboardRuntimeTargetOpenClaw | OnboardRuntimeTargetAgent; + +export interface OnboardBaseContext { + mode: OnboardMode; + resume: boolean; + runtimeTarget: OnboardRuntimeTarget; + fromDockerfile: string | null; + requestedSandboxName: string | null; + sandboxName: string | null; + provider: string | null; + model: string | null; + endpointUrl: string | null; + credentialEnv: string | null; + preferredInferenceApi: string | null; + nimContainer: string | null; + webSearchConfig: WebSearchConfig | null; + messagingChannels: readonly string[]; + policyPresets: readonly string[]; +} + +export interface OnboardSelectionContext extends OnboardBaseContext { + provider: string; + model: string; +} + +export interface OnboardSandboxContext extends OnboardSelectionContext { + sandboxName: string; +} + +export interface OnboardPoliciesContext extends OnboardSandboxContext { + policyPresets: readonly string[]; +} + +export type OnboardFlowState = + | { phase: "boot"; ctx: OnboardBaseContext } + | { phase: "preflight"; ctx: OnboardBaseContext } + | { phase: "gateway"; ctx: OnboardBaseContext } + | { phase: "provider_selection"; ctx: OnboardBaseContext } + | { phase: "inference"; ctx: OnboardSelectionContext } + | { phase: "messaging"; ctx: OnboardSelectionContext } + | { phase: "sandbox"; ctx: OnboardSelectionContext } + | { phase: "runtime_setup"; ctx: OnboardSandboxContext } + | { phase: "policies"; ctx: OnboardSandboxContext } + | { phase: "complete"; ctx: OnboardPoliciesContext } + | { + phase: "failed"; + ctx: OnboardBaseContext; + failedFrom: Exclude; + error: { + code: string; + message: string; + recoverable: boolean; + }; + }; + +export type OnboardFlowEvent = + | { type: "SESSION_READY" } + | { type: "PREFLIGHT_PASSED" } + | { + type: "PROVIDER_SELECTED"; + selection: { + provider: string; + model: string; + endpointUrl: string | null; + credentialEnv: string | null; + preferredInferenceApi: string | null; + nimContainer: string | null; + }; + } + | { type: "INFERENCE_CONFIGURED" } + | { type: "MESSAGING_CONFIGURED"; messagingChannels: readonly string[] } + | { + type: "SANDBOX_READY"; + sandboxName: string; + webSearchConfig: WebSearchConfig | null; + } + | { type: "RUNTIME_CONFIGURED" } + | { type: "POLICIES_APPLIED"; policyPresets: readonly string[] } + | { + type: "FAIL"; + error: { + code: string; + message: string; + recoverable: boolean; + }; + } + | { type: "RESET"; ctx: OnboardBaseContext }; + +const ONBOARD_NEXT_PHASE = { + boot: { SESSION_READY: "preflight" }, + preflight: { PREFLIGHT_PASSED: "gateway", FAIL: "failed" }, + gateway: { SESSION_READY: "provider_selection", FAIL: "failed" }, + provider_selection: { PROVIDER_SELECTED: "inference", FAIL: "failed" }, + inference: { INFERENCE_CONFIGURED: "messaging", FAIL: "failed" }, + messaging: { MESSAGING_CONFIGURED: "sandbox", FAIL: "failed" }, + sandbox: { SANDBOX_READY: "runtime_setup", FAIL: "failed" }, + runtime_setup: { RUNTIME_CONFIGURED: "policies", FAIL: "failed" }, + policies: { POLICIES_APPLIED: "complete", FAIL: "failed" }, + complete: { RESET: "boot" }, + failed: { RESET: "boot" }, +} as const; + +type OnboardPhase = keyof typeof ONBOARD_NEXT_PHASE; +type OnboardStateByPhase = { + [P in OnboardFlowState["phase"]]: Extract; +}; + +type StateOf

= OnboardStateByPhase[P]; +type EventOf = Extract; +type AllowedEvent

= keyof (typeof ONBOARD_NEXT_PHASE)[P] & OnboardFlowEvent["type"]; +type NextPhase

> = + ((typeof ONBOARD_NEXT_PHASE)[P][E]) & OnboardPhase; + +type OnboardTransitionTable = { + [P in OnboardPhase]: { + [E in AllowedEvent

]: ( + state: StateOf

, + event: EventOf, + ) => StateOf>; + }; +}; + + +export function createEmptyStepLedger(): OnboardStepLedger { + const emptyStep = (): OnboardStepState => ({ + status: "pending", + startedAt: null, + completedAt: null, + error: null, + }); + return Object.fromEntries( + ONBOARD_SESSION_STEPS.map((stepName) => [stepName, emptyStep()]), + ) as OnboardStepLedger; +} + +export function isOnboardStepName(value: unknown): value is OnboardStepName { + return typeof value === "string" && (ONBOARD_SESSION_STEPS as readonly string[]).includes(value); +} + +export function toVisibleStepName(stepName: OnboardStepName): OnboardVisibleStep { + return stepName in ONBOARD_STEP_ALIAS_TO_VISIBLE + ? ONBOARD_STEP_ALIAS_TO_VISIBLE[stepName as OnboardRuntimeStepAlias] + : (stepName as OnboardVisibleStep); +} + +export function createInitialOnboardContext( + overrides: Partial = {}, +): OnboardBaseContext { + return { + mode: overrides.mode ?? "interactive", + resume: overrides.resume ?? false, + runtimeTarget: overrides.runtimeTarget ?? { kind: "openclaw" }, + fromDockerfile: overrides.fromDockerfile ?? null, + requestedSandboxName: overrides.requestedSandboxName ?? null, + sandboxName: overrides.sandboxName ?? null, + provider: overrides.provider ?? null, + model: overrides.model ?? null, + endpointUrl: overrides.endpointUrl ?? null, + credentialEnv: overrides.credentialEnv ?? null, + preferredInferenceApi: overrides.preferredInferenceApi ?? null, + nimContainer: overrides.nimContainer ?? null, + webSearchConfig: overrides.webSearchConfig ?? null, + messagingChannels: overrides.messagingChannels ?? [], + policyPresets: overrides.policyPresets ?? [], + }; +} + +export function createInitialOnboardState( + ctx: Partial = {}, +): Extract { + return { + phase: "boot", + ctx: createInitialOnboardContext(ctx), + }; +} + +function failFrom

>(phase: P) { + return (state: StateOf

, event: EventOf<"FAIL">): StateOf<"failed"> => ({ + phase: "failed", + ctx: state.ctx, + failedFrom: phase, + error: event.error, + }); +} + +const ONBOARD_TRANSITIONS = { + boot: { + SESSION_READY: (state) => ({ phase: "preflight", ctx: state.ctx }), + }, + preflight: { + PREFLIGHT_PASSED: (state) => ({ phase: "gateway", ctx: state.ctx }), + FAIL: failFrom("preflight"), + }, + gateway: { + SESSION_READY: (state) => ({ phase: "provider_selection", ctx: state.ctx }), + FAIL: failFrom("gateway"), + }, + provider_selection: { + PROVIDER_SELECTED: (state, event) => ({ + phase: "inference", + ctx: { + ...state.ctx, + provider: event.selection.provider, + model: event.selection.model, + endpointUrl: event.selection.endpointUrl, + credentialEnv: event.selection.credentialEnv, + preferredInferenceApi: event.selection.preferredInferenceApi, + nimContainer: event.selection.nimContainer, + }, + }), + FAIL: failFrom("provider_selection"), + }, + inference: { + INFERENCE_CONFIGURED: (state) => ({ phase: "messaging", ctx: state.ctx }), + FAIL: failFrom("inference"), + }, + messaging: { + MESSAGING_CONFIGURED: (state, event) => ({ + phase: "sandbox", + ctx: { ...state.ctx, messagingChannels: [...event.messagingChannels] }, + }), + FAIL: failFrom("messaging"), + }, + sandbox: { + SANDBOX_READY: (state, event) => ({ + phase: "runtime_setup", + ctx: { + ...state.ctx, + sandboxName: event.sandboxName, + webSearchConfig: event.webSearchConfig, + }, + }), + FAIL: failFrom("sandbox"), + }, + runtime_setup: { + RUNTIME_CONFIGURED: (state) => ({ phase: "policies", ctx: state.ctx }), + FAIL: failFrom("runtime_setup"), + }, + policies: { + POLICIES_APPLIED: (state, event) => ({ + phase: "complete", + ctx: { ...state.ctx, policyPresets: [...event.policyPresets] }, + }), + FAIL: failFrom("policies"), + }, + complete: { + RESET: (_state, event) => ({ phase: "boot", ctx: event.ctx }), + }, + failed: { + RESET: (_state, event) => ({ phase: "boot", ctx: event.ctx }), + }, +} satisfies OnboardTransitionTable; + +export function transitionOnboardState( + state: StateOf<"boot">, + event: EventOf<"SESSION_READY">, +): StateOf<"preflight">; +export function transitionOnboardState( + state: StateOf<"preflight">, + event: EventOf<"PREFLIGHT_PASSED">, +): StateOf<"gateway">; +export function transitionOnboardState( + state: StateOf<"preflight">, + event: EventOf<"FAIL">, +): StateOf<"failed">; +export function transitionOnboardState( + state: StateOf<"gateway">, + event: EventOf<"SESSION_READY">, +): StateOf<"provider_selection">; +export function transitionOnboardState( + state: StateOf<"gateway">, + event: EventOf<"FAIL">, +): StateOf<"failed">; +export function transitionOnboardState( + state: StateOf<"provider_selection">, + event: EventOf<"PROVIDER_SELECTED">, +): StateOf<"inference">; +export function transitionOnboardState( + state: StateOf<"provider_selection">, + event: EventOf<"FAIL">, +): StateOf<"failed">; +export function transitionOnboardState( + state: StateOf<"inference">, + event: EventOf<"INFERENCE_CONFIGURED">, +): StateOf<"messaging">; +export function transitionOnboardState( + state: StateOf<"inference">, + event: EventOf<"FAIL">, +): StateOf<"failed">; +export function transitionOnboardState( + state: StateOf<"messaging">, + event: EventOf<"MESSAGING_CONFIGURED">, +): StateOf<"sandbox">; +export function transitionOnboardState( + state: StateOf<"messaging">, + event: EventOf<"FAIL">, +): StateOf<"failed">; +export function transitionOnboardState( + state: StateOf<"sandbox">, + event: EventOf<"SANDBOX_READY">, +): StateOf<"runtime_setup">; +export function transitionOnboardState( + state: StateOf<"sandbox">, + event: EventOf<"FAIL">, +): StateOf<"failed">; +export function transitionOnboardState( + state: StateOf<"runtime_setup">, + event: EventOf<"RUNTIME_CONFIGURED">, +): StateOf<"policies">; +export function transitionOnboardState( + state: StateOf<"runtime_setup">, + event: EventOf<"FAIL">, +): StateOf<"failed">; +export function transitionOnboardState( + state: StateOf<"policies">, + event: EventOf<"POLICIES_APPLIED">, +): StateOf<"complete">; +export function transitionOnboardState( + state: StateOf<"policies">, + event: EventOf<"FAIL">, +): StateOf<"failed">; +export function transitionOnboardState( + state: StateOf<"complete">, + event: EventOf<"RESET">, +): StateOf<"boot">; +export function transitionOnboardState( + state: StateOf<"failed">, + event: EventOf<"RESET">, +): StateOf<"boot">; +export function transitionOnboardState( + state: OnboardFlowState, + event: OnboardFlowEvent, +): OnboardFlowState { + const phaseTransitions = ONBOARD_TRANSITIONS[state.phase] as Record< + string, + (current: OnboardFlowState, nextEvent: OnboardFlowEvent) => OnboardFlowState + >; + const handler = phaseTransitions[event.type]; + if (typeof handler !== "function") { + throw new Error( + `Invalid onboarding transition: ${state.phase} -> ${event.type} (allowed: ${Object.keys(phaseTransitions).join(", ")})`, + ); + } + return handler(state, event); +} + +export function assertNever(value: never): never { + throw new Error(`Unexpected value: ${String(value)}`); +} diff --git a/src/lib/onboard-gateway-liveness.ts b/src/lib/onboard-gateway-liveness.ts new file mode 100644 index 0000000000..746a5b191e --- /dev/null +++ b/src/lib/onboard-gateway-liveness.ts @@ -0,0 +1,46 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +export interface GatewayLivenessDeps { + run: ( + command: string | string[], + options?: { ignoreError?: boolean; suppressOutput?: boolean }, + ) => { status: number; stdout?: string; stderr?: string }; +} + +/** + * Probe whether the gateway Docker container is actually running. + * openshell CLI metadata can be stale after a manual `docker rm`, so this + * verifies the container is live before trusting a "healthy" reuse state. + * + * Returns "running" | "missing" | "unknown". + * - "running" — container exists and State.Running is true + * - "missing" — container was removed or exists but is stopped (not reusable) + * - "unknown" — any other failure (daemon down, timeout, etc.) + * + * Callers should only trigger stale-metadata cleanup on "missing", not on + * "unknown", to avoid destroying a healthy gateway when Docker is temporarily + * unavailable. See #2020. + */ +export function verifyGatewayContainerRunning( + gatewayName: string, + deps: GatewayLivenessDeps, +): "running" | "missing" | "unknown" { + const containerName = `openshell-cluster-${gatewayName}`; + const result = deps.run( + `docker inspect --type container --format '{{.State.Running}}' ${containerName}`, + { ignoreError: true, suppressOutput: true }, + ); + if (result.status === 0 && String(result.stdout || "").trim() === "true") { + return "running"; + } + // Container exists but is stopped (exit 0, Running !== "true") + if (result.status === 0) { + return "missing"; + } + const stderr = String(result.stderr || ""); + if (stderr.includes("No such object") || stderr.includes("No such container")) { + return "missing"; + } + return "unknown"; +} diff --git a/src/lib/onboard-gateway-runtime.test.ts b/src/lib/onboard-gateway-runtime.test.ts new file mode 100644 index 0000000000..298b67f3ef --- /dev/null +++ b/src/lib/onboard-gateway-runtime.test.ts @@ -0,0 +1,248 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; + +vi.mock("p-retry", () => ({ + default: async ( + fn: () => Promise, + opts?: { onFailedAttempt?: (error: Error & { attemptNumber: number; retriesLeft: number }) => void }, + ) => { + try { + return await fn(); + } catch (error) { + opts?.onFailedAttempt?.(Object.assign(error as Error, { attemptNumber: 1, retriesLeft: 0 })); + throw error; + } + }, +})); +// Import from compiled dist/ so coverage is attributed correctly. +import { + getGatewayStartEnv, + recoverGatewayRuntime, + startGatewayWithOptions, +} from "../../dist/lib/onboard-gateway-runtime"; + +describe("onboard-gateway-runtime", () => { + it("builds a pinned gateway image environment from the installed OpenShell version", () => { + expect(getGatewayStartEnv("0.0.24")).toEqual({ + OPENSHELL_CLUSTER_IMAGE: "ghcr.io/nvidia/openshell/cluster:0.0.24", + IMAGE_TAG: "0.0.24", + }); + expect(getGatewayStartEnv(null)).toEqual({}); + }); + + it("reuses an already healthy gateway without attempting a restart", async () => { + const log = vi.fn(); + const runOpenshell = vi.fn(() => ({ status: 0 })); + const processEnv: NodeJS.ProcessEnv = {}; + + await startGatewayWithOptions( + null, + { + gatewayName: "nemoclaw", + gatewayPort: 8080, + scriptsDir: "/repo/scripts", + processEnv, + showHeader: vi.fn(), + log, + error: vi.fn(), + exit: ((code: number) => { + throw new Error(`exit:${code}`); + }) as never, + openshellShellCommand: (args) => args.join(" "), + streamGatewayStart: async () => ({ status: 0, output: "" }), + runCaptureOpenshell: (args) => (args[0] === "status" ? "Gateway status: Connected" : "Gateway: nemoclaw"), + runOpenshell, + isGatewayHealthy: () => true, + hasStaleGateway: () => false, + redact: (value) => value, + compactText: (value) => value.trim(), + envInt: (_name, fallback) => fallback, + sleep: () => {}, + getInstalledOpenshellVersion: () => "0.0.24", + getContainerRuntime: () => "docker", + shouldPatchCoredns: () => false, + run: () => ({ status: 0 }), + destroyGateway: vi.fn(), + pruneKnownHostsEntries: (value) => value, + }, + { exitOnFailure: true }, + ); + + expect(log).toHaveBeenCalledWith(" ✓ Reusing existing gateway"); + expect(runOpenshell).toHaveBeenCalledWith(["gateway", "select", "nemoclaw"], { + ignoreError: true, + }); + expect(processEnv.OPENSHELL_GATEWAY).toBe("nemoclaw"); + }); + + it("starts the gateway, patches CoreDNS when needed, and selects it afterward", async () => { + const log = vi.fn(); + const runCaptureOpenshell = vi + .fn() + .mockReturnValueOnce("Gateway status: Disconnected") + .mockReturnValueOnce("Gateway: nemoclaw") + .mockReturnValueOnce("Gateway: openshell") + .mockReturnValueOnce("") + .mockReturnValueOnce("Gateway status: Connected\nGateway: nemoclaw") + .mockReturnValueOnce("Gateway: nemoclaw") + .mockReturnValueOnce("Gateway: nemoclaw"); + const run = vi.fn(() => ({ status: 0 })); + const runOpenshell = vi.fn(() => ({ status: 0 })); + const processEnv: NodeJS.ProcessEnv = {}; + + await startGatewayWithOptions( + null, + { + gatewayName: "nemoclaw", + gatewayPort: 8080, + scriptsDir: "/repo/scripts", + processEnv, + showHeader: vi.fn(), + log, + error: vi.fn(), + exit: ((code: number) => { + throw new Error(`exit:${code}`); + }) as never, + openshellShellCommand: (args) => `openshell ${args.join(" ")}`, + streamGatewayStart: async (_command, env) => { + expect(env.OPENSHELL_CLUSTER_IMAGE).toBe("ghcr.io/nvidia/openshell/cluster:0.0.24"); + return { status: 0, output: "starting gateway" }; + }, + runCaptureOpenshell, + runOpenshell, + isGatewayHealthy: (status) => status.includes("Connected") && status.includes("nemoclaw"), + hasStaleGateway: () => false, + redact: (value) => value, + compactText: (value) => value.trim(), + envInt: (_name, fallback) => fallback === 12 ? 1 : fallback === 5 ? 0 : fallback, + sleep: vi.fn(), + getInstalledOpenshellVersion: () => "0.0.24", + getContainerRuntime: () => "docker", + shouldPatchCoredns: () => true, + run, + destroyGateway: vi.fn(), + pruneKnownHostsEntries: (value) => value, + }, + { exitOnFailure: true }, + ); + + expect(log).toHaveBeenCalledWith(" Waiting for gateway health..."); + expect(log).toHaveBeenCalledWith(" ✓ Gateway is healthy"); + expect(log).toHaveBeenCalledWith(" Patching CoreDNS DNS forwarding..."); + expect(run).toHaveBeenCalledWith( + ["bash", "/repo/scripts/fix-coredns.sh", "nemoclaw"], + { ignoreError: true }, + ); + expect(runOpenshell).toHaveBeenLastCalledWith(["gateway", "select", "nemoclaw"], { + ignoreError: true, + }); + expect(processEnv.OPENSHELL_GATEWAY).toBe("nemoclaw"); + }); + + it("prints doctor logs and exits when gateway startup fails with exitOnFailure", async () => { + vi.useFakeTimers(); + const error = vi.fn(); + try { + const promise = startGatewayWithOptions( + null, + { + gatewayName: "nemoclaw", + gatewayPort: 8080, + scriptsDir: "/repo/scripts", + processEnv: {}, + showHeader: vi.fn(), + log: vi.fn(), + error, + exit: ((code: number) => { + throw new Error(`exit:${code}`); + }) as never, + openshellShellCommand: (args) => args.join(" "), + streamGatewayStart: async () => ({ status: 1, output: "ERROR gateway failed" }), + runCaptureOpenshell: (args) => + args.includes("doctor") + ? "ERROR k3s cluster crashed: OOMKilled\nGateway auth token: nvapi-fakecredential-9999" + : "", + runOpenshell: vi.fn(() => ({ status: 0 })), + isGatewayHealthy: () => false, + hasStaleGateway: () => true, + redact: (value) => value.replace(/nvapi-[^\s]+/g, ""), + compactText: (value) => value.trim(), + envInt: (_name, fallback) => fallback === 12 ? 0 : fallback, + sleep: vi.fn(), + getInstalledOpenshellVersion: () => null, + getContainerRuntime: () => "docker", + shouldPatchCoredns: () => false, + run: vi.fn(() => ({ status: 0 })), + destroyGateway: vi.fn(), + pruneKnownHostsEntries: (value) => value, + }, + { exitOnFailure: true }, + ).then( + () => { + throw new Error("expected gateway startup to fail"); + }, + (error) => error, + ); + await vi.advanceTimersByTimeAsync(100_000); + const failure = await promise; + expect(failure).toBeInstanceOf(Error); + expect(failure.message).toBe("exit:1"); + } finally { + vi.useRealTimers(); + } + + expect(error).toHaveBeenCalledWith(" Gateway failed to start after 3 attempts."); + expect(error).toHaveBeenCalledWith(" Gateway logs:"); + expect(error.mock.calls.join("\n")).toContain("OOMKilled"); + expect(error.mock.calls.join("\n")).not.toContain("nvapi-fakecredential-9999"); + }); + + it("recovers gateway runtime by restarting, polling health, and patching CoreDNS when needed", async () => { + const runCaptureOpenshell = vi + .fn() + .mockReturnValueOnce("Disconnected") + .mockReturnValueOnce("Connected Gateway: nemoclaw"); + const runOpenshell = vi.fn(() => ({ status: 0, stdout: "", stderr: "" })); + const run = vi.fn(() => ({ status: 0 })); + const processEnv: NodeJS.ProcessEnv = {}; + + const ok = await recoverGatewayRuntime({ + gatewayName: "nemoclaw", + gatewayPort: 8080, + processEnv, + runCaptureOpenshell, + runOpenshell, + isSelectedGateway: () => true, + getGatewayStartEnv: () => ({ OPENSHELL_CLUSTER_IMAGE: "ghcr.io/nvidia/openshell/cluster:0.0.24" }), + envInt: (_name, fallback) => fallback === 10 ? 1 : fallback === 2 ? 0 : fallback, + sleep: vi.fn(), + redact: (value) => value, + compactText: (value) => value.trim(), + getContainerRuntime: () => "docker", + shouldPatchCoredns: () => true, + run, + scriptsDir: "/repo/scripts", + error: vi.fn(), + }); + + expect(ok).toBe(true); + expect(runOpenshell).toHaveBeenCalledWith(["gateway", "select", "nemoclaw"], { + ignoreError: true, + }); + expect(runOpenshell).toHaveBeenCalledWith( + ["gateway", "start", "--name", "nemoclaw", "--port", "8080"], + { + ignoreError: true, + env: { OPENSHELL_CLUSTER_IMAGE: "ghcr.io/nvidia/openshell/cluster:0.0.24" }, + suppressOutput: true, + }, + ); + expect(run).toHaveBeenCalledWith( + ["bash", "/repo/scripts/fix-coredns.sh", "nemoclaw"], + { ignoreError: true }, + ); + expect(processEnv.OPENSHELL_GATEWAY).toBe("nemoclaw"); + }); +}); diff --git a/src/lib/onboard-gateway-runtime.ts b/src/lib/onboard-gateway-runtime.ts new file mode 100644 index 0000000000..b25b3466ba --- /dev/null +++ b/src/lib/onboard-gateway-runtime.ts @@ -0,0 +1,333 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { execFileSync } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import pRetry from "p-retry"; + +import { ANSI_RE } from "./ansi-utils"; + +export interface GatewayStartResult { + status: number; + output: string; +} + +export interface GatewayStartEnv { + OPENSHELL_CLUSTER_IMAGE?: string; + IMAGE_TAG?: string; +} + +export function getGatewayStartEnv(openshellVersion: string | null): GatewayStartEnv { + const gatewayEnv: GatewayStartEnv = {}; + const stableGatewayImage = openshellVersion + ? `ghcr.io/nvidia/openshell/cluster:${openshellVersion}` + : null; + if (stableGatewayImage) { + gatewayEnv.OPENSHELL_CLUSTER_IMAGE = stableGatewayImage; + gatewayEnv.IMAGE_TAG = openshellVersion || undefined; + } + return gatewayEnv; +} + +export interface StartGatewayDeps { + gatewayName: string; + gatewayPort: number; + scriptsDir: string; + processEnv: NodeJS.ProcessEnv; + processArch?: string; + showHeader: () => void; + log: (message?: string) => void; + error: (message?: string) => void; + exit: (code: number) => never; + openshellShellCommand: (args: string[]) => string; + streamGatewayStart: ( + command: string, + env: NodeJS.ProcessEnv, + ) => Promise; + runCaptureOpenshell: (args: string[], opts?: { ignoreError?: boolean }) => string; + runOpenshell: ( + args: string[], + opts?: { + ignoreError?: boolean; + suppressOutput?: boolean; + env?: Record; + stdio?: [string, string, string]; + }, + ) => { status: number; stdout?: string; stderr?: string }; + isGatewayHealthy: ( + statusOutput: string, + gwInfoOutput: string, + activeGatewayInfoOutput: string, + ) => boolean; + hasStaleGateway: (gwInfoOutput: string) => boolean; + redact: (value: string) => string; + compactText: (value: string) => string; + envInt: (name: string, fallback: number) => number; + sleep: (seconds: number) => void; + getInstalledOpenshellVersion: () => string | null; + getContainerRuntime: () => string; + shouldPatchCoredns: (runtime: string) => boolean; + run: (command: string | string[], opts?: { ignoreError?: boolean }) => unknown; + destroyGateway: () => void; + pruneKnownHostsEntries: (contents: string) => string; + execFileSyncImpl?: typeof execFileSync; + fsImpl?: Pick; + osImpl?: Pick; +} + +export interface StartGatewayOptions { + exitOnFailure?: boolean; +} + +function clearGatewayKnownHosts( + gatewayName: string, + pruneKnownHostsEntries: (contents: string) => string, + execFileSyncImpl: typeof execFileSync, + fsImpl: Pick, + homeDir: string, +): void { + try { + execFileSyncImpl("ssh-keygen", ["-R", `openshell-${gatewayName}`], { stdio: "ignore" }); + } catch { + /* ssh-keygen -R may fail if entry doesn't exist — safe to ignore */ + } + + const knownHostsPath = path.join(homeDir, ".ssh", "known_hosts"); + if (fsImpl.existsSync(knownHostsPath)) { + try { + const kh = fsImpl.readFileSync(knownHostsPath, "utf8"); + const cleaned = pruneKnownHostsEntries(kh); + if (cleaned !== kh) fsImpl.writeFileSync(knownHostsPath, cleaned); + } catch { + /* best-effort cleanup — ignore read/write errors */ + } + } +} + +export async function startGatewayWithOptions( + _gpu: TGpu, + deps: StartGatewayDeps, + options: StartGatewayOptions = {}, +): Promise { + const exitOnFailure = options.exitOnFailure ?? true; + const execFileSyncImpl = deps.execFileSyncImpl ?? execFileSync; + const fsImpl = deps.fsImpl ?? fs; + const osImpl = deps.osImpl ?? os; + + deps.showHeader(); + + const gatewayStatus = deps.runCaptureOpenshell(["status"], { ignoreError: true }); + const gwInfo = deps.runCaptureOpenshell(["gateway", "info", "-g", deps.gatewayName], { + ignoreError: true, + }); + const activeGatewayInfo = deps.runCaptureOpenshell(["gateway", "info"], { ignoreError: true }); + if (deps.isGatewayHealthy(gatewayStatus, gwInfo, activeGatewayInfo)) { + deps.log(" ✓ Reusing existing gateway"); + deps.runOpenshell(["gateway", "select", deps.gatewayName], { ignoreError: true }); + deps.processEnv.OPENSHELL_GATEWAY = deps.gatewayName; + return; + } + + if (deps.hasStaleGateway(gwInfo)) { + deps.log(" Stale gateway detected — attempting restart without destroy..."); + } + + clearGatewayKnownHosts( + deps.gatewayName, + deps.pruneKnownHostsEntries, + execFileSyncImpl, + fsImpl, + osImpl.homedir(), + ); + + const gwArgs = ["--name", deps.gatewayName, "--port", String(deps.gatewayPort)]; + const gatewayEnv = getGatewayStartEnv(deps.getInstalledOpenshellVersion()); + if (gatewayEnv.OPENSHELL_CLUSTER_IMAGE) { + deps.log(` Using pinned OpenShell gateway image: ${gatewayEnv.OPENSHELL_CLUSTER_IMAGE}`); + } + + const retries = exitOnFailure ? 2 : 0; + try { + await pRetry( + async () => { + const startResult = await deps.streamGatewayStart( + deps.openshellShellCommand(["gateway", "start", ...gwArgs]), + { + ...deps.processEnv, + ...gatewayEnv, + }, + ); + if (startResult.status !== 0) { + const lines = String(deps.redact(startResult.output || "")) + .split("\n") + .map((line) => deps.compactText(line.replace(ANSI_RE, ""))) + .filter(Boolean) + .map((line) => ` ${line}`); + if (lines.length > 0) { + deps.log(` Gateway start returned before healthy:\n${lines.join("\n")}`); + } + } + deps.log(" Waiting for gateway health..."); + + const isArm64 = (deps.processArch ?? process.arch) === "arm64"; + const healthPollCount = deps.envInt("NEMOCLAW_HEALTH_POLL_COUNT", isArm64 ? 30 : 12); + const healthPollInterval = deps.envInt( + "NEMOCLAW_HEALTH_POLL_INTERVAL", + isArm64 ? 10 : 5, + ); + for (let i = 0; i < healthPollCount; i++) { + deps.runCaptureOpenshell(["gateway", "select", deps.gatewayName], { ignoreError: true }); + const status = deps.runCaptureOpenshell(["status"], { ignoreError: true }); + const namedInfo = deps.runCaptureOpenshell(["gateway", "info", "-g", deps.gatewayName], { + ignoreError: true, + }); + const currentInfo = deps.runCaptureOpenshell(["gateway", "info"], { ignoreError: true }); + if (deps.isGatewayHealthy(status, namedInfo, currentInfo)) { + return; + } + if (i < healthPollCount - 1) deps.sleep(healthPollInterval); + } + + throw new Error("Gateway failed to start"); + }, + { + retries, + minTimeout: 10_000, + factor: 3, + onFailedAttempt: (error) => { + deps.log( + ` Gateway start attempt ${error.attemptNumber} failed. ${error.retriesLeft} retries left...`, + ); + if (error.retriesLeft > 0 && exitOnFailure) { + deps.destroyGateway(); + } + }, + }, + ); + } catch { + if (exitOnFailure) { + deps.error(` Gateway failed to start after ${retries + 1} attempts.`); + deps.error(" Gateway state preserved for diagnostics."); + deps.error(""); + try { + const logs = deps.redact( + deps.runCaptureOpenshell(["doctor", "logs", "--name", deps.gatewayName], { + ignoreError: true, + }), + ); + if (logs) { + deps.error(" Gateway logs:"); + for (const line of String(logs) + .split("\n") + .map((line) => line.replace(/\r/g, "").replace(ANSI_RE, "")) + .filter(Boolean)) { + deps.error(` ${line}`); + } + deps.error(""); + } + } catch { + // doctor logs unavailable — fall through to manual instructions + } + deps.error(" Troubleshooting:"); + deps.error(` openshell doctor logs --name ${deps.gatewayName}`); + deps.error(" openshell doctor check"); + deps.exit(1); + } + throw new Error("Gateway failed to start"); + } + + deps.log(" ✓ Gateway is healthy"); + const runtime = deps.getContainerRuntime(); + if (deps.shouldPatchCoredns(runtime)) { + deps.log(" Patching CoreDNS DNS forwarding..."); + // Pass the script path and gateway name as discrete argv entries so + // deps.gatewayName cannot alter shell parsing if it ever stops being a + // fixed internal constant. + deps.run(["bash", path.join(deps.scriptsDir, "fix-coredns.sh"), deps.gatewayName], { + ignoreError: true, + }); + } + deps.sleep(5); + deps.runOpenshell(["gateway", "select", deps.gatewayName], { ignoreError: true }); + deps.processEnv.OPENSHELL_GATEWAY = deps.gatewayName; +} + +export interface RecoverGatewayRuntimeDeps { + gatewayName: string; + gatewayPort: number; + processEnv: NodeJS.ProcessEnv; + runCaptureOpenshell: (args: string[], opts?: { ignoreError?: boolean }) => string; + runOpenshell: ( + args: string[], + opts?: { + ignoreError?: boolean; + suppressOutput?: boolean; + env?: Record; + stdio?: [string, string, string]; + }, + ) => { status: number; stdout?: string; stderr?: string }; + isSelectedGateway: (statusOutput: string, gatewayName?: string) => boolean; + getGatewayStartEnv: () => GatewayStartEnv; + envInt: (name: string, fallback: number) => number; + sleep: (seconds: number) => void; + redact: (value: string) => string; + compactText: (value: string) => string; + getContainerRuntime: () => string; + shouldPatchCoredns: (runtime: string) => boolean; + run: (command: string | string[], opts?: { ignoreError?: boolean }) => unknown; + scriptsDir: string; + error: (message?: string) => void; +} + +export async function recoverGatewayRuntime(deps: RecoverGatewayRuntimeDeps): Promise { + deps.runOpenshell(["gateway", "select", deps.gatewayName], { ignoreError: true }); + let status = deps.runCaptureOpenshell(["status"], { ignoreError: true }); + if (status.includes("Connected") && deps.isSelectedGateway(status, deps.gatewayName)) { + deps.processEnv.OPENSHELL_GATEWAY = deps.gatewayName; + return true; + } + + const startResult = deps.runOpenshell( + ["gateway", "start", "--name", deps.gatewayName, "--port", String(deps.gatewayPort)], + { + ignoreError: true, + env: deps.getGatewayStartEnv() as Record, + suppressOutput: true, + }, + ); + if (startResult.status !== 0) { + const diagnostic = deps.compactText( + deps.redact(`${startResult.stderr || ""} ${startResult.stdout || ""}`), + ); + deps.error(` Gateway restart failed (exit ${startResult.status}).`); + if (diagnostic) { + deps.error(` ${diagnostic.slice(0, 240)}`); + } + } + deps.runOpenshell(["gateway", "select", deps.gatewayName], { ignoreError: true }); + + const recoveryPollCount = deps.envInt("NEMOCLAW_HEALTH_POLL_COUNT", 10); + const recoveryPollInterval = deps.envInt("NEMOCLAW_HEALTH_POLL_INTERVAL", 2); + for (let i = 0; i < recoveryPollCount; i++) { + status = deps.runCaptureOpenshell(["status"], { ignoreError: true }); + if (status.includes("Connected") && deps.isSelectedGateway(status, deps.gatewayName)) { + deps.processEnv.OPENSHELL_GATEWAY = deps.gatewayName; + const runtime = deps.getContainerRuntime(); + if (deps.shouldPatchCoredns(runtime)) { + // Pass the script path and gateway name as discrete argv entries so + // deps.gatewayName cannot alter shell parsing if it ever stops being a + // fixed internal constant. + deps.run(["bash", path.join(deps.scriptsDir, "fix-coredns.sh"), deps.gatewayName], { + ignoreError: true, + }); + } + return true; + } + if (i < recoveryPollCount - 1) deps.sleep(recoveryPollInterval); + } + + return false; +} diff --git a/src/lib/onboard-gateway-start-stream.ts b/src/lib/onboard-gateway-start-stream.ts new file mode 100644 index 0000000000..0356753b64 --- /dev/null +++ b/src/lib/onboard-gateway-start-stream.ts @@ -0,0 +1,163 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +export interface GatewayStartStreamResult { + status: number; + output: string; +} + +export interface GatewayStartStreamDeps { + spawn: typeof import("node:child_process").spawn; + root: string; + envInt: (name: string, fallback: number) => number; +} + +/** Spawn `openshell gateway start` and stream its output with progress heartbeats. */ +export function streamGatewayStart( + command: string, + env: NodeJS.ProcessEnv = process.env, + deps: GatewayStartStreamDeps, +): Promise { + const child = deps.spawn("bash", ["-lc", command], { + cwd: deps.root, + env, + stdio: ["ignore", "pipe", "pipe"], + }); + + const lines: string[] = []; + let pending = ""; + let settled = false; + let resolvePromise: (result: GatewayStartStreamResult) => void; + let lastPrintedLine = ""; + let currentPhase = "cluster"; + let lastHeartbeatBucket = -1; + let lastOutputAt = Date.now(); + const startedAt = Date.now(); + + function getDisplayWidth() { + return Math.max(60, Number(process.stdout.columns || 100)); + } + + function trimDisplayLine(line: string) { + const width = getDisplayWidth(); + const maxLen = Math.max(40, width - 4); + if (line.length <= maxLen) return line; + return `${line.slice(0, Math.max(0, maxLen - 3))}...`; + } + + function printProgressLine(line: string) { + const display = trimDisplayLine(line); + if (display !== lastPrintedLine) { + console.log(display); + lastPrintedLine = display; + } + } + + function elapsedSeconds() { + return Math.max(0, Math.floor((Date.now() - startedAt) / 1000)); + } + + function setPhase(nextPhase: string | null) { + if (!nextPhase || nextPhase === currentPhase) return; + currentPhase = nextPhase; + const phaseLine = + nextPhase === "install" + ? " Installing OpenShell components..." + : nextPhase === "pod" + ? " Starting OpenShell gateway pod..." + : nextPhase === "health" + ? " Waiting for gateway health..." + : " Starting gateway cluster..."; + printProgressLine(phaseLine); + } + + function classifyLine(line: string) { + if (/ApplyJob|helm-install-openshell|Applying HelmChart/i.test(line)) return "install"; + if ( + /openshell-0|Observed pod startup duration|MountVolume\.MountDevice succeeded/i.test(line) + ) { + return "pod"; + } + if (/Gateway .* ready\.?$/i.test(line)) return "health"; + return null; + } + + function flushLine(rawLine: string) { + const line = rawLine.replace(/\r/g, "").trimEnd(); + if (!line) return; + lines.push(line); + lastOutputAt = Date.now(); + const nextPhase = classifyLine(line); + if (nextPhase) setPhase(nextPhase); + } + + function onChunk(chunk: Buffer | string) { + pending += chunk.toString(); + const parts = pending.split("\n"); + pending = parts.pop() ?? ""; + parts.forEach(flushLine); + } + + function finish(result: GatewayStartStreamResult) { + if (settled) return; + settled = true; + if (pending) flushLine(pending); + clearInterval(heartbeatTimer); + resolvePromise(result); + } + + child.stdout?.on("data", onChunk); + child.stderr?.on("data", onChunk); + + printProgressLine(" Starting gateway cluster..."); + const heartbeatTimer = setInterval(() => { + if (settled) return; + const elapsed = elapsedSeconds(); + const bucket = Math.floor(elapsed / 10); + if (bucket === lastHeartbeatBucket) return; + if (Date.now() - lastOutputAt < 3000 && elapsed < 10) return; + const heartbeatLine = + currentPhase === "install" + ? ` Still installing OpenShell components... (${elapsed}s elapsed)` + : currentPhase === "pod" + ? ` Still starting OpenShell gateway pod... (${elapsed}s elapsed)` + : currentPhase === "health" + ? ` Still waiting for gateway health... (${elapsed}s elapsed)` + : ` Still starting gateway cluster... (${elapsed}s elapsed)`; + printProgressLine(heartbeatLine); + lastHeartbeatBucket = bucket; + }, 5000); + heartbeatTimer.unref?.(); + + // Hard timeout to prevent indefinite hangs if the openshell process + // never exits (e.g. Docker daemon unresponsive, k3s restart loop). (#1830) + // On timeout, send SIGTERM and let the `close` event resolve the promise + // so the child has actually exited before the caller proceeds to retry. + const GATEWAY_START_TIMEOUT = deps.envInt("NEMOCLAW_GATEWAY_START_TIMEOUT", 600) * 1000; + let killedByTimeout = false; + const killTimer = setTimeout(() => { + killedByTimeout = true; + lines.push("[NemoClaw] Gateway start timed out — killing process."); + child.kill("SIGTERM"); + // If SIGTERM is ignored, force-kill after 10s. + setTimeout(() => { + if (!settled) child.kill("SIGKILL"); + }, 10_000).unref?.(); + }, GATEWAY_START_TIMEOUT); + killTimer.unref?.(); + + return new Promise((resolve) => { + resolvePromise = resolve; + child.on("error", (error) => { + clearTimeout(killTimer); + const detail = error instanceof Error ? error.message : String(error); + lines.push(detail); + finish({ status: 1, output: lines.join("\n") }); + }); + child.on("close", (code) => { + clearTimeout(killTimer); + const exitCode = killedByTimeout ? 1 : (code ?? 1); + finish({ status: exitCode, output: lines.join("\n") }); + }); + }); +} diff --git a/src/lib/onboard-host-flow.test.ts b/src/lib/onboard-host-flow.test.ts new file mode 100644 index 0000000000..fd78004a4f --- /dev/null +++ b/src/lib/onboard-host-flow.test.ts @@ -0,0 +1,164 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; +// Import from compiled dist/ so coverage is attributed correctly. +import { runHostPreparationFlow } from "../../dist/lib/onboard-host-flow"; + +describe("runHostPreparationFlow", () => { + it("runs preflight and gateway startup on a fresh flow", async () => { + const events: string[] = []; + const startGateway = vi.fn(async () => { + events.push("start-gateway"); + }); + + const result = await runHostPreparationFlow({ + resume: false, + hasCompletedPreflight: false, + hasCompletedGateway: false, + preflight: async () => ({ gpu: "spark" }), + detectGpu: () => ({ gpu: "cached" }), + getGatewayStatus: () => "status", + getNamedGatewayInfo: () => "gw-info", + getActiveGatewayInfo: () => "active-info", + getGatewayReuseState: () => "missing", + verifyGatewayContainerRunning: () => "running", + stopDashboardForward: () => events.push("stop-forward"), + destroyGateway: () => events.push("destroy-gateway"), + clearRegistryAll: () => events.push("clear-registry"), + startGateway, + onNote: (message) => events.push(`note:${message}`), + onLog: (message) => events.push(`log:${message}`), + onSkip: (step, detail, reason = "resume") => events.push(`skip:${step}:${detail}:${reason}`), + onStartStep: (step) => events.push(`start:${step}`), + onCompleteStep: (step) => events.push(`complete:${step}`), + }); + + expect(result).toEqual({ gpu: { gpu: "spark" }, gatewayReuseState: "missing" }); + expect(startGateway).toHaveBeenCalledWith({ gpu: "spark" }); + expect(events).toEqual([ + "start:preflight", + "complete:preflight", + "start:gateway", + "start-gateway", + "complete:gateway", + ]); + }); + + it("skips preflight and gateway when resume can reuse a healthy gateway", async () => { + const events: string[] = []; + + const result = await runHostPreparationFlow({ + resume: true, + hasCompletedPreflight: true, + hasCompletedGateway: true, + preflight: async () => { + throw new Error("should not rerun preflight"); + }, + detectGpu: () => ({ gpu: "cached" }), + getGatewayStatus: () => "status", + getNamedGatewayInfo: () => "gw-info", + getActiveGatewayInfo: () => "active-info", + getGatewayReuseState: () => "healthy", + verifyGatewayContainerRunning: () => "running", + stopDashboardForward: () => events.push("stop-forward"), + destroyGateway: () => events.push("destroy-gateway"), + clearRegistryAll: () => events.push("clear-registry"), + startGateway: async () => { + throw new Error("should not rerun gateway"); + }, + onNote: (message) => events.push(`note:${message}`), + onLog: (message) => events.push(`log:${message}`), + onSkip: (step, detail, reason = "resume") => events.push(`skip:${step}:${detail}:${reason}`), + onStartStep: (step) => events.push(`start:${step}`), + onCompleteStep: (step) => events.push(`complete:${step}`), + }); + + expect(result).toEqual({ gpu: { gpu: "cached" }, gatewayReuseState: "healthy" }); + expect(events).toEqual([ + "skip:preflight:cached:resume", + "skip:gateway:running:resume", + ]); + }); + + it("cleans up stale gateway metadata before restarting the gateway", async () => { + const events: string[] = []; + + const result = await runHostPreparationFlow({ + resume: true, + hasCompletedPreflight: true, + hasCompletedGateway: true, + preflight: async () => { + throw new Error("should not rerun preflight"); + }, + detectGpu: () => ({ gpu: "cached" }), + getGatewayStatus: () => "status", + getNamedGatewayInfo: () => "gw-info", + getActiveGatewayInfo: () => "active-info", + getGatewayReuseState: () => "healthy", + verifyGatewayContainerRunning: () => "missing", + stopDashboardForward: () => events.push("stop-forward"), + destroyGateway: () => events.push("destroy-gateway"), + clearRegistryAll: () => events.push("clear-registry"), + startGateway: async () => { + events.push("start-gateway"); + }, + onNote: (message) => events.push(`note:${message}`), + onLog: (message) => events.push(`log:${message}`), + onSkip: (step, detail, reason = "resume") => events.push(`skip:${step}:${detail}:${reason}`), + onStartStep: (step) => events.push(`start:${step}`), + onCompleteStep: (step) => events.push(`complete:${step}`), + }); + + expect(result).toEqual({ gpu: { gpu: "cached" }, gatewayReuseState: "missing" }); + expect(events).toEqual([ + "skip:preflight:cached:resume", + "log: Gateway metadata is stale (container not running). Cleaning up...", + "stop-forward", + "destroy-gateway", + "clear-registry", + "log: ✓ Stale gateway metadata cleaned up", + "note: [resume] Recorded gateway state is unavailable; recreating it.", + "start:gateway", + "start-gateway", + "complete:gateway", + ]); + }); + + it("warns and reuses the gateway when Docker state cannot be probed", async () => { + const events: string[] = []; + + const result = await runHostPreparationFlow({ + resume: false, + hasCompletedPreflight: true, + hasCompletedGateway: false, + preflight: async () => ({ gpu: "fresh" }), + detectGpu: () => ({ gpu: "cached" }), + getGatewayStatus: () => "status", + getNamedGatewayInfo: () => "gw-info", + getActiveGatewayInfo: () => "active-info", + getGatewayReuseState: () => "healthy", + verifyGatewayContainerRunning: () => "unknown", + stopDashboardForward: () => events.push("stop-forward"), + destroyGateway: () => events.push("destroy-gateway"), + clearRegistryAll: () => events.push("clear-registry"), + startGateway: async () => { + throw new Error("should not restart gateway when metadata stays healthy"); + }, + onNote: (message) => events.push(`note:${message}`), + onLog: (message) => events.push(`log:${message}`), + onSkip: (step, detail, reason = "resume") => events.push(`skip:${step}:${detail}:${reason}`), + onStartStep: (step) => events.push(`start:${step}`), + onCompleteStep: (step) => events.push(`complete:${step}`), + }); + + expect(result).toEqual({ gpu: { gpu: "fresh" }, gatewayReuseState: "healthy" }); + expect(events).toEqual([ + "start:preflight", + "complete:preflight", + "log: Warning: could not verify gateway container state (Docker may be unavailable). Proceeding with cached health status.", + "skip:gateway:running:reuse", + "note: Reusing healthy NemoClaw gateway.", + ]); + }); +}); diff --git a/src/lib/onboard-host-flow.ts b/src/lib/onboard-host-flow.ts new file mode 100644 index 0000000000..57b7a26c65 --- /dev/null +++ b/src/lib/onboard-host-flow.ts @@ -0,0 +1,98 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import type { GatewayReuseState } from "./gateway-state"; + +export interface HostPreparationResult { + gpu: TGpu; + gatewayReuseState: GatewayReuseState; +} + +export interface HostPreparationDeps { + resume: boolean; + hasCompletedPreflight: boolean; + hasCompletedGateway: boolean; + preflight: () => Promise; + detectGpu: () => TGpu; + getGatewayStatus: () => string; + getNamedGatewayInfo: () => string; + getActiveGatewayInfo: () => string; + getGatewayReuseState: ( + statusOutput: string, + gwInfoOutput: string, + activeGatewayInfoOutput: string, + ) => GatewayReuseState; + verifyGatewayContainerRunning: () => "running" | "missing" | "unknown"; + stopDashboardForward: () => void; + destroyGateway: () => void; + clearRegistryAll: () => void; + startGateway: (gpu: TGpu) => Promise; + onNote: (message: string) => void; + onLog: (message: string) => void; + onSkip: (stepName: "preflight" | "gateway", detail: string, reason?: "resume" | "reuse") => void; + onStartStep: (stepName: "preflight" | "gateway") => void; + onCompleteStep: (stepName: "preflight" | "gateway") => void; +} + +export async function runHostPreparationFlow( + deps: HostPreparationDeps, +): Promise> { + let gpu: TGpu; + if (deps.resume && deps.hasCompletedPreflight) { + deps.onSkip("preflight", "cached"); + gpu = deps.detectGpu(); + } else { + deps.onStartStep("preflight"); + gpu = await deps.preflight(); + deps.onCompleteStep("preflight"); + } + + const gatewayStatus = deps.getGatewayStatus(); + const gatewayInfo = deps.getNamedGatewayInfo(); + const activeGatewayInfo = deps.getActiveGatewayInfo(); + let gatewayReuseState = deps.getGatewayReuseState(gatewayStatus, gatewayInfo, activeGatewayInfo); + + // Verify the gateway container is actually running — openshell CLI metadata + // can be stale after a manual `docker rm`. See #2020. + if (gatewayReuseState === "healthy") { + const containerState = deps.verifyGatewayContainerRunning(); + if (containerState === "missing") { + deps.onLog(" Gateway metadata is stale (container not running). Cleaning up..."); + deps.stopDashboardForward(); + deps.destroyGateway(); + deps.clearRegistryAll(); + gatewayReuseState = "missing"; + deps.onLog(" ✓ Stale gateway metadata cleaned up"); + } else if (containerState === "unknown") { + deps.onLog( + " Warning: could not verify gateway container state (Docker may be unavailable). Proceeding with cached health status.", + ); + } + } + + const canReuseHealthyGateway = gatewayReuseState === "healthy"; + const resumeGateway = deps.resume && deps.hasCompletedGateway && canReuseHealthyGateway; + if (resumeGateway) { + deps.onSkip("gateway", "running"); + } else if (!deps.resume && canReuseHealthyGateway) { + deps.onSkip("gateway", "running", "reuse"); + deps.onNote(" Reusing healthy NemoClaw gateway."); + } else { + if (deps.hasCompletedGateway) { + if (gatewayReuseState === "active-unnamed") { + deps.onNote(" [resume] Gateway is active but named metadata is missing; recreating it safely."); + } else if (gatewayReuseState === "foreign-active") { + deps.onNote(" [resume] A different OpenShell gateway is active; NemoClaw will not reuse it."); + } else if (gatewayReuseState === "stale") { + deps.onNote(" [resume] Recorded gateway is unhealthy; recreating it."); + } else { + deps.onNote(" [resume] Recorded gateway state is unavailable; recreating it."); + } + } + deps.onStartStep("gateway"); + await deps.startGateway(gpu); + deps.onCompleteStep("gateway"); + } + + return { gpu, gatewayReuseState }; +} diff --git a/src/lib/onboard-inference-loop.test.ts b/src/lib/onboard-inference-loop.test.ts new file mode 100644 index 0000000000..909318acaf --- /dev/null +++ b/src/lib/onboard-inference-loop.test.ts @@ -0,0 +1,226 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; +// Import from compiled dist/ so coverage is attributed correctly. +import { runInferenceSelectionLoop } from "../../dist/lib/onboard-inference-loop"; + +describe("runInferenceSelectionLoop", () => { + it("runs provider selection and inference setup on a fresh flow", async () => { + const events: string[] = []; + const setupNim = vi.fn(async () => ({ + provider: "openai-api", + model: "gpt-5.4", + endpointUrl: "https://api.openai.com/v1", + credentialEnv: "OPENAI_API_KEY", + preferredInferenceApi: "responses", + nimContainer: null, + })); + const setupInference = vi.fn(async () => {}); + + const result = await runInferenceSelectionLoop( + { + sandboxName: "alpha", + model: null, + provider: null, + endpointUrl: null, + credentialEnv: null, + preferredInferenceApi: null, + nimContainer: null, + }, + { + gpu: null, + resume: false, + hasCompletedProviderSelection: false, + hasCompletedInference: false, + setupNim, + setupInference, + isInferenceRouteReady: () => false, + hydrateCredentialEnv: () => events.push("hydrate"), + getOpenshellBinary: () => "/usr/bin/openshell", + setOpenshellBinary: (binary) => events.push(`set-binary:${binary}`), + clearSensitiveEnv: (credentialEnv) => events.push(`clear-env:${credentialEnv}`), + updateSandboxNimContainer: () => events.push("update-nim"), + onSkip: (step, detail) => events.push(`skip:${step}:${detail}`), + onStartStep: (step) => events.push(`start:${step}`), + onCompleteStep: (step) => events.push(`complete:${step}`), + }, + ); + + expect(result.provider).toBe("openai-api"); + expect(result.model).toBe("gpt-5.4"); + expect(setupNim).toHaveBeenCalledTimes(1); + expect(setupInference).toHaveBeenCalledWith( + "alpha", + "gpt-5.4", + "openai-api", + "https://api.openai.com/v1", + "OPENAI_API_KEY", + ); + expect(events).toEqual([ + "start:provider_selection", + "complete:provider_selection", + "set-binary:/usr/bin/openshell", + "start:inference", + "clear-env:OPENAI_API_KEY", + "complete:inference", + ]); + }); + + it("reuses completed selection/inference state on resume", async () => { + const events: string[] = []; + + const result = await runInferenceSelectionLoop( + { + sandboxName: "alpha", + model: "meta/llama-3.3-70b-instruct", + provider: "nvidia-prod", + endpointUrl: "https://integrate.api.nvidia.com/v1", + credentialEnv: "NVIDIA_API_KEY", + preferredInferenceApi: "openai-completions", + nimContainer: "nim-123", + }, + { + gpu: null, + resume: true, + hasCompletedProviderSelection: true, + hasCompletedInference: true, + setupNim: async () => { + throw new Error("should not rerun selection"); + }, + setupInference: async () => { + throw new Error("should not rerun inference"); + }, + isInferenceRouteReady: () => true, + hydrateCredentialEnv: (credentialEnv) => events.push(`hydrate:${credentialEnv}`), + getOpenshellBinary: () => "/usr/bin/openshell", + setOpenshellBinary: (binary) => events.push(`set-binary:${binary}`), + clearSensitiveEnv: (credentialEnv) => events.push(`clear-env:${credentialEnv}`), + updateSandboxNimContainer: (sandboxName, nimContainer) => + events.push(`update-nim:${sandboxName}:${nimContainer}`), + onSkip: (step, detail) => events.push(`skip:${step}:${detail}`), + onStartStep: (step) => events.push(`start:${step}`), + onCompleteStep: (step) => events.push(`complete:${step}`), + }, + ); + + expect(result.nimContainer).toBe("nim-123"); + expect(events).toEqual([ + "skip:provider_selection:nvidia-prod / meta/llama-3.3-70b-instruct", + "hydrate:NVIDIA_API_KEY", + "set-binary:/usr/bin/openshell", + "skip:inference:nvidia-prod / meta/llama-3.3-70b-instruct", + "update-nim:alpha:nim-123", + "complete:inference", + "clear-env:NVIDIA_API_KEY", + ]); + }); + + it("clears hydrated credentials even when setupInference throws", async () => { + const events: string[] = []; + + await expect( + runInferenceSelectionLoop( + { + sandboxName: "alpha", + model: "gpt-5.4", + provider: "openai-api", + endpointUrl: "https://api.openai.com/v1", + credentialEnv: "OPENAI_API_KEY", + preferredInferenceApi: "responses", + nimContainer: null, + }, + { + gpu: null, + resume: true, + hasCompletedProviderSelection: true, + hasCompletedInference: false, + setupNim: async () => { + throw new Error("should not rerun selection"); + }, + setupInference: async () => { + throw new Error("boom"); + }, + isInferenceRouteReady: () => false, + hydrateCredentialEnv: (credentialEnv) => events.push(`hydrate:${credentialEnv}`), + getOpenshellBinary: () => "/usr/bin/openshell", + setOpenshellBinary: (binary) => events.push(`set-binary:${binary}`), + clearSensitiveEnv: (credentialEnv) => events.push(`clear-env:${credentialEnv}`), + updateSandboxNimContainer: () => events.push("update-nim"), + onSkip: (step, detail) => events.push(`skip:${step}:${detail}`), + onStartStep: (step) => events.push(`start:${step}`), + onCompleteStep: (step) => events.push(`complete:${step}`), + }, + ), + ).rejects.toThrow("boom"); + + expect(events).toEqual([ + "skip:provider_selection:openai-api / gpt-5.4", + "hydrate:OPENAI_API_KEY", + "set-binary:/usr/bin/openshell", + "start:inference", + "clear-env:OPENAI_API_KEY", + "clear-env:OPENAI_API_KEY", + ]); + }); + + it("retries provider selection when inference requests a reselection", async () => { + const selections = [ + { + provider: "openai-api", + model: "gpt-5.4", + endpointUrl: "https://api.openai.com/v1", + credentialEnv: "OPENAI_API_KEY", + preferredInferenceApi: "responses", + nimContainer: null, + }, + { + provider: "nvidia-prod", + model: "meta/llama-3.3-70b-instruct", + endpointUrl: "https://integrate.api.nvidia.com/v1", + credentialEnv: "NVIDIA_API_KEY", + preferredInferenceApi: "openai-completions", + nimContainer: null, + }, + ]; + const setupNim = vi.fn(async () => selections.shift()!); + const setupInference = vi + .fn() + .mockResolvedValueOnce({ retry: "selection" }) + .mockResolvedValueOnce(undefined); + + const result = await runInferenceSelectionLoop( + { + sandboxName: "alpha", + model: null, + provider: null, + endpointUrl: null, + credentialEnv: null, + preferredInferenceApi: null, + nimContainer: null, + }, + { + gpu: null, + resume: true, + hasCompletedProviderSelection: true, + hasCompletedInference: false, + setupNim, + setupInference, + isInferenceRouteReady: () => false, + hydrateCredentialEnv: () => {}, + getOpenshellBinary: () => "/usr/bin/openshell", + setOpenshellBinary: () => {}, + clearSensitiveEnv: () => {}, + updateSandboxNimContainer: () => {}, + onSkip: () => {}, + onStartStep: () => {}, + onCompleteStep: () => {}, + }, + ); + + expect(setupNim).toHaveBeenCalledTimes(2); + expect(setupInference).toHaveBeenCalledTimes(2); + expect(result.provider).toBe("nvidia-prod"); + expect(result.model).toBe("meta/llama-3.3-70b-instruct"); + }); +}); diff --git a/src/lib/onboard-inference-loop.ts b/src/lib/onboard-inference-loop.ts new file mode 100644 index 0000000000..b0b37b611b --- /dev/null +++ b/src/lib/onboard-inference-loop.ts @@ -0,0 +1,173 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +export interface InferenceSelectionResult { + model: string; + provider: string; + endpointUrl: string | null; + credentialEnv: string | null; + preferredInferenceApi: string | null; + nimContainer: string | null; +} + +export interface InferenceLoopState { + sandboxName: string | null; + model: string | null; + provider: string | null; + endpointUrl: string | null; + credentialEnv: string | null; + preferredInferenceApi: string | null; + nimContainer: string | null; +} + +export interface InferenceLoopResult extends InferenceLoopState {} + +export interface InferenceLoopDeps { + gpu: TGpu; + resume: boolean; + hasCompletedProviderSelection: boolean; + hasCompletedInference: boolean; + setupNim: (gpu: TGpu) => Promise; + setupInference: ( + sandboxName: string | null, + model: string, + provider: string, + endpointUrl: string | null, + credentialEnv: string | null, + ) => Promise<{ retry?: "selection" } | void>; + isInferenceRouteReady: (provider: string, model: string) => boolean; + hydrateCredentialEnv: (credentialEnv: string | null) => void; + getOpenshellBinary: () => string; + setOpenshellBinary: (binary: string) => void; + clearSensitiveEnv: (credentialEnv: string | null) => void; + updateSandboxNimContainer: (sandboxName: string | null, nimContainer: string) => void; + onSkip: (stepName: "provider_selection" | "inference", detail: string) => void; + onStartStep: ( + stepName: "provider_selection" | "inference", + updates?: { sandboxName?: string | null; provider?: string | null; model?: string | null }, + ) => void; + onCompleteStep: ( + stepName: "provider_selection" | "inference", + updates?: { + sandboxName?: string | null; + provider?: string | null; + model?: string | null; + endpointUrl?: string | null; + credentialEnv?: string | null; + preferredInferenceApi?: string | null; + nimContainer?: string | null; + }, + ) => void; +} + +function hasResolvedSelection( + state: InferenceLoopState, +): state is InferenceLoopState & { provider: string; model: string } { + return typeof state.provider === "string" && typeof state.model === "string"; +} + +export async function runInferenceSelectionLoop( + initialState: InferenceLoopState, + deps: InferenceLoopDeps, +): Promise { + const state: InferenceLoopState = { ...initialState }; + let forceProviderSelection = false; + + while (true) { + const resumeProviderSelection = + !forceProviderSelection && + deps.resume && + deps.hasCompletedProviderSelection && + hasResolvedSelection(state); + + if (resumeProviderSelection) { + deps.onSkip("provider_selection", `${state.provider} / ${state.model}`); + deps.hydrateCredentialEnv(state.credentialEnv); + } else { + deps.onStartStep("provider_selection", { sandboxName: state.sandboxName }); + const selection = await deps.setupNim(deps.gpu); + state.model = selection.model; + state.provider = selection.provider; + state.endpointUrl = selection.endpointUrl; + state.credentialEnv = selection.credentialEnv; + state.preferredInferenceApi = selection.preferredInferenceApi; + state.nimContainer = selection.nimContainer; + deps.onCompleteStep("provider_selection", { + sandboxName: state.sandboxName, + provider: state.provider, + model: state.model, + endpointUrl: state.endpointUrl, + credentialEnv: state.credentialEnv, + preferredInferenceApi: state.preferredInferenceApi, + nimContainer: state.nimContainer, + }); + } + + const hydratedCredentialEnv = resumeProviderSelection ? state.credentialEnv : null; + try { + if (!hasResolvedSelection(state)) { + throw new Error("Provider selection did not produce a provider/model pair."); + } + + deps.setOpenshellBinary(deps.getOpenshellBinary()); + + const resumeInference = + !forceProviderSelection && + deps.resume && + deps.hasCompletedInference && + deps.isInferenceRouteReady(state.provider, state.model); + + if (resumeInference) { + deps.onSkip("inference", `${state.provider} / ${state.model}`); + if (state.nimContainer) { + deps.updateSandboxNimContainer(state.sandboxName, state.nimContainer); + } + deps.onCompleteStep("inference", { + sandboxName: state.sandboxName, + provider: state.provider, + model: state.model, + nimContainer: state.nimContainer, + }); + break; + } + + deps.onStartStep("inference", { + sandboxName: state.sandboxName, + provider: state.provider, + model: state.model, + }); + let inferenceResult: { retry?: "selection" } | void; + try { + inferenceResult = await deps.setupInference( + state.sandboxName, + state.model, + state.provider, + state.endpointUrl, + state.credentialEnv, + ); + } finally { + deps.clearSensitiveEnv(state.credentialEnv); + } + if (inferenceResult?.retry === "selection") { + forceProviderSelection = true; + continue; + } + if (state.nimContainer) { + deps.updateSandboxNimContainer(state.sandboxName, state.nimContainer); + } + deps.onCompleteStep("inference", { + sandboxName: state.sandboxName, + provider: state.provider, + model: state.model, + nimContainer: state.nimContainer, + }); + break; + } finally { + if (hydratedCredentialEnv !== null) { + deps.clearSensitiveEnv(hydratedCredentialEnv); + } + } + } + + return state; +} diff --git a/src/lib/onboard-inference-provider.ts b/src/lib/onboard-inference-provider.ts new file mode 100644 index 0000000000..80cfdc7ca4 --- /dev/null +++ b/src/lib/onboard-inference-provider.ts @@ -0,0 +1,169 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +export async function runSetupInference( + sandboxName: string, + model: string, + provider: string, + endpointUrl: string | null = null, + credentialEnv: string | null = null, + deps: any, +): Promise<{ retry?: "selection"; ok?: true }> { + deps.step(4, 8, "Setting up inference provider"); + deps.runOpenshell(["gateway", "select", deps.gatewayName], { ignoreError: true }); + + if ( + provider === "nvidia-prod" || + provider === "nvidia-nim" || + provider === "openai-api" || + provider === "anthropic-prod" || + provider === "compatible-anthropic-endpoint" || + provider === "gemini-api" || + provider === "compatible-endpoint" + ) { + const config = + provider === "nvidia-nim" + ? deps.remoteProviderConfig.build + : Object.values(deps.remoteProviderConfig).find((entry: any) => entry.providerName === provider); + while (true) { + const resolvedCredentialEnv = credentialEnv || (config && config.credentialEnv); + const resolvedEndpointUrl = endpointUrl || (config && config.endpointUrl); + const credentialValue = deps.hydrateCredentialEnv(resolvedCredentialEnv); + const env = + resolvedCredentialEnv && credentialValue + ? { [resolvedCredentialEnv]: credentialValue } + : {}; + const providerResult = deps.upsertProvider( + provider, + config.providerType, + resolvedCredentialEnv, + resolvedEndpointUrl, + env, + ); + if (!providerResult.ok) { + console.error(` ${providerResult.message}`); + if (deps.isNonInteractive()) { + process.exit(providerResult.status || 1); + } + const retry = await deps.promptValidationRecovery( + config.label, + deps.classifyApplyFailure(providerResult.message), + resolvedCredentialEnv, + config.helpUrl, + ); + if (retry === "credential" || retry === "retry") { + continue; + } + if (retry === "selection" || retry === "model") { + return { retry: "selection" }; + } + process.exit(providerResult.status || 1); + } + const args = ["inference", "set"]; + if (config.skipVerify) { + args.push("--no-verify"); + } + args.push("--provider", provider, "--model", model); + const applyResult = deps.runOpenshell(args, { ignoreError: true }); + if (applyResult.status === 0) { + break; + } + const message = + deps.compactText(deps.redact(`${applyResult.stderr || ""} ${applyResult.stdout || ""}`)) || + `Failed to configure inference provider '${provider}'.`; + console.error(` ${message}`); + if (deps.isNonInteractive()) { + process.exit(applyResult.status || 1); + } + const retry = await deps.promptValidationRecovery( + config.label, + deps.classifyApplyFailure(message), + resolvedCredentialEnv, + config.helpUrl, + ); + if (retry === "credential" || retry === "retry") { + continue; + } + if (retry === "selection" || retry === "model") { + return { retry: "selection" }; + } + process.exit(applyResult.status || 1); + } + } else if (provider === "vllm-local") { + const validation = deps.validateLocalProvider(provider); + if (!validation.ok) { + console.error(` ${validation.message}`); + process.exit(1); + } + const baseUrl = deps.getLocalProviderBaseUrl(provider); + const providerResult = deps.upsertProvider("vllm-local", "openai", "OPENAI_API_KEY", baseUrl, { + OPENAI_API_KEY: "dummy", + }); + if (!providerResult.ok) { + console.error(` ${providerResult.message}`); + process.exit(providerResult.status || 1); + } + deps.runOpenshell([ + "inference", + "set", + "--no-verify", + "--provider", + "vllm-local", + "--model", + model, + "--timeout", + String(deps.localInferenceTimeoutSecs), + ]); + } else if (provider === "ollama-local") { + const validation = deps.validateLocalProvider(provider); + if (!validation.ok) { + console.error(` ${validation.message}`); + if (deps.processPlatform === "darwin") { + console.error(" On macOS, local inference also depends on OpenShell host routing support."); + } + process.exit(1); + } + const baseUrl = deps.getLocalProviderBaseUrl(provider); + let ollamaCredential = "ollama"; + if (!deps.isWsl()) { + deps.ensureOllamaAuthProxy(); + const proxyToken = deps.getOllamaProxyToken(); + if (!proxyToken) { + console.error(" Ollama auth proxy token is not set. Re-run onboard to initialize the proxy."); + process.exit(1); + } + ollamaCredential = proxyToken; + deps.persistProxyToken(proxyToken); + } + const providerResult = deps.upsertProvider("ollama-local", "openai", "OPENAI_API_KEY", baseUrl, { + OPENAI_API_KEY: ollamaCredential, + }); + if (!providerResult.ok) { + console.error(` ${providerResult.message}`); + process.exit(providerResult.status || 1); + } + deps.runOpenshell([ + "inference", + "set", + "--no-verify", + "--provider", + "ollama-local", + "--model", + model, + "--timeout", + String(deps.localInferenceTimeoutSecs), + ]); + console.log(` Priming Ollama model: ${model}`); + deps.run(deps.getOllamaWarmupCommand(model), { ignoreError: true }); + const probe = deps.validateOllamaModel(model); + if (!probe.ok) { + console.error(` ${probe.message}`); + process.exit(1); + } + } + + deps.verifyInferenceRoute(provider, model); + deps.updateSandbox(sandboxName, { model, provider }); + console.log(` ✓ Inference route set: ${provider} / ${model}`); + return { ok: true }; +} diff --git a/src/lib/onboard-inference-validation.ts b/src/lib/onboard-inference-validation.ts new file mode 100644 index 0000000000..273affe2c1 --- /dev/null +++ b/src/lib/onboard-inference-validation.ts @@ -0,0 +1,620 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +function replaceNamedCredential( + envName: string, + label: string, + helpUrl: string | null = null, + validator: ((value: string) => string | null) | null = null, + deps: any, +): Promise { + if (helpUrl) { + console.log(""); + console.log(` Get your ${label} from: ${helpUrl}`); + console.log(""); + } + + return (async () => { + while (true) { + const key = deps.normalizeCredentialValue(await deps.prompt(` ${label}: `, { secret: true })); + if (!key) { + console.error(` ${label} is required.`); + continue; + } + const validationError = typeof validator === "function" ? validator(key) : null; + if (validationError) { + console.error(validationError); + continue; + } + deps.saveCredential(envName, key); + process.env[envName] = key; + console.log(""); + console.log(` Key saved to ~/.nemoclaw/credentials.json (mode 600)`); + console.log(""); + return key; + } + })(); +} + +export async function promptValidationRecovery( + label: string, + recovery: any, + credentialEnv: string | null = null, + helpUrl: string | null = null, + deps: any, +): Promise { + if (deps.isNonInteractive()) { + process.exit(1); + } + + if (recovery.kind === "credential" && credentialEnv) { + console.log( + ` ${label} authorization failed. Re-enter the API key or choose a different provider/model.`, + ); + console.log(" ⚠️ Do NOT paste your API key here — use the options below:"); + const choice = ( + await deps.prompt(" Options: retry (re-enter key), back (change provider), exit [retry]: ", { + secret: true, + }) + ) + .trim() + .toLowerCase(); + // Guard against the user accidentally pasting an API key at this prompt. + // Tokens don't contain spaces; human sentences do — the no-space + length check + // avoids false-positives on long typed sentences. + const API_KEY_PREFIXES = ["nvapi-", "ghp_", "gcm-", "sk-", "gpt-", "gemini-", "nvcf-"]; + const looksLikeToken = + API_KEY_PREFIXES.some((prefix) => choice.startsWith(prefix)) || + (!choice.includes(" ") && choice.length > 40) || + // Regex fallback: base64-safe token pattern (20+ chars, no spaces, mixed alphanum) + /^[A-Za-z0-9_\-\.]{20,}$/.test(choice); + const validator = credentialEnv === "NVIDIA_API_KEY" ? deps.validateNvidiaApiKeyValue : null; + if (looksLikeToken) { + console.log(" ⚠️ That looks like an API key — do not paste credentials here."); + console.log(" Treating as 'retry'. You will be prompted to enter the key securely."); + await replaceNamedCredential(credentialEnv, `${label} API key`, helpUrl, validator, deps); + return "credential"; + } + if (choice === "back") { + console.log(" Returning to provider selection."); + console.log(""); + return "selection"; + } + if (choice === "exit" || choice === "quit") { + deps.exitOnboardFromPrompt(); + } + if (choice === "" || choice === "retry") { + await replaceNamedCredential(credentialEnv, `${label} API key`, helpUrl, validator, deps); + return "credential"; + } + console.log(" Please choose a provider/model again."); + console.log(""); + return "selection"; + } + + if (recovery.kind === "transport") { + console.log(deps.getTransportRecoveryMessage(recovery.failure || {})); + const choice = (await deps.prompt(" Type 'retry', 'back', or 'exit' [retry]: ")) + .trim() + .toLowerCase(); + if (choice === "back") { + console.log(" Returning to provider selection."); + console.log(""); + return "selection"; + } + if (choice === "exit" || choice === "quit") { + deps.exitOnboardFromPrompt(); + } + if (choice === "" || choice === "retry") { + console.log(""); + return "retry"; + } + console.log(" Please choose a provider/model again."); + console.log(""); + return "selection"; + } + + if (recovery.kind === "model") { + console.log(` Please enter a different ${label} model name.`); + console.log(""); + return "model"; + } + + console.log(" Please choose a provider/model again."); + console.log(""); + return "selection"; +} + +function parseJsonObject(body: string): any { + if (!body) return null; + try { + return JSON.parse(body); + } catch { + return null; + } +} + +export function hasResponsesToolCall(body: string): boolean { + const parsed = parseJsonObject(body); + if (!parsed || !Array.isArray(parsed.output)) return false; + + const stack = [...parsed.output]; + while (stack.length > 0) { + const item = stack.pop(); + if (!item || typeof item !== "object") continue; + if (item.type === "function_call" || item.type === "tool_call") return true; + if (Array.isArray(item.content)) { + stack.push(...item.content); + } + } + + return false; +} + +export function shouldRequireResponsesToolCalling(provider: string): boolean { + return ( + provider === "nvidia-prod" || provider === "gemini-api" || provider === "compatible-endpoint" + ); +} + +// Google Gemini rejects requests that carry both an Authorization: Bearer +// header and a ?key= query parameter ("Multiple authentication credentials +// received"). Send the API key as ?key= only for Gemini. See issue #1960. +export function getProbeAuthMode(provider: string): "query-param" | undefined { + return provider === "gemini-api" ? "query-param" : undefined; +} + +// Per-validation-probe curl timing. Tighter than the default 60s in +// getCurlTimingArgs() because validation must not hang the wizard for a +// minute on a misbehaving model. See issue #1601 (Bug 3). +export function getValidationProbeCurlArgs(opts: any = {}, deps: any): string[] { + if (deps.isWsl(opts)) { + return ["--connect-timeout", "20", "--max-time", "30"]; + } + return ["--connect-timeout", "10", "--max-time", "15"]; +} + +function probeResponsesToolCalling( + endpointUrl: string, + model: string, + apiKey: string, + options: any = {}, + deps: any, +): any { + const useQueryParam = options.authMode === "query-param"; + const normalizedKey = apiKey ? deps.normalizeCredentialValue(apiKey) : ""; + const baseUrl = String(endpointUrl).replace(/\/+$/, ""); + const authHeader = !useQueryParam && normalizedKey + ? ["-H", `Authorization: Bearer ${normalizedKey}`] + : []; + const url = useQueryParam && normalizedKey + ? `${baseUrl}/responses?key=${encodeURIComponent(normalizedKey)}` + : `${baseUrl}/responses`; + const result = deps.runCurlProbe([ + "-sS", + ...getValidationProbeCurlArgs({}, deps), + "-H", + "Content-Type: application/json", + ...authHeader, + "-d", + JSON.stringify({ + model, + input: "Call the emit_ok function with value OK. Do not answer with plain text.", + tool_choice: "required", + tools: [ + { + type: "function", + name: "emit_ok", + description: "Returns the probe value for validation.", + parameters: { + type: "object", + properties: { + value: { type: "string" }, + }, + required: ["value"], + additionalProperties: false, + }, + }, + ], + }), + url, + ]); + + if (!result.ok) { + return result; + } + if (hasResponsesToolCall(result.body)) { + return result; + } + return { + ok: false, + httpStatus: result.httpStatus, + curlStatus: result.curlStatus, + body: result.body, + stderr: result.stderr, + message: `HTTP ${result.httpStatus}: Responses API did not return a tool call`, + }; +} + +function probeOpenAiLikeEndpoint( + endpointUrl: string, + model: string, + apiKey: string, + options: any = {}, + deps: any, +): any { + const useQueryParam = options.authMode === "query-param"; + const normalizedKey = apiKey ? deps.normalizeCredentialValue(apiKey) : ""; + const baseUrl = String(endpointUrl).replace(/\/+$/, ""); + const authHeader = !useQueryParam && normalizedKey + ? ["-H", `Authorization: Bearer ${normalizedKey}`] + : []; + const appendKey = (requestPath: string) => + useQueryParam && normalizedKey + ? `${baseUrl}${requestPath}?key=${encodeURIComponent(normalizedKey)}` + : `${baseUrl}${requestPath}`; + + const responsesProbe = + options.requireResponsesToolCalling === true + ? { + name: "Responses API with tool calling", + api: "openai-responses", + execute: () => + probeResponsesToolCalling(endpointUrl, model, apiKey, { authMode: options.authMode }, deps), + } + : { + name: "Responses API", + api: "openai-responses", + execute: () => + deps.runCurlProbe([ + "-sS", + ...getValidationProbeCurlArgs({}, deps), + "-H", + "Content-Type: application/json", + ...authHeader, + "-d", + JSON.stringify({ + model, + input: "Reply with exactly: OK", + }), + appendKey("/responses"), + ]), + }; + + const chatCompletionsProbe = { + name: "Chat Completions API", + api: "openai-completions", + execute: () => + deps.runCurlProbe([ + "-sS", + ...getValidationProbeCurlArgs({}, deps), + "-H", + "Content-Type: application/json", + ...authHeader, + "-d", + JSON.stringify({ + model, + messages: [{ role: "user", content: "Reply with exactly: OK" }], + }), + appendKey("/chat/completions"), + ]), + }; + + // NVIDIA Build does not expose /v1/responses; probing it always returns + // "404 page not found" and only adds noise to error messages. Skip it + // entirely for that provider. See issue #1601. + const probes = options.skipResponsesProbe + ? [chatCompletionsProbe] + : [responsesProbe, chatCompletionsProbe]; + + const failures = []; + for (const probe of probes) { + const result = probe.execute(); + if (result.ok) { + // Streaming event validation — catch backends like SGLang that return + // valid non-streaming responses but emit incomplete SSE events in + // streaming mode. Only run for /responses probes on custom endpoints + // where probeStreaming was requested. + if (probe.api === "openai-responses" && options.probeStreaming === true) { + const streamResult = deps.runStreamingEventProbe([ + "-sS", + ...getValidationProbeCurlArgs({}, deps), + "-H", + "Content-Type: application/json", + ...authHeader, + "-d", + JSON.stringify({ + model, + input: "Reply with exactly: OK", + stream: true, + }), + appendKey("/responses"), + ]); + if (!streamResult.ok && streamResult.missingEvents.length > 0) { + // Backend responds but lacks required streaming events — fall back + // to /chat/completions silently. + console.log(` ℹ ${streamResult.message}`); + failures.push({ + name: probe.name + " (streaming)", + httpStatus: 0, + curlStatus: 0, + message: streamResult.message, + body: "", + }); + continue; + } + if (!streamResult.ok) { + // Transport or execution failure — surface as a hard error instead + // of silently switching APIs. + return { + ok: false, + message: `${probe.name} (streaming): ${streamResult.message}`, + failures: [ + { + name: probe.name + " (streaming)", + httpStatus: 0, + curlStatus: 0, + message: streamResult.message, + body: "", + }, + ], + }; + } + } + return { ok: true, api: probe.api, label: probe.name }; + } + // Preserve the raw response body alongside the summarized message so the + // NVCF "Function not found for account" detector below can fall back to + // the raw body if summarizeProbeError ever stops surfacing the marker + // through `message`. + failures.push({ + name: probe.name, + httpStatus: result.httpStatus, + curlStatus: result.curlStatus, + message: result.message, + body: result.body, + }); + } + + // Single retry with doubled timeouts on timeout/connection failure. + // WSL2's virtualized network stack can cause the initial probe to time out + // before the TLS handshake completes. See issue #987. + const isTimeoutOrConnFailure = (cs: number) => cs === 28 || cs === 6 || cs === 7; + let retriedAfterTimeout = false; + if (failures.length > 0 && isTimeoutOrConnFailure(failures[0].curlStatus)) { + retriedAfterTimeout = true; + const baseArgs = getValidationProbeCurlArgs({}, deps); + const doubledArgs = baseArgs.map((arg) => + /^\d+$/.test(arg) ? String(Number(arg) * 2) : arg, + ); + const retryResult = deps.runCurlProbe([ + "-sS", + ...doubledArgs, + "-H", + "Content-Type: application/json", + ...(apiKey ? ["-H", `Authorization: Bearer ${deps.normalizeCredentialValue(apiKey)}`] : []), + "-d", + JSON.stringify({ + model, + messages: [{ role: "user", content: "Reply with exactly: OK" }], + }), + `${String(endpointUrl).replace(/\/+$/, "")}/chat/completions`, + ]); + if (retryResult.ok) { + return { ok: true, api: "openai-completions", label: "Chat Completions API" }; + } + } + + // Detect the NVCF "Function not found for account" error and reframe it + // with an actionable next step instead of dumping the raw NVCF body. + // See issue #1601 (Bug 2). + const accountFailure = failures.find( + (failure) => + deps.isNvcfFunctionNotFoundForAccount(failure.message) || + deps.isNvcfFunctionNotFoundForAccount(failure.body), + ); + if (accountFailure) { + return { + ok: false, + message: deps.nvcfFunctionNotFoundMessage(model), + failures, + }; + } + + const baseMessage = failures.map((failure) => `${failure.name}: ${failure.message}`).join(" | "); + const wslHint = + deps.isWsl() && retriedAfterTimeout + ? " · WSL2 detected — network verification may be slower than expected. " + + "Run `nemoclaw onboard` with the `--skip-verify` flag if this endpoint is known to be reachable." + : ""; + return { + ok: false, + message: baseMessage + wslHint, + failures, + }; +} + +function probeAnthropicEndpoint( + endpointUrl: string, + model: string, + apiKey: string, + deps: any, +): any { + const result = deps.runCurlProbe([ + "-sS", + ...deps.getCurlTimingArgs(), + "-H", + `x-api-key: ${deps.normalizeCredentialValue(apiKey)}`, + "-H", + "anthropic-version: 2023-06-01", + "-H", + "content-type: application/json", + "-d", + JSON.stringify({ + model, + max_tokens: 16, + messages: [{ role: "user", content: "Reply with exactly: OK" }], + }), + `${String(endpointUrl).replace(/\/+$/, "")}/v1/messages`, + ]); + if (result.ok) { + return { ok: true, api: "anthropic-messages", label: "Anthropic Messages API" }; + } + return { + ok: false, + message: result.message, + failures: [ + { + name: "Anthropic Messages API", + httpStatus: result.httpStatus, + curlStatus: result.curlStatus, + message: result.message, + }, + ], + }; +} + +export async function validateOpenAiLikeSelection( + label: string, + endpointUrl: string, + model: string, + credentialEnv: string | null = null, + retryMessage = "Please choose a provider/model again.", + helpUrl: string | null = null, + options: any = {}, + deps: any, +): Promise { + const apiKey = credentialEnv ? deps.getCredential(credentialEnv) : ""; + const probe = probeOpenAiLikeEndpoint(endpointUrl, model, apiKey, options, deps); + if (!probe.ok) { + console.error(` ${label} endpoint validation failed.`); + console.error(` ${probe.message}`); + if (deps.isNonInteractive()) { + process.exit(1); + } + const retry = await promptValidationRecovery( + label, + deps.getProbeRecovery(probe), + credentialEnv, + helpUrl, + deps, + ); + if (retry === "selection") { + console.log(` ${retryMessage}`); + console.log(""); + } + return { ok: false, retry }; + } + console.log(` ${probe.label} available — OpenClaw will use ${probe.api}.`); + return { ok: true, api: probe.api }; +} + +export async function validateAnthropicSelectionWithRetryMessage( + label: string, + endpointUrl: string, + model: string, + credentialEnv: string, + retryMessage = "Please choose a provider/model again.", + helpUrl: string | null = null, + deps: any, +): Promise { + const apiKey = deps.getCredential(credentialEnv); + const probe = probeAnthropicEndpoint(endpointUrl, model, apiKey, deps); + if (!probe.ok) { + console.error(` ${label} endpoint validation failed.`); + console.error(` ${probe.message}`); + if (deps.isNonInteractive()) { + process.exit(1); + } + const retry = await promptValidationRecovery( + label, + deps.getProbeRecovery(probe), + credentialEnv, + helpUrl, + deps, + ); + if (retry === "selection") { + console.log(` ${retryMessage}`); + console.log(""); + } + return { ok: false, retry }; + } + console.log(` ${probe.label} available — OpenClaw will use ${probe.api}.`); + return { ok: true, api: probe.api }; +} + +export async function validateCustomOpenAiLikeSelection( + label: string, + endpointUrl: string, + model: string, + credentialEnv: string, + helpUrl: string | null = null, + deps: any, +): Promise { + const apiKey = deps.getCredential(credentialEnv); + const probe = probeOpenAiLikeEndpoint( + endpointUrl, + model, + apiKey, + { + requireResponsesToolCalling: true, + skipResponsesProbe: deps.shouldForceCompletionsApi(process.env.NEMOCLAW_PREFERRED_API), + probeStreaming: true, + }, + deps, + ); + if (probe.ok) { + console.log(` ${probe.label} available — OpenClaw will use ${probe.api}.`); + return { ok: true, api: probe.api }; + } + console.error(` ${label} endpoint validation failed.`); + console.error(` ${probe.message}`); + if (deps.isNonInteractive()) { + process.exit(1); + } + const retry = await promptValidationRecovery( + label, + deps.getProbeRecovery(probe, { allowModelRetry: true }), + credentialEnv, + helpUrl, + deps, + ); + if (retry === "selection") { + console.log(" Please choose a provider/model again."); + console.log(""); + } + return { ok: false, retry }; +} + +export async function validateCustomAnthropicSelection( + label: string, + endpointUrl: string, + model: string, + credentialEnv: string, + helpUrl: string | null = null, + deps: any, +): Promise { + const apiKey = deps.getCredential(credentialEnv); + const probe = probeAnthropicEndpoint(endpointUrl, model, apiKey, deps); + if (probe.ok) { + console.log(` ${probe.label} available — OpenClaw will use ${probe.api}.`); + return { ok: true, api: probe.api }; + } + console.error(` ${label} endpoint validation failed.`); + console.error(` ${probe.message}`); + if (deps.isNonInteractive()) { + process.exit(1); + } + const retry = await promptValidationRecovery( + label, + deps.getProbeRecovery(probe, { allowModelRetry: true }), + credentialEnv, + helpUrl, + deps, + ); + if (retry === "selection") { + console.log(" Please choose a provider/model again."); + console.log(""); + } + return { ok: false, retry }; +} diff --git a/src/lib/onboard-messaging.test.ts b/src/lib/onboard-messaging.test.ts new file mode 100644 index 0000000000..cd50445a65 --- /dev/null +++ b/src/lib/onboard-messaging.test.ts @@ -0,0 +1,67 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; +// Import from compiled dist/ so coverage is attributed correctly. +import { MESSAGING_CHANNELS, setupMessagingChannels } from "../../dist/lib/onboard-messaging"; + +describe("onboard-messaging", () => { + it("exports the expected messaging channel definitions", () => { + expect(MESSAGING_CHANNELS.map((channel) => channel.name)).toEqual([ + "telegram", + "discord", + "slack", + ]); + }); + + it("returns configured channels in non-interactive mode and probes Telegram reachability once", async () => { + const note = vi.fn(); + const checkTelegramReachability = vi.fn(async () => {}); + + const result = await setupMessagingChannels({ + step: vi.fn(), + isNonInteractive: () => true, + note, + getCredential: (envKey) => (envKey === "SLACK_BOT_TOKEN" ? "xoxb-token" : null), + normalizeCredentialValue: (value) => String(value || ""), + prompt: async () => "", + promptOrDefault: async () => "n", + saveCredential: vi.fn(), + checkTelegramReachability, + env: { + NEMOCLAW_NON_INTERACTIVE: "1", + TELEGRAM_BOT_TOKEN: "123456:ABC-telegram-token", + } as NodeJS.ProcessEnv, + }); + + expect(result).toEqual(["telegram", "slack"]); + expect(note).toHaveBeenCalledWith( + " [non-interactive] Messaging tokens detected: telegram, slack", + ); + expect(checkTelegramReachability).toHaveBeenCalledWith( + "123456:ABC-telegram-token", + ); + }); + + it("returns an empty array when no messaging tokens are configured", async () => { + const note = vi.fn(); + + const result = await setupMessagingChannels({ + step: vi.fn(), + isNonInteractive: () => true, + note, + getCredential: () => null, + normalizeCredentialValue: (value) => String(value || ""), + prompt: async () => "", + promptOrDefault: async () => "n", + saveCredential: vi.fn(), + checkTelegramReachability: vi.fn(async () => {}), + env: { NEMOCLAW_NON_INTERACTIVE: "1" } as NodeJS.ProcessEnv, + }); + + expect(result).toEqual([]); + expect(note).toHaveBeenCalledWith( + " [non-interactive] No messaging tokens configured. Skipping.", + ); + }); +}); diff --git a/src/lib/onboard-messaging.ts b/src/lib/onboard-messaging.ts new file mode 100644 index 0000000000..ba930cba23 --- /dev/null +++ b/src/lib/onboard-messaging.ts @@ -0,0 +1,288 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +export interface MessagingChannelDefinition { + name: string; + envKey: string; + description: string; + help: string; + label: string; + userIdEnvKey?: string; + userIdHelp?: string; + userIdLabel?: string; + allowIdsMode?: "dm" | "guild"; + serverIdEnvKey?: string; + serverIdHelp?: string; + serverIdLabel?: string; + requireMentionEnvKey?: string; + requireMentionHelp?: string; + appTokenEnvKey?: string; + appTokenHelp?: string; + appTokenLabel?: string; +} + +export const MESSAGING_CHANNELS: MessagingChannelDefinition[] = [ + { + name: "telegram", + envKey: "TELEGRAM_BOT_TOKEN", + description: "Telegram bot messaging", + help: "Create a bot via @BotFather on Telegram, then copy the token.", + label: "Telegram Bot Token", + userIdEnvKey: "TELEGRAM_ALLOWED_IDS", + userIdHelp: "Send /start to @userinfobot on Telegram to get your numeric user ID.", + userIdLabel: "Telegram User ID (for DM access)", + allowIdsMode: "dm", + }, + { + name: "discord", + envKey: "DISCORD_BOT_TOKEN", + description: "Discord bot messaging", + help: "Discord Developer Portal → Applications → Bot → Reset/Copy Token.", + label: "Discord Bot Token", + serverIdEnvKey: "DISCORD_SERVER_ID", + serverIdHelp: + "Enable Developer Mode in Discord, then right-click your server and copy the Server ID.", + serverIdLabel: "Discord Server ID (for guild workspace access)", + requireMentionEnvKey: "DISCORD_REQUIRE_MENTION", + requireMentionHelp: + "Choose whether the bot should reply only when @mentioned or to all messages in this server.", + userIdEnvKey: "DISCORD_USER_ID", + userIdHelp: + "Optional: enable Developer Mode in Discord, then right-click your user/avatar and copy the User ID. Leave blank to allow any member of the configured server to message the bot.", + userIdLabel: "Discord User ID (optional guild allowlist)", + allowIdsMode: "guild", + }, + { + name: "slack", + envKey: "SLACK_BOT_TOKEN", + description: "Slack bot messaging", + help: "Slack API → Your Apps → OAuth & Permissions → Bot User OAuth Token (xoxb-...).", + label: "Slack Bot Token", + appTokenEnvKey: "SLACK_APP_TOKEN", + appTokenHelp: "Slack API → Your Apps → Basic Information → App-Level Tokens (xapp-...).", + appTokenLabel: "Slack App Token (Socket Mode)", + }, +]; + +export interface SetupMessagingChannelsDeps { + step: (current: number, total: number, message: string) => void; + isNonInteractive: () => boolean; + note: (message: string) => void; + getCredential: (envKey: string) => string | null; + normalizeCredentialValue: (value: unknown) => string; + prompt: (question: string, options?: { secret?: boolean }) => Promise; + promptOrDefault: ( + question: string, + envVar: string | null, + defaultValue: string, + ) => Promise; + saveCredential: (envKey: string, token: string) => void; + checkTelegramReachability: (token: string) => Promise; + env?: NodeJS.ProcessEnv; + input?: NodeJS.ReadStream; + output?: NodeJS.WriteStream; +} + +export async function setupMessagingChannels( + deps: SetupMessagingChannelsDeps, +): Promise { + const env = deps.env ?? process.env; + const input = deps.input ?? process.stdin; + const output = deps.output ?? process.stderr; + + deps.step(5, 8, "Messaging channels"); + + const getMessagingToken = (envKey: string): string | null => + deps.getCredential(envKey) || deps.normalizeCredentialValue(env[envKey]) || null; + + if (deps.isNonInteractive() || env.NEMOCLAW_NON_INTERACTIVE === "1") { + const found = MESSAGING_CHANNELS.filter((channel) => getMessagingToken(channel.envKey)).map( + (channel) => channel.name, + ); + if (found.length > 0) { + deps.note(` [non-interactive] Messaging tokens detected: ${found.join(", ")}`); + if (found.includes("telegram")) { + await deps.checkTelegramReachability(getMessagingToken("TELEGRAM_BOT_TOKEN") || ""); + } + } else { + deps.note(" [non-interactive] No messaging tokens configured. Skipping."); + } + return found; + } + + const enabled = new Set( + MESSAGING_CHANNELS.filter((channel) => getMessagingToken(channel.envKey)).map( + (channel) => channel.name, + ), + ); + + const linesAbovePrompt = MESSAGING_CHANNELS.length + 3; + let firstDraw = true; + const showList = () => { + if (!firstDraw) { + output.write(`\r\x1b[${linesAbovePrompt}A\x1b[J`); + } + firstDraw = false; + output.write("\n"); + output.write(" Available messaging channels:\n"); + MESSAGING_CHANNELS.forEach((channel, index) => { + const marker = enabled.has(channel.name) ? "●" : "○"; + const status = getMessagingToken(channel.envKey) ? " (configured)" : ""; + output.write( + ` [${index + 1}] ${marker} ${channel.name} — ${channel.description}${status}\n`, + ); + }); + output.write("\n"); + output.write(" Press 1-3 to toggle, Enter when done: "); + }; + + showList(); + + await new Promise((resolve, reject) => { + let rawModeEnabled = false; + let finished = false; + + function cleanup() { + input.removeListener("data", onData); + if (rawModeEnabled && typeof input.setRawMode === "function") { + input.setRawMode(false); + } + } + + function finish() { + if (finished) return; + finished = true; + cleanup(); + output.write("\n"); + resolve(); + } + + function onData(chunk: Buffer | string) { + const text = chunk.toString("utf8"); + for (let i = 0; i < text.length; i += 1) { + const ch = text[i]; + if (ch === "\u0003") { + cleanup(); + reject(Object.assign(new Error("Prompt interrupted"), { code: "SIGINT" })); + process.kill(process.pid, "SIGINT"); + return; + } + if (ch === "\r" || ch === "\n") { + finish(); + return; + } + const num = parseInt(ch, 10); + if (num >= 1 && num <= MESSAGING_CHANNELS.length) { + const channel = MESSAGING_CHANNELS[num - 1]; + if (enabled.has(channel.name)) { + enabled.delete(channel.name); + } else { + enabled.add(channel.name); + } + showList(); + } + } + } + + input.setEncoding("utf8"); + if (typeof input.resume === "function") { + input.resume(); + } + if (typeof input.setRawMode === "function") { + input.setRawMode(true); + rawModeEnabled = true; + } + input.on("data", onData); + }); + + const selected = Array.from(enabled); + if (selected.length === 0) { + console.log(" Skipping messaging channels."); + return []; + } + + for (const name of selected) { + const channel = MESSAGING_CHANNELS.find((entry) => entry.name === name); + if (!channel) { + console.log(` Unknown channel: ${name}`); + continue; + } + if (getMessagingToken(channel.envKey)) { + console.log(` ✓ ${channel.name} — already configured`); + } else { + console.log(""); + console.log(` ${channel.help}`); + const token = deps.normalizeCredentialValue( + await deps.prompt(` ${channel.label}: `, { secret: true }), + ); + if (token) { + deps.saveCredential(channel.envKey, token); + env[channel.envKey] = token; + console.log(` ✓ ${channel.name} token saved`); + } else { + console.log(` Skipped ${channel.name} (no token entered)`); + continue; + } + } + if (channel.serverIdEnvKey) { + const existingServerIds = env[channel.serverIdEnvKey] || ""; + if (existingServerIds) { + console.log(` ✓ ${channel.name} — server ID already set: ${existingServerIds}`); + } else { + console.log(` ${channel.serverIdHelp}`); + const serverId = (await deps.prompt(` ${channel.serverIdLabel}: `)).trim(); + if (serverId) { + env[channel.serverIdEnvKey] = serverId; + console.log(` ✓ ${channel.name} server ID saved`); + } else { + console.log(` Skipped ${channel.name} server ID (guild channels stay disabled)`); + } + } + } + if (channel.requireMentionEnvKey && channel.serverIdEnvKey && env[channel.serverIdEnvKey]) { + const existingRequireMention = env[channel.requireMentionEnvKey]; + if (existingRequireMention === "0" || existingRequireMention === "1") { + const mode = existingRequireMention === "0" ? "all messages" : "@mentions only"; + console.log(` ✓ ${channel.name} — reply mode already set: ${mode}`); + } else { + console.log(` ${channel.requireMentionHelp}`); + const answer = (await deps.prompt(" Reply only when @mentioned? [Y/n]: ")) + .trim() + .toLowerCase(); + env[channel.requireMentionEnvKey] = answer === "n" || answer === "no" ? "0" : "1"; + const mode = env[channel.requireMentionEnvKey] === "0" ? "all messages" : "@mentions only"; + console.log(` ✓ ${channel.name} reply mode saved: ${mode}`); + } + } + if (channel.userIdEnvKey && (!channel.serverIdEnvKey || env[channel.serverIdEnvKey])) { + const existingIds = env[channel.userIdEnvKey] || ""; + if (existingIds) { + console.log(` ✓ ${channel.name} — allowed IDs already set: ${existingIds}`); + } else { + console.log(` ${channel.userIdHelp}`); + const userId = (await deps.prompt(` ${channel.userIdLabel}: `)).trim(); + if (userId) { + env[channel.userIdEnvKey] = userId; + console.log(` ✓ ${channel.name} user ID saved`); + } else { + const skippedReason = + channel.allowIdsMode === "guild" + ? "any member in the configured server can message the bot" + : "bot will require manual pairing"; + console.log(` Skipped ${channel.name} user ID (${skippedReason})`); + } + } + } + } + console.log(""); + + if ( + !deps.isNonInteractive() && + selected.includes("telegram") && + getMessagingToken("TELEGRAM_BOT_TOKEN") + ) { + await deps.checkTelegramReachability(getMessagingToken("TELEGRAM_BOT_TOKEN") || ""); + } + + return selected; +} diff --git a/src/lib/onboard-nim-setup.ts b/src/lib/onboard-nim-setup.ts new file mode 100644 index 0000000000..39caedfffd --- /dev/null +++ b/src/lib/onboard-nim-setup.ts @@ -0,0 +1,663 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +export async function runSetupNim(gpu: any, deps: any): Promise { + deps.step(3, 8, "Configuring inference (NIM)"); + + let model = null; + let provider = deps.remoteProviderConfig.build.providerName; + let nimContainer = null; + let endpointUrl = deps.remoteProviderConfig.build.endpointUrl; + let credentialEnv = deps.remoteProviderConfig.build.credentialEnv; + let preferredInferenceApi = null; + + const hasOllama = !!deps.runCapture("command -v ollama", { ignoreError: true }); + const ollamaRunning = !!deps.runCapture( + `curl -sf http://127.0.0.1:${deps.ollamaPort}/api/tags 2>/dev/null`, + { + ignoreError: true, + }, + ); + const vllmRunning = !!deps.runCapture( + `curl -sf http://127.0.0.1:${deps.vllmPort}/v1/models 2>/dev/null`, + { + ignoreError: true, + }, + ); + const requestedProvider = deps.isNonInteractive() ? deps.getNonInteractiveProvider() : null; + const requestedModel = deps.isNonInteractive() + ? deps.getNonInteractiveModel(requestedProvider || "build") + : null; + const options: Array<{ key: string; label: string }> = []; + options.push({ key: "build", label: "NVIDIA Endpoints" }); + options.push({ key: "openai", label: "OpenAI" }); + options.push({ key: "custom", label: "Other OpenAI-compatible endpoint" }); + options.push({ key: "anthropic", label: "Anthropic" }); + options.push({ key: "anthropicCompatible", label: "Other Anthropic-compatible endpoint" }); + options.push({ key: "gemini", label: "Google Gemini" }); + if (hasOllama || ollamaRunning) { + options.push({ + key: "ollama", + label: + `Local Ollama (localhost:${deps.ollamaPort})${ollamaRunning ? " — running" : ""}` + + (ollamaRunning ? " (suggested)" : ""), + }); + } + if (deps.experimental && gpu && gpu.nimCapable) { + options.push({ key: "nim-local", label: "Local NVIDIA NIM [experimental]" }); + } + if (deps.experimental && vllmRunning) { + options.push({ + key: "vllm", + label: "Local vLLM [experimental] — running", + }); + } + if (!hasOllama && deps.processPlatform === "darwin") { + options.push({ key: "install-ollama", label: "Install Ollama (macOS)" }); + } + + if (options.length > 1) { + selectionLoop: while (true) { + let selected: { key: string; label: string } | undefined; + + if (deps.isNonInteractive()) { + const providerKey = requestedProvider || "build"; + selected = options.find((option) => option.key === providerKey); + if (!selected) { + console.error( + ` Requested provider '${providerKey}' is not available in this environment.`, + ); + process.exit(1); + } + deps.note(` [non-interactive] Provider: ${selected.key}`); + } else { + const suggestions = []; + if (vllmRunning) suggestions.push("vLLM"); + if (ollamaRunning) suggestions.push("Ollama"); + if (suggestions.length > 0) { + console.log( + ` Detected local inference option${suggestions.length > 1 ? "s" : ""}: ${suggestions.join(", ")}`, + ); + console.log(""); + } + + console.log(""); + console.log(" Inference options:"); + options.forEach((option, index) => { + console.log(` ${index + 1}) ${option.label}`); + }); + console.log(""); + + const envProviderHint = (process.env.NEMOCLAW_PROVIDER || "").trim().toLowerCase(); + const envProviderIdx = envProviderHint + ? options.findIndex((option) => option.key.toLowerCase() === envProviderHint) + : -1; + const defaultIdx = + (envProviderIdx >= 0 ? envProviderIdx : options.findIndex((option) => option.key === "build")) + 1; + const choice = await deps.prompt(` Choose [${defaultIdx}]: `); + const idx = parseInt(choice || String(defaultIdx), 10) - 1; + selected = options[idx] || options[defaultIdx - 1]; + } + + if (selected && deps.remoteProviderConfig[selected.key]) { + const remoteConfig = deps.remoteProviderConfig[selected.key]; + provider = remoteConfig.providerName; + credentialEnv = remoteConfig.credentialEnv; + endpointUrl = remoteConfig.endpointUrl; + preferredInferenceApi = null; + + if (selected.key === "custom") { + const _envUrl = (process.env.NEMOCLAW_ENDPOINT_URL || "").trim(); + const endpointInput = deps.isNonInteractive() + ? _envUrl + : + (await deps.prompt( + _envUrl + ? ` OpenAI-compatible base URL [${_envUrl}]: ` + : " OpenAI-compatible base URL (e.g., https://openrouter.ai): ", + )) || _envUrl; + const navigation = deps.getNavigationChoice(endpointInput); + if (navigation === "back") { + console.log(" Returning to provider selection."); + console.log(""); + continue selectionLoop; + } + if (navigation === "exit") { + deps.exitOnboardFromPrompt(); + } + endpointUrl = deps.normalizeProviderBaseUrl(endpointInput, "openai"); + if (!endpointUrl) { + console.error(" Endpoint URL is required for Other OpenAI-compatible endpoint."); + if (deps.isNonInteractive()) { + process.exit(1); + } + console.log(""); + continue selectionLoop; + } + } else if (selected.key === "anthropicCompatible") { + const _envUrl = (process.env.NEMOCLAW_ENDPOINT_URL || "").trim(); + const endpointInput = deps.isNonInteractive() + ? _envUrl + : + (await deps.prompt( + _envUrl + ? ` Anthropic-compatible base URL [${_envUrl}]: ` + : " Anthropic-compatible base URL (e.g., https://proxy.example.com): ", + )) || _envUrl; + const navigation = deps.getNavigationChoice(endpointInput); + if (navigation === "back") { + console.log(" Returning to provider selection."); + console.log(""); + continue selectionLoop; + } + if (navigation === "exit") { + deps.exitOnboardFromPrompt(); + } + endpointUrl = deps.normalizeProviderBaseUrl(endpointInput, "anthropic"); + if (!endpointUrl) { + console.error(" Endpoint URL is required for Other Anthropic-compatible endpoint."); + if (deps.isNonInteractive()) { + process.exit(1); + } + console.log(""); + continue selectionLoop; + } + } + + if (selected.key === "build") { + const _nvProviderKey = (process.env.NEMOCLAW_PROVIDER_KEY || "").trim(); + if (_nvProviderKey && !process.env.NVIDIA_API_KEY) { + process.env.NVIDIA_API_KEY = _nvProviderKey; + } + if (deps.isNonInteractive()) { + if (!process.env.NVIDIA_API_KEY) { + console.error( + " NVIDIA_API_KEY (or NEMOCLAW_PROVIDER_KEY) is required for NVIDIA Endpoints in non-interactive mode.", + ); + process.exit(1); + } + const keyError = deps.validateNvidiaApiKeyValue(process.env.NVIDIA_API_KEY); + if (keyError) { + console.error(keyError); + console.error(` Get a key from ${deps.remoteProviderConfig.build.helpUrl}`); + process.exit(1); + } + } else { + await deps.ensureApiKey(); + } + const _envModel = (process.env.NEMOCLAW_MODEL || "").trim(); + model = + requestedModel || + (deps.isNonInteractive() + ? deps.defaultCloudModel + : await deps.promptCloudModel({ defaultModelId: _envModel || undefined })) || + deps.defaultCloudModel; + if (model === deps.backToSelection) { + console.log(" Returning to provider selection."); + console.log(""); + continue selectionLoop; + } + } else { + const _providerKeyHint = (process.env.NEMOCLAW_PROVIDER_KEY || "").trim(); + if (_providerKeyHint && !process.env[credentialEnv]) { + process.env[credentialEnv] = _providerKeyHint; + } + + if (deps.isNonInteractive()) { + if (!process.env[credentialEnv]) { + console.error( + ` ${credentialEnv} (or NEMOCLAW_PROVIDER_KEY) is required for ${remoteConfig.label} in non-interactive mode.`, + ); + process.exit(1); + } + } else { + await deps.ensureNamedCredential( + credentialEnv, + remoteConfig.label + " API key", + remoteConfig.helpUrl, + ); + } + const _envModelRemote = (process.env.NEMOCLAW_MODEL || "").trim(); + const defaultModel = requestedModel || _envModelRemote || remoteConfig.defaultModel; + let modelValidator = null; + if (selected.key === "openai" || selected.key === "gemini") { + const modelAuthMode = deps.getProbeAuthMode(provider); + modelValidator = (candidate: string) => + deps.validateOpenAiLikeModel( + remoteConfig.label, + endpointUrl, + candidate, + deps.getCredential(credentialEnv), + ...(modelAuthMode ? [{ authMode: modelAuthMode }] : []), + ); + } else if (selected.key === "anthropic") { + modelValidator = (candidate: string) => + deps.validateAnthropicModel( + endpointUrl || deps.anthropicEndpointUrl, + candidate, + deps.getCredential(credentialEnv), + ); + } + while (true) { + if (deps.isNonInteractive()) { + model = defaultModel; + } else if (remoteConfig.modelMode === "curated") { + model = await deps.promptRemoteModel( + remoteConfig.label, + selected.key, + defaultModel, + modelValidator, + ); + } else { + model = await deps.promptInputModel(remoteConfig.label, defaultModel, modelValidator); + } + if (model === deps.backToSelection) { + console.log(" Returning to provider selection."); + console.log(""); + continue selectionLoop; + } + + if (selected.key === "custom") { + const validation = await deps.validateCustomOpenAiLikeSelection( + remoteConfig.label, + endpointUrl, + model, + credentialEnv, + remoteConfig.helpUrl, + ); + if (validation.ok) { + const explicitApi = (process.env.NEMOCLAW_PREFERRED_API || "") + .trim() + .toLowerCase(); + if ( + explicitApi && + explicitApi !== "openai-completions" && + explicitApi !== "chat-completions" + ) { + preferredInferenceApi = validation.api; + } else { + if (validation.api !== "openai-completions") { + console.log( + " ℹ Using chat completions API (compatible endpoints may not support the Responses API developer role)", + ); + } + preferredInferenceApi = "openai-completions"; + } + break; + } + if ( + validation.retry === "credential" || + validation.retry === "retry" || + validation.retry === "model" + ) { + continue; + } + if (validation.retry === "selection") { + continue selectionLoop; + } + } else if (selected.key === "anthropicCompatible") { + const validation = await deps.validateCustomAnthropicSelection( + remoteConfig.label, + endpointUrl || deps.anthropicEndpointUrl, + model, + credentialEnv, + remoteConfig.helpUrl, + ); + if (validation.ok) { + preferredInferenceApi = validation.api; + break; + } + if ( + validation.retry === "credential" || + validation.retry === "retry" || + validation.retry === "model" + ) { + continue; + } + if (validation.retry === "selection") { + continue selectionLoop; + } + } else { + const retryMessage = "Please choose a provider/model again."; + if (selected.key === "anthropic") { + const validation = await deps.validateAnthropicSelectionWithRetryMessage( + remoteConfig.label, + endpointUrl || deps.anthropicEndpointUrl, + model, + credentialEnv, + retryMessage, + remoteConfig.helpUrl, + ); + if (validation.ok) { + preferredInferenceApi = validation.api; + break; + } + if ( + validation.retry === "credential" || + validation.retry === "retry" || + validation.retry === "model" + ) { + continue; + } + } else { + const validation = await deps.validateOpenAiLikeSelection( + remoteConfig.label, + endpointUrl, + model, + credentialEnv, + retryMessage, + remoteConfig.helpUrl, + { + requireResponsesToolCalling: deps.shouldRequireResponsesToolCalling(provider), + skipResponsesProbe: deps.shouldSkipResponsesProbe(provider), + authMode: deps.getProbeAuthMode(provider), + }, + ); + if (validation.ok) { + preferredInferenceApi = validation.api; + break; + } + if ( + validation.retry === "credential" || + validation.retry === "retry" || + validation.retry === "model" + ) { + continue; + } + } + continue selectionLoop; + } + } + } + + if (selected.key === "build") { + while (true) { + const validation = await deps.validateOpenAiLikeSelection( + remoteConfig.label, + endpointUrl, + model, + credentialEnv, + "Please choose a provider/model again.", + remoteConfig.helpUrl, + { + requireResponsesToolCalling: deps.shouldRequireResponsesToolCalling(provider), + skipResponsesProbe: deps.shouldSkipResponsesProbe(provider), + authMode: deps.getProbeAuthMode(provider), + }, + ); + if (validation.ok) { + preferredInferenceApi = validation.api; + break; + } + if (validation.retry === "credential" || validation.retry === "retry") { + continue; + } + continue selectionLoop; + } + } + + console.log(` Using ${remoteConfig.label} with model: ${model}`); + break; + } else if (selected && selected.key === "nim-local") { + const models = deps.nim.listModels().filter((entry: any) => entry.minGpuMemoryMB <= gpu.totalMemoryMB); + if (models.length === 0) { + console.log(" No NIM models fit your GPU VRAM. Falling back to cloud API."); + } else { + let sel; + if (deps.isNonInteractive()) { + if (requestedModel) { + sel = models.find((entry: any) => entry.name === requestedModel); + if (!sel) { + console.error(` Unsupported NEMOCLAW_MODEL for NIM: ${requestedModel}`); + process.exit(1); + } + } else { + sel = models[0]; + } + deps.note(` [non-interactive] NIM model: ${sel.name}`); + } else { + console.log(""); + console.log(" Models that fit your GPU:"); + models.forEach((entry: any, index: number) => { + console.log(` ${index + 1}) ${entry.name} (min ${entry.minGpuMemoryMB} MB)`); + }); + console.log(""); + + const modelChoice = await deps.prompt(` Choose model [1]: `); + const midx = parseInt(modelChoice || "1", 10) - 1; + sel = models[midx] || models[0]; + } + model = sel.name; + + console.log(` Pulling NIM image for ${model}...`); + deps.nim.pullNimImage(model); + + console.log(" Starting NIM container..."); + nimContainer = deps.nim.startNimContainerByName( + deps.nim.containerName(deps.gatewayName), + model, + ); + + console.log(" Waiting for NIM to become healthy..."); + if (!deps.nim.waitForNimHealth()) { + console.error(" NIM failed to start. Falling back to cloud API."); + model = null; + nimContainer = null; + } else { + provider = "vllm-local"; + credentialEnv = "OPENAI_API_KEY"; + endpointUrl = deps.getLocalProviderBaseUrl(provider); + const validation = await deps.validateOpenAiLikeSelection( + "Local NVIDIA NIM", + endpointUrl, + model, + credentialEnv, + ); + if ( + validation.retry === "selection" || + validation.retry === "back" || + validation.retry === "model" + ) { + continue selectionLoop; + } + if (!validation.ok) { + continue selectionLoop; + } + preferredInferenceApi = validation.api; + if (preferredInferenceApi !== "openai-completions") { + console.log( + " ℹ Using chat completions API (tool-call-parser requires /v1/chat/completions)", + ); + } + preferredInferenceApi = "openai-completions"; + } + } + break; + } else if (selected && selected.key === "ollama") { + if (!ollamaRunning) { + console.log(" Starting Ollama..."); + if (deps.isWsl()) { + deps.run(`ollama serve > /dev/null 2>&1 &`, { ignoreError: true }); + } else { + deps.run( + `OLLAMA_HOST=127.0.0.1:${deps.ollamaPort} ollama serve > /dev/null 2>&1 &`, + { ignoreError: true }, + ); + } + deps.sleep(2); + if (!deps.isWsl()) deps.printOllamaExposureWarning(); + } + if (deps.isWsl()) { + console.log(` ✓ Using Ollama on localhost:${deps.ollamaPort}`); + } else { + deps.startOllamaAuthProxy(); + console.log( + ` ✓ Using Ollama on localhost:${deps.ollamaPort} (proxy on :${deps.ollamaProxyPort})`, + ); + } + provider = "ollama-local"; + credentialEnv = "OPENAI_API_KEY"; + endpointUrl = deps.getLocalProviderBaseUrl(provider); + while (true) { + const installedModels = deps.getOllamaModelOptions(); + if (deps.isNonInteractive()) { + model = requestedModel || deps.getDefaultOllamaModel(gpu); + } else { + model = await deps.promptOllamaModel(gpu); + } + if (model === deps.backToSelection) { + console.log(" Returning to provider selection."); + console.log(""); + continue selectionLoop; + } + const probe = deps.prepareOllamaModel(model, installedModels); + if (!probe.ok) { + console.error(` ${probe.message}`); + if (deps.isNonInteractive()) { + process.exit(1); + } + console.log(" Choose a different Ollama model or select Other."); + console.log(""); + continue; + } + const validation = await deps.validateOpenAiLikeSelection( + "Local Ollama", + deps.getLocalProviderValidationBaseUrl(provider), + model, + null, + "Choose a different Ollama model or select Other.", + ); + if (validation.retry === "selection" || validation.retry === "back") { + continue selectionLoop; + } + if (!validation.ok) { + continue; + } + if (validation.api !== "openai-completions") { + console.log( + " ℹ Using chat completions API (Ollama tool calls require /v1/chat/completions)", + ); + } + preferredInferenceApi = "openai-completions"; + break; + } + break; + } else if (selected && selected.key === "install-ollama") { + console.log(" Installing Ollama via Homebrew..."); + deps.run("brew install ollama", { ignoreError: true }); + console.log(" Starting Ollama..."); + deps.run(`OLLAMA_HOST=127.0.0.1:${deps.ollamaPort} ollama serve > /dev/null 2>&1 &`, { + ignoreError: true, + }); + deps.sleep(2); + deps.startOllamaAuthProxy(); + console.log( + ` ✓ Using Ollama on localhost:${deps.ollamaPort} (proxy on :${deps.ollamaProxyPort})`, + ); + provider = "ollama-local"; + credentialEnv = "OPENAI_API_KEY"; + endpointUrl = deps.getLocalProviderBaseUrl(provider); + while (true) { + const installedModels = deps.getOllamaModelOptions(); + if (deps.isNonInteractive()) { + model = requestedModel || deps.getDefaultOllamaModel(gpu); + } else { + model = await deps.promptOllamaModel(gpu); + } + if (model === deps.backToSelection) { + console.log(" Returning to provider selection."); + console.log(""); + continue selectionLoop; + } + const probe = deps.prepareOllamaModel(model, installedModels); + if (!probe.ok) { + console.error(` ${probe.message}`); + if (deps.isNonInteractive()) { + process.exit(1); + } + console.log(" Choose a different Ollama model or select Other."); + console.log(""); + continue; + } + const validation = await deps.validateOpenAiLikeSelection( + "Local Ollama", + deps.getLocalProviderValidationBaseUrl(provider), + model, + null, + "Choose a different Ollama model or select Other.", + ); + if (validation.retry === "selection" || validation.retry === "back") { + continue selectionLoop; + } + if (!validation.ok) { + continue; + } + if (validation.api !== "openai-completions") { + console.log( + " ℹ Using chat completions API (Ollama tool calls require /v1/chat/completions)", + ); + } + preferredInferenceApi = "openai-completions"; + break; + } + break; + } else if (selected && selected.key === "vllm") { + console.log(` ✓ Using existing vLLM on localhost:${deps.vllmPort}`); + provider = "vllm-local"; + credentialEnv = "OPENAI_API_KEY"; + endpointUrl = deps.getLocalProviderBaseUrl(provider); + const vllmModelsRaw = deps.runCapture( + `curl -sf http://127.0.0.1:${deps.vllmPort}/v1/models 2>/dev/null`, + { + ignoreError: true, + }, + ); + try { + const vllmModels = JSON.parse(vllmModelsRaw); + if (vllmModels.data && vllmModels.data.length > 0) { + model = vllmModels.data[0].id; + if (!deps.isSafeModelId(model)) { + console.error(` Detected model ID contains invalid characters: ${model}`); + process.exit(1); + } + console.log(` Detected model: ${model}`); + } else { + console.error(" Could not detect model from vLLM. Please specify manually."); + process.exit(1); + } + } catch { + console.error( + ` Could not query vLLM models endpoint. Is vLLM running on localhost:${deps.vllmPort}?`, + ); + process.exit(1); + } + const validation = await deps.validateOpenAiLikeSelection( + "Local vLLM", + deps.getLocalProviderValidationBaseUrl(provider), + model, + credentialEnv, + ); + if ( + validation.retry === "selection" || + validation.retry === "back" || + validation.retry === "model" + ) { + continue selectionLoop; + } + if (!validation.ok) { + continue selectionLoop; + } + preferredInferenceApi = validation.api; + if (preferredInferenceApi !== "openai-completions") { + console.log( + " ℹ Using chat completions API (tool-call-parser requires /v1/chat/completions)", + ); + } + preferredInferenceApi = "openai-completions"; + break; + } + } + } + + return { model, provider, endpointUrl, credentialEnv, preferredInferenceApi, nimContainer }; +} diff --git a/src/lib/onboard-ollama-models.ts b/src/lib/onboard-ollama-models.ts new file mode 100644 index 0000000000..328ac1f4f4 --- /dev/null +++ b/src/lib/onboard-ollama-models.ts @@ -0,0 +1,95 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { spawnSync } from "node:child_process"; + +export interface OllamaModelDeps { + getOllamaModelOptions: () => string[]; + getBootstrapOllamaModelOptions: (gpu?: unknown) => string[]; + getDefaultOllamaModel: (gpu?: unknown) => string; + prompt: (question: string, options?: { secret?: boolean }) => Promise; + promptManualModelId: (question: string, providerLabel: string) => Promise; + shellQuote: (value: string) => string; + root: string; + getOllamaWarmupCommand: (model: string) => string; + run: ( + command: string | string[], + options?: { ignoreError?: boolean; suppressOutput?: boolean }, + ) => { status: number; stdout?: string; stderr?: string }; + validateOllamaModel: (model: string) => { ok: boolean; message?: string }; +} + +export async function promptOllamaModel(gpu: unknown = null, deps: OllamaModelDeps): Promise { + const installed = deps.getOllamaModelOptions(); + const options = installed.length > 0 ? installed : deps.getBootstrapOllamaModelOptions(gpu); + const defaultModel = deps.getDefaultOllamaModel(gpu); + const defaultIndex = Math.max(0, options.indexOf(defaultModel)); + + console.log(""); + console.log(installed.length > 0 ? " Ollama models:" : " Ollama starter models:"); + options.forEach((option, index) => { + console.log(` ${index + 1}) ${option}`); + }); + console.log(` ${options.length + 1}) Other...`); + if (installed.length === 0) { + console.log(""); + console.log(" No local Ollama models are installed yet. Choose one to pull and load now."); + } + console.log(""); + + const choice = await deps.prompt(` Choose model [${defaultIndex + 1}]: `); + const index = parseInt(choice || String(defaultIndex + 1), 10) - 1; + if (index >= 0 && index < options.length) { + return options[index]!; + } + return deps.promptManualModelId(" Ollama model id: ", "Ollama"); +} + +export function printOllamaExposureWarning(): void { + console.log(""); + console.log(" ⚠ Ollama is binding to 0.0.0.0 so the sandbox can reach it via Docker."); + console.log(" This exposes the Ollama API to your local network (no auth required)."); + console.log(" On public WiFi, any device on the same network can send prompts to your GPU."); + console.log(" See: CNVD-2025-04094, CVE-2024-37032"); + console.log(""); +} + +function pullOllamaModel(model: string, deps: OllamaModelDeps): boolean { + const result = spawnSync("bash", ["-c", `ollama pull ${deps.shellQuote(model)}`], { + cwd: deps.root, + encoding: "utf8", + stdio: "inherit", + timeout: 600_000, + env: { ...process.env }, + }); + if (result.signal === "SIGTERM") { + console.error( + " Model pull timed out after 10 minutes. Try a smaller model or check your network connection.", + ); + return false; + } + return result.status === 0; +} + +export function prepareOllamaModel( + model: string, + installedModels: string[] = [], + deps: OllamaModelDeps, +): { ok: boolean; message?: string } { + const alreadyInstalled = installedModels.includes(model); + if (!alreadyInstalled) { + console.log(` Pulling Ollama model: ${model}`); + if (!pullOllamaModel(model, deps)) { + return { + ok: false, + message: + `Failed to pull Ollama model '${model}'. ` + + "Check the model name and that Ollama can access the registry, then try another model.", + }; + } + } + + console.log(` Loading Ollama model: ${model}`); + deps.run(deps.getOllamaWarmupCommand(model), { ignoreError: true }); + return deps.validateOllamaModel(model); +} diff --git a/src/lib/onboard-ollama-proxy.ts b/src/lib/onboard-ollama-proxy.ts new file mode 100644 index 0000000000..0e5e33b913 --- /dev/null +++ b/src/lib/onboard-ollama-proxy.ts @@ -0,0 +1,175 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +const PROXY_STATE_DIR = path.join(os.homedir(), ".nemoclaw"); +const PROXY_TOKEN_PATH = path.join(PROXY_STATE_DIR, "ollama-proxy-token"); +const PROXY_PID_PATH = path.join(PROXY_STATE_DIR, "ollama-auth-proxy.pid"); + +let ollamaProxyToken: string | null = null; + +export interface OllamaProxyDeps { + runCapture: (command: string | string[], opts?: { ignoreError?: boolean }) => string; + run: ( + command: string | string[], + opts?: { ignoreError?: boolean; suppressOutput?: boolean }, + ) => { status: number; stdout?: string; stderr?: string }; + spawn: typeof import("node:child_process").spawn; + sleep: (seconds: number) => void; + scriptsDir: string; + ollamaProxyPort: number; + ollamaPort: number; +} + +function ensureProxyStateDir(): void { + if (!fs.existsSync(PROXY_STATE_DIR)) { + fs.mkdirSync(PROXY_STATE_DIR, { recursive: true }); + } +} + +export function persistProxyToken(token: string): void { + ensureProxyStateDir(); + fs.writeFileSync(PROXY_TOKEN_PATH, token, { mode: 0o600 }); + // mode only applies on creation; ensure permissions on existing files too + fs.chmodSync(PROXY_TOKEN_PATH, 0o600); +} + +function loadPersistedProxyToken(): string | null { + try { + if (fs.existsSync(PROXY_TOKEN_PATH)) { + const token = fs.readFileSync(PROXY_TOKEN_PATH, "utf-8").trim(); + return token || null; + } + } catch { + /* ignore */ + } + return null; +} + +function persistProxyPid(pid: number | null | undefined): void { + if (typeof pid !== "number" || !Number.isInteger(pid) || pid <= 0) return; + const resolvedPid = pid; + ensureProxyStateDir(); + fs.writeFileSync(PROXY_PID_PATH, `${resolvedPid}\n`, { mode: 0o600 }); + fs.chmodSync(PROXY_PID_PATH, 0o600); +} + +function loadPersistedProxyPid(): number | null { + try { + if (!fs.existsSync(PROXY_PID_PATH)) return null; + const raw = fs.readFileSync(PROXY_PID_PATH, "utf-8").trim(); + const pid = Number.parseInt(raw, 10); + return Number.isInteger(pid) && pid > 0 ? pid : null; + } catch { + return null; + } +} + +function clearPersistedProxyPid(): void { + try { + if (fs.existsSync(PROXY_PID_PATH)) { + fs.unlinkSync(PROXY_PID_PATH); + } + } catch { + /* ignore */ + } +} + +function isOllamaProxyProcess(pid: number | null | undefined, deps: OllamaProxyDeps): boolean { + if (typeof pid !== "number" || !Number.isInteger(pid) || pid <= 0) return false; + const resolvedPid = pid; + const cmdline = deps.runCapture(["ps", "-p", String(resolvedPid), "-o", "args="], { + ignoreError: true, + }); + return Boolean(cmdline && cmdline.includes("ollama-auth-proxy.js")); +} + +function spawnOllamaAuthProxy(token: string, deps: OllamaProxyDeps): number | null { + const child = deps.spawn(process.execPath, [path.join(deps.scriptsDir, "ollama-auth-proxy.js")], { + detached: true, + stdio: "ignore", + env: { + ...process.env, + OLLAMA_PROXY_TOKEN: token, + OLLAMA_PROXY_PORT: String(deps.ollamaProxyPort), + OLLAMA_BACKEND_PORT: String(deps.ollamaPort), + }, + }); + child.unref(); + persistProxyPid(child.pid); + return child.pid ?? null; +} + +function killStaleProxy(deps: OllamaProxyDeps): void { + try { + const persistedPid = loadPersistedProxyPid(); + if (isOllamaProxyProcess(persistedPid, deps)) { + deps.run(["kill", String(persistedPid)], { ignoreError: true, suppressOutput: true }); + } + clearPersistedProxyPid(); + + // Best-effort cleanup for older proxy processes created before the PID file + // existed. Only kill processes that are actually the auth proxy, not + // unrelated services that happen to use the same port. + const pidOutput = deps.runCapture(["lsof", "-ti", `:${deps.ollamaProxyPort}`], { + ignoreError: true, + }); + if (pidOutput && pidOutput.trim()) { + for (const pid of pidOutput.trim().split(/\s+/)) { + if (isOllamaProxyProcess(Number.parseInt(pid, 10), deps)) { + deps.run(["kill", pid], { ignoreError: true, suppressOutput: true }); + } + } + deps.sleep(1); + } + } catch { + /* ignore */ + } +} + +export function startOllamaAuthProxy(deps: OllamaProxyDeps): void { + const crypto = require("crypto"); + killStaleProxy(deps); + + ollamaProxyToken = crypto.randomBytes(24).toString("hex"); + // Don't persist yet — wait until provider is confirmed in setupInference. + // If the user backs out to a different provider, the token stays in memory + // only and is discarded. + const pid = spawnOllamaAuthProxy(ollamaProxyToken as string, deps); + deps.sleep(1); + if (!isOllamaProxyProcess(pid, deps)) { + console.error(` Warning: Ollama auth proxy did not start on :${deps.ollamaProxyPort}`); + } +} + +/** + * Ensure the auth proxy is running — called on sandbox connect to recover + * from host reboots where the background proxy process was lost. + */ +export function ensureOllamaAuthProxy(deps: OllamaProxyDeps): void { + // Try to load persisted token first — if none, this isn't an Ollama setup. + const token = loadPersistedProxyToken(); + if (!token) return; + + const pid = loadPersistedProxyPid(); + if (isOllamaProxyProcess(pid, deps)) { + ollamaProxyToken = token; + return; + } + + // Proxy not running — restart it with the persisted token. + killStaleProxy(deps); + ollamaProxyToken = token; + spawnOllamaAuthProxy(token, deps); + deps.sleep(1); +} + +export function getOllamaProxyToken(): string | null { + if (ollamaProxyToken) return ollamaProxyToken; + // Fall back to persisted token (resume / reconnect scenario) + ollamaProxyToken = loadPersistedProxyToken(); + return ollamaProxyToken; +} diff --git a/src/lib/onboard-openclaw-setup.ts b/src/lib/onboard-openclaw-setup.ts new file mode 100644 index 0000000000..4ca890c38c --- /dev/null +++ b/src/lib/onboard-openclaw-setup.ts @@ -0,0 +1,77 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; + +export interface OpenclawSetupDeps { + step: (current: number, total: number, message: string) => void; + getProviderSelectionConfig: (provider: string, model: string) => Record | null; + writeSandboxConfigSyncFile: (script: string) => string; + openshellShellCommand: (args: string[], options?: { openshellBinary?: string }) => string; + shellQuote: (value: string) => string; + run: ( + command: string | string[], + options?: { stdio?: [string, string, string] }, + ) => unknown; + cleanupTempDir: (filePath: string, expectedPrefix: string) => void; + fetchGatewayAuthTokenFromSandbox: (sandboxName: string) => string | null; + log: (message: string) => void; + secureTempFile: (prefix: string, ext?: string) => string; +} + +export function buildSandboxConfigSyncScript(selectionConfig: Record): string { + // openclaw.json is immutable (root:root 444, Landlock read-only) — never + // write to it at runtime. Model routing is handled by the host-side + // gateway (`openshell inference set` in Step 5), not from inside the + // sandbox. We only write the NemoClaw selection config (~/.nemoclaw/). + return ` +set -euo pipefail +mkdir -p ~/.nemoclaw +cat > ~/.nemoclaw/config.json <<'EOF_NEMOCLAW_CFG' +${JSON.stringify(selectionConfig, null, 2)} +EOF_NEMOCLAW_CFG +exit +`.trim(); +} + +export function isOpenclawReady(sandboxName: string, deps: Pick): boolean { + return Boolean(deps.fetchGatewayAuthTokenFromSandbox(sandboxName)); +} + +export function writeSandboxConfigSyncFile( + script: string, + deps: Pick, +): string { + const scriptFile = deps.secureTempFile("nemoclaw-sync", ".sh"); + fs.writeFileSync(scriptFile, `${script}\n`, { mode: 0o600 }); + return scriptFile; +} + +export function setupOpenclaw( + sandboxName: string, + model: string, + provider: string, + deps: OpenclawSetupDeps, +): void { + deps.step(7, 8, "Setting up OpenClaw inside sandbox"); + + const selectionConfig = deps.getProviderSelectionConfig(provider, model); + if (selectionConfig) { + const sandboxConfig = { + ...selectionConfig, + onboardedAt: new Date().toISOString(), + }; + const script = buildSandboxConfigSyncScript(sandboxConfig); + const scriptFile = deps.writeSandboxConfigSyncFile(script); + try { + deps.run( + `${deps.openshellShellCommand(["sandbox", "connect", sandboxName])} < ${deps.shellQuote(scriptFile)}`, + { stdio: ["ignore", "ignore", "inherit"] }, + ); + } finally { + deps.cleanupTempDir(scriptFile, "nemoclaw-sync"); + } + } + + deps.log(" ✓ OpenClaw gateway launched inside sandbox"); +} diff --git a/src/lib/onboard-openshell-version.ts b/src/lib/onboard-openshell-version.ts new file mode 100644 index 0000000000..2738e7d4bf --- /dev/null +++ b/src/lib/onboard-openshell-version.ts @@ -0,0 +1,86 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import path from "node:path"; + +export interface InstalledOpenshellVersionDeps { + runCapture?: (command: string, opts?: { ignoreError?: boolean }) => string; +} + +export function getInstalledOpenshellVersion( + versionOutput: string | null = null, + deps: InstalledOpenshellVersionDeps = {}, +): string | null { + const output = String( + versionOutput ?? deps.runCapture?.("openshell -V", { ignoreError: true }) ?? "", + ).trim(); + const match = output.match(/openshell\s+([0-9]+\.[0-9]+\.[0-9]+)/i); + if (!match) return null; + return match[1]; +} + +/** + * Compare two semver-like x.y.z strings. Returns true iff `left >= right`. + * Non-numeric or missing components are treated as 0. + */ +export function versionGte(left = "0.0.0", right = "0.0.0"): boolean { + const lhs = String(left) + .split(".") + .map((part) => Number.parseInt(part, 10) || 0); + const rhs = String(right) + .split(".") + .map((part) => Number.parseInt(part, 10) || 0); + const length = Math.max(lhs.length, rhs.length); + for (let index = 0; index < length; index += 1) { + const a = lhs[index] || 0; + const b = rhs[index] || 0; + if (a > b) return true; + if (a < b) return false; + } + return true; +} + +/** + * Read a semver field from nemoclaw-blueprint/blueprint.yaml. Returns null if + * the blueprint or field is missing or unparseable — callers must treat null + * as "no constraint configured" so a malformed install does not become a hard + * onboard blocker. See #1317. + */ +function getBlueprintVersionField(field: string, rootDir: string): string | null { + try { + // Lazy require: yaml is already a dependency via the policy helpers but + // pulling it at module load would slow down `nemoclaw --help` for users + // who never reach the preflight path. + // eslint-disable-next-line @typescript-eslint/no-require-imports + const YAML = require("yaml"); + const blueprintPath = path.join(rootDir, "nemoclaw-blueprint", "blueprint.yaml"); + if (!fs.existsSync(blueprintPath)) return null; + const raw = fs.readFileSync(blueprintPath, "utf8"); + const parsed = YAML.parse(raw); + const value = parsed && parsed[field]; + if (typeof value !== "string") return null; + const trimmed = value.trim(); + if (!/^[0-9]+\.[0-9]+\.[0-9]+/.test(trimmed)) return null; + return trimmed; + } catch { + return null; + } +} + +export function getBlueprintMinOpenshellVersion(rootDir: string): string | null { + return getBlueprintVersionField("min_openshell_version", rootDir); +} + +export function getBlueprintMaxOpenshellVersion(rootDir: string): string | null { + return getBlueprintVersionField("max_openshell_version", rootDir); +} + +export function getStableGatewayImageRef( + versionOutput: string | null = null, + deps: InstalledOpenshellVersionDeps = {}, +): string | null { + const version = getInstalledOpenshellVersion(versionOutput, deps); + if (!version) return null; + return `ghcr.io/nvidia/openshell/cluster:${version}`; +} diff --git a/src/lib/onboard-openshell.test.ts b/src/lib/onboard-openshell.test.ts new file mode 100644 index 0000000000..8c9232180f --- /dev/null +++ b/src/lib/onboard-openshell.test.ts @@ -0,0 +1,101 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; +// Import from compiled dist/ so coverage is attributed correctly. +import { + installOpenshell, + isOpenshellInstalled, + waitForSandboxReady, +} from "../../dist/lib/onboard-openshell"; + +describe("onboard-openshell", () => { + it("detects whether OpenShell is installed", () => { + expect(isOpenshellInstalled(() => "/usr/bin/openshell")).toBe(true); + expect(isOpenshellInstalled(() => null)).toBe(false); + }); + + it("installs openshell and computes the future-shell PATH hint", () => { + const spawnSync = vi.fn(() => ({ status: 0, stdout: "", stderr: "" })); + const result = installOpenshell({ + scriptPath: "/repo/scripts/install-openshell.sh", + rootDir: "/repo", + env: { HOME: "/home/test", PATH: "/usr/local/bin:/usr/bin" }, + spawnSync, + existsSync: (filePath) => filePath === "/home/test/.local/bin/openshell", + resolveOpenshell: () => null, + getFutureShellPathHint: (binDir, pathValue) => + pathValue.includes(binDir) ? null : `export PATH=\"${binDir}:$PATH\"`, + }); + + expect(spawnSync).toHaveBeenCalledWith("bash", ["/repo/scripts/install-openshell.sh"], { + cwd: "/repo", + env: { HOME: "/home/test", PATH: "/usr/local/bin:/usr/bin" }, + stdio: ["ignore", "pipe", "pipe"], + encoding: "utf-8", + timeout: 300000, + }); + expect(result).toEqual({ + installed: true, + localBin: "/home/test/.local/bin", + futureShellPathHint: 'export PATH="/home/test/.local/bin:$PATH"', + updatedPathValue: "/home/test/.local/bin:/usr/local/bin:/usr/bin", + openshellBinary: "/home/test/.local/bin/openshell", + }); + }); + + it("returns a failure result and forwards installer output on install errors", () => { + const errorWriter = vi.fn(); + const result = installOpenshell({ + scriptPath: "/repo/scripts/install-openshell.sh", + rootDir: "/repo", + env: { HOME: "/home/test", PATH: "/usr/local/bin:/usr/bin" }, + spawnSync: () => ({ status: 1, stdout: "stdout failure", stderr: "stderr failure" }), + existsSync: () => false, + resolveOpenshell: () => null, + getFutureShellPathHint: () => null, + errorWriter, + }); + + expect(result).toEqual({ + installed: false, + localBin: null, + futureShellPathHint: null, + updatedPathValue: null, + openshellBinary: null, + }); + expect(errorWriter).toHaveBeenCalledWith("stdout failurestderr failure"); + }); + + it("waits for the sandbox pod to reach the Running phase", () => { + const calls: string[][] = []; + const result = waitForSandboxReady( + "alpha", + { + runCaptureOpenshell: (args) => { + calls.push(args); + return calls.length === 3 ? "Running" : "Pending"; + }, + sleep: vi.fn(), + }, + 5, + 1, + ); + + expect(result).toBe(true); + expect(calls).toHaveLength(3); + expect(calls[0]).toEqual([ + "doctor", + "exec", + "--", + "kubectl", + "-n", + "openshell", + "get", + "pod", + "alpha", + "-o", + "jsonpath={.status.phase}", + ]); + }); +}); diff --git a/src/lib/onboard-openshell.ts b/src/lib/onboard-openshell.ts new file mode 100644 index 0000000000..5c9ae5dc9c --- /dev/null +++ b/src/lib/onboard-openshell.ts @@ -0,0 +1,116 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import path from "node:path"; +import type { SpawnSyncReturns } from "node:child_process"; + +export interface InstallOpenshellResult { + installed: boolean; + localBin: string | null; + futureShellPathHint: string | null; + updatedPathValue: string | null; + openshellBinary: string | null; +} + +export function isOpenshellInstalled(resolveOpenshell: () => string | null): boolean { + return resolveOpenshell() !== null; +} + +export interface InstallOpenshellDeps { + scriptPath: string; + rootDir: string; + env: NodeJS.ProcessEnv; + spawnSync: ( + command: string, + args: string[], + options: { + cwd: string; + env: NodeJS.ProcessEnv; + stdio: ["ignore", "pipe", "pipe"]; + encoding: BufferEncoding; + timeout: number; + }, + ) => Pick, "status" | "stdout" | "stderr">; + existsSync: (filePath: string) => boolean; + resolveOpenshell: () => string | null; + getFutureShellPathHint: (binDir: string, pathValue: string) => string | null; + errorWriter?: (message?: string) => void; +} + +export function installOpenshell(deps: InstallOpenshellDeps): InstallOpenshellResult { + const errorWriter = deps.errorWriter ?? console.error; + const result = deps.spawnSync("bash", [deps.scriptPath], { + cwd: deps.rootDir, + env: deps.env, + stdio: ["ignore", "pipe", "pipe"], + encoding: "utf-8", + timeout: 300_000, + }); + if (result.status !== 0) { + const output = `${result.stdout || ""}${result.stderr || ""}`.trim(); + if (output) { + errorWriter(output); + } + return { + installed: false, + localBin: null, + futureShellPathHint: null, + updatedPathValue: null, + openshellBinary: null, + }; + } + + const localBin = deps.env.XDG_BIN_HOME || path.join(deps.env.HOME || "", ".local", "bin"); + const openshellPath = path.join(localBin, "openshell"); + const openshellExists = deps.existsSync(openshellPath); + const futureShellPathHint = openshellExists + ? deps.getFutureShellPathHint(localBin, deps.env.PATH || "") + : null; + const updatedPathValue = + openshellExists && futureShellPathHint + ? `${localBin}${path.delimiter}${deps.env.PATH || ""}` + : null; + const openshellBinary = deps.resolveOpenshell() ?? (openshellExists ? openshellPath : null); + return { + installed: openshellExists || openshellBinary !== null, + localBin, + futureShellPathHint, + updatedPathValue, + openshellBinary, + }; +} + +export interface WaitForSandboxReadyDeps { + runCaptureOpenshell: (args: string[], opts?: { ignoreError?: boolean }) => string; + sleep: (seconds: number) => void; +} + +export function waitForSandboxReady( + sandboxName: string, + deps: WaitForSandboxReadyDeps, + attempts = 10, + delaySeconds = 2, +): boolean { + for (let i = 0; i < attempts; i += 1) { + const podPhase = deps.runCaptureOpenshell( + [ + "doctor", + "exec", + "--", + "kubectl", + "-n", + "openshell", + "get", + "pod", + sandboxName, + "-o", + "jsonpath={.status.phase}", + ], + { ignoreError: true }, + ); + if (podPhase === "Running") return true; + deps.sleep(delaySeconds); + } + return false; +} diff --git a/src/lib/onboard-orchestrator-deps.test.ts b/src/lib/onboard-orchestrator-deps.test.ts new file mode 100644 index 0000000000..dead0b11e3 --- /dev/null +++ b/src/lib/onboard-orchestrator-deps.test.ts @@ -0,0 +1,156 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import { createRequire } from "node:module"; +import os from "node:os"; +import path from "node:path"; + +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +const require = createRequire(import.meta.url); +const bootstrapDistPath = require.resolve("../../dist/lib/onboard-bootstrap"); +const contextDistPath = require.resolve("../../dist/lib/onboard-run-context"); +const depsDistPath = require.resolve("../../dist/lib/onboard-orchestrator-deps"); +const originalEnv = { ...process.env }; +let tmpDir: string; + +const clearDistModuleCache = () => { + delete require.cache[bootstrapDistPath]; + delete require.cache[contextDistPath]; + delete require.cache[depsDistPath]; +}; + +beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-orchestrator-deps-")); + clearDistModuleCache(); + process.env.HOME = tmpDir; + process.env.NVIDIA_API_KEY = "test-key"; +}); + +afterEach(() => { + clearDistModuleCache(); + fs.rmSync(tmpDir, { recursive: true, force: true }); + for (const key of Object.keys(process.env)) { + delete process.env[key]; + } + Object.assign(process.env, originalEnv); +}); + +describe("createOnboardingOrchestratorDeps", () => { + it("builds orchestrator deps that wire legacy helpers into the extracted flows", async () => { + const { initializeOnboardRun } = require("../../dist/lib/onboard-bootstrap"); + const { createOnboardRunContext } = require("../../dist/lib/onboard-run-context"); + const { createOnboardingOrchestratorDeps } = require("../../dist/lib/onboard-orchestrator-deps"); + + const initializedRun = initializeOnboardRun({ + resume: false, + mode: "interactive", + requestedFromDockerfile: null, + requestedAgent: "hermes", + }); + expect(initializedRun.ok).toBe(true); + if (!initializedRun.ok) { + throw new Error("expected onboarding initialization to succeed"); + } + + const runContext = createOnboardRunContext(initializedRun.value); + const runCaptureOpenshell = vi.fn(() => "ok"); + const runOpenshell = vi.fn(() => ({ status: 0 })); + const updateSandbox = vi.fn(); + const handleAgentSetup = vi.fn(async () => {}); + const setupPoliciesWithSelection = vi.fn(async (_sandboxName, options) => { + options.onSelection(["npm"]); + return ["npm"]; + }); + + const deps = createOnboardingOrchestratorDeps(runContext, { + resume: true, + dangerouslySkipPermissions: false, + requestedAgent: "hermes", + gatewayName: "nemoclaw", + dashboardPort: 18789, + resolveAgent: () => ({ name: "hermes" }), + note: () => {}, + log: () => {}, + skippedStepMessage: () => {}, + step: () => {}, + preflight: async () => null, + detectGpu: () => null, + runCaptureOpenshell, + getGatewayReuseState: () => "missing", + verifyGatewayContainerRunning: () => "running", + runOpenshell, + destroyGateway: () => {}, + clearRegistryAll: () => {}, + startGateway: async () => {}, + setupNim: async () => ({ + model: "gpt-5.4", + provider: "openai-api", + endpointUrl: "https://api.openai.com/v1", + credentialEnv: "OPENAI_API_KEY", + preferredInferenceApi: "responses", + nimContainer: null, + }), + setupInference: async () => {}, + isInferenceRouteReady: () => false, + hydrateCredentialEnv: () => {}, + getOpenshellBinary: () => "/usr/bin/openshell", + updateSandbox, + setupMessagingChannels: async () => ["telegram"], + configureWebSearch: async () => null, + ensureValidatedBraveSearchCredential: async () => null, + getSandboxReuseState: () => "missing", + removeSandbox: () => {}, + repairRecordedSandbox: () => {}, + createSandbox: async () => "alpha", + handleAgentSetup, + openshellShellCommand: () => "openshell shell cmd", + buildSandboxConfigSyncScript: () => "echo config", + writeSandboxConfigSyncFile: () => "/tmp/config.sh", + cleanupTempDir: () => {}, + isOpenclawReady: () => false, + setupOpenclaw: async () => {}, + waitForSandboxReady: () => true, + applyPermissivePolicy: () => {}, + arePolicyPresetsApplied: () => false, + setupPoliciesWithSelection, + }); + + expect(deps.resume).toBe(true); + expect(deps.requestedAgent).toBe("hermes"); + expect(deps.host.run.name).toBe("runHostPreparationFlow"); + expect(deps.inference.run.name).toBe("runInferenceSelectionLoop"); + expect(deps.sandbox.run.name).toBe("runSandboxProvisioningFlow"); + expect(deps.runtime.run.name).toBe("runRuntimeSetupFlow"); + expect(deps.policy.run.name).toBe("runPolicySetupFlow"); + + deps.host.getNamedGatewayInfo(); + deps.host.getActiveGatewayInfo(); + deps.host.stopDashboardForward(); + deps.inference.setOpenshellBinary("/tmp/openshell"); + deps.inference.clearSensitiveEnv("NVIDIA_API_KEY"); + deps.sandbox.persistRegistryModelProvider("alpha", { model: "gpt-5.4", provider: "openai-api" }); + await deps.runtime.handleAgentSetup("alpha", "gpt-5.4", "openai-api", { name: "hermes" }, true, { id: 1 }); + await deps.policy.setupPoliciesWithSelection("alpha", { + selectedPresets: ["npm"], + enabledChannels: [], + webSearchConfig: null, + provider: "openai-api", + onSelection: () => {}, + }); + + expect(runCaptureOpenshell).toHaveBeenNthCalledWith(1, ["gateway", "info", "-g", "nemoclaw"], { + ignoreError: true, + }); + expect(runCaptureOpenshell).toHaveBeenNthCalledWith(2, ["gateway", "info"], { + ignoreError: true, + }); + expect(runOpenshell).toHaveBeenCalledWith(["forward", "stop", "18789"], { ignoreError: true }); + expect(process.env.NEMOCLAW_OPENSHELL_BIN).toBe("/tmp/openshell"); + expect(process.env.NVIDIA_API_KEY).toBeUndefined(); + expect(updateSandbox).toHaveBeenCalledWith("alpha", { model: "gpt-5.4", provider: "openai-api" }); + expect(handleAgentSetup).toHaveBeenCalled(); + expect(setupPoliciesWithSelection).toHaveBeenCalled(); + }); +}); diff --git a/src/lib/onboard-orchestrator-deps.ts b/src/lib/onboard-orchestrator-deps.ts new file mode 100644 index 0000000000..ebe2705132 --- /dev/null +++ b/src/lib/onboard-orchestrator-deps.ts @@ -0,0 +1,242 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import type { GatewayReuseState } from "./gateway-state"; +import { runHostPreparationFlow } from "./onboard-host-flow"; +import { runInferenceSelectionLoop } from "./onboard-inference-loop"; +import { runPolicySetupFlow } from "./onboard-policy-flow"; +import type { OnboardOrchestratorDeps } from "./onboard-orchestrator"; +import type { OnboardRunContext } from "./onboard-run-context"; +import { runRuntimeSetupFlow } from "./onboard-runtime-flow"; +import { runSandboxProvisioningFlow } from "./onboard-sandbox-flow"; +import type { Session } from "./onboard-session"; +import type { WebSearchConfig } from "./web-search"; + +export interface CreateOnboardingOrchestratorDepsInput< + TGpu = unknown, + TAgent extends { name: string } = { name: string }, +> { + resume: boolean; + dangerouslySkipPermissions: boolean; + requestedAgent: string | null; + gatewayName: string; + dashboardPort: number; + resolveAgent: (options: { agentFlag?: string | null; session?: Session | null }) => TAgent | null; + note: (message: string) => void; + log: (message: string) => void; + skippedStepMessage: ( + stepName: string, + detail: string | null, + reason?: "resume" | "reuse", + ) => void; + step: (current: number, total: number, message: string) => void; + preflight: () => Promise; + detectGpu: () => TGpu; + runCaptureOpenshell: (args: string[], opts?: { ignoreError?: boolean }) => string; + getGatewayReuseState: ( + statusOutput: string, + gwInfoOutput: string, + activeGatewayInfoOutput: string, + ) => GatewayReuseState; + verifyGatewayContainerRunning: () => "running" | "missing" | "unknown"; + runOpenshell: ( + args: string[], + opts?: { ignoreError?: boolean }, + ) => { status: number; stdout?: string; stderr?: string }; + destroyGateway: () => void; + clearRegistryAll: () => void; + startGateway: (gpu: TGpu) => Promise; + setupNim: (gpu: TGpu) => Promise<{ + model: string; + provider: string; + endpointUrl: string | null; + credentialEnv: string | null; + preferredInferenceApi: string | null; + nimContainer: string | null; + }>; + setupInference: ( + sandboxName: string | null, + model: string, + provider: string, + endpointUrl: string | null, + credentialEnv: string | null, + ) => Promise<{ retry?: "selection" } | void>; + isInferenceRouteReady: (provider: string, model: string) => boolean; + hydrateCredentialEnv: (credentialEnv: string | null) => void; + getOpenshellBinary: () => string; + updateSandbox: (sandboxName: string | null, patch: Record) => void; + setupMessagingChannels: () => Promise; + configureWebSearch: (_existing: null) => Promise; + ensureValidatedBraveSearchCredential: () => Promise; + getSandboxReuseState: (sandboxName: string | null) => string; + removeSandbox: (sandboxName: string) => void; + repairRecordedSandbox: (sandboxName: string) => void; + createSandbox: ( + gpu: TGpu, + model: string, + provider: string, + preferredInferenceApi: string | null, + sandboxName: string | null, + webSearchConfig: WebSearchConfig | null, + messagingChannels: string[], + fromDockerfile: string | null, + agent: TAgent | null, + dangerouslySkipPermissions: boolean, + ) => Promise; + handleAgentSetup: ( + sandboxName: string, + model: string, + provider: string, + agent: TAgent, + resume: boolean, + session: unknown, + ctx: { + step: (current: number, total: number, message: string) => void; + runCaptureOpenshell: (args: string[], opts?: { ignoreError?: boolean }) => string; + openshellShellCommand: (args: string[], options?: { openshellBinary?: string }) => string; + buildSandboxConfigSyncScript: (config: Record) => string; + writeSandboxConfigSyncFile: (script: string) => string; + cleanupTempDir: (file: string, prefix: string) => void; + startRecordedStep: (stepName: string, updates: Record) => void; + skippedStepMessage: (stepName: string, sandboxName: string) => void; + }, + ) => Promise; + openshellShellCommand: (args: string[], options?: { openshellBinary?: string }) => string; + buildSandboxConfigSyncScript: (config: Record) => string; + writeSandboxConfigSyncFile: (script: string) => string; + cleanupTempDir: (file: string, prefix: string) => void; + isOpenclawReady: (sandboxName: string) => boolean; + setupOpenclaw: (sandboxName: string, model: string, provider: string) => Promise; + waitForSandboxReady: (sandboxName: string) => boolean; + applyPermissivePolicy: (sandboxName: string) => void; + arePolicyPresetsApplied: (sandboxName: string, selectedPresets: string[]) => boolean; + setupPoliciesWithSelection: ( + sandboxName: string, + options: { + selectedPresets: string[] | null; + enabledChannels: string[]; + webSearchConfig: WebSearchConfig | null; + provider: string; + onSelection: (policyPresets: string[]) => void; + }, + ) => Promise; +} + +export function createOnboardingOrchestratorDeps< + TGpu = unknown, + TAgent extends { name: string } = { name: string }, +>( + runContext: OnboardRunContext, + input: CreateOnboardingOrchestratorDepsInput, +): OnboardOrchestratorDeps { + return { + resume: input.resume, + dangerouslySkipPermissions: input.dangerouslySkipPermissions, + requestedAgent: input.requestedAgent, + resolveAgent: input.resolveAgent, + note: input.note, + log: input.log, + skippedStepMessage: input.skippedStepMessage, + showPolicyHeader: () => { + input.step(8, 8, "Policy presets"); + }, + host: { + run: runHostPreparationFlow, + preflight: input.preflight, + detectGpu: input.detectGpu, + getGatewayStatus: () => input.runCaptureOpenshell(["status"], { ignoreError: true }), + getNamedGatewayInfo: () => + input.runCaptureOpenshell(["gateway", "info", "-g", input.gatewayName], { + ignoreError: true, + }), + getActiveGatewayInfo: () => input.runCaptureOpenshell(["gateway", "info"], { ignoreError: true }), + getGatewayReuseState: input.getGatewayReuseState, + verifyGatewayContainerRunning: input.verifyGatewayContainerRunning, + stopDashboardForward: () => { + input.runOpenshell(["forward", "stop", String(input.dashboardPort)], { ignoreError: true }); + }, + destroyGateway: input.destroyGateway, + clearRegistryAll: input.clearRegistryAll, + startGateway: input.startGateway, + }, + inference: { + run: runInferenceSelectionLoop, + setupNim: input.setupNim, + setupInference: input.setupInference, + isInferenceRouteReady: input.isInferenceRouteReady, + hydrateCredentialEnv: input.hydrateCredentialEnv, + getOpenshellBinary: input.getOpenshellBinary, + setOpenshellBinary: (binary) => { + process.env.NEMOCLAW_OPENSHELL_BIN = binary; + }, + clearSensitiveEnv: (credentialEnv) => { + if (credentialEnv) { + delete process.env[credentialEnv]; + } + }, + updateSandboxNimContainer: (nextSandboxName, nextNimContainer) => { + input.updateSandbox(nextSandboxName, { nimContainer: nextNimContainer }); + }, + }, + sandbox: { + run: runSandboxProvisioningFlow, + setupMessagingChannels: input.setupMessagingChannels, + configureWebSearch: input.configureWebSearch, + ensureValidatedBraveSearchCredential: input.ensureValidatedBraveSearchCredential, + getSandboxReuseState: input.getSandboxReuseState, + removeSandbox: input.removeSandbox, + repairRecordedSandbox: input.repairRecordedSandbox, + createSandbox: input.createSandbox, + persistRegistryModelProvider: (name, patch) => { + // Persist model and provider after the sandbox entry exists in the registry. + // updateSandbox() silently no-ops when the entry is missing, so this must + // run after createSandbox() / registerSandbox() — not before. Fixes #1881. + input.updateSandbox(name, patch); + }, + }, + runtime: { + run: runRuntimeSetupFlow, + handleAgentSetup: async ( + nextSandboxName, + nextModel, + nextProvider, + nextAgent, + nextResume, + nextSession, + ) => { + if (nextAgent === null) { + throw new Error("Agent runtime setup requested without an agent."); + } + await input.handleAgentSetup( + nextSandboxName, + nextModel, + nextProvider, + nextAgent, + nextResume, + nextSession, + { + step: input.step, + runCaptureOpenshell: input.runCaptureOpenshell, + openshellShellCommand: input.openshellShellCommand, + buildSandboxConfigSyncScript: input.buildSandboxConfigSyncScript, + writeSandboxConfigSyncFile: input.writeSandboxConfigSyncFile, + cleanupTempDir: input.cleanupTempDir, + startRecordedStep: (stepName, updates) => { + runContext.startStep(stepName as never, updates as never); + }, + skippedStepMessage: input.skippedStepMessage, + }, + ); + }, + isOpenclawReady: input.isOpenclawReady, + setupOpenclaw: input.setupOpenclaw, + }, + policy: { + run: runPolicySetupFlow, + waitForSandboxReady: input.waitForSandboxReady, + applyPermissivePolicy: input.applyPermissivePolicy, + arePolicyPresetsApplied: input.arePolicyPresetsApplied, + setupPoliciesWithSelection: input.setupPoliciesWithSelection, + }, + }; +} diff --git a/src/lib/onboard-orchestrator.test.ts b/src/lib/onboard-orchestrator.test.ts new file mode 100644 index 0000000000..44aa0fb4a3 --- /dev/null +++ b/src/lib/onboard-orchestrator.test.ts @@ -0,0 +1,432 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import { createRequire } from "node:module"; +import os from "node:os"; +import path from "node:path"; + +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +import type { InferenceLoopDeps, InferenceLoopState } from "./onboard-inference-loop"; +import type { Session } from "./onboard-session"; + +const require = createRequire(import.meta.url); +const bootstrapDistPath = require.resolve("../../dist/lib/onboard-bootstrap"); +const contextDistPath = require.resolve("../../dist/lib/onboard-run-context"); +const orchestratorDistPath = require.resolve("../../dist/lib/onboard-orchestrator"); +const sessionDistPath = require.resolve("../../dist/lib/onboard-session"); +const flowStateDistPath = require.resolve("../../dist/lib/onboard-flow-state"); +const driverDistPath = require.resolve("../../dist/lib/onboard-persistent-driver"); +const originalHome = process.env.HOME; +let tmpDir: string; + +beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-orchestrator-")); + process.env.HOME = tmpDir; + delete require.cache[bootstrapDistPath]; + delete require.cache[contextDistPath]; + delete require.cache[orchestratorDistPath]; + delete require.cache[sessionDistPath]; + delete require.cache[flowStateDistPath]; + delete require.cache[driverDistPath]; +}); + +afterEach(() => { + delete require.cache[bootstrapDistPath]; + delete require.cache[contextDistPath]; + delete require.cache[orchestratorDistPath]; + delete require.cache[sessionDistPath]; + delete require.cache[flowStateDistPath]; + delete require.cache[driverDistPath]; + fs.rmSync(tmpDir, { recursive: true, force: true }); + if (originalHome === undefined) { + delete process.env.HOME; + } else { + process.env.HOME = originalHome; + } +}); + +describe("runOnboardingOrchestrator", () => { + it("coordinates the extracted helper flows and completes the session", async () => { + const { initializeOnboardRun } = require("../../dist/lib/onboard-bootstrap"); + const { createOnboardRunContext } = require("../../dist/lib/onboard-run-context"); + const { runOnboardingOrchestrator } = require("../../dist/lib/onboard-orchestrator"); + + const initializedRun = initializeOnboardRun({ + resume: false, + mode: "non-interactive", + requestedFromDockerfile: "./Dockerfile.custom", + requestedAgent: null, + }); + expect(initializedRun.ok).toBe(true); + if (!initializedRun.ok) { + throw new Error("expected onboarding initialization to succeed"); + } + + const runContext = createOnboardRunContext(initializedRun.value); + const hostRun = vi.fn(async () => ({ gpu: { kind: "spark" }, gatewayReuseState: "missing" })); + const inferenceRun = vi.fn(async () => ({ + sandboxName: null, + model: "gpt-5.4", + provider: "openai-api", + endpointUrl: "https://api.openai.com/v1", + credentialEnv: "OPENAI_API_KEY", + preferredInferenceApi: "responses", + nimContainer: null, + })); + const sandboxRun = vi.fn(async () => ({ + gpu: { kind: "spark" }, + sandboxName: "alpha", + model: "gpt-5.4", + provider: "openai-api", + preferredInferenceApi: "responses", + webSearchConfig: { fetchEnabled: true }, + selectedMessagingChannels: ["telegram"], + nimContainer: null, + fromDockerfile: path.resolve("./Dockerfile.custom"), + agent: null, + dangerouslySkipPermissions: false, + })); + const runtimeRun = vi.fn(async () => {}); + const policyRun = vi.fn(async () => ({ kind: "complete", policyPresets: ["npm"] })); + + const result = await runOnboardingOrchestrator(runContext, { + resume: false, + dangerouslySkipPermissions: false, + requestedAgent: null, + resolveAgent: () => null, + note: () => {}, + log: () => {}, + skippedStepMessage: () => {}, + showPolicyHeader: () => {}, + host: { + run: hostRun, + preflight: async () => ({ kind: "spark" }), + detectGpu: () => ({ kind: "cached" }), + getGatewayStatus: () => "status", + getNamedGatewayInfo: () => "named-info", + getActiveGatewayInfo: () => "active-info", + getGatewayReuseState: () => "missing", + verifyGatewayContainerRunning: () => "running", + stopDashboardForward: () => {}, + destroyGateway: () => {}, + clearRegistryAll: () => {}, + startGateway: async () => {}, + }, + inference: { + run: inferenceRun, + gpu: null, + setupNim: async () => { + throw new Error("unused in orchestrator test"); + }, + setupInference: async () => {}, + isInferenceRouteReady: () => false, + hydrateCredentialEnv: () => {}, + getOpenshellBinary: () => "/usr/bin/openshell", + setOpenshellBinary: () => {}, + clearSensitiveEnv: () => {}, + updateSandboxNimContainer: () => {}, + }, + sandbox: { + run: sandboxRun, + sessionMessagingChannels: null, + sessionWebSearchConfig: null, + hasCompletedMessaging: false, + hasCompletedSandbox: false, + setupMessagingChannels: async () => ["telegram"], + configureWebSearch: async () => ({ fetchEnabled: true }), + ensureValidatedBraveSearchCredential: async () => null, + getSandboxReuseState: () => "missing", + removeSandbox: () => {}, + repairRecordedSandbox: () => {}, + createSandbox: async () => "alpha", + persistRegistryModelProvider: () => {}, + }, + runtime: { + run: runtimeRun, + hasCompletedRuntimeSetup: false, + handleAgentSetup: async () => {}, + isOpenclawReady: () => false, + setupOpenclaw: async () => {}, + }, + policy: { + run: policyRun, + waitForSandboxReady: () => true, + applyPermissivePolicy: () => {}, + arePolicyPresetsApplied: () => false, + setupPoliciesWithSelection: async () => ["npm"], + }, + }); + + expect(result).toEqual({ + sandboxName: "alpha", + model: "gpt-5.4", + provider: "openai-api", + nimContainer: null, + agent: null, + policyResult: { kind: "complete", policyPresets: ["npm"] }, + }); + expect(runContext.session.status).toBe("complete"); + expect(runContext.session.policyPresets).toEqual(["npm"]); + expect(hostRun).toHaveBeenCalledTimes(1); + expect(inferenceRun).toHaveBeenCalledTimes(1); + expect(sandboxRun).toHaveBeenCalledTimes(1); + expect(runtimeRun).toHaveBeenCalledTimes(1); + expect(policyRun).toHaveBeenCalledTimes(1); + }); + + it("preserves explicit null session updates from extracted flow callbacks", async () => { + const { initializeOnboardRun } = require("../../dist/lib/onboard-bootstrap"); + const { createOnboardRunContext } = require("../../dist/lib/onboard-run-context"); + const { runOnboardingOrchestrator } = require("../../dist/lib/onboard-orchestrator"); + + const initializedRun = initializeOnboardRun({ + resume: false, + mode: "interactive", + requestedFromDockerfile: null, + requestedAgent: null, + }); + expect(initializedRun.ok).toBe(true); + if (!initializedRun.ok) { + throw new Error("expected onboarding initialization to succeed"); + } + + const runContext = createOnboardRunContext(initializedRun.value); + runContext.updateSession((session: Session) => { + session.endpointUrl = "https://old.example.com/v1"; + session.credentialEnv = "COMPATIBLE_API_KEY"; + session.preferredInferenceApi = "responses"; + session.nimContainer = "nim-stale"; + return session; + }); + + await runOnboardingOrchestrator(runContext, { + resume: false, + dangerouslySkipPermissions: false, + requestedAgent: null, + resolveAgent: () => null, + note: () => {}, + log: () => {}, + skippedStepMessage: () => {}, + showPolicyHeader: () => {}, + host: { + run: async () => ({ gpu: null, gatewayReuseState: "missing" }), + preflight: async () => null, + detectGpu: () => null, + getGatewayStatus: () => "status", + getNamedGatewayInfo: () => "named-info", + getActiveGatewayInfo: () => "active-info", + getGatewayReuseState: () => "missing", + verifyGatewayContainerRunning: () => "running", + stopDashboardForward: () => {}, + destroyGateway: () => {}, + clearRegistryAll: () => {}, + startGateway: async () => {}, + }, + inference: { + run: async ( + state: InferenceLoopState, + deps: Pick, + ) => { + deps.onCompleteStep("provider_selection", { + provider: "openai-api", + model: "gpt-5.4", + endpointUrl: null, + credentialEnv: null, + preferredInferenceApi: null, + nimContainer: null, + }); + return { + ...state, + model: "gpt-5.4", + provider: "openai-api", + endpointUrl: null, + credentialEnv: null, + preferredInferenceApi: null, + nimContainer: null, + }; + }, + gpu: null, + setupNim: async () => { + throw new Error("unused in orchestrator test"); + }, + setupInference: async () => {}, + isInferenceRouteReady: () => false, + hydrateCredentialEnv: () => {}, + getOpenshellBinary: () => "/usr/bin/openshell", + setOpenshellBinary: () => {}, + clearSensitiveEnv: () => {}, + updateSandboxNimContainer: () => {}, + }, + sandbox: { + run: async () => ({ + gpu: null, + sandboxName: "alpha", + model: "gpt-5.4", + provider: "openai-api", + preferredInferenceApi: null, + webSearchConfig: null, + selectedMessagingChannels: [], + nimContainer: null, + fromDockerfile: null, + agent: null, + dangerouslySkipPermissions: false, + }), + sessionMessagingChannels: null, + sessionWebSearchConfig: null, + hasCompletedMessaging: false, + hasCompletedSandbox: false, + setupMessagingChannels: async () => [], + configureWebSearch: async () => null, + ensureValidatedBraveSearchCredential: async () => null, + getSandboxReuseState: () => "missing", + removeSandbox: () => {}, + repairRecordedSandbox: () => {}, + createSandbox: async () => "alpha", + persistRegistryModelProvider: () => {}, + }, + runtime: { + run: async () => {}, + hasCompletedRuntimeSetup: false, + handleAgentSetup: async () => {}, + isOpenclawReady: () => false, + setupOpenclaw: async () => {}, + }, + policy: { + run: async () => ({ kind: "complete", policyPresets: [] }), + waitForSandboxReady: () => true, + applyPermissivePolicy: () => {}, + arePolicyPresetsApplied: () => false, + setupPoliciesWithSelection: async () => [], + }, + }); + + expect(runContext.session.endpointUrl).toBeNull(); + expect(runContext.session.credentialEnv).toBeNull(); + expect(runContext.session.preferredInferenceApi).toBeNull(); + expect(runContext.session.nimContainer).toBeNull(); + }); + + it("leaves the session in progress when policy setup returns sandbox_not_ready", async () => { + const { initializeOnboardRun } = require("../../dist/lib/onboard-bootstrap"); + const { createOnboardRunContext } = require("../../dist/lib/onboard-run-context"); + const { runOnboardingOrchestrator } = require("../../dist/lib/onboard-orchestrator"); + + const initializedRun = initializeOnboardRun({ + resume: false, + mode: "interactive", + requestedFromDockerfile: null, + requestedAgent: "hermes", + }); + expect(initializedRun.ok).toBe(true); + if (!initializedRun.ok) { + throw new Error("expected onboarding initialization to succeed"); + } + + const runContext = createOnboardRunContext(initializedRun.value); + const runtimeRun = vi.fn(async (state) => { + expect(state.agent).toEqual({ name: "hermes" }); + }); + + const result = await runOnboardingOrchestrator(runContext, { + resume: false, + dangerouslySkipPermissions: true, + requestedAgent: "hermes", + resolveAgent: () => ({ name: "hermes" }), + note: () => {}, + log: () => {}, + skippedStepMessage: () => {}, + showPolicyHeader: () => {}, + host: { + run: async () => ({ gpu: null, gatewayReuseState: "missing" }), + preflight: async () => null, + detectGpu: () => null, + getGatewayStatus: () => "status", + getNamedGatewayInfo: () => "named-info", + getActiveGatewayInfo: () => "active-info", + getGatewayReuseState: () => "missing", + verifyGatewayContainerRunning: () => "running", + stopDashboardForward: () => {}, + destroyGateway: () => {}, + clearRegistryAll: () => {}, + startGateway: async () => {}, + }, + inference: { + run: async () => ({ + sandboxName: null, + model: "meta/llama-3.3-70b-instruct", + provider: "nvidia-prod", + endpointUrl: "https://integrate.api.nvidia.com/v1", + credentialEnv: "NVIDIA_API_KEY", + preferredInferenceApi: "openai-completions", + nimContainer: null, + }), + gpu: null, + setupNim: async () => { + throw new Error("unused in orchestrator test"); + }, + setupInference: async () => {}, + isInferenceRouteReady: () => false, + hydrateCredentialEnv: () => {}, + getOpenshellBinary: () => "/usr/bin/openshell", + setOpenshellBinary: () => {}, + clearSensitiveEnv: () => {}, + updateSandboxNimContainer: () => {}, + }, + sandbox: { + run: async () => ({ + gpu: null, + sandboxName: "alpha", + model: "meta/llama-3.3-70b-instruct", + provider: "nvidia-prod", + preferredInferenceApi: "openai-completions", + webSearchConfig: null, + selectedMessagingChannels: [], + nimContainer: null, + fromDockerfile: null, + agent: { name: "hermes" }, + dangerouslySkipPermissions: true, + }), + sessionMessagingChannels: null, + sessionWebSearchConfig: null, + hasCompletedMessaging: false, + hasCompletedSandbox: false, + setupMessagingChannels: async () => [], + configureWebSearch: async () => null, + ensureValidatedBraveSearchCredential: async () => null, + getSandboxReuseState: () => "missing", + removeSandbox: () => {}, + repairRecordedSandbox: () => {}, + createSandbox: async () => "alpha", + persistRegistryModelProvider: () => {}, + }, + runtime: { + run: runtimeRun, + hasCompletedRuntimeSetup: false, + handleAgentSetup: async () => {}, + isOpenclawReady: () => false, + setupOpenclaw: async () => {}, + }, + policy: { + run: async () => ({ + kind: "sandbox_not_ready", + message: " ✗ Sandbox 'alpha' not ready after creation. Giving up.", + }), + waitForSandboxReady: () => false, + applyPermissivePolicy: () => {}, + arePolicyPresetsApplied: () => false, + setupPoliciesWithSelection: async () => [], + }, + }); + + expect(result.agent).toEqual({ name: "hermes" }); + expect(result.policyResult).toEqual({ + kind: "sandbox_not_ready", + message: " ✗ Sandbox 'alpha' not ready after creation. Giving up.", + }); + expect(runContext.session.status).toBe("in_progress"); + expect(runContext.session.agent).toBe("hermes"); + expect(runtimeRun).toHaveBeenCalledTimes(1); + }); +}); diff --git a/src/lib/onboard-orchestrator.ts b/src/lib/onboard-orchestrator.ts new file mode 100644 index 0000000000..b42952ba4e --- /dev/null +++ b/src/lib/onboard-orchestrator.ts @@ -0,0 +1,378 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { hasCompletedOnboardStep } from "./onboard-flow-state"; +import type { + HostPreparationDeps, + HostPreparationResult, +} from "./onboard-host-flow"; +import type { + InferenceLoopDeps, + InferenceLoopResult, + InferenceLoopState, +} from "./onboard-inference-loop"; +import type { OnboardStepName, OnboardVisibleStep } from "./onboard-fsm"; +import type { + PolicyFlowDeps, + PolicyFlowResult, + PolicyFlowState, +} from "./onboard-policy-flow"; +import type { OnboardRunContext } from "./onboard-run-context"; +import type { + RuntimeSetupDeps, + RuntimeSetupState, +} from "./onboard-runtime-flow"; +import type { + SandboxFlowDeps, + SandboxFlowResult, + SandboxFlowState, +} from "./onboard-sandbox-flow"; +import type { Session, SessionUpdates } from "./onboard-session"; + +export interface OnboardOrchestratorDeps< + TGpu = unknown, + TAgent extends { name: string } = { name: string }, +> { + resume: boolean; + dangerouslySkipPermissions: boolean; + requestedAgent: string | null; + resolveAgent: (options: { agentFlag?: string | null; session?: Session | null }) => TAgent | null; + note: (message: string) => void; + log: (message: string) => void; + skippedStepMessage: ( + stepName: string, + detail: string | null, + reason?: "resume" | "reuse", + ) => void; + showPolicyHeader: () => void; + host: Omit< + HostPreparationDeps, + | "resume" + | "hasCompletedPreflight" + | "hasCompletedGateway" + | "onNote" + | "onLog" + | "onSkip" + | "onStartStep" + | "onCompleteStep" + > & { + run: (deps: HostPreparationDeps) => Promise>; + }; + inference: Omit< + InferenceLoopDeps, + | "gpu" + | "resume" + | "hasCompletedProviderSelection" + | "hasCompletedInference" + | "onSkip" + | "onStartStep" + | "onCompleteStep" + > & { + run: ( + initialState: InferenceLoopState, + deps: InferenceLoopDeps, + ) => Promise; + }; + sandbox: Omit< + SandboxFlowDeps, + | "resume" + | "sessionMessagingChannels" + | "sessionWebSearchConfig" + | "hasCompletedMessaging" + | "hasCompletedSandbox" + | "onNote" + | "onSkip" + | "onStartStep" + | "onCompleteStep" + > & { + run: ( + initialState: SandboxFlowState, + deps: SandboxFlowDeps, + ) => Promise>; + }; + runtime: Omit< + RuntimeSetupDeps, + | "hasCompletedRuntimeSetup" + | "onSkip" + | "onStartStep" + | "onCompleteStep" + | "onSkipSiblingStep" + > & { + run: ( + state: RuntimeSetupState, + deps: RuntimeSetupDeps, + ) => Promise; + }; + policy: Omit< + PolicyFlowDeps, + | "resume" + | "dangerouslySkipPermissions" + | "hasCompletedPolicies" + | "onShowHeader" + | "onSkip" + | "onStartStep" + | "onCompleteStep" + | "onSelectionPersist" + > & { + run: (state: PolicyFlowState, deps: PolicyFlowDeps) => Promise; + }; +} + +export interface OnboardOrchestratorResult { + sandboxName: string; + model: string; + provider: string; + nimContainer: string | null; + agent: TAgent | null; + policyResult: PolicyFlowResult; +} + +function normalizeSessionUpdates( + updates: + | { + sandboxName?: string | null; + provider?: string | null; + model?: string | null; + endpointUrl?: string | null; + credentialEnv?: string | null; + preferredInferenceApi?: string | null; + nimContainer?: string | null; + messagingChannels?: string[]; + policyPresets?: string[]; + webSearchConfig?: SessionUpdates["webSearchConfig"]; + } + | undefined, +): SessionUpdates { + if (!updates) { + return {}; + } + const normalized: SessionUpdates = {}; + if (updates.sandboxName === null) normalized.sandboxName = null; + else if (typeof updates.sandboxName === "string") normalized.sandboxName = updates.sandboxName; + if (updates.provider === null) normalized.provider = null; + else if (typeof updates.provider === "string") normalized.provider = updates.provider; + if (updates.model === null) normalized.model = null; + else if (typeof updates.model === "string") normalized.model = updates.model; + if (updates.endpointUrl === null) normalized.endpointUrl = null; + else if (typeof updates.endpointUrl === "string") normalized.endpointUrl = updates.endpointUrl; + if (updates.credentialEnv === null) normalized.credentialEnv = null; + else if (typeof updates.credentialEnv === "string") normalized.credentialEnv = updates.credentialEnv; + if (updates.preferredInferenceApi === null) normalized.preferredInferenceApi = null; + else if (typeof updates.preferredInferenceApi === "string") { + normalized.preferredInferenceApi = updates.preferredInferenceApi; + } + if (updates.nimContainer === null) normalized.nimContainer = null; + else if (typeof updates.nimContainer === "string") normalized.nimContainer = updates.nimContainer; + if (Array.isArray(updates.messagingChannels)) { + normalized.messagingChannels = updates.messagingChannels; + } + if (Array.isArray(updates.policyPresets)) { + normalized.policyPresets = updates.policyPresets; + } + if (updates.webSearchConfig !== undefined) { + normalized.webSearchConfig = updates.webSearchConfig; + } + return normalized; +} + +function requireString(value: string | null, label: string): string { + if (typeof value !== "string") { + throw new Error(`${label} was not resolved during onboarding orchestration.`); + } + return value; +} + +export async function runOnboardingOrchestrator< + TGpu = unknown, + TAgent extends { name: string } = { name: string }, +>( + runContext: OnboardRunContext, + deps: OnboardOrchestratorDeps, +): Promise> { + const agent = deps.resolveAgent({ + agentFlag: deps.requestedAgent, + session: runContext.session, + }); + if (agent) { + runContext.updateSession((session) => { + session.agent = agent.name; + return session; + }); + } + + const resumeFlowState = deps.resume ? runContext.driver.flowState : null; + const hasCompleted = (stepName: OnboardVisibleStep): boolean => + !!resumeFlowState && hasCompletedOnboardStep(resumeFlowState, stepName); + const startStep = (stepName: OnboardStepName, updates?: SessionUpdates): void => { + runContext.startStep(stepName, updates); + }; + const completeStep = (stepName: OnboardStepName, updates?: SessionUpdates): void => { + runContext.completeStep(stepName, updates); + }; + + const { gpu } = await deps.host.run({ + ...deps.host, + resume: deps.resume, + hasCompletedPreflight: hasCompleted("preflight"), + hasCompletedGateway: hasCompleted("gateway"), + onNote: deps.note, + onLog: deps.log, + onSkip: deps.skippedStepMessage, + onStartStep: (stepName) => { + startStep(stepName); + }, + onCompleteStep: (stepName) => { + completeStep(stepName); + }, + }); + + const currentSession = runContext.session; + let sandboxName = currentSession.sandboxName || null; + let model = currentSession.model || null; + let provider = currentSession.provider || null; + let endpointUrl = currentSession.endpointUrl || null; + let credentialEnv = currentSession.credentialEnv || null; + let preferredInferenceApi = currentSession.preferredInferenceApi || null; + let nimContainer = currentSession.nimContainer || null; + let webSearchConfig = currentSession.webSearchConfig || null; + let selectedMessagingChannels = Array.isArray(currentSession.messagingChannels) + ? [...currentSession.messagingChannels] + : []; + + ({ + sandboxName, + model, + provider, + endpointUrl, + credentialEnv, + preferredInferenceApi, + nimContainer, + } = await deps.inference.run( + { + sandboxName, + model, + provider, + endpointUrl, + credentialEnv, + preferredInferenceApi, + nimContainer, + }, + { + ...deps.inference, + gpu, + resume: deps.resume, + hasCompletedProviderSelection: hasCompleted("provider_selection"), + hasCompletedInference: hasCompleted("inference"), + onSkip: deps.skippedStepMessage, + onStartStep: (stepName, updates) => startStep(stepName, normalizeSessionUpdates(updates)), + onCompleteStep: (stepName, updates) => + completeStep(stepName, normalizeSessionUpdates(updates)), + }, + )); + + model = requireString(model, "model"); + provider = requireString(provider, "provider"); + + ({ sandboxName, webSearchConfig, selectedMessagingChannels } = await deps.sandbox.run( + { + gpu, + sandboxName, + model, + provider, + preferredInferenceApi, + webSearchConfig, + selectedMessagingChannels, + nimContainer, + fromDockerfile: runContext.fromDockerfile, + agent, + dangerouslySkipPermissions: deps.dangerouslySkipPermissions, + }, + { + ...deps.sandbox, + resume: deps.resume, + sessionMessagingChannels: Array.isArray(currentSession.messagingChannels) + ? [...currentSession.messagingChannels] + : null, + sessionWebSearchConfig: currentSession.webSearchConfig || null, + hasCompletedMessaging: hasCompleted("messaging"), + hasCompletedSandbox: hasCompleted("sandbox"), + onNote: deps.note, + onSkip: deps.skippedStepMessage, + onStartStep: (stepName, updates) => startStep(stepName, normalizeSessionUpdates(updates)), + onCompleteStep: (stepName, updates) => + completeStep(stepName, normalizeSessionUpdates(updates)), + }, + )); + + await deps.runtime.run( + { + sandboxName: requireString(sandboxName, "sandboxName"), + model, + provider, + agent, + resume: deps.resume, + session: runContext.session, + }, + { + ...deps.runtime, + hasCompletedRuntimeSetup: hasCompleted("runtime_setup"), + onSkip: deps.skippedStepMessage, + onStartStep: (stepName, updates) => startStep(stepName, normalizeSessionUpdates(updates)), + onCompleteStep: (stepName, updates) => + completeStep(stepName, normalizeSessionUpdates(updates)), + onSkipSiblingStep: (stepName) => { + runContext.skipStep(stepName); + }, + }, + ); + + const latestSession = runContext.driver.session; + const recordedPolicyPresets = Array.isArray(latestSession?.policyPresets) + ? latestSession.policyPresets + : null; + const policyResult = await deps.policy.run( + { + sandboxName: requireString(sandboxName, "sandboxName"), + provider, + model, + webSearchConfig, + enabledChannels: selectedMessagingChannels, + recordedPolicyPresets, + }, + { + ...deps.policy, + resume: deps.resume, + dangerouslySkipPermissions: deps.dangerouslySkipPermissions, + hasCompletedPolicies: hasCompleted("policies"), + onShowHeader: deps.showPolicyHeader, + onSkip: deps.skippedStepMessage, + onStartStep: (stepName, updates) => startStep(stepName, normalizeSessionUpdates(updates)), + onCompleteStep: (stepName, updates) => + completeStep(stepName, normalizeSessionUpdates(updates)), + onSelectionPersist: (policyPresets) => { + runContext.updateSession((session) => { + session.policyPresets = policyPresets; + return session; + }); + }, + }, + ); + + if (policyResult.kind === "complete") { + runContext.completeSession({ + sandboxName: requireString(sandboxName, "sandboxName"), + provider, + model, + policyPresets: policyResult.policyPresets, + }); + } + + return { + sandboxName: requireString(sandboxName, "sandboxName"), + model, + provider, + nimContainer, + agent, + policyResult, + }; +} diff --git a/src/lib/onboard-persistent-driver.test.ts b/src/lib/onboard-persistent-driver.test.ts new file mode 100644 index 0000000000..22aefd9f07 --- /dev/null +++ b/src/lib/onboard-persistent-driver.test.ts @@ -0,0 +1,106 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import { createRequire } from "node:module"; +import os from "node:os"; +import path from "node:path"; + +import { afterEach, beforeEach, describe, expect, it } from "vitest"; + +const require = createRequire(import.meta.url); +const driverDistPath = require.resolve("../../dist/lib/onboard-persistent-driver"); +const sessionDistPath = require.resolve("../../dist/lib/onboard-session"); +const originalHome = process.env.HOME; +let tmpDir: string; + +beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-persistent-driver-")); + process.env.HOME = tmpDir; + delete require.cache[driverDistPath]; + delete require.cache[sessionDistPath]; +}); + +afterEach(() => { + delete require.cache[driverDistPath]; + delete require.cache[sessionDistPath]; + fs.rmSync(tmpDir, { recursive: true, force: true }); + if (originalHome === undefined) { + delete process.env.HOME; + } else { + process.env.HOME = originalHome; + } +}); + +describe("PersistentOnboardDriver", () => { + it("tracks persisted step progress and canonical completion checks", () => { + const onboardSession = require("../../dist/lib/onboard-session"); + const { PersistentOnboardDriver } = require("../../dist/lib/onboard-persistent-driver"); + + onboardSession.saveSession(onboardSession.createSession({ sandboxName: "alpha" })); + const driver = new PersistentOnboardDriver({ resume: true, requestedSandboxName: "alpha" }); + + expect(driver.flowState.phase).toBe("preflight"); + expect(driver.hasCompleted("preflight")).toBe(false); + + driver.startStep("preflight", { sandboxName: "alpha" }); + driver.completeStep("preflight"); + expect(driver.hasCompleted("preflight")).toBe(true); + expect(driver.flowState.phase).toBe("gateway"); + + driver.completeStep("gateway"); + expect(driver.hasCompleted("gateway")).toBe(true); + expect(driver.flowState.phase).toBe("provider_selection"); + }); + + it("persists messaging and runtime aliases through the shared reducers", () => { + const onboardSession = require("../../dist/lib/onboard-session"); + const { PersistentOnboardDriver } = require("../../dist/lib/onboard-persistent-driver"); + + onboardSession.saveSession( + onboardSession.createSession({ + sandboxName: "alpha", + provider: "openai-api", + model: "gpt-5.4", + }), + ); + const driver = new PersistentOnboardDriver({ resume: true, requestedSandboxName: "alpha" }); + + driver.completeStep("messaging", { messagingChannels: ["telegram"] }); + driver.completeStep("sandbox", { sandboxName: "alpha" }); + driver.completeStep("openclaw", { + sandboxName: "alpha", + provider: "openai-api", + model: "gpt-5.4", + }); + + const session = driver.requiredSession; + expect(session.messagingChannels).toEqual(["telegram"]); + expect(session.steps.runtime_setup.status).toBe("complete"); + expect(session.steps.openclaw.status).toBe("complete"); + expect(session.steps.agent_setup.status).toBe("skipped"); + expect(driver.hasCompleted("runtime_setup")).toBe(true); + }); + + it("records failures and final completion using persisted state", () => { + const onboardSession = require("../../dist/lib/onboard-session"); + const { PersistentOnboardDriver } = require("../../dist/lib/onboard-persistent-driver"); + + onboardSession.saveSession(onboardSession.createSession({ sandboxName: "alpha" })); + const driver = new PersistentOnboardDriver({ resume: true, requestedSandboxName: "alpha" }); + + driver.startStep("sandbox", { sandboxName: "alpha" }); + driver.failStep("sandbox", "sandbox create failed"); + expect(driver.flowState.phase).toBe("failed"); + + driver.completeStep("sandbox", { sandboxName: "alpha" }); + driver.completeStep("openclaw", { sandboxName: "alpha" }); + driver.completeStep("policies", { sandboxName: "alpha", policyPresets: ["npm"] }); + driver.completeSession({ sandboxName: "alpha", policyPresets: ["npm"] }); + + const session = driver.requiredSession; + expect(session.status).toBe("complete"); + expect(session.resumable).toBe(false); + expect(session.policyPresets).toEqual(["npm"]); + }); +}); diff --git a/src/lib/onboard-persistent-driver.ts b/src/lib/onboard-persistent-driver.ts new file mode 100644 index 0000000000..db149ea622 --- /dev/null +++ b/src/lib/onboard-persistent-driver.ts @@ -0,0 +1,89 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { deriveOnboardFlowState, hasCompletedOnboardStep } from "./onboard-flow-state"; +import type { OnboardFlowState } from "./onboard-fsm"; +import { + applySessionComplete, + applyStepComplete, + applyStepFailed, + applyStepSkipped, + applyStepStarted, + filterSafeUpdates, + loadSession, + saveSession, + updateSession, + type Session, + type SessionUpdates, +} from "./onboard-session"; +import type { OnboardStepName, OnboardVisibleStep } from "./onboard-fsm"; + +export interface PersistentOnboardDriverOptions { + resume?: boolean; + requestedSandboxName?: string | null; +} + +export class PersistentOnboardDriver { + readonly #resume: boolean; + readonly #requestedSandboxName: string | null; + + constructor(options: PersistentOnboardDriverOptions = {}) { + this.#resume = options.resume ?? false; + this.#requestedSandboxName = options.requestedSandboxName ?? null; + } + + get session(): Session | null { + return loadSession(); + } + + get requiredSession(): Session { + const session = this.session; + if (!session) { + throw new Error("No onboarding session is available."); + } + return session; + } + + get flowState(): OnboardFlowState { + return deriveOnboardFlowState(this.session, { + resume: this.#resume, + requestedSandboxName: this.#requestedSandboxName, + }); + } + + replaceSession(session: Session): Session { + return saveSession(session); + } + + hasCompleted(step: OnboardVisibleStep): boolean { + return hasCompletedOnboardStep(this.flowState, step); + } + + update(mutator: (session: Session) => Session | void): Session { + return updateSession(mutator); + } + + startStep(stepName: OnboardStepName, updates: SessionUpdates = {}): Session { + return updateSession((session) => { + applyStepStarted(session, stepName); + Object.assign(session, filterSafeUpdates(updates)); + return session; + }); + } + + completeStep(stepName: OnboardStepName, updates: SessionUpdates = {}): Session { + return updateSession((session) => applyStepComplete(session, stepName, updates)); + } + + skipStep(stepName: OnboardStepName): Session { + return updateSession((session) => applyStepSkipped(session, stepName)); + } + + failStep(stepName: OnboardStepName, message: string | null = null): Session { + return updateSession((session) => applyStepFailed(session, stepName, message)); + } + + completeSession(updates: SessionUpdates = {}): Session { + return updateSession((session) => applySessionComplete(session, updates)); + } +} diff --git a/src/lib/onboard-policy-flow.test.ts b/src/lib/onboard-policy-flow.test.ts new file mode 100644 index 0000000000..b05b02bca8 --- /dev/null +++ b/src/lib/onboard-policy-flow.test.ts @@ -0,0 +1,158 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; +// Import from compiled dist/ so coverage is attributed correctly. +import { runPolicySetupFlow } from "../../dist/lib/onboard-policy-flow"; + +describe("runPolicySetupFlow", () => { + it("applies the permissive policy after the sandbox is ready", async () => { + const events: string[] = []; + + const result = await runPolicySetupFlow( + { + sandboxName: "alpha", + provider: "openai-api", + model: "gpt-5.4", + webSearchConfig: null, + enabledChannels: [], + recordedPolicyPresets: null, + }, + { + resume: false, + dangerouslySkipPermissions: true, + hasCompletedPolicies: false, + waitForSandboxReady: () => true, + applyPermissivePolicy: (sandboxName) => events.push(`apply:${sandboxName}`), + arePolicyPresetsApplied: () => false, + setupPoliciesWithSelection: async () => { + throw new Error("should not configure presets when using permissive mode"); + }, + onShowHeader: () => events.push("show-header"), + onSkip: (step, detail) => events.push(`skip:${step}:${detail}`), + onStartStep: (step) => events.push(`start:${step}`), + onCompleteStep: (step) => events.push(`complete:${step}`), + onSelectionPersist: (policyPresets) => events.push(`persist:${policyPresets.join(",")}`), + }, + ); + + expect(result).toEqual({ kind: "complete", policyPresets: [] }); + expect(events).toEqual(["show-header", "apply:alpha", "complete:policies"]); + }); + + it("returns a not-ready error in permissive mode without applying policy", async () => { + const events: string[] = []; + + const result = await runPolicySetupFlow( + { + sandboxName: "alpha", + provider: "openai-api", + model: "gpt-5.4", + webSearchConfig: null, + enabledChannels: [], + recordedPolicyPresets: null, + }, + { + resume: false, + dangerouslySkipPermissions: true, + hasCompletedPolicies: false, + waitForSandboxReady: () => false, + applyPermissivePolicy: () => events.push("apply"), + arePolicyPresetsApplied: () => false, + setupPoliciesWithSelection: async () => { + throw new Error("should not configure presets when sandbox is not ready"); + }, + onShowHeader: () => events.push("show-header"), + onSkip: (step, detail) => events.push(`skip:${step}:${detail}`), + onStartStep: (step) => events.push(`start:${step}`), + onCompleteStep: (step) => events.push(`complete:${step}`), + onSelectionPersist: () => events.push("persist"), + }, + ); + + expect(result).toEqual({ + kind: "sandbox_not_ready", + message: " ✗ Sandbox 'alpha' not ready after creation. Giving up.", + }); + expect(events).toEqual(["show-header"]); + }); + + it("skips policies on resume when the selected presets are already applied", async () => { + const events: string[] = []; + + const result = await runPolicySetupFlow( + { + sandboxName: "alpha", + provider: "openai-api", + model: "gpt-5.4", + webSearchConfig: null, + enabledChannels: ["telegram"], + recordedPolicyPresets: ["npm", "telegram"], + }, + { + resume: true, + dangerouslySkipPermissions: false, + hasCompletedPolicies: true, + waitForSandboxReady: () => true, + applyPermissivePolicy: () => events.push("apply"), + arePolicyPresetsApplied: () => true, + setupPoliciesWithSelection: async () => { + throw new Error("should not rerun policy selection"); + }, + onShowHeader: () => events.push("show-header"), + onSkip: (step, detail) => events.push(`skip:${step}:${detail}`), + onStartStep: (step) => events.push(`start:${step}`), + onCompleteStep: (step) => events.push(`complete:${step}`), + onSelectionPersist: () => events.push("persist"), + }, + ); + + expect(result).toEqual({ kind: "complete", policyPresets: ["npm", "telegram"] }); + expect(events).toEqual(["skip:policies:npm, telegram", "complete:policies"]); + }); + + it("runs policy selection and persists operator choices when policies must be configured", async () => { + const events: string[] = []; + const setupPoliciesWithSelection = vi.fn(async (_sandboxName, options) => { + options.onSelection(["npm", "pypi"]); + return ["npm", "pypi"]; + }); + + const result = await runPolicySetupFlow( + { + sandboxName: "alpha", + provider: "openai-api", + model: "gpt-5.4", + webSearchConfig: { fetchEnabled: true }, + enabledChannels: ["telegram"], + recordedPolicyPresets: ["npm"], + }, + { + resume: true, + dangerouslySkipPermissions: false, + hasCompletedPolicies: false, + waitForSandboxReady: () => true, + applyPermissivePolicy: () => events.push("apply"), + arePolicyPresetsApplied: () => false, + setupPoliciesWithSelection, + onShowHeader: () => events.push("show-header"), + onSkip: (step, detail) => events.push(`skip:${step}:${detail}`), + onStartStep: (step) => events.push(`start:${step}`), + onCompleteStep: (step) => events.push(`complete:${step}`), + onSelectionPersist: (policyPresets) => events.push(`persist:${policyPresets.join(",")}`), + }, + ); + + expect(result).toEqual({ kind: "complete", policyPresets: ["npm", "pypi"] }); + expect(setupPoliciesWithSelection).toHaveBeenCalledWith( + "alpha", + expect.objectContaining({ + selectedPresets: ["npm"], + enabledChannels: ["telegram"], + webSearchConfig: { fetchEnabled: true }, + provider: "openai-api", + }), + ); + expect(events).toEqual(["start:policies", "persist:npm,pypi", "complete:policies"]); + }); +}); diff --git a/src/lib/onboard-policy-flow.ts b/src/lib/onboard-policy-flow.ts new file mode 100644 index 0000000000..ef2c2c639b --- /dev/null +++ b/src/lib/onboard-policy-flow.ts @@ -0,0 +1,108 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import type { WebSearchConfig } from "./web-search"; + +export interface PolicyFlowState { + sandboxName: string; + provider: string; + model: string; + webSearchConfig: WebSearchConfig | null; + enabledChannels: string[]; + recordedPolicyPresets: string[] | null; +} + +export interface PolicyFlowDeps { + resume: boolean; + dangerouslySkipPermissions: boolean; + hasCompletedPolicies: boolean; + waitForSandboxReady: (sandboxName: string) => boolean; + applyPermissivePolicy: (sandboxName: string) => void; + arePolicyPresetsApplied: (sandboxName: string, selectedPresets: string[]) => boolean; + setupPoliciesWithSelection: ( + sandboxName: string, + options: { + selectedPresets: string[] | null; + enabledChannels: string[]; + webSearchConfig: WebSearchConfig | null; + provider: string; + onSelection: (policyPresets: string[]) => void; + }, + ) => Promise; + onShowHeader: () => void; + onSkip: (stepName: "policies", detail: string) => void; + onStartStep: ( + stepName: "policies", + updates?: { sandboxName?: string; provider?: string; model?: string; policyPresets?: string[] }, + ) => void; + onCompleteStep: ( + stepName: "policies", + updates?: { sandboxName?: string; provider?: string; model?: string; policyPresets?: string[] }, + ) => void; + onSelectionPersist: (policyPresets: string[]) => void; +} + +export type PolicyFlowResult = + | { kind: "complete"; policyPresets: string[] } + | { kind: "sandbox_not_ready"; message: string }; + +export async function runPolicySetupFlow( + state: PolicyFlowState, + deps: PolicyFlowDeps, +): Promise { + if (deps.dangerouslySkipPermissions) { + deps.onShowHeader(); + if (!deps.waitForSandboxReady(state.sandboxName)) { + return { + kind: "sandbox_not_ready", + message: ` ✗ Sandbox '${state.sandboxName}' not ready after creation. Giving up.`, + }; + } + deps.applyPermissivePolicy(state.sandboxName); + deps.onCompleteStep("policies", { + sandboxName: state.sandboxName, + provider: state.provider, + model: state.model, + policyPresets: [], + }); + return { kind: "complete", policyPresets: [] }; + } + + const resumePolicies = + deps.hasCompletedPolicies && + deps.arePolicyPresetsApplied(state.sandboxName, state.recordedPolicyPresets || []); + if (resumePolicies) { + deps.onSkip("policies", (state.recordedPolicyPresets || []).join(", ")); + deps.onCompleteStep("policies", { + sandboxName: state.sandboxName, + provider: state.provider, + model: state.model, + policyPresets: state.recordedPolicyPresets || [], + }); + return { kind: "complete", policyPresets: state.recordedPolicyPresets || [] }; + } + + deps.onStartStep("policies", { + sandboxName: state.sandboxName, + provider: state.provider, + model: state.model, + policyPresets: state.recordedPolicyPresets || [], + }); + const appliedPolicyPresets = await deps.setupPoliciesWithSelection(state.sandboxName, { + selectedPresets: + Array.isArray(state.recordedPolicyPresets) && state.recordedPolicyPresets.length > 0 + ? state.recordedPolicyPresets + : null, + enabledChannels: state.enabledChannels, + webSearchConfig: state.webSearchConfig, + provider: state.provider, + onSelection: deps.onSelectionPersist, + }); + deps.onCompleteStep("policies", { + sandboxName: state.sandboxName, + provider: state.provider, + model: state.model, + policyPresets: appliedPolicyPresets, + }); + return { kind: "complete", policyPresets: appliedPolicyPresets }; +} diff --git a/src/lib/onboard-policy-suggestions.test.ts b/src/lib/onboard-policy-suggestions.test.ts new file mode 100644 index 0000000000..c310ef37da --- /dev/null +++ b/src/lib/onboard-policy-suggestions.test.ts @@ -0,0 +1,48 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; +// Import from compiled dist/ so coverage is attributed correctly. +import { + getSuggestedPolicyPresets, + LOCAL_INFERENCE_PROVIDERS, +} from "../../dist/lib/onboard-policy-suggestions"; + +describe("onboard-policy-suggestions", () => { + it("exports the local inference providers used for policy suggestions", () => { + expect(LOCAL_INFERENCE_PROVIDERS).toEqual(["ollama-local", "vllm-local"]); + }); + + it("suggests baseline, messaging, brave, and local-inference presets as expected", () => { + expect( + getSuggestedPolicyPresets({ + enabledChannels: ["telegram"], + webSearchConfig: { fetchEnabled: true }, + provider: "ollama-local", + getCredential: () => null, + env: {}, + }), + ).toEqual(["pypi", "npm", "local-inference", "telegram", "brave"]); + }); + + it("auto-detects messaging presets from credentials/env in interactive tty mode", () => { + const notes: string[] = []; + const getCredential = vi.fn((envKey: string) => + envKey === "SLACK_BOT_TOKEN" ? "xoxb-token" : null, + ); + const result = getSuggestedPolicyPresets({ + provider: "nvidia-prod", + getCredential, + env: { DISCORD_BOT_TOKEN: "discord-token", CI: "false" } as NodeJS.ProcessEnv, + isInteractiveTty: true, + isNonInteractive: false, + note: (message) => notes.push(message), + }); + + expect(result).toEqual(["pypi", "npm", "slack", "discord"]); + expect(notes).toEqual([ + " Auto-detected: SLACK_BOT_TOKEN -> suggesting slack preset", + " Auto-detected: DISCORD_BOT_TOKEN -> suggesting discord preset", + ]); + }); +}); diff --git a/src/lib/onboard-policy-suggestions.ts b/src/lib/onboard-policy-suggestions.ts new file mode 100644 index 0000000000..bcc642703a --- /dev/null +++ b/src/lib/onboard-policy-suggestions.ts @@ -0,0 +1,80 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import type { WebSearchConfig } from "./web-search"; + +// Providers that run on the host and need the local-inference policy preset. +export const LOCAL_INFERENCE_PROVIDERS = ["ollama-local", "vllm-local"] as const; + +export interface SuggestedPolicyPresetDeps { + enabledChannels?: string[] | null; + webSearchConfig?: WebSearchConfig | null; + provider?: string | null; + getCredential: (envKey: string) => string | null; + env?: NodeJS.ProcessEnv; + isInteractiveTty?: boolean; + isNonInteractive?: boolean; + note?: (message: string) => void; +} + +export interface TierPolicySuggestionDeps { + enabledChannels?: string[] | null; + webSearchConfig?: WebSearchConfig | null; + provider?: string | null; + knownPresetNames?: string[] | null; + resolveTierPresets: (tierName: string) => Array<{ name: string }>; +} + +export function getSuggestedPolicyPresets( + deps: SuggestedPolicyPresetDeps, +): string[] { + const env = deps.env ?? process.env; + const note = deps.note ?? (() => {}); + const suggestions = ["pypi", "npm"]; + + if (deps.provider && LOCAL_INFERENCE_PROVIDERS.includes(deps.provider as never)) { + suggestions.push("local-inference"); + } + const usesExplicitMessagingSelection = Array.isArray(deps.enabledChannels); + + const maybeSuggestMessagingPreset = (channel: string, envKey: string) => { + if (usesExplicitMessagingSelection) { + if (deps.enabledChannels?.includes(channel)) suggestions.push(channel); + return; + } + if (deps.getCredential(envKey) || env[envKey]) { + suggestions.push(channel); + if (deps.isInteractiveTty && !deps.isNonInteractive && env.CI !== "true") { + note(` Auto-detected: ${envKey} -> suggesting ${channel} preset`); + } + } + }; + + maybeSuggestMessagingPreset("telegram", "TELEGRAM_BOT_TOKEN"); + maybeSuggestMessagingPreset("slack", "SLACK_BOT_TOKEN"); + maybeSuggestMessagingPreset("discord", "DISCORD_BOT_TOKEN"); + + if (deps.webSearchConfig) suggestions.push("brave"); + + return suggestions; +} + +export function computeSetupPresetSuggestions( + tierName: string, + deps: TierPolicySuggestionDeps, +): string[] { + const { enabledChannels = null, webSearchConfig = null, provider = null } = deps; + const known = Array.isArray(deps.knownPresetNames) ? new Set(deps.knownPresetNames) : null; + const suggestions = deps.resolveTierPresets(tierName).map((preset) => preset.name); + const add = (name: string) => { + if (suggestions.includes(name)) return; + if (known && !known.has(name)) return; + suggestions.push(name); + }; + if (webSearchConfig) add("brave"); + if (provider && LOCAL_INFERENCE_PROVIDERS.includes(provider as never)) add("local-inference"); + if (Array.isArray(enabledChannels)) { + for (const channel of enabledChannels) add(channel); + } + return suggestions; +} diff --git a/src/lib/onboard-policy-ui.ts b/src/lib/onboard-policy-ui.ts new file mode 100644 index 0000000000..b0cbc8ba78 --- /dev/null +++ b/src/lib/onboard-policy-ui.ts @@ -0,0 +1,743 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +export interface PolicyUiDeps { + step: (current: number, total: number, message: string) => void; + prompt: (question: string) => Promise; + note: (message: string) => void; + sleep: (seconds: number) => void; + isNonInteractive: () => boolean; + parsePolicyPresetEnv: (raw: string | undefined) => string[]; + waitForSandboxReady: (sandboxName: string, attempts?: number, delaySeconds?: number) => boolean; + localInferenceProviders: string[]; + useColor: boolean; + policies: { + listPresets: () => Array<{ name: string; description: string }>; + getAppliedPresets: (sandboxName: string) => string[]; + applyPreset: (sandboxName: string, name: string, options?: { access?: string }) => void; + removePreset: (sandboxName: string, name: string) => boolean; + }; + tiers: { + listTiers: () => Array<{ name: string; label: string }>; + getTier: (name: string) => + | { name: string; label: string; presets: Array<{ name: string; access: string }> } + | null; + resolveTierPresets: (name: string) => Array<{ name: string; access: string }>; + }; + updateSandbox: (sandboxName: string, patch: Record) => void; +} + +export interface LegacySetupPoliciesOptions { + enabledChannels?: string[] | null; + webSearchConfig?: unknown; + provider?: string | null; + getSuggestedPolicyPresets: (options?: { + enabledChannels?: string[] | null; + webSearchConfig?: unknown; + provider?: string | null; + }) => string[]; +} + +export interface SetupPoliciesWithSelectionOptions { + selectedPresets?: string[] | null; + onSelection?: ((presets: string[]) => void) | null; + webSearchConfig?: unknown; + enabledChannels?: string[] | null; + provider?: string | null; +} + +// eslint-disable-next-line complexity +export async function setupPoliciesLegacy( + sandboxName: string, + options: LegacySetupPoliciesOptions, + deps: PolicyUiDeps, +): Promise { + deps.step(8, 8, "Policy presets"); + const suggestions = options.getSuggestedPolicyPresets(options); + + const allPresets = deps.policies.listPresets(); + const applied = deps.policies.getAppliedPresets(sandboxName); + + if (deps.isNonInteractive()) { + const policyMode = (process.env.NEMOCLAW_POLICY_MODE || "suggested").trim().toLowerCase(); + let selectedPresets = suggestions; + + if (policyMode === "skip" || policyMode === "none" || policyMode === "no") { + deps.note(" [non-interactive] Skipping policy presets."); + return; + } + + if (policyMode === "custom" || policyMode === "list") { + selectedPresets = deps.parsePolicyPresetEnv(process.env.NEMOCLAW_POLICY_PRESETS); + if (selectedPresets.length === 0) { + console.error(" NEMOCLAW_POLICY_PRESETS is required when NEMOCLAW_POLICY_MODE=custom."); + process.exit(1); + } + } else if (policyMode === "suggested" || policyMode === "default" || policyMode === "auto") { + const envPresets = deps.parsePolicyPresetEnv(process.env.NEMOCLAW_POLICY_PRESETS); + if (envPresets.length > 0) { + selectedPresets = envPresets; + } + } else { + console.error(` Unsupported NEMOCLAW_POLICY_MODE: ${policyMode}`); + console.error(" Valid values: suggested, custom, skip"); + process.exit(1); + } + + const knownPresets = new Set(allPresets.map((preset) => preset.name)); + const invalidPresets = selectedPresets.filter((name) => !knownPresets.has(name)); + if (invalidPresets.length > 0) { + console.error(` Unknown policy preset(s): ${invalidPresets.join(", ")}`); + process.exit(1); + } + + if (!deps.waitForSandboxReady(sandboxName)) { + console.error(` Sandbox '${sandboxName}' was not ready for policy application.`); + process.exit(1); + } + deps.note(` [non-interactive] Applying policy presets: ${selectedPresets.join(", ")}`); + for (const name of selectedPresets) { + for (let attempt = 0; attempt < 3; attempt += 1) { + try { + deps.policies.applyPreset(sandboxName, name); + break; + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + if (!message.includes("sandbox not found") || attempt === 2) { + throw err; + } + deps.sleep(2); + } + } + } + } else { + console.log(""); + console.log(" Available policy presets:"); + allPresets.forEach((preset) => { + const marker = applied.includes(preset.name) || suggestions.includes(preset.name) ? "●" : "○"; + const suggested = suggestions.includes(preset.name) ? " (suggested)" : ""; + console.log(` ${marker} ${preset.name} — ${preset.description}${suggested}`); + }); + console.log(""); + + const answer = await deps.prompt( + ` Apply suggested presets (${suggestions.join(", ")})? [Y/n/list]: `, + ); + + if (answer.toLowerCase() === "n") { + console.log(" Skipping policy presets."); + return; + } + + if (!deps.waitForSandboxReady(sandboxName)) { + console.error(` Sandbox '${sandboxName}' was not ready for policy application.`); + process.exit(1); + } + + if (answer.toLowerCase() === "list") { + const picks = await deps.prompt(" Enter preset names (comma-separated): "); + const selected = picks + .split(",") + .map((value) => value.trim()) + .filter(Boolean); + for (const name of selected) { + deps.policies.applyPreset(sandboxName, name); + } + } else { + for (const name of suggestions) { + deps.policies.applyPreset(sandboxName, name); + } + } + } + + console.log(" ✓ Policies applied"); +} + +export function arePolicyPresetsApplied( + sandboxName: string, + selectedPresets: string[] = [], + deps: PolicyUiDeps, +): boolean { + if (!Array.isArray(selectedPresets) || selectedPresets.length === 0) return false; + const applied = new Set(deps.policies.getAppliedPresets(sandboxName)); + return selectedPresets.every((preset) => applied.has(preset)); +} + +/** + * Prompt the user to select a policy tier (restricted / balanced / open). + * Uses the same radio-style TUI as presetsCheckboxSelector (single-select). + * In non-interactive mode reads NEMOCLAW_POLICY_TIER (default: balanced). + * Returns the tier name string. + */ +export async function selectPolicyTier(deps: PolicyUiDeps): Promise { + const allTiers = deps.tiers.listTiers(); + const defaultTier = (allTiers.find((tier) => tier.name === "balanced") || allTiers[1])!; + + if (deps.isNonInteractive()) { + const name = (process.env.NEMOCLAW_POLICY_TIER || "balanced").trim().toLowerCase(); + if (!deps.tiers.getTier(name)) { + console.error( + ` Unknown policy tier: ${name}. Valid: ${allTiers.map((tier) => tier.name).join(", ")}`, + ); + process.exit(1); + } + deps.note(` [non-interactive] Policy tier: ${name}`); + return name; + } + + const RADIO_ON = deps.useColor ? "[\x1b[32m✓\x1b[0m]" : "[✓]"; + const RADIO_OFF = deps.useColor ? "\x1b[2m[ ]\x1b[0m" : "[ ]"; + + if (!process.stdin.isTTY || !process.stdout.isTTY) { + console.log(""); + console.log(" Policy tier — controls which network presets are enabled:"); + allTiers.forEach((tier) => { + const marker = tier.name === defaultTier.name ? RADIO_ON : RADIO_OFF; + console.log(` ${marker} ${tier.label}`); + }); + console.log(""); + const answer = await deps.prompt( + ` Select tier [1-${allTiers.length}] (default: ${allTiers.indexOf(defaultTier) + 1} ${defaultTier.name}): `, + ); + const idx = + answer.trim() === "" ? allTiers.indexOf(defaultTier) : parseInt(answer.trim(), 10) - 1; + const chosen = allTiers[idx] || defaultTier; + console.log(` Tier: ${chosen.label}`); + return chosen.name; + } + + let cursor = allTiers.indexOf(defaultTier); + let selectedIdx = cursor; + const n = allTiers.length; + + const G = deps.useColor ? "\x1b[32m" : ""; + const D = deps.useColor ? "\x1b[2m" : ""; + const R = deps.useColor ? "\x1b[0m" : ""; + const HINT = deps.useColor + ? ` ${G}↑/↓ j/k${R} ${D}move${R} ${G}Space${R} ${D}select${R} ${G}Enter${R} ${D}confirm${R}` + : " ↑/↓ j/k move Space select Enter confirm"; + + const renderLines = () => { + const lines = [" Policy tier — controls which network presets are enabled:"]; + allTiers.forEach((tier, index) => { + const radio = index === selectedIdx ? RADIO_ON : RADIO_OFF; + const arrow = index === cursor ? ">" : " "; + lines.push(` ${arrow} ${radio} ${tier.label}`); + }); + lines.push(""); + lines.push(HINT); + return lines; + }; + + process.stdout.write("\n"); + const initial = renderLines(); + for (const line of initial) process.stdout.write(`${line}\n`); + let lineCount = initial.length; + + const redraw = () => { + process.stdout.write(`\x1b[${lineCount}A`); + const lines = renderLines(); + for (const line of lines) process.stdout.write(`\r\x1b[2K${line}\n`); + lineCount = lines.length; + }; + + process.stdin.setRawMode(true); + process.stdin.resume(); + process.stdin.setEncoding("utf8"); + + return new Promise((resolve) => { + const cleanup = () => { + process.stdin.setRawMode(false); + process.stdin.pause(); + process.stdin.removeListener("data", onData); + process.removeListener("SIGTERM", onSigterm); + }; + + const onSigterm = () => { + cleanup(); + process.exit(1); + }; + process.once("SIGTERM", onSigterm); + + const onData = (key: string) => { + if (key === "\r" || key === "\n") { + cleanup(); + process.stdout.write("\n"); + resolve(allTiers[selectedIdx]!.name); + } else if (key === " ") { + selectedIdx = cursor; + redraw(); + } else if (key === "\x03") { + cleanup(); + process.exit(1); + } else if (key === "\x1b[A" || key === "k") { + cursor = (cursor - 1 + n) % n; + redraw(); + } else if (key === "\x1b[B" || key === "j") { + cursor = (cursor + 1) % n; + redraw(); + } + }; + + process.stdin.on("data", onData); + }); +} + +/** + * Combined preset selector: shows ALL available presets, pre-checks those in + * the chosen tier, and lets the user include/exclude any preset and toggle + * per-preset access (read vs read-write). + */ +export async function selectTierPresetsAndAccess( + tierName: string, + allPresets: Array<{ name: string; description?: string }>, + extraSelected: string[] = [], + deps: PolicyUiDeps, +): Promise> { + const tierDef = deps.tiers.getTier(tierName); + const tierPresetMap: Record = {}; + if (tierDef) { + for (const preset of tierDef.presets) { + tierPresetMap[preset.name] = preset.access; + } + } + + const tierNames = tierDef ? tierDef.presets.map((preset) => preset.name) : []; + const tierSet = new Set(tierNames); + const ordered = [ + ...tierNames.map((name) => allPresets.find((preset) => preset.name === name)).filter(Boolean), + ...allPresets.filter((preset) => !tierSet.has(preset.name)), + ] as Array<{ name: string; description?: string }>; + + const included = new Set([ + ...tierNames, + ...extraSelected.filter((name) => ordered.find((preset) => preset.name === name)), + ]); + + const accessModes: Record = {}; + for (const preset of ordered) { + accessModes[preset.name] = tierPresetMap[preset.name] ?? "read-write"; + } + + const G = deps.useColor ? "\x1b[32m" : ""; + const O = deps.useColor ? "\x1b[38;5;208m" : ""; + const D = deps.useColor ? "\x1b[2m" : ""; + const R = deps.useColor ? "\x1b[0m" : ""; + const GREEN_CHECK = deps.useColor ? `[${G}✓${R}]` : "[✓]"; + const EMPTY_CHECK = deps.useColor ? `${D}[ ]${R}` : "[ ]"; + const TOGGLE_RW = deps.useColor ? `[${O}rw${R}]` : "[rw]"; + const TOGGLE_R = deps.useColor ? `${D}[ r]${R}` : "[ r]"; + + const label = tierDef ? ` Presets (${tierDef.label} defaults):` : " Presets:"; + const n = ordered.length; + + if (deps.isNonInteractive()) { + return ordered + .filter((preset) => included.has(preset.name)) + .map((preset) => ({ name: preset.name, access: accessModes[preset.name]! })); + } + + if (!process.stdin.isTTY || !process.stdout.isTTY) { + console.log(""); + console.log(label); + ordered.forEach((preset) => { + const isIncluded = included.has(preset.name); + const isRw = accessModes[preset.name] === "read-write"; + const check = isIncluded ? GREEN_CHECK : EMPTY_CHECK; + const badge = isIncluded ? (isRw ? "[rw]" : "[ r]") : " "; + console.log(` ${check} ${badge} ${preset.name}`); + }); + console.log(""); + const rawInclude = await deps.prompt( + " Include presets (comma-separated names, Enter to keep defaults): ", + ); + if (rawInclude.trim()) { + const knownNames = new Set(ordered.map((preset) => preset.name)); + included.clear(); + for (const name of rawInclude + .split(",") + .map((value) => value.trim()) + .filter(Boolean)) { + if (knownNames.has(name)) { + included.add(name); + } else { + console.error(` Unknown preset name ignored: ${name}`); + } + } + } + return ordered + .filter((preset) => included.has(preset.name)) + .map((preset) => ({ name: preset.name, access: accessModes[preset.name]! })); + } + + let cursor = 0; + + const HINT = deps.useColor + ? ` ${G}↑/↓ j/k${R} ${D}move${R} ${G}Space${R} ${D}include${R} ${G}r${R} ${D}toggle rw${R} ${G}Enter${R} ${D}confirm${R}` + : " ↑/↓ j/k move Space include r toggle rw Enter confirm"; + + const renderLines = () => { + const lines = [label]; + ordered.forEach((preset, index) => { + const isIncluded = included.has(preset.name); + const isRw = accessModes[preset.name] === "read-write"; + const check = isIncluded ? GREEN_CHECK : EMPTY_CHECK; + const badge = isIncluded ? (isRw ? `${TOGGLE_RW} ` : `${TOGGLE_R} `) : " "; + const arrow = index === cursor ? ">" : " "; + lines.push(` ${arrow} ${check} ${badge}${preset.name}`); + }); + lines.push(""); + lines.push(HINT); + return lines; + }; + + process.stdout.write("\n"); + const initial = renderLines(); + for (const line of initial) process.stdout.write(`${line}\n`); + let lineCount = initial.length; + + const redraw = () => { + process.stdout.write(`\x1b[${lineCount}A`); + const lines = renderLines(); + for (const line of lines) process.stdout.write(`\r\x1b[2K${line}\n`); + lineCount = lines.length; + }; + + process.stdin.setRawMode(true); + process.stdin.resume(); + process.stdin.setEncoding("utf8"); + + return new Promise((resolve) => { + const cleanup = () => { + process.stdin.setRawMode(false); + process.stdin.pause(); + process.stdin.removeListener("data", onData); + process.removeListener("SIGTERM", onSigterm); + }; + + const onSigterm = () => { + cleanup(); + process.exit(1); + }; + process.once("SIGTERM", onSigterm); + + const onData = (key: string) => { + if (key === "\r" || key === "\n") { + cleanup(); + process.stdout.write("\n"); + resolve( + ordered + .filter((preset) => included.has(preset.name)) + .map((preset) => ({ name: preset.name, access: accessModes[preset.name]! })), + ); + } else if (key === "\x03") { + cleanup(); + process.exit(1); + } else if (key === "\x1b[A" || key === "k") { + cursor = (cursor - 1 + n) % n; + redraw(); + } else if (key === "\x1b[B" || key === "j") { + cursor = (cursor + 1) % n; + redraw(); + } else if (key === " ") { + const name = ordered[cursor]!.name; + if (included.has(name)) { + included.delete(name); + } else { + included.add(name); + } + redraw(); + } else if (key === "r" || key === "R") { + const name = ordered[cursor]!.name; + accessModes[name] = accessModes[name] === "read-write" ? "read" : "read-write"; + redraw(); + } + }; + + process.stdin.on("data", onData); + }); +} + +/** + * Raw-mode TUI preset selector. + * Keys: ↑/↓ or k/j to move, Space to toggle, a to select/unselect all, Enter to confirm. + * Falls back to a simple line-based prompt when stdin is not a TTY. + */ +export async function presetsCheckboxSelector( + allPresets: Array<{ name: string; description: string }>, + initialSelected: string[], + deps: PolicyUiDeps, +): Promise { + const selected = new Set(initialSelected); + const n = allPresets.length; + + if (n === 0) { + console.log(" No policy presets are available."); + return []; + } + + const GREEN_CHECK = deps.useColor ? "[\x1b[32m✓\x1b[0m]" : "[✓]"; + + if (!process.stdin.isTTY || !process.stdout.isTTY) { + console.log(""); + console.log(" Available policy presets:"); + allPresets.forEach((preset) => { + const marker = selected.has(preset.name) ? GREEN_CHECK : "[ ]"; + console.log(` ${marker} ${preset.name.padEnd(14)} — ${preset.description}`); + }); + console.log(""); + const raw = await deps.prompt(" Select presets (comma-separated names, Enter to skip): "); + if (!raw.trim()) { + console.log(" Skipping policy presets."); + return []; + } + const knownNames = new Set(allPresets.map((preset) => preset.name)); + const chosen: string[] = []; + for (const name of raw + .split(",") + .map((value) => value.trim()) + .filter(Boolean)) { + if (knownNames.has(name)) { + chosen.push(name); + } else { + console.error(` Unknown preset name ignored: ${name}`); + } + } + return chosen; + } + + let cursor = 0; + + const G = deps.useColor ? "\x1b[32m" : ""; + const D = deps.useColor ? "\x1b[2m" : ""; + const R = deps.useColor ? "\x1b[0m" : ""; + const HINT = deps.useColor + ? ` ${G}↑/↓ j/k${R} ${D}move${R} ${G}Space${R} ${D}toggle${R} ${G}a${R} ${D}all/none${R} ${G}Enter${R} ${D}confirm${R}` + : " ↑/↓ j/k move Space toggle a all/none Enter confirm"; + + const renderLines = () => { + const lines = [" Available policy presets:"]; + allPresets.forEach((preset, index) => { + const check = selected.has(preset.name) ? GREEN_CHECK : "[ ]"; + const arrow = index === cursor ? ">" : " "; + lines.push(` ${arrow} ${check} ${preset.name.padEnd(14)} — ${preset.description}`); + }); + lines.push(""); + lines.push(HINT); + return lines; + }; + + process.stdout.write("\n"); + const initial = renderLines(); + for (const line of initial) process.stdout.write(`${line}\n`); + let lineCount = initial.length; + + const redraw = () => { + process.stdout.write(`\x1b[${lineCount}A`); + const lines = renderLines(); + for (const line of lines) process.stdout.write(`\r\x1b[2K${line}\n`); + lineCount = lines.length; + }; + + process.stdin.setRawMode(true); + process.stdin.resume(); + process.stdin.setEncoding("utf8"); + + return new Promise((resolve) => { + const cleanup = () => { + process.stdin.setRawMode(false); + process.stdin.pause(); + process.stdin.removeListener("data", onData); + process.removeListener("SIGTERM", onSigterm); + }; + + const onSigterm = () => { + cleanup(); + process.exit(1); + }; + process.once("SIGTERM", onSigterm); + + const onData = (key: string) => { + if (key === "\r" || key === "\n") { + cleanup(); + process.stdout.write("\n"); + resolve([...selected]); + } else if (key === "\x03") { + cleanup(); + process.exit(1); + } else if (key === "\x1b[A" || key === "k") { + cursor = (cursor - 1 + n) % n; + redraw(); + } else if (key === "\x1b[B" || key === "j") { + cursor = (cursor + 1) % n; + redraw(); + } else if (key === " ") { + const name = allPresets[cursor]!.name; + if (selected.has(name)) selected.delete(name); + else selected.add(name); + redraw(); + } else if (key === "a") { + if (selected.size === n) selected.clear(); + else for (const preset of allPresets) selected.add(preset.name); + redraw(); + } + }; + + process.stdin.on("data", onData); + }); +} + +// eslint-disable-next-line complexity +export async function setupPoliciesWithSelection( + sandboxName: string, + options: SetupPoliciesWithSelectionOptions = {}, + deps: PolicyUiDeps, +): Promise { + const selectedPresets = Array.isArray(options.selectedPresets) ? options.selectedPresets : null; + const onSelection = typeof options.onSelection === "function" ? options.onSelection : null; + const webSearchConfig = options.webSearchConfig || null; + const provider = options.provider || null; + + deps.step(8, 8, "Policy presets"); + + const allPresets = deps.policies.listPresets(); + const applied = deps.policies.getAppliedPresets(sandboxName); + let chosen = selectedPresets; + + if (chosen && chosen.length > 0) { + if (onSelection) onSelection(chosen); + if (!deps.waitForSandboxReady(sandboxName)) { + console.error(` Sandbox '${sandboxName}' was not ready for policy application.`); + process.exit(1); + } + deps.note(` [resume] Reapplying policy presets: ${chosen.join(", ")}`); + for (const name of chosen) { + if (applied.includes(name)) continue; + deps.policies.applyPreset(sandboxName, name); + } + return chosen; + } + + const tierName = await selectPolicyTier(deps); + deps.updateSandbox(sandboxName, { policyTier: tierName }); + const suggestions = deps.tiers.resolveTierPresets(tierName).map((preset) => preset.name); + if (webSearchConfig && !suggestions.includes("brave")) suggestions.push("brave"); + if ( + provider && + deps.localInferenceProviders.includes(provider) && + !suggestions.includes("local-inference") + ) { + suggestions.push("local-inference"); + } + + if (deps.isNonInteractive()) { + const policyMode = (process.env.NEMOCLAW_POLICY_MODE || "suggested").trim().toLowerCase(); + chosen = suggestions; + + if (policyMode === "skip" || policyMode === "none" || policyMode === "no") { + deps.note(" [non-interactive] Skipping policy presets."); + return []; + } + + if (policyMode === "custom" || policyMode === "list") { + chosen = deps.parsePolicyPresetEnv(process.env.NEMOCLAW_POLICY_PRESETS); + if (chosen.length === 0) { + console.error(" NEMOCLAW_POLICY_PRESETS is required when NEMOCLAW_POLICY_MODE=custom."); + process.exit(1); + } + } else if (policyMode === "suggested" || policyMode === "default" || policyMode === "auto") { + const envPresets = deps.parsePolicyPresetEnv(process.env.NEMOCLAW_POLICY_PRESETS); + if (envPresets.length > 0) chosen = envPresets; + } else { + console.error(` Unsupported NEMOCLAW_POLICY_MODE: ${policyMode}`); + console.error(" Valid values: suggested, custom, skip"); + process.exit(1); + } + + const knownPresets = new Set(allPresets.map((preset) => preset.name)); + const invalidPresets = chosen.filter((name) => !knownPresets.has(name)); + if (invalidPresets.length > 0) { + console.error(` Unknown policy preset(s): ${invalidPresets.join(", ")}`); + process.exit(1); + } + + if (onSelection) onSelection(chosen); + if (!deps.waitForSandboxReady(sandboxName)) { + console.error(` Sandbox '${sandboxName}' was not ready for policy application.`); + process.exit(1); + } + deps.note(` [non-interactive] Applying policy presets: ${chosen.join(", ")}`); + for (const name of chosen) { + for (let attempt = 0; attempt < 3; attempt += 1) { + try { + deps.policies.applyPreset(sandboxName, name); + break; + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + if (!message.includes("sandbox not found") || attempt === 2) { + throw err; + } + deps.sleep(2); + } + } + } + return chosen; + } + + const knownNames = new Set(allPresets.map((preset) => preset.name)); + const extraSelected = [ + ...applied.filter((name) => knownNames.has(name)), + ...suggestions.filter((name) => knownNames.has(name) && !applied.includes(name)), + ]; + const resolvedPresets = await selectTierPresetsAndAccess( + tierName, + allPresets, + extraSelected, + deps, + ); + const interactiveChoice = resolvedPresets.map((preset) => preset.name); + + if (onSelection) onSelection(interactiveChoice); + if (!deps.waitForSandboxReady(sandboxName)) { + console.error(` Sandbox '${sandboxName}' was not ready for policy application.`); + process.exit(1); + } + + const accessByName: Record = {}; + for (const preset of resolvedPresets) accessByName[preset.name] = preset.access; + const newlySelected = interactiveChoice.filter((name) => !applied.includes(name)); + const deselected = applied.filter((name) => !interactiveChoice.includes(name)); + + for (const name of deselected) { + for (let attempt = 0; attempt < 3; attempt += 1) { + try { + if (!deps.policies.removePreset(sandboxName, name)) { + throw new Error(`Failed to remove preset '${name}'.`); + } + break; + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + if (!message.includes("sandbox not found") || attempt === 2) { + throw err; + } + deps.sleep(2); + } + } + } + + for (const name of newlySelected) { + for (let attempt = 0; attempt < 3; attempt += 1) { + try { + deps.policies.applyPreset(sandboxName, name, { access: accessByName[name] }); + break; + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + if (!message.includes("sandbox not found") || attempt === 2) { + throw err; + } + deps.sleep(2); + } + } + } + return interactiveChoice; +} diff --git a/src/lib/onboard-preflight-run.ts b/src/lib/onboard-preflight-run.ts new file mode 100644 index 0000000000..a641def51d --- /dev/null +++ b/src/lib/onboard-preflight-run.ts @@ -0,0 +1,316 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +export async function runOnboardPreflight(deps: any): Promise { + deps.step(1, 8, "Preflight checks"); + + const host = deps.assessHost(); + + // Docker / runtime + if (!host.dockerReachable) { + console.error(" Docker is not reachable. Please fix Docker and try again."); + deps.printRemediationActions(deps.planHostRemediation(host)); + process.exit(1); + } + console.log(" ✓ Docker is running"); + + if (host.runtime !== "unknown") { + console.log(` ✓ Container runtime: ${host.runtime}`); + } + // Podman is now supported — no unsupported runtime warning needed. + if (host.notes.includes("Running under WSL")) { + console.log(" ⓘ Running under WSL"); + } + + // OpenShell CLI — install if missing, upgrade if below minimum version. + // MIN_VERSION in install-openshell.sh handles the version gate; calling it + // when openshell already exists is safe (it exits early if version is OK). + let openshellInstall: { + installed?: boolean; + localBin: string | null; + futureShellPathHint: string | null; + } = { localBin: null, futureShellPathHint: null }; + if (!deps.isOpenshellInstalled()) { + console.log(" openshell CLI not found. Installing..."); + openshellInstall = deps.installOpenshell(); + if (!openshellInstall.installed) { + console.error(" Failed to install openshell CLI."); + console.error(" Install manually: https://github.com/NVIDIA/OpenShell/releases"); + process.exit(1); + } + } else { + const currentVersion = deps.getInstalledOpenshellVersion(); + if (!currentVersion) { + console.log(" openshell version could not be determined. Reinstalling..."); + openshellInstall = deps.installOpenshell(); + if (!openshellInstall.installed) { + console.error(" Failed to reinstall openshell CLI."); + console.error(" Install manually: https://github.com/NVIDIA/OpenShell/releases"); + process.exit(1); + } + } else { + const parts = currentVersion.split(".").map(Number); + const minParts = [0, 0, 24]; // must match MIN_VERSION in scripts/install-openshell.sh + const needsUpgrade = + parts[0] < minParts[0] || + (parts[0] === minParts[0] && parts[1] < minParts[1]) || + (parts[0] === minParts[0] && parts[1] === minParts[1] && parts[2] < minParts[2]); + if (needsUpgrade) { + console.log( + ` openshell ${currentVersion} is below minimum required version. Upgrading...`, + ); + openshellInstall = deps.installOpenshell(); + if (!openshellInstall.installed) { + console.error(" Failed to upgrade openshell CLI."); + console.error(" Install manually: https://github.com/NVIDIA/OpenShell/releases"); + process.exit(1); + } + } + } + } + const openshellVersionOutput = deps.runCaptureOpenshell(["--version"], { ignoreError: true }); + console.log(` ✓ openshell CLI: ${openshellVersionOutput || "unknown"}`); + const installedOpenshellVersion = deps.getInstalledOpenshellVersion(openshellVersionOutput); + const minOpenshellVersion = deps.getBlueprintMinOpenshellVersion(); + if ( + installedOpenshellVersion && + minOpenshellVersion && + !deps.versionGte(installedOpenshellVersion, minOpenshellVersion) + ) { + console.error(""); + console.error( + ` ✗ openshell ${installedOpenshellVersion} is below the minimum required by this NemoClaw release.`, + ); + console.error(` blueprint.yaml min_openshell_version: ${minOpenshellVersion}`); + console.error(""); + console.error(" Upgrade openshell and retry:"); + console.error(" https://github.com/NVIDIA/OpenShell/releases"); + console.error( + " Or remove the existing binary so the installer can re-fetch a current build:", + ); + console.error(' command -v openshell && rm -f "$(command -v openshell)"'); + console.error(""); + process.exit(1); + } + const maxOpenshellVersion = deps.getBlueprintMaxOpenshellVersion(); + if ( + installedOpenshellVersion && + maxOpenshellVersion && + !deps.versionGte(maxOpenshellVersion, installedOpenshellVersion) + ) { + console.error(""); + console.error( + ` ✗ openshell ${installedOpenshellVersion} is above the maximum supported by this NemoClaw release.`, + ); + console.error(` blueprint.yaml max_openshell_version: ${maxOpenshellVersion}`); + console.error(""); + console.error(" Upgrade NemoClaw to a version that supports your OpenShell release,"); + console.error(" or install a supported OpenShell version:"); + console.error(" https://github.com/NVIDIA/OpenShell/releases"); + console.error(""); + process.exit(1); + } + if (openshellInstall.futureShellPathHint) { + console.log( + ` Note: openshell was installed to ${openshellInstall.localBin} for this onboarding run.`, + ); + console.log(` Future shells may still need: ${openshellInstall.futureShellPathHint}`); + console.log( + " Add that export to your shell profile, or open a new terminal before running openshell directly.", + ); + } + + const gatewayStatus = deps.runCaptureOpenshell(["status"], { ignoreError: true }); + const gwInfo = deps.runCaptureOpenshell(["gateway", "info", "-g", deps.gatewayName], { + ignoreError: true, + }); + const activeGatewayInfo = deps.runCaptureOpenshell(["gateway", "info"], { + ignoreError: true, + }); + let gatewayReuseState = deps.getGatewayReuseState(gatewayStatus, gwInfo, activeGatewayInfo); + + if (gatewayReuseState === "healthy") { + const containerState = deps.verifyGatewayContainerRunning(); + if (containerState === "missing") { + console.log(" Gateway metadata is stale (container not running). Cleaning up..."); + deps.runOpenshell(["forward", "stop", String(deps.dashboardPort)], { ignoreError: true }); + deps.destroyGateway(); + deps.clearRegistryAll(); + gatewayReuseState = "missing"; + console.log(" ✓ Stale gateway metadata cleaned up"); + } else if (containerState === "unknown") { + console.log( + " Warning: could not verify gateway container state (Docker may be unavailable). Proceeding with cached health status.", + ); + } + } + + if (gatewayReuseState === "stale" || gatewayReuseState === "active-unnamed") { + console.log(" Cleaning up previous NemoClaw session..."); + deps.runOpenshell(["forward", "stop", String(deps.dashboardPort)], { ignoreError: true }); + const destroyResult = deps.runOpenshell(["gateway", "destroy", "-g", deps.gatewayName], { + ignoreError: true, + }); + if (destroyResult.status === 0) { + deps.clearRegistryAll(); + } + console.log(" ✓ Previous session cleaned up"); + } + + if (gatewayReuseState === "missing") { + const containerName = `openshell-cluster-${deps.gatewayName}`; + const inspectResult = deps.run( + `docker inspect --type container --format '{{.State.Status}}' ${containerName} 2>/dev/null`, + { ignoreError: true, suppressOutput: true }, + ); + if (inspectResult.status === 0) { + console.log(" Cleaning up orphaned gateway container..."); + deps.run(`docker stop ${containerName} >/dev/null 2>&1`, { + ignoreError: true, + suppressOutput: true, + }); + deps.run(`docker rm ${containerName} >/dev/null 2>&1`, { + ignoreError: true, + suppressOutput: true, + }); + const postInspectResult = deps.run( + `docker inspect --type container ${containerName} 2>/dev/null`, + { + ignoreError: true, + suppressOutput: true, + }, + ); + if (postInspectResult.status !== 0) { + deps.run( + `docker volume ls -q --filter "name=openshell-cluster-${deps.gatewayName}" | grep . && docker volume ls -q --filter "name=openshell-cluster-${deps.gatewayName}" | xargs docker volume rm 2>/dev/null || true`, + { ignoreError: true, suppressOutput: true }, + ); + deps.clearRegistryAll(); + console.log(" ✓ Orphaned gateway container removed"); + } else { + console.warn(" ! Found an orphaned gateway container, but automatic cleanup failed."); + } + } + } + + const requiredPorts = [ + { port: deps.gatewayPort, label: "OpenShell gateway" }, + { port: deps.dashboardPort, label: "NemoClaw dashboard" }, + ]; + for (const { port, label } of requiredPorts) { + let portCheck = await deps.checkPortAvailable(port); + if (!portCheck.ok) { + if ( + (port === deps.gatewayPort || port === deps.dashboardPort) && + gatewayReuseState === "healthy" + ) { + console.log(` ✓ Port ${port} already owned by healthy NemoClaw runtime (${label})`); + continue; + } + if (port === deps.dashboardPort && portCheck.process === "ssh" && portCheck.pid) { + const cmdline = deps.runCapture( + `ps -p ${portCheck.pid} -o args= 2>/dev/null`, + { ignoreError: true }, + ).trim(); + if (cmdline.includes("openshell")) { + console.log( + ` Cleaning up orphaned SSH port-forward on port ${port} (PID ${portCheck.pid})...`, + ); + deps.run(`kill ${portCheck.pid} 2>/dev/null || true`, { ignoreError: true }); + deps.sleep(1); + portCheck = await deps.checkPortAvailable(port); + if (portCheck.ok) { + console.log(` ✓ Port ${port} available after orphaned forward cleanup (${label})`); + continue; + } + } + } + console.error(""); + console.error(` !! Port ${port} is not available.`); + console.error(` ${label} needs this port.`); + console.error(""); + if (portCheck.process && portCheck.process !== "unknown") { + console.error( + ` Blocked by: ${portCheck.process}${portCheck.pid ? ` (PID ${portCheck.pid})` : ""}`, + ); + console.error(""); + console.error(" To fix, stop the conflicting process:"); + console.error(""); + if (portCheck.pid) { + console.error(` sudo kill ${portCheck.pid}`); + } else { + console.error(` sudo lsof -i :${port} -sTCP:LISTEN -P -n`); + } + for (const hint of deps.getPortConflictServiceHints()) { + console.error(hint); + } + } else { + console.error(` Could not identify the process using port ${port}.`); + console.error(` Run: sudo lsof -i :${port} -sTCP:LISTEN`); + } + console.error(""); + console.error(` Detail: ${portCheck.reason}`); + process.exit(1); + } + console.log(` ✓ Port ${port} available (${label})`); + } + + const gpu = deps.nimDetectGpu(); + if (gpu && gpu.type === "nvidia") { + console.log(` ✓ NVIDIA GPU detected: ${gpu.count} GPU(s), ${gpu.totalMemoryMB} MB VRAM`); + if (!gpu.nimCapable) { + console.log(" ⓘ GPU VRAM too small for local NIM — will use cloud inference"); + } + } else if (gpu && gpu.type === "apple") { + console.log( + ` ✓ Apple GPU detected: ${gpu.name}${gpu.cores ? ` (${gpu.cores} cores)` : ""}, ${gpu.totalMemoryMB} MB unified memory`, + ); + console.log(" ⓘ NIM requires NVIDIA GPU — will use cloud inference"); + } else { + console.log(" ⓘ No GPU detected — will use cloud inference"); + } + + if ((deps.processPlatform ?? process.platform) === "linux") { + const mem = deps.getMemoryInfo(); + if (mem) { + if (mem.totalMB < 12000) { + console.log( + ` ⚠ Low memory detected (${mem.totalRamMB} MB RAM + ${mem.totalSwapMB} MB swap = ${mem.totalMB} MB total)`, + ); + + let proceedWithSwap = false; + if (!deps.isNonInteractive()) { + const answer = await deps.prompt( + " Create a 4 GB swap file to prevent OOM during sandbox build? (requires sudo) [y/N]: ", + ); + proceedWithSwap = answer && answer.toLowerCase().startsWith("y"); + } + + if (!proceedWithSwap) { + console.log( + " ⓘ Skipping swap creation. Sandbox build may fail with OOM on this system.", + ); + } else { + console.log(" Creating 4 GB swap file to prevent OOM during sandbox build..."); + const swapResult = deps.ensureSwap(12000); + if (swapResult.ok && swapResult.swapCreated) { + console.log(" ✓ Swap file created and activated"); + } else if (swapResult.ok) { + if (swapResult.reason) { + console.log(` ⓘ ${swapResult.reason} — existing swap should help prevent OOM`); + } else { + console.log(` ✓ Memory OK: ${mem.totalRamMB} MB RAM + ${mem.totalSwapMB} MB swap`); + } + } else { + console.log(` ⚠ Could not create swap: ${swapResult.reason}`); + console.log(" Sandbox creation may fail with OOM on low-memory systems."); + } + } + } else { + console.log(` ✓ Memory OK: ${mem.totalRamMB} MB RAM + ${mem.totalSwapMB} MB swap`); + } + } + } + + return gpu; +} diff --git a/src/lib/onboard-provider-management.ts b/src/lib/onboard-provider-management.ts new file mode 100644 index 0000000000..0c948efc58 --- /dev/null +++ b/src/lib/onboard-provider-management.ts @@ -0,0 +1,158 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import crypto from "node:crypto"; + +export interface ProviderManagementDeps { + runOpenshell: ( + args: string[], + opts?: { + ignoreError?: boolean; + env?: Record; + stdio?: [string, string, string]; + }, + ) => { status: number; stdout?: string; stderr?: string }; + compactText: (value: string) => string; + redact: (value: string) => string; + registry: { + getSandbox: (sandboxName: string) => any; + }; + runCaptureOpenshell: (args: string[], opts?: { ignoreError?: boolean }) => string; +} + +/** + * Build the argument array for an `openshell provider create` or `update` command. + */ +export function buildProviderArgs( + action: "create" | "update", + name: string, + type: string, + credentialEnv: string, + baseUrl: string | null, +): string[] { + const args = + action === "create" + ? ["provider", "create", "--name", name, "--type", type, "--credential", credentialEnv] + : ["provider", "update", name, "--credential", credentialEnv]; + if (baseUrl && type === "openai") { + args.push("--config", `OPENAI_BASE_URL=${baseUrl}`); + } else if (baseUrl && type === "anthropic") { + args.push("--config", `ANTHROPIC_BASE_URL=${baseUrl}`); + } + return args; +} + +/** + * Check whether an OpenShell provider exists in the gateway. + */ +export function providerExistsInGateway(name: string, deps: ProviderManagementDeps): boolean { + const result = deps.runOpenshell(["provider", "get", name], { + ignoreError: true, + stdio: ["ignore", "ignore", "ignore"], + }); + return result.status === 0; +} + +/** + * Create or update an OpenShell provider in the gateway. + */ +export function upsertProvider( + name: string, + type: string, + credentialEnv: string, + baseUrl: string | null, + env: Record = {}, + deps: ProviderManagementDeps, +): { ok: boolean; status?: number; message?: string } { + const exists = providerExistsInGateway(name, deps); + const action = exists ? "update" : "create"; + const args = buildProviderArgs(action, name, type, credentialEnv, baseUrl); + const runOpts = { ignoreError: true, env, stdio: ["ignore", "pipe", "pipe"] as [string, string, string] }; + const result = deps.runOpenshell(args, runOpts); + if (result.status !== 0) { + const output = + deps.compactText(deps.redact(`${result.stderr || ""}`)) || + deps.compactText(deps.redact(`${result.stdout || ""}`)) || + `Failed to ${action} provider '${name}'.`; + return { ok: false, status: result.status || 1, message: output }; + } + return { ok: true }; +} + +/** + * Upsert all messaging providers that have tokens configured. + */ +export function upsertMessagingProviders( + tokenDefs: Array<{ name: string; envKey: string; token: string | null }>, + deps: ProviderManagementDeps, +): string[] { + const providers: string[] = []; + for (const { name, envKey, token } of tokenDefs) { + if (!token) continue; + const result = upsertProvider(name, "generic", envKey, null, { [envKey]: token }, deps); + if (!result.ok) { + console.error(`\n ✗ Failed to create messaging provider '${name}': ${result.message}`); + process.exit(1); + } + providers.push(name); + } + return providers; +} + +/** + * Compute a SHA-256 hash of a credential value for change detection. + */ +export function hashCredential(value: string | null | undefined): string | null { + if (!value) return null; + return crypto.createHash("sha256").update(String(value).trim()).digest("hex"); +} + +/** + * Detect whether any messaging provider credential has been rotated since + * the sandbox was created. + */ +export function detectMessagingCredentialRotation( + sandboxName: string, + tokenDefs: Array<{ name: string; envKey: string; token: string | null }>, + deps: ProviderManagementDeps, +): { changed: boolean; changedProviders: string[] } { + const sandboxEntry = deps.registry.getSandbox(sandboxName); + const storedHashes = sandboxEntry?.providerCredentialHashes || {}; + const changedProviders: string[] = []; + for (const { name, envKey, token } of tokenDefs) { + if (!token) continue; + const storedHash = storedHashes[envKey]; + if (!storedHash) continue; + if (storedHash !== hashCredential(token)) { + changedProviders.push(name); + } + } + return { changed: changedProviders.length > 0, changedProviders }; +} + +// Tri-state probe factory for messaging-conflict backfill. An upfront liveness +// check is necessary because `openshell provider get` exits non-zero for both +// "provider not attached" and "gateway unreachable"; without the liveness +// gate, a transient gateway failure would be recorded as "no providers" and +// permanently suppress future backfill retries. +export function makeConflictProbe(deps: ProviderManagementDeps): { + providerExists: (name: string) => "present" | "absent" | "error"; +} { + let gatewayAlive: boolean | null = null; + const isGatewayAlive = () => { + if (gatewayAlive === null) { + const result = deps.runCaptureOpenshell(["sandbox", "list"], { ignoreError: true }); + // runCaptureOpenshell returns stdout/stderr as a single string; treat + // any non-empty output as a sign openshell answered. Empty output with + // ignoreError typically means the binary failed to produce anything. + gatewayAlive = typeof result === "string" && result.length > 0; + } + return gatewayAlive; + }; + return { + providerExists: (name: string) => { + if (!isGatewayAlive()) return "error"; + return providerExistsInGateway(name, deps) ? "present" : "absent"; + }, + }; +} diff --git a/src/lib/onboard-recorders.test.ts b/src/lib/onboard-recorders.test.ts new file mode 100644 index 0000000000..4256f4e72a --- /dev/null +++ b/src/lib/onboard-recorders.test.ts @@ -0,0 +1,74 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import { createRequire } from "node:module"; +import os from "node:os"; +import path from "node:path"; + +import { afterEach, beforeEach, describe, expect, it } from "vitest"; + +const require = createRequire(import.meta.url); +const recordersDistPath = require.resolve("../../dist/lib/onboard-recorders"); +const driverDistPath = require.resolve("../../dist/lib/onboard-persistent-driver"); +const sessionDistPath = require.resolve("../../dist/lib/onboard-session"); +const distModulePaths = [recordersDistPath, driverDistPath, sessionDistPath] as const; +const originalHome = process.env.HOME; +let tmpDir: string; + +const clearDistModuleCache = () => { + for (const modulePath of distModulePaths) { + delete require.cache[modulePath]; + } +}; + +beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-recorders-")); + process.env.HOME = tmpDir; + clearDistModuleCache(); +}); + +afterEach(() => { + clearDistModuleCache(); + fs.rmSync(tmpDir, { recursive: true, force: true }); + if (originalHome === undefined) { + delete process.env.HOME; + } else { + process.env.HOME = originalHome; + } +}); + +describe("createTrackedOnboardRun", () => { + it("keeps the caller's session reference in sync with persisted driver updates", () => { + const onboardSession = require("../../dist/lib/onboard-session"); + const { PersistentOnboardDriver } = require("../../dist/lib/onboard-persistent-driver"); + const { createTrackedOnboardRun } = require("../../dist/lib/onboard-recorders"); + + const initialSession = onboardSession.saveSession( + onboardSession.createSession({ sandboxName: "alpha" }), + ); + const driver = new PersistentOnboardDriver({ resume: true, requestedSandboxName: "alpha" }); + const trackedRun = createTrackedOnboardRun(driver, initialSession); + + trackedRun.startStep("preflight"); + expect(trackedRun.session.lastStepStarted).toBe("preflight"); + + trackedRun.completeStep("preflight"); + trackedRun.completeStep("gateway"); + trackedRun.completeStep("provider_selection", { + provider: "openai-api", + model: "gpt-5.4", + }); + trackedRun.completeStep("inference", { + provider: "openai-api", + model: "gpt-5.4", + }); + trackedRun.completeStep("messaging", { + messagingChannels: ["telegram"], + }); + + expect(trackedRun.session.steps.messaging.status).toBe("complete"); + expect(trackedRun.session.messagingChannels).toEqual(["telegram"]); + expect(driver.requiredSession.messagingChannels).toEqual(["telegram"]); + }); +}); diff --git a/src/lib/onboard-recorders.ts b/src/lib/onboard-recorders.ts new file mode 100644 index 0000000000..7a376523b2 --- /dev/null +++ b/src/lib/onboard-recorders.ts @@ -0,0 +1,55 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import type { OnboardStepName } from "./onboard-fsm"; +import { PersistentOnboardDriver } from "./onboard-persistent-driver"; +import type { Session, SessionUpdates } from "./onboard-session"; + +export interface TrackedOnboardRun { + readonly driver: PersistentOnboardDriver; + readonly session: Session; + update(mutator: (session: Session) => Session | void): Session; + startStep(stepName: OnboardStepName, updates?: SessionUpdates): Session; + completeStep(stepName: OnboardStepName, updates?: SessionUpdates): Session; + skipStep(stepName: OnboardStepName): Session; + failStep(stepName: OnboardStepName, message?: string | null): Session; + completeSession(updates?: SessionUpdates): Session; +} + +export function createTrackedOnboardRun( + driver: PersistentOnboardDriver, + initialSession: Session, +): TrackedOnboardRun { + let session = initialSession; + + return { + driver, + get session(): Session { + return session; + }, + update(mutator): Session { + session = driver.update(mutator); + return session; + }, + startStep(stepName, updates = {}): Session { + session = driver.startStep(stepName, updates); + return session; + }, + completeStep(stepName, updates = {}): Session { + session = driver.completeStep(stepName, updates); + return session; + }, + skipStep(stepName): Session { + session = driver.skipStep(stepName); + return session; + }, + failStep(stepName, message = null): Session { + session = driver.failStep(stepName, message); + return session; + }, + completeSession(updates = {}): Session { + session = driver.completeSession(updates); + return session; + }, + }; +} diff --git a/src/lib/onboard-remediation.test.ts b/src/lib/onboard-remediation.test.ts new file mode 100644 index 0000000000..fbd7f7cb4c --- /dev/null +++ b/src/lib/onboard-remediation.test.ts @@ -0,0 +1,68 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; +// Import from compiled dist/ so coverage is attributed correctly. +import { + getContainerRuntime, + getFutureShellPathHint, + getPortConflictServiceHints, + printRemediationActions, +} from "../../dist/lib/onboard-remediation"; + +describe("onboard-remediation", () => { + it("formats remediation steps for the operator", () => { + const lines: string[] = []; + printRemediationActions( + [ + { + title: "Install Docker", + reason: "Docker is required.", + commands: ["sudo apt-get install docker-ce", "nemoclaw onboard"], + }, + ], + (message = "") => lines.push(message), + ); + + expect(lines).toEqual([ + "", + " Suggested fix:", + "", + " - Install Docker: Docker is required.", + " sudo apt-get install docker-ce", + " nemoclaw onboard", + ]); + }); + + it("returns a future-shell PATH hint only when the bin dir is not already present", () => { + expect(getFutureShellPathHint("/home/test/.local/bin", "/usr/local/bin:/usr/bin")).toBe( + 'export PATH="/home/test/.local/bin:$PATH"', + ); + expect( + getFutureShellPathHint( + "/home/test/.local/bin", + "/home/test/.local/bin:/usr/local/bin:/usr/bin", + ), + ).toBeNull(); + }); + + it("renders platform-specific port conflict service hints", () => { + expect(getPortConflictServiceHints("darwin", "/tmp/agent.plist").join("\n")).toContain( + "launchctl unload /tmp/agent.plist", + ); + expect(getPortConflictServiceHints("darwin", "/tmp/agent.plist").join("\n")).not.toContain( + "systemctl --user", + ); + expect(getPortConflictServiceHints("darwin").join("\n")).not.toContain("launchctl unload "); + expect(getPortConflictServiceHints("linux").join("\n")).toContain( + "systemctl --user stop openclaw-gateway.service", + ); + }); + + it("derives the container runtime from docker info output", () => { + const runCapture = vi.fn(() => "Docker Desktop 4.0"); + const inferContainerRuntime = vi.fn((info: string) => info.toLowerCase().includes("desktop") ? "docker-desktop" : "docker"); + expect(getContainerRuntime({ runCapture, inferContainerRuntime })).toBe("docker-desktop"); + expect(runCapture).toHaveBeenCalledWith("docker info 2>/dev/null", { ignoreError: true }); + }); +}); diff --git a/src/lib/onboard-remediation.ts b/src/lib/onboard-remediation.ts new file mode 100644 index 0000000000..1815b60b47 --- /dev/null +++ b/src/lib/onboard-remediation.ts @@ -0,0 +1,70 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import path from "node:path"; + +export interface RemediationActionLike { + title: string; + reason: string; + commands?: string[]; +} + +export interface ContainerRuntimeDeps { + runCapture: (command: string, options?: { ignoreError?: boolean }) => string; + inferContainerRuntime: (dockerInfo: string) => string; +} + +export function getContainerRuntime(deps: ContainerRuntimeDeps): string { + const info = deps.runCapture("docker info 2>/dev/null", { ignoreError: true }); + return deps.inferContainerRuntime(info); +} + +export function printRemediationActions( + actions: RemediationActionLike[] | null | undefined, + errorWriter: (message?: string) => void = console.error, +): void { + if (!Array.isArray(actions) || actions.length === 0) { + return; + } + + errorWriter(""); + errorWriter(" Suggested fix:"); + errorWriter(""); + for (const action of actions) { + errorWriter(` - ${action.title}: ${action.reason}`); + for (const command of action.commands || []) { + errorWriter(` ${command}`); + } + } +} + +export function getFutureShellPathHint( + binDir: string, + pathValue = process.env.PATH || "", +): string | null { + if (String(pathValue).split(path.delimiter).includes(binDir)) { + return null; + } + return `export PATH="${binDir}:$PATH"`; +} + +export function getPortConflictServiceHints( + platform = process.platform, + launchAgentPlist = "", +): string[] { + if (platform === "darwin") { + const hints = [ + " # or, if it's a launchctl service (macOS):", + " launchctl list | grep -i claw # columns: PID | ExitStatus | Label", + " # or: launchctl bootout gui/$(id -u)/ai.openclaw.gateway", + ]; + if (launchAgentPlist) { + hints.splice(2, 0, ` launchctl unload ${launchAgentPlist}`); + } + return hints; + } + return [ + " # or, if it's a systemd service:", + " systemctl --user stop openclaw-gateway.service", + ]; +} diff --git a/src/lib/onboard-remote-provider-config.ts b/src/lib/onboard-remote-provider-config.ts new file mode 100644 index 0000000000..b696bb01ac --- /dev/null +++ b/src/lib/onboard-remote-provider-config.ts @@ -0,0 +1,76 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { DEFAULT_CLOUD_MODEL } from "./inference-config"; + +export const BUILD_ENDPOINT_URL = "https://integrate.api.nvidia.com/v1"; +export const OPENAI_ENDPOINT_URL = "https://api.openai.com/v1"; +export const ANTHROPIC_ENDPOINT_URL = "https://api.anthropic.com"; +export const GEMINI_ENDPOINT_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"; + +export const REMOTE_PROVIDER_CONFIG = { + build: { + label: "NVIDIA Endpoints", + providerName: "nvidia-prod", + providerType: "nvidia", + credentialEnv: "NVIDIA_API_KEY", + endpointUrl: BUILD_ENDPOINT_URL, + helpUrl: "https://build.nvidia.com/settings/api-keys", + modelMode: "catalog", + defaultModel: DEFAULT_CLOUD_MODEL, + skipVerify: true, + }, + openai: { + label: "OpenAI", + providerName: "openai-api", + providerType: "openai", + credentialEnv: "OPENAI_API_KEY", + endpointUrl: OPENAI_ENDPOINT_URL, + helpUrl: "https://platform.openai.com/api-keys", + modelMode: "curated", + defaultModel: "gpt-5.4", + skipVerify: true, + }, + anthropic: { + label: "Anthropic", + providerName: "anthropic-prod", + providerType: "anthropic", + credentialEnv: "ANTHROPIC_API_KEY", + endpointUrl: ANTHROPIC_ENDPOINT_URL, + helpUrl: "https://console.anthropic.com/settings/keys", + modelMode: "curated", + defaultModel: "claude-sonnet-4-6", + }, + anthropicCompatible: { + label: "Other Anthropic-compatible endpoint", + providerName: "compatible-anthropic-endpoint", + providerType: "anthropic", + credentialEnv: "COMPATIBLE_ANTHROPIC_API_KEY", + endpointUrl: "", + helpUrl: null, + modelMode: "input", + defaultModel: "", + }, + gemini: { + label: "Google Gemini", + providerName: "gemini-api", + providerType: "openai", + credentialEnv: "GEMINI_API_KEY", + endpointUrl: GEMINI_ENDPOINT_URL, + helpUrl: "https://aistudio.google.com/app/apikey", + modelMode: "curated", + defaultModel: "gemini-2.5-flash", + skipVerify: true, + }, + custom: { + label: "Other OpenAI-compatible endpoint", + providerName: "compatible-endpoint", + providerType: "openai", + credentialEnv: "COMPATIBLE_API_KEY", + endpointUrl: "", + helpUrl: null, + modelMode: "input", + defaultModel: "", + skipVerify: true, + }, +}; diff --git a/src/lib/onboard-requests.test.ts b/src/lib/onboard-requests.test.ts new file mode 100644 index 0000000000..a683c9c6a7 --- /dev/null +++ b/src/lib/onboard-requests.test.ts @@ -0,0 +1,129 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; +// Import from compiled dist/ so coverage is attributed correctly. +import { + getEffectiveProviderName, + getNonInteractiveModel, + getNonInteractiveProvider, + getRequestedModelHint, + getRequestedProviderHint, + getRequestedSandboxNameHint, + getResumeConfigConflicts, + getResumeSandboxConflict, +} from "../../dist/lib/onboard-requests"; + +describe("onboard-requests", () => { + it("resolves requested sandbox hints and resume sandbox conflicts", () => { + const env = { NEMOCLAW_SANDBOX_NAME: "My-Assistant" } as NodeJS.ProcessEnv; + expect(getRequestedSandboxNameHint(env)).toBe("my-assistant"); + expect(getResumeSandboxConflict({ sandboxName: "my-assistant" }, env)).toBeNull(); + expect(getResumeSandboxConflict({ sandboxName: "other-sandbox" }, env)).toEqual({ + requestedSandboxName: "my-assistant", + recordedSandboxName: "other-sandbox", + }); + }); + + it("resolves and validates non-interactive provider/model inputs", () => { + const env = { + NEMOCLAW_PROVIDER: "cloud", + NEMOCLAW_MODEL: "nvidia/test-model", + } as NodeJS.ProcessEnv; + expect(getNonInteractiveProvider({ env })).toBe("build"); + expect(getRequestedProviderHint(true, { env })).toBe("build"); + expect(getRequestedProviderHint(false, { env })).toBeNull(); + expect( + getNonInteractiveModel("build", { + env, + isSafeModelId: (value) => value === "nvidia/test-model", + }), + ).toBe("nvidia/test-model"); + expect( + getRequestedModelHint(true, { + env, + isSafeModelId: (value) => value === "nvidia/test-model", + }), + ).toBe("nvidia/test-model"); + expect(getRequestedModelHint(false, { env })).toBeNull(); + }); + + it("reports invalid non-interactive provider and model inputs before onboarding begins", () => { + const error = vi.fn(); + const exit = vi.fn((code: number) => { + throw new Error(`exit:${code}`); + }) as never; + + expect(() => + getNonInteractiveProvider({ + env: { NEMOCLAW_PROVIDER: "bogus" } as NodeJS.ProcessEnv, + error, + exit, + }), + ).toThrow("exit:1"); + expect(error).toHaveBeenCalledWith(" Unsupported NEMOCLAW_PROVIDER: bogus"); + + const modelError = vi.fn(); + const modelExit = vi.fn((code: number) => { + throw new Error(`exit:${code}`); + }) as never; + expect(() => + getNonInteractiveModel("build", { + env: { NEMOCLAW_MODEL: "bad model" } as NodeJS.ProcessEnv, + error: modelError, + exit: modelExit, + isSafeModelId: () => false, + }), + ).toThrow("exit:1"); + expect(modelError).toHaveBeenCalledWith( + " Invalid NEMOCLAW_MODEL for provider 'build': bad model", + ); + }); + + it("maps requested providers to effective provider names and resume conflicts", () => { + const remoteProviderConfig = { + build: { providerName: "nvidia-prod" }, + openai: { providerName: "openai-api" }, + }; + expect(getEffectiveProviderName("build", remoteProviderConfig)).toBe("nvidia-prod"); + expect(getEffectiveProviderName("nim-local", remoteProviderConfig)).toBe("nvidia-nim"); + expect(getEffectiveProviderName("ollama", remoteProviderConfig)).toBe("ollama-local"); + expect(getEffectiveProviderName("vllm", remoteProviderConfig)).toBe("vllm-local"); + expect(getEffectiveProviderName("custom-provider", remoteProviderConfig)).toBe( + "custom-provider", + ); + + const env = { + NEMOCLAW_SANDBOX_NAME: "my-assistant", + NEMOCLAW_PROVIDER: "cloud", + NEMOCLAW_MODEL: "nvidia/other-model", + } as NodeJS.ProcessEnv; + expect( + getResumeConfigConflicts( + { + sandboxName: "my-assistant", + provider: "nvidia-nim", + model: "nvidia/nemotron-3-super-120b-a12b", + metadata: { fromDockerfile: null }, + } as never, + { + nonInteractive: true, + env, + remoteProviderConfig, + isSafeModelId: () => true, + }, + ), + ).toEqual([ + { + field: "provider", + requested: "nvidia-prod", + recorded: "nvidia-nim", + }, + { + field: "model", + requested: "nvidia/other-model", + recorded: "nvidia/nemotron-3-super-120b-a12b", + }, + ]); + }); +}); diff --git a/src/lib/onboard-requests.ts b/src/lib/onboard-requests.ts new file mode 100644 index 0000000000..29b3c9b09d --- /dev/null +++ b/src/lib/onboard-requests.ts @@ -0,0 +1,163 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import type { Session } from "./onboard-session"; +import { collectResumeConfigConflicts, detectResumeSandboxConflict } from "./onboard-resume"; + +const NON_INTERACTIVE_PROVIDER_ALIASES = { + cloud: "build", + nim: "nim-local", + vllm: "vllm", + anthropiccompatible: "anthropicCompatible", +} as const; + +const VALID_NON_INTERACTIVE_PROVIDERS = new Set([ + "build", + "openai", + "anthropic", + "anthropicCompatible", + "gemini", + "ollama", + "custom", + "nim-local", + "vllm", +]); + +export interface NonInteractiveRequestDeps { + env?: NodeJS.ProcessEnv; + error?: (message?: string) => void; + exit?: (code: number) => never; + isSafeModelId?: (value: string) => boolean; +} + +export function getRequestedSandboxNameHint(env: NodeJS.ProcessEnv = process.env): string | null { + const raw = env.NEMOCLAW_SANDBOX_NAME; + if (typeof raw !== "string") return null; + const normalized = raw.trim().toLowerCase(); + return normalized || null; +} + +export function getNonInteractiveProvider( + deps: NonInteractiveRequestDeps = {}, +): string | null { + const env = deps.env ?? process.env; + const error = deps.error ?? console.error; + const exit = deps.exit ?? ((code: number) => process.exit(code)); + const providerKey = String(env.NEMOCLAW_PROVIDER || "").trim().toLowerCase(); + if (!providerKey) return null; + + const normalized = + NON_INTERACTIVE_PROVIDER_ALIASES[ + providerKey as keyof typeof NON_INTERACTIVE_PROVIDER_ALIASES + ] ?? providerKey; + if (!VALID_NON_INTERACTIVE_PROVIDERS.has(normalized)) { + error(` Unsupported NEMOCLAW_PROVIDER: ${providerKey}`); + error( + " Valid values: build, openai, anthropic, anthropicCompatible, gemini, ollama, custom, nim-local, vllm", + ); + exit(1); + } + + return normalized; +} + +/** + * Resolve the requested non-interactive model id. + * + * NonInteractiveRequestDeps.isSafeModelId defaults to a permissive validator + * that always returns true, so getNonInteractiveModel performs no model-id + * validation unless the caller injects their own validator. Callers should pass + * a validator that enforces the allowed characters (letters, numbers, '.', '_', + * ':', '/', and '-') or delegate to a shared helper validator when available. + */ +export function getNonInteractiveModel( + providerKey: string, + deps: NonInteractiveRequestDeps = {}, +): string | null { + const env = deps.env ?? process.env; + const error = deps.error ?? console.error; + const exit = deps.exit ?? ((code: number) => process.exit(code)); + const isSafeModelId = deps.isSafeModelId ?? (() => true); + const model = String(env.NEMOCLAW_MODEL || "").trim(); + if (!model) return null; + if (!isSafeModelId(model)) { + error(` Invalid NEMOCLAW_MODEL for provider '${providerKey}': ${model}`); + error(" Model values may only contain letters, numbers, '.', '_', ':', '/', and '-'."); + exit(1); + } + return model; +} + +export function getRequestedProviderHint( + nonInteractive: boolean, + deps: NonInteractiveRequestDeps = {}, +): string | null { + return nonInteractive ? getNonInteractiveProvider(deps) : null; +} + +export function getRequestedModelHint( + nonInteractive: boolean, + deps: NonInteractiveRequestDeps = {}, +): string | null { + if (!nonInteractive) return null; + const providerKey = getRequestedProviderHint(nonInteractive, deps) || "cloud"; + return getNonInteractiveModel(providerKey, deps); +} + +export function getEffectiveProviderName( + providerKey: string | null, + remoteProviderConfig: Record, +): string | null { + if (!providerKey) return null; + if (remoteProviderConfig[providerKey]) { + return remoteProviderConfig[providerKey].providerName; + } + + switch (providerKey) { + case "nim-local": + return "nvidia-nim"; + case "ollama": + return "ollama-local"; + case "vllm": + return "vllm-local"; + default: + return providerKey; + } +} + +export function getResumeSandboxConflict( + session: Pick | null | undefined, + env: NodeJS.ProcessEnv = process.env, +) { + return detectResumeSandboxConflict(session, getRequestedSandboxNameHint(env)); +} + +export function getResumeConfigConflicts( + session: Session | null | undefined, + opts: { + nonInteractive: boolean; + fromDockerfile?: string | null; + agent?: string | null; + env?: NodeJS.ProcessEnv; + error?: (message?: string) => void; + exit?: (code: number) => never; + isSafeModelId?: (value: string) => boolean; + remoteProviderConfig: Record; + }, +) { + const env = opts.env ?? process.env; + const deps: NonInteractiveRequestDeps = { + env, + error: opts.error, + exit: opts.exit, + isSafeModelId: opts.isSafeModelId, + }; + const requestedProvider = getRequestedProviderHint(opts.nonInteractive, deps); + return collectResumeConfigConflicts(session, { + requestedSandboxName: getRequestedSandboxNameHint(env), + requestedProvider: getEffectiveProviderName(requestedProvider, opts.remoteProviderConfig), + requestedModel: getRequestedModelHint(opts.nonInteractive, deps), + requestedFromDockerfile: opts.fromDockerfile || null, + requestedAgent: opts.agent || env.NEMOCLAW_AGENT || null, + }); +} diff --git a/src/lib/onboard-resume.test.ts b/src/lib/onboard-resume.test.ts new file mode 100644 index 0000000000..968c9b6645 --- /dev/null +++ b/src/lib/onboard-resume.test.ts @@ -0,0 +1,86 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it } from "vitest"; +// Import from compiled dist/ so coverage is attributed correctly. +import { createSession } from "../../dist/lib/onboard-session"; +import { + buildResumeConflictLines, + collectResumeConfigConflicts, + detectResumeSandboxConflict, +} from "../../dist/lib/onboard-resume"; + +describe("onboard-resume", () => { + it("detects sandbox conflicts only when requested and recorded names differ", () => { + const session = createSession({ sandboxName: "alpha" }); + + expect(detectResumeSandboxConflict(session, null)).toBeNull(); + expect(detectResumeSandboxConflict(session, "alpha")).toBeNull(); + expect(detectResumeSandboxConflict(session, "beta")).toEqual({ + requestedSandboxName: "beta", + recordedSandboxName: "alpha", + }); + }); + + it("collects provider/model/from/agent resume conflicts", () => { + const session = createSession({ + sandboxName: "alpha", + provider: "nvidia-prod", + model: "meta/llama-3.3-70b-instruct", + agent: "hermes", + metadata: { gatewayName: "nemoclaw", fromDockerfile: "/tmp/Recorded.Dockerfile" }, + }); + + expect( + collectResumeConfigConflicts(session, { + requestedSandboxName: "beta", + requestedProvider: "openai-api", + requestedModel: "gpt-5.4", + requestedFromDockerfile: "/tmp/Requested.Dockerfile", + requestedAgent: "openclaw", + }), + ).toEqual([ + { field: "sandbox", requested: "beta", recorded: "alpha" }, + { field: "provider", requested: "openai-api", recorded: "nvidia-prod" }, + { + field: "model", + requested: "gpt-5.4", + recorded: "meta/llama-3.3-70b-instruct", + }, + { + field: "fromDockerfile", + requested: "/tmp/Requested.Dockerfile", + recorded: "/tmp/Recorded.Dockerfile", + }, + { field: "agent", requested: "openclaw", recorded: "hermes" }, + ]); + }); + + it("formats resume conflict guidance consistently", () => { + const lines = buildResumeConflictLines([ + { field: "sandbox", requested: "beta", recorded: "alpha" }, + { field: "fromDockerfile", requested: null, recorded: "/tmp/Recorded.Dockerfile" }, + { field: "provider", requested: "openai-api", recorded: "nvidia-prod" }, + ]); + + expect(lines).toEqual([ + " Resumable state belongs to sandbox 'alpha', not 'beta'.", + " Session was started with --from '/tmp/Recorded.Dockerfile'; rerun with that path to resume it.", + " Resumable state recorded provider 'nvidia-prod', not 'openai-api'.", + " Run: nemoclaw onboard # start a fresh onboarding session", + " Or rerun with the original settings to continue that session.", + ]); + }); + + it("tells users to resume without --from when the recorded session omitted it", () => { + const lines = buildResumeConflictLines([ + { field: "fromDockerfile", requested: "/tmp/Requested.Dockerfile", recorded: null }, + ]); + + expect(lines).toEqual([ + " Session was started without --from; rerun without --from to resume it.", + " Run: nemoclaw onboard # start a fresh onboarding session", + " Or rerun with the original settings to continue that session.", + ]); + }); +}); diff --git a/src/lib/onboard-resume.ts b/src/lib/onboard-resume.ts new file mode 100644 index 0000000000..e7300cf552 --- /dev/null +++ b/src/lib/onboard-resume.ts @@ -0,0 +1,132 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import path from "node:path"; + +import type { Session } from "./onboard-session"; + +export type ResumeConflictField = + | "sandbox" + | "provider" + | "model" + | "fromDockerfile" + | "agent"; + +export interface ResumeSandboxConflict { + requestedSandboxName: string; + recordedSandboxName: string; +} + +export interface ResumeConfigConflict { + field: ResumeConflictField; + requested: string | null; + recorded: string | null; +} + +export interface ResumeConfigConflictOptions { + requestedSandboxName?: string | null; + requestedProvider?: string | null; + requestedModel?: string | null; + requestedFromDockerfile?: string | null; + requestedAgent?: string | null; +} + +export function detectResumeSandboxConflict( + session: Pick | null | undefined, + requestedSandboxName: string | null, +): ResumeSandboxConflict | null { + if (!requestedSandboxName || !session?.sandboxName) { + return null; + } + return requestedSandboxName !== session.sandboxName + ? { requestedSandboxName, recordedSandboxName: session.sandboxName } + : null; +} + +export function collectResumeConfigConflicts( + session: Session | null | undefined, + options: ResumeConfigConflictOptions = {}, +): ResumeConfigConflict[] { + const conflicts: ResumeConfigConflict[] = []; + + const sandboxConflict = detectResumeSandboxConflict(session, options.requestedSandboxName ?? null); + if (sandboxConflict) { + conflicts.push({ + field: "sandbox", + requested: sandboxConflict.requestedSandboxName, + recorded: sandboxConflict.recordedSandboxName, + }); + } + + if (options.requestedProvider && session?.provider && options.requestedProvider !== session.provider) { + conflicts.push({ + field: "provider", + requested: options.requestedProvider, + recorded: session.provider, + }); + } + + if (options.requestedModel && session?.model && options.requestedModel !== session.model) { + conflicts.push({ + field: "model", + requested: options.requestedModel, + recorded: session.model, + }); + } + + const requestedFrom = options.requestedFromDockerfile + ? path.resolve(options.requestedFromDockerfile) + : null; + const recordedFrom = session?.metadata?.fromDockerfile + ? path.resolve(session.metadata.fromDockerfile) + : null; + if (requestedFrom !== recordedFrom) { + conflicts.push({ + field: "fromDockerfile", + requested: requestedFrom, + recorded: recordedFrom, + }); + } + + const requestedAgent = options.requestedAgent ?? null; + const recordedAgent = session?.agent ?? null; + if (requestedAgent && recordedAgent && requestedAgent !== recordedAgent) { + conflicts.push({ + field: "agent", + requested: requestedAgent, + recorded: recordedAgent, + }); + } + + return conflicts; +} + +function formatResumeConflictLine(conflict: ResumeConfigConflict): string { + if (conflict.field === "sandbox") { + return ` Resumable state belongs to sandbox '${conflict.recorded}', not '${conflict.requested}'.`; + } + if (conflict.field === "agent") { + return ` Session was started with agent '${conflict.recorded}', not '${conflict.requested}'.`; + } + if (conflict.field === "fromDockerfile") { + if (!conflict.recorded) { + return " Session was started without --from; rerun without --from to resume it."; + } + if (!conflict.requested) { + return ` Session was started with --from '${conflict.recorded}'; rerun with that path to resume it.`; + } + return ` Session was started with --from '${conflict.recorded}', not '${conflict.requested}'.`; + } + return ` Resumable state recorded ${conflict.field} '${conflict.recorded}', not '${conflict.requested}'.`; +} + +export function buildResumeConflictLines(conflicts: readonly ResumeConfigConflict[]): string[] { + if (conflicts.length === 0) { + return []; + } + return [ + ...conflicts.map(formatResumeConflictLine), + " Run: nemoclaw onboard # start a fresh onboarding session", + " Or rerun with the original settings to continue that session.", + ]; +} diff --git a/src/lib/onboard-run-context.test.ts b/src/lib/onboard-run-context.test.ts new file mode 100644 index 0000000000..70ebf968c3 --- /dev/null +++ b/src/lib/onboard-run-context.test.ts @@ -0,0 +1,69 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import { createRequire } from "node:module"; +import os from "node:os"; +import path from "node:path"; + +import { afterEach, beforeEach, describe, expect, it } from "vitest"; + +const require = createRequire(import.meta.url); +const contextDistPath = require.resolve("../../dist/lib/onboard-run-context"); +const bootstrapDistPath = require.resolve("../../dist/lib/onboard-bootstrap"); +const sessionDistPath = require.resolve("../../dist/lib/onboard-session"); +const originalHome = process.env.HOME; +let tmpDir: string; + +beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-run-context-")); + process.env.HOME = tmpDir; + delete require.cache[contextDistPath]; + delete require.cache[bootstrapDistPath]; + delete require.cache[sessionDistPath]; +}); + +afterEach(() => { + delete require.cache[contextDistPath]; + delete require.cache[bootstrapDistPath]; + delete require.cache[sessionDistPath]; + fs.rmSync(tmpDir, { recursive: true, force: true }); + if (originalHome === undefined) { + delete process.env.HOME; + } else { + process.env.HOME = originalHome; + } +}); + +describe("createOnboardRunContext", () => { + it("keeps session access and step mutations synchronized", () => { + const { initializeOnboardRun } = require("../../dist/lib/onboard-bootstrap"); + const { createOnboardRunContext } = require("../../dist/lib/onboard-run-context"); + + const initializedRun = initializeOnboardRun({ + resume: false, + mode: "non-interactive", + requestedFromDockerfile: "./Dockerfile.custom", + requestedAgent: "hermes", + }); + + expect(initializedRun.ok).toBe(true); + if (!initializedRun.ok) { + throw new Error("expected onboarding initialization to succeed"); + } + + const context = createOnboardRunContext(initializedRun.value); + expect(context.fromDockerfile).toBe(path.resolve("./Dockerfile.custom")); + expect(context.session.mode).toBe("non-interactive"); + expect(context.session.agent).toBe("hermes"); + + context.startStep("preflight"); + context.completeStep("preflight"); + context.completeStep("messaging", { messagingChannels: ["telegram"] }); + context.completeStep("sandbox", { sandboxName: "alpha" }); + + expect(context.session.steps.preflight.status).toBe("complete"); + expect(context.session.messagingChannels).toEqual(["telegram"]); + expect(context.session.sandboxName).toBe("alpha"); + }); +}); diff --git a/src/lib/onboard-run-context.ts b/src/lib/onboard-run-context.ts new file mode 100644 index 0000000000..4f0828a87a --- /dev/null +++ b/src/lib/onboard-run-context.ts @@ -0,0 +1,50 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import type { InitializedOnboardRun } from "./onboard-bootstrap"; +import type { OnboardStepName } from "./onboard-fsm"; +import { createTrackedOnboardRun } from "./onboard-recorders"; +import type { PersistentOnboardDriver } from "./onboard-persistent-driver"; +import type { Session, SessionUpdates } from "./onboard-session"; + +export interface OnboardRunContext { + readonly driver: PersistentOnboardDriver; + readonly fromDockerfile: string | null; + readonly session: Session; + updateSession(mutator: (session: Session) => Session | void): Session; + startStep(stepName: OnboardStepName, updates?: SessionUpdates): Session; + completeStep(stepName: OnboardStepName, updates?: SessionUpdates): Session; + skipStep(stepName: OnboardStepName): Session; + failStep(stepName: OnboardStepName, message?: string | null): Session; + completeSession(updates?: SessionUpdates): Session; +} + +export function createOnboardRunContext(initializedRun: InitializedOnboardRun): OnboardRunContext { + const trackedRun = createTrackedOnboardRun(initializedRun.driver, initializedRun.session); + + return { + driver: initializedRun.driver, + fromDockerfile: initializedRun.fromDockerfile, + get session(): Session { + return trackedRun.session; + }, + updateSession(mutator): Session { + return trackedRun.update(mutator); + }, + startStep(stepName, updates = {}): Session { + return trackedRun.startStep(stepName, updates); + }, + completeStep(stepName, updates = {}): Session { + return trackedRun.completeStep(stepName, updates); + }, + skipStep(stepName): Session { + return trackedRun.skipStep(stepName); + }, + failStep(stepName, message = null): Session { + return trackedRun.failStep(stepName, message); + }, + completeSession(updates = {}): Session { + return trackedRun.completeSession(updates); + }, + }; +} diff --git a/src/lib/onboard-runtime-flow.test.ts b/src/lib/onboard-runtime-flow.test.ts new file mode 100644 index 0000000000..2728b97344 --- /dev/null +++ b/src/lib/onboard-runtime-flow.test.ts @@ -0,0 +1,112 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; +// Import from compiled dist/ so coverage is attributed correctly. +import { runRuntimeSetupFlow } from "../../dist/lib/onboard-runtime-flow"; + +describe("runRuntimeSetupFlow", () => { + it("delegates agent setup and skips the openclaw sibling step", async () => { + const events: string[] = []; + const handleAgentSetup = vi.fn(async () => { + events.push("agent-setup"); + }); + + await runRuntimeSetupFlow( + { + sandboxName: "alpha", + model: "meta/llama-3.3-70b-instruct", + provider: "nvidia-prod", + agent: { name: "hermes" }, + resume: true, + session: { id: "resume-session" }, + }, + { + hasCompletedRuntimeSetup: true, + handleAgentSetup, + isOpenclawReady: () => false, + setupOpenclaw: async () => { + throw new Error("should not run openclaw setup for agent flow"); + }, + onSkip: (step, detail) => events.push(`skip:${step}:${detail}`), + onStartStep: (step) => events.push(`start:${step}`), + onCompleteStep: (step) => events.push(`complete:${step}`), + onSkipSiblingStep: (step) => events.push(`skip-sibling:${step}`), + }, + ); + + expect(handleAgentSetup).toHaveBeenCalledWith( + "alpha", + "meta/llama-3.3-70b-instruct", + "nvidia-prod", + { name: "hermes" }, + true, + { id: "resume-session" }, + ); + expect(events).toEqual(["agent-setup", "skip-sibling:openclaw"]); + }); + + it("skips OpenClaw setup when runtime is already complete and ready", async () => { + const events: string[] = []; + + await runRuntimeSetupFlow( + { + sandboxName: "alpha", + model: "gpt-5.4", + provider: "openai-api", + agent: null, + resume: true, + session: null, + }, + { + hasCompletedRuntimeSetup: true, + handleAgentSetup: async () => { + throw new Error("should not enter agent path"); + }, + isOpenclawReady: () => true, + setupOpenclaw: async () => { + throw new Error("should not rerun openclaw setup"); + }, + onSkip: (step, detail) => events.push(`skip:${step}:${detail}`), + onStartStep: (step) => events.push(`start:${step}`), + onCompleteStep: (step) => events.push(`complete:${step}`), + onSkipSiblingStep: (step) => events.push(`skip-sibling:${step}`), + }, + ); + + expect(events).toEqual(["skip:openclaw:alpha", "complete:openclaw", "skip-sibling:agent_setup"]); + }); + + it("runs OpenClaw setup when runtime has not been completed", async () => { + const events: string[] = []; + const setupOpenclaw = vi.fn(async () => { + events.push("setup-openclaw"); + }); + + await runRuntimeSetupFlow( + { + sandboxName: "alpha", + model: "gpt-5.4", + provider: "openai-api", + agent: null, + resume: false, + session: null, + }, + { + hasCompletedRuntimeSetup: false, + handleAgentSetup: async () => { + throw new Error("should not enter agent path"); + }, + isOpenclawReady: () => false, + setupOpenclaw, + onSkip: (step, detail) => events.push(`skip:${step}:${detail}`), + onStartStep: (step) => events.push(`start:${step}`), + onCompleteStep: (step) => events.push(`complete:${step}`), + onSkipSiblingStep: (step) => events.push(`skip-sibling:${step}`), + }, + ); + + expect(setupOpenclaw).toHaveBeenCalledWith("alpha", "gpt-5.4", "openai-api"); + expect(events).toEqual(["start:openclaw", "setup-openclaw", "complete:openclaw", "skip-sibling:agent_setup"]); + }); +}); diff --git a/src/lib/onboard-runtime-flow.ts b/src/lib/onboard-runtime-flow.ts new file mode 100644 index 0000000000..787cf3d6fb --- /dev/null +++ b/src/lib/onboard-runtime-flow.ts @@ -0,0 +1,76 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +export interface RuntimeSetupState { + sandboxName: string; + model: string; + provider: string; + agent: TAgent | null; + resume: boolean; + session: unknown; +} + +export interface RuntimeSetupDeps { + hasCompletedRuntimeSetup: boolean; + handleAgentSetup: ( + sandboxName: string, + model: string, + provider: string, + agent: TAgent, + resume: boolean, + session: unknown, + ) => Promise; + isOpenclawReady: (sandboxName: string) => boolean; + setupOpenclaw: (sandboxName: string, model: string, provider: string) => Promise; + onSkip: (stepName: "openclaw", detail: string) => void; + onStartStep: ( + stepName: "openclaw", + updates?: { sandboxName?: string; provider?: string; model?: string }, + ) => void; + onCompleteStep: ( + stepName: "openclaw", + updates?: { sandboxName?: string; provider?: string; model?: string }, + ) => void; + onSkipSiblingStep: (stepName: "openclaw" | "agent_setup") => void; +} + +export async function runRuntimeSetupFlow( + state: RuntimeSetupState, + deps: RuntimeSetupDeps, +): Promise { + if (state.agent) { + await deps.handleAgentSetup( + state.sandboxName, + state.model, + state.provider, + state.agent, + state.resume, + state.session, + ); + deps.onSkipSiblingStep("openclaw"); + return; + } + + const resumeOpenclaw = deps.hasCompletedRuntimeSetup && deps.isOpenclawReady(state.sandboxName); + if (resumeOpenclaw) { + deps.onSkip("openclaw", state.sandboxName); + deps.onCompleteStep("openclaw", { + sandboxName: state.sandboxName, + provider: state.provider, + model: state.model, + }); + } else { + deps.onStartStep("openclaw", { + sandboxName: state.sandboxName, + provider: state.provider, + model: state.model, + }); + await deps.setupOpenclaw(state.sandboxName, state.model, state.provider); + deps.onCompleteStep("openclaw", { + sandboxName: state.sandboxName, + provider: state.provider, + model: state.model, + }); + } + deps.onSkipSiblingStep("agent_setup"); +} diff --git a/src/lib/onboard-runtime-helpers.ts b/src/lib/onboard-runtime-helpers.ts new file mode 100644 index 0000000000..8739e13c74 --- /dev/null +++ b/src/lib/onboard-runtime-helpers.ts @@ -0,0 +1,172 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import path from "node:path"; +import { spawnSync } from "node:child_process"; + +import { getSandboxStateFromOutputs } from "./gateway-state"; +import { parseGatewayInference } from "./inference-config"; +import { + installOpenshell as installOpenshellWithDeps, + isOpenshellInstalled as detectInstalledOpenshell, + waitForSandboxReady as waitForSandboxReadyWithDeps, +} from "./onboard-openshell"; +import { + getContainerRuntime as getContainerRuntimeWithDeps, + printRemediationActions as printRemediationActionsWithDeps, +} from "./onboard-remediation"; +import { inferContainerRuntime } from "./platform"; +import { resolveOpenshell } from "./resolve-openshell"; + +export interface SandboxRuntimeDeps { + runCaptureOpenshell: (args: string[], opts?: { ignoreError?: boolean }) => string; + runOpenshell: (args: string[], opts?: { ignoreError?: boolean }) => { status: number }; + note?: (message: string) => void; + dashboardPort?: number; + removeSandbox?: (sandboxName: string) => void; +} + +export interface GatewayDestroyDeps { + runOpenshell: (args: string[], opts?: { ignoreError?: boolean }) => { status: number }; + clearRegistryAll: () => void; + run: (command: string | string[], opts?: { ignoreError?: boolean }) => unknown; +} + +export interface InstallOpenshellDeps { + scriptPath: string; + rootDir: string; + env: NodeJS.ProcessEnv; + getFutureShellPathHint: (binDir: string, pathValue: string) => string | null; + errorWriter?: (message?: string) => void; +} + +export function sleep(seconds: number): void { + spawnSync("sleep", [String(seconds)]); +} + +/** + * Remove known_hosts lines whose host field contains an openshell-* entry. + * Preserves blank lines and comments. Returns the cleaned string. + */ +export function pruneKnownHostsEntries(contents: string): string { + return contents + .split("\n") + .filter((line) => { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith("#")) return true; + const hostField = trimmed.split(/\s+/)[0]; + return !hostField.split(",").some((host) => host.startsWith("openshell-")); + }) + .join("\n"); +} + +export function getSandboxReuseState( + sandboxName: string | null, + deps: SandboxRuntimeDeps, +): string { + if (!sandboxName) return "missing"; + const getOutput = deps.runCaptureOpenshell(["sandbox", "get", sandboxName], { ignoreError: true }); + const listOutput = deps.runCaptureOpenshell(["sandbox", "list"], { ignoreError: true }); + return getSandboxStateFromOutputs(sandboxName, getOutput, listOutput); +} + +export function repairRecordedSandbox(sandboxName: string | null, deps: SandboxRuntimeDeps): void { + if (!sandboxName) return; + deps.note?.(` [resume] Cleaning up recorded sandbox '${sandboxName}' before recreating it.`); + deps.runOpenshell(["forward", "stop", String(deps.dashboardPort ?? 0)], { ignoreError: true }); + deps.runOpenshell(["sandbox", "delete", sandboxName], { ignoreError: true }); + deps.removeSandbox?.(sandboxName); +} + +export function destroyGateway(gatewayName: string, deps: GatewayDestroyDeps): void { + const destroyResult = deps.runOpenshell(["gateway", "destroy", "-g", gatewayName], { + ignoreError: true, + }); + // Clear the local registry so `nemoclaw list` stays consistent with OpenShell state. (#532) + if (destroyResult.status === 0) { + deps.clearRegistryAll(); + } + // openshell gateway destroy doesn't remove Docker volumes, which leaves + // corrupted cluster state that breaks the next gateway start. Clean them up. + deps.run( + `docker volume ls -q --filter "name=openshell-cluster-${gatewayName}" | grep . && docker volume ls -q --filter "name=openshell-cluster-${gatewayName}" | xargs docker volume rm || true`, + { ignoreError: true }, + ); +} + +export function installOpenshell(deps: InstallOpenshellDeps): { + installed: boolean; + localBin: string | null; + futureShellPathHint: string | null; + updatedPathValue: string | null; + openshellBinary: string | null; +} { + return installOpenshellWithDeps({ + scriptPath: deps.scriptPath, + rootDir: deps.rootDir, + env: deps.env, + spawnSync, + existsSync: fs.existsSync, + resolveOpenshell, + getFutureShellPathHint: deps.getFutureShellPathHint, + errorWriter: deps.errorWriter, + }); +} + +export function isOpenshellInstalled(): boolean { + return detectInstalledOpenshell(resolveOpenshell); +} + +export function getContainerRuntime( + runCapture: (command: string, options?: { ignoreError?: boolean }) => string, +): string { + return getContainerRuntimeWithDeps({ runCapture, inferContainerRuntime }); +} + +export function printRemediationActions( + actions: unknown, + errorWriter: (message?: string) => void = console.error, +): void { + return printRemediationActionsWithDeps(actions as never, errorWriter); +} + +export function waitForSandboxReady( + sandboxName: string, + deps: { runCaptureOpenshell: (args: string[], opts?: { ignoreError?: boolean }) => string }, + attempts = 10, + delaySeconds = 2, +): boolean { + return waitForSandboxReadyWithDeps( + sandboxName, + { + runCaptureOpenshell: deps.runCaptureOpenshell, + sleep, + }, + attempts, + delaySeconds, + ); +} + +export function verifyInferenceRoute( + _provider: string, + _model: string, + runCaptureOpenshell: (args: string[], opts?: { ignoreError?: boolean }) => string, +): void { + const output = runCaptureOpenshell(["inference", "get"], { ignoreError: true }); + if (!output || /Gateway inference:\s*[\r\n]+\s*Not configured/i.test(output)) { + console.error(" OpenShell inference route was not configured."); + process.exit(1); + } +} + +export function isInferenceRouteReady( + provider: string, + model: string, + runCaptureOpenshell: (args: string[], opts?: { ignoreError?: boolean }) => string, +): boolean { + const live = parseGatewayInference( + runCaptureOpenshell(["inference", "get"], { ignoreError: true }), + ); + return Boolean(live && live.provider === provider && live.model === model); +} diff --git a/src/lib/onboard-sandbox-build-config.ts b/src/lib/onboard-sandbox-build-config.ts new file mode 100644 index 0000000000..d6c68bd64c --- /dev/null +++ b/src/lib/onboard-sandbox-build-config.ts @@ -0,0 +1,261 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; + +import type { WebSearchConfig } from "./web-search"; + +export const SANDBOX_BASE_IMAGE = "ghcr.io/nvidia/nemoclaw/sandbox-base"; +export const SANDBOX_BASE_TAG = "latest"; + +export interface SandboxBaseImageDeps { + run: ( + command: string | string[], + opts?: { suppressOutput?: boolean }, + ) => { status: number; stdout?: string; stderr?: string }; + runCapture: ( + command: string | string[], + opts?: { ignoreError?: boolean }, + ) => string; +} + +export interface SandboxBuildConfigDeps { + sandboxBaseImage: string; +} + +function encodeDockerJsonArg(value: unknown): string { + return Buffer.from(JSON.stringify(value || {}), "utf8").toString("base64"); +} + +/** + * Pull sandbox-base:latest from GHCR and resolve its repo digest. + * Returns { digest, ref } on success, or null when the pull or + * inspect fails (offline, GHCR outage, local-only build). + */ +export function pullAndResolveBaseImageDigest( + deps: SandboxBaseImageDeps, +): { digest: string; ref: string } | null { + const imageWithTag = `${SANDBOX_BASE_IMAGE}:${SANDBOX_BASE_TAG}`; + try { + deps.run(["docker", "pull", imageWithTag], { suppressOutput: true }); + } catch { + // Pull failed — caller should fall back to unpin :latest + return null; + } + + let inspectOutput; + try { + inspectOutput = deps.runCapture( + ["docker", "inspect", "--format", "{{json .RepoDigests}}", imageWithTag], + { ignoreError: false }, + ); + } catch { + return null; + } + + // RepoDigests is a JSON array like ["ghcr.io/nvidia/nemoclaw/sandbox-base@sha256:abc..."]. + // Filter to the entry matching our registry — index ordering is not guaranteed. + let repoDigests; + try { + repoDigests = JSON.parse(inspectOutput || "[]"); + } catch { + return null; + } + const repoDigest = Array.isArray(repoDigests) + ? repoDigests.find((entry) => entry.startsWith(`${SANDBOX_BASE_IMAGE}@sha256:`)) + : null; + if (!repoDigest) return null; + + const digest = repoDigest.slice(repoDigest.indexOf("@") + 1); + const ref = `${SANDBOX_BASE_IMAGE}@${digest}`; + return { digest, ref }; +} + +export function getSandboxInferenceConfig( + model: string, + provider: string | null = null, + preferredInferenceApi: string | null = null, +): { + providerKey: string; + primaryModelRef: string; + inferenceBaseUrl: string; + inferenceApi: string; + inferenceCompat: { supportsStore: boolean } | null; +} { + let providerKey; + let primaryModelRef; + let inferenceBaseUrl = "https://inference.local/v1"; + let inferenceApi = preferredInferenceApi || "openai-completions"; + let inferenceCompat = null; + + switch (provider) { + case "openai-api": + providerKey = "openai"; + primaryModelRef = `openai/${model}`; + break; + case "anthropic-prod": + case "compatible-anthropic-endpoint": + providerKey = "anthropic"; + primaryModelRef = `anthropic/${model}`; + inferenceBaseUrl = "https://inference.local"; + inferenceApi = "anthropic-messages"; + break; + case "gemini-api": + providerKey = "inference"; + primaryModelRef = `inference/${model}`; + inferenceCompat = { + supportsStore: false, + }; + break; + case "compatible-endpoint": + providerKey = "inference"; + primaryModelRef = `inference/${model}`; + inferenceCompat = { + supportsStore: false, + }; + break; + case "nvidia-prod": + case "nvidia-nim": + default: + providerKey = "inference"; + primaryModelRef = `inference/${model}`; + break; + } + + return { providerKey, primaryModelRef, inferenceBaseUrl, inferenceApi, inferenceCompat }; +} + +export function patchStagedDockerfile( + dockerfilePath: string, + model: string, + chatUiUrl: string, + buildId = String(Date.now()), + provider: string | null = null, + preferredInferenceApi: string | null = null, + webSearchConfig: WebSearchConfig | null = null, + messagingChannels: string[] = [], + messagingAllowedIds: Record = {}, + discordGuilds: Record = {}, + baseImageRef: string | null = null, + deps: SandboxBuildConfigDeps, +): void { + const { providerKey, primaryModelRef, inferenceBaseUrl, inferenceApi, inferenceCompat } = + getSandboxInferenceConfig(model, provider, preferredInferenceApi); + let dockerfile = fs.readFileSync(dockerfilePath, "utf8"); + // Pin the base image to a specific digest when available (#1904). + // The ref must come from pullAndResolveBaseImageDigest() — never from + // blueprint.yaml, whose digest belongs to a different registry. + // Only rewrite when the current value already points at our sandbox-base + // image — custom --from Dockerfiles may use a different base. + if (baseImageRef) { + dockerfile = dockerfile.replace(/^ARG BASE_IMAGE=(.*)$/m, (line, currentValue) => { + const trimmed = String(currentValue).trim(); + if ( + trimmed.startsWith(`${deps.sandboxBaseImage}:`) || + trimmed.startsWith(`${deps.sandboxBaseImage}@`) + ) { + return `ARG BASE_IMAGE=${baseImageRef}`; + } + return line; + }); + } + dockerfile = dockerfile.replace(/^ARG NEMOCLAW_MODEL=.*$/m, `ARG NEMOCLAW_MODEL=${model}`); + dockerfile = dockerfile.replace( + /^ARG NEMOCLAW_PROVIDER_KEY=.*$/m, + `ARG NEMOCLAW_PROVIDER_KEY=${providerKey}`, + ); + dockerfile = dockerfile.replace( + /^ARG NEMOCLAW_PRIMARY_MODEL_REF=.*$/m, + `ARG NEMOCLAW_PRIMARY_MODEL_REF=${primaryModelRef}`, + ); + dockerfile = dockerfile.replace(/^ARG CHAT_UI_URL=.*$/m, `ARG CHAT_UI_URL=${chatUiUrl}`); + dockerfile = dockerfile.replace( + /^ARG NEMOCLAW_INFERENCE_BASE_URL=.*$/m, + `ARG NEMOCLAW_INFERENCE_BASE_URL=${inferenceBaseUrl}`, + ); + dockerfile = dockerfile.replace( + /^ARG NEMOCLAW_INFERENCE_API=.*$/m, + `ARG NEMOCLAW_INFERENCE_API=${inferenceApi}`, + ); + dockerfile = dockerfile.replace( + /^ARG NEMOCLAW_INFERENCE_COMPAT_B64=.*$/m, + `ARG NEMOCLAW_INFERENCE_COMPAT_B64=${encodeDockerJsonArg(inferenceCompat)}`, + ); + dockerfile = dockerfile.replace( + /^ARG NEMOCLAW_BUILD_ID=.*$/m, + `ARG NEMOCLAW_BUILD_ID=${buildId}`, + ); + // Honor NEMOCLAW_CONTEXT_WINDOW / NEMOCLAW_MAX_TOKENS / NEMOCLAW_REASONING + // so the user can tune model metadata without editing the Dockerfile. + const POSITIVE_INT_RE = /^[1-9][0-9]*$/; + const contextWindow = process.env.NEMOCLAW_CONTEXT_WINDOW; + if (contextWindow && POSITIVE_INT_RE.test(contextWindow)) { + dockerfile = dockerfile.replace( + /^ARG NEMOCLAW_CONTEXT_WINDOW=.*$/m, + `ARG NEMOCLAW_CONTEXT_WINDOW=${contextWindow}`, + ); + } + const maxTokens = process.env.NEMOCLAW_MAX_TOKENS; + if (maxTokens && POSITIVE_INT_RE.test(maxTokens)) { + dockerfile = dockerfile.replace( + /^ARG NEMOCLAW_MAX_TOKENS=.*$/m, + `ARG NEMOCLAW_MAX_TOKENS=${maxTokens}`, + ); + } + const reasoning = process.env.NEMOCLAW_REASONING; + if (reasoning === "true" || reasoning === "false") { + dockerfile = dockerfile.replace( + /^ARG NEMOCLAW_REASONING=.*$/m, + `ARG NEMOCLAW_REASONING=${reasoning}`, + ); + } + // Honor NEMOCLAW_PROXY_HOST / NEMOCLAW_PROXY_PORT exported in the host + // shell so the sandbox-side nemoclaw-start.sh sees them via $ENV at runtime. + // Without this, the host export is silently dropped at image build time and + // the sandbox falls back to the default 10.200.0.1:3128 proxy. See #1409. + const PROXY_HOST_RE = /^[A-Za-z0-9._:-]+$/; + const PROXY_PORT_RE = /^[0-9]{1,5}$/; + const proxyHostEnv = process.env.NEMOCLAW_PROXY_HOST; + if (proxyHostEnv && PROXY_HOST_RE.test(proxyHostEnv)) { + dockerfile = dockerfile.replace( + /^ARG NEMOCLAW_PROXY_HOST=.*$/m, + `ARG NEMOCLAW_PROXY_HOST=${proxyHostEnv}`, + ); + } + const proxyPortEnv = process.env.NEMOCLAW_PROXY_PORT; + if (proxyPortEnv && PROXY_PORT_RE.test(proxyPortEnv)) { + dockerfile = dockerfile.replace( + /^ARG NEMOCLAW_PROXY_PORT=.*$/m, + `ARG NEMOCLAW_PROXY_PORT=${proxyPortEnv}`, + ); + } + dockerfile = dockerfile.replace( + /^ARG NEMOCLAW_WEB_SEARCH_ENABLED=.*$/m, + `ARG NEMOCLAW_WEB_SEARCH_ENABLED=${webSearchConfig ? "1" : "0"}`, + ); + // Onboard flow expects immediate dashboard access without device pairing, + // so disable device auth for images built during onboard (see #1217). + dockerfile = dockerfile.replace( + /^ARG NEMOCLAW_DISABLE_DEVICE_AUTH=.*$/m, + `ARG NEMOCLAW_DISABLE_DEVICE_AUTH=1`, + ); + if (messagingChannels.length > 0) { + dockerfile = dockerfile.replace( + /^ARG NEMOCLAW_MESSAGING_CHANNELS_B64=.*$/m, + `ARG NEMOCLAW_MESSAGING_CHANNELS_B64=${encodeDockerJsonArg(messagingChannels)}`, + ); + } + if (Object.keys(messagingAllowedIds).length > 0) { + dockerfile = dockerfile.replace( + /^ARG NEMOCLAW_MESSAGING_ALLOWED_IDS_B64=.*$/m, + `ARG NEMOCLAW_MESSAGING_ALLOWED_IDS_B64=${encodeDockerJsonArg(messagingAllowedIds)}`, + ); + } + if (Object.keys(discordGuilds).length > 0) { + dockerfile = dockerfile.replace( + /^ARG NEMOCLAW_DISCORD_GUILDS_B64=.*$/m, + `ARG NEMOCLAW_DISCORD_GUILDS_B64=${encodeDockerJsonArg(discordGuilds)}`, + ); + } + fs.writeFileSync(dockerfilePath, dockerfile); +} diff --git a/src/lib/onboard-sandbox-create.ts b/src/lib/onboard-sandbox-create.ts new file mode 100644 index 0000000000..d9f95f08dc --- /dev/null +++ b/src/lib/onboard-sandbox-create.ts @@ -0,0 +1,830 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +export interface SandboxCreateWebSearchConfig { + fetchEnabled?: boolean | null; +} + +export interface SandboxCreateAgent { + name?: string | null; + forwardPort?: number; + expectedVersion?: string | null; +} + +export interface SandboxCreateDeps { + step: (current: number, total: number, message: string) => void; + validateName: (value: string, label: string) => string; + promptValidatedSandboxName: () => Promise; + controlUiPort: number; + dashboardPort: number; + getCredential: (envKey: string) => string | null; + normalizeCredentialValue: (value: string | null | undefined) => string | null; + messagingChannels: Array<{ + name: string; + envKey: string; + appTokenEnvKey?: string | null; + allowIdsMode?: string | null; + userIdEnvKey?: string | null; + }>; + registry: { + getSandbox: (sandboxName: string) => any; + updateSandbox: (sandboxName: string, patch: Record) => void; + removeSandbox: (sandboxName: string) => void; + registerSandbox: (entry: Record) => void; + }; + makeConflictProbe: () => any; + isNonInteractive: () => boolean; + promptOrDefault: ( + question: string, + fallback?: string | null, + defaultValue?: string | null, + ) => Promise; + getSandboxReuseState: (sandboxName: string) => string; + providerExistsInGateway: (name: string) => boolean; + detectMessagingCredentialRotation: ( + sandboxName: string, + tokenDefs: Array<{ name: string; envKey: string; token: string | null }>, + ) => { changed: boolean; changedProviders: string[] }; + isRecreateSandbox: () => boolean; + upsertMessagingProviders: ( + tokenDefs: Array<{ name: string; envKey: string; token: string | null }>, + ) => string[]; + note: (message: string) => void; + ensureDashboardForward: (sandboxName: string, chatUiUrl: string) => void; + sandboxState: { + backupSandboxState: (sandboxName: string) => any; + restoreSandboxState: (sandboxName: string, backupPath: string) => any; + }; + hashCredential: (value: string) => string | null; + onboardSession: { + updateSession: (updater: (current: any) => any) => void; + }; + runOpenshell: ( + args: string[], + opts?: { ignoreError?: boolean }, + ) => { status: number; stdout?: string; stderr?: string }; + agentOnboard: { + createAgentSandbox: (agent: SandboxCreateAgent) => { buildCtx: string; stagedDockerfile: string }; + getAgentPermissivePolicyPath: (agent: SandboxCreateAgent) => string | null; + getAgentPolicyPath: (agent: SandboxCreateAgent) => string | null; + }; + stageOptimizedSandboxBuildContext: (root: string) => { buildCtx: string; stagedDockerfile: string }; + root: string; + webSearchBraveApiKeyEnv: string; + buildSubprocessEnv: () => NodeJS.ProcessEnv; + formatEnvAssignment: (name: string, value: string) => string; + runCapture: (command: string | string[], opts?: { ignoreError?: boolean }) => string; + sandboxBaseImage: string; + sandboxBaseTag: string; + pullAndResolveBaseImageDigest: () => { digest: string; ref: string } | null; + patchStagedDockerfile: ( + dockerfilePath: string, + model: string, + chatUiUrl: string, + buildMeta: string, + provider: string, + preferredInferenceApi: string | null, + webSearchConfig: SandboxCreateWebSearchConfig | null, + activeMessagingChannels: string[], + messagingAllowedIds: Record, + discordGuilds: Record, + baseImageRef?: string | null, + ) => void; + openshellShellCommand: (args: string[], options?: { openshellBinary?: string }) => string; + streamSandboxCreate: ( + command: string, + env: NodeJS.ProcessEnv, + options: { readyCheck: () => boolean }, + ) => Promise<{ status: number; output: string }>; + run: ( + command: string | string[], + opts?: { ignoreError?: boolean; suppressOutput?: boolean }, + ) => { status: number; stdout?: string; stderr?: string }; + runCaptureOpenshell: (args: string[], opts?: { ignoreError?: boolean }) => string; + isSandboxReady: (output: string, sandboxName: string) => boolean; + sleep: (seconds: number) => void; + classifySandboxCreateFailure: (output: string) => { kind: string }; + printSandboxCreateRecoveryHints: (output: string) => void; + agentDefs: { + loadAgent: (name: string) => { expectedVersion?: string | null }; + }; + runFile: ( + file: string, + args: string[], + opts?: { ignoreError?: boolean }, + ) => { status?: number } | void; + scriptsDir: string; + gatewayName: string; + discordSnowflakeRe: RegExp; +} + +// eslint-disable-next-line complexity +export async function runCreateSandbox( + gpu: unknown, + model: string, + provider: string, + preferredInferenceApi: string | null = null, + sandboxNameOverride: string | null = null, + webSearchConfig: SandboxCreateWebSearchConfig | null = null, + enabledChannels: string[] | null = null, + fromDockerfile: string | null = null, + agent: SandboxCreateAgent | null = null, + dangerouslySkipPermissions = false, + deps: SandboxCreateDeps, +): Promise { + deps.step(6, 8, "Creating sandbox"); + + const sandboxName = deps.validateName( + sandboxNameOverride ?? (await deps.promptValidatedSandboxName()), + "sandbox name", + ); + const effectivePort = agent ? agent.forwardPort : deps.controlUiPort; + const chatUiUrl = process.env.CHAT_UI_URL || `http://127.0.0.1:${effectivePort}`; + + // Check whether messaging providers will be needed — this must happen before + // the sandbox reuse decision so we can detect stale sandboxes that were created + // without provider attachments (security: prevents legacy raw-env-var leaks). + const getMessagingToken = (envKey: string): string | null => + deps.getCredential(envKey) || deps.normalizeCredentialValue(process.env[envKey]) || null; + + // The UI toggle list can include channels the user toggled on but then + // skipped the token prompt for. Only channels with a real token will have a + // provider attached, so the conflict check must filter out the skipped ones + // (otherwise we warn about phantom channels that will never poll). + const conflictCheckChannels: string[] = Array.isArray(enabledChannels) + ? enabledChannels.filter((name) => { + const def = deps.messagingChannels.find((channel) => channel.name === name); + return def ? !!getMessagingToken(def.envKey) : false; + }) + : []; + + // Messaging channels like Telegram (getUpdates), Discord (gateway), and Slack + // (Socket Mode) enforce one consumer per bot token. Two sandboxes sharing + // a token silently break both bridges (see #1953). Warn before we commit. + if (conflictCheckChannels.length > 0) { + const { backfillMessagingChannels, findChannelConflicts } = require("./messaging-conflict"); + backfillMessagingChannels(deps.registry, deps.makeConflictProbe()); + const conflicts = findChannelConflicts(sandboxName, conflictCheckChannels, deps.registry); + if (conflicts.length > 0) { + for (const { channel, sandbox } of conflicts) { + console.log( + ` ⚠ Sandbox '${sandbox}' already has ${channel} enabled. Bot tokens only allow one sandbox to poll — continuing will break both bridges.`, + ); + } + if (deps.isNonInteractive()) { + console.error( + " Aborting: resolve the messaging channel conflict above or run `nemoclaw destroy` on the other sandbox.", + ); + process.exit(1); + } + const answer = (await deps.promptOrDefault(" Continue anyway? [y/N]: ", null, "n")) + .trim() + .toLowerCase(); + if (answer !== "y" && answer !== "yes") { + console.log(" Aborting sandbox creation."); + process.exit(1); + } + } + } + + // When enabledChannels is provided (from the toggle picker), only include + // channels the user selected. When null (backward compat), include all. + const enabledEnvKeys = + enabledChannels != null + ? new Set( + deps.messagingChannels + .filter((channel) => enabledChannels.includes(channel.name)) + .flatMap((channel) => + channel.appTokenEnvKey + ? [channel.envKey, channel.appTokenEnvKey] + : [channel.envKey], + ), + ) + : null; + + const messagingTokenDefs: Array<{ name: string; envKey: string; token: string | null }> = [ + { + name: `${sandboxName}-discord-bridge`, + envKey: "DISCORD_BOT_TOKEN", + token: getMessagingToken("DISCORD_BOT_TOKEN"), + }, + { + name: `${sandboxName}-slack-bridge`, + envKey: "SLACK_BOT_TOKEN", + token: getMessagingToken("SLACK_BOT_TOKEN"), + }, + { + name: `${sandboxName}-slack-app`, + envKey: "SLACK_APP_TOKEN", + token: getMessagingToken("SLACK_APP_TOKEN"), + }, + { + name: `${sandboxName}-telegram-bridge`, + envKey: "TELEGRAM_BOT_TOKEN", + token: getMessagingToken("TELEGRAM_BOT_TOKEN"), + }, + ].filter(({ envKey }) => !enabledEnvKeys || enabledEnvKeys.has(envKey)); + + if (webSearchConfig) { + messagingTokenDefs.push({ + name: `${sandboxName}-brave-search`, + envKey: deps.webSearchBraveApiKeyEnv, + token: deps.getCredential(deps.webSearchBraveApiKeyEnv), + }); + } + const hasMessagingTokens = messagingTokenDefs.some(({ token }) => !!token); + + // Reconcile local registry state with the live OpenShell gateway state. + const existing = deps.registry.getSandbox(sandboxName); + const liveExists = Boolean(deps.runCaptureOpenshell(["sandbox", "get", sandboxName], { ignoreError: true })); + if (existing && !liveExists) { + deps.registry.removeSandbox(sandboxName); + } + + // Declared outside the liveExists block so it is accessible during + // post-creation restore (the sandbox create path runs after the block). + let pendingStateRestore: any = null; + + if (liveExists) { + const existingSandboxState = deps.getSandboxReuseState(sandboxName); + + // Check whether messaging providers are missing from the gateway. Only + // force recreation when at least one required provider doesn't exist yet — + // this avoids destroying sandboxes already created with provider attachments. + const needsProviderMigration = + hasMessagingTokens && + messagingTokenDefs.some(({ name, token }) => token && !deps.providerExistsInGateway(name)); + + // Detect whether any messaging credential has been rotated since the + // sandbox was created. Provider credentials are resolved once at sandbox + // startup, so a rotated token requires a rebuild to take effect. + const credentialRotation = hasMessagingTokens + ? deps.detectMessagingCredentialRotation(sandboxName, messagingTokenDefs) + : { changed: false, changedProviders: [] }; + + if (!deps.isRecreateSandbox() && !needsProviderMigration && !credentialRotation.changed) { + if (deps.isNonInteractive()) { + if (existingSandboxState === "ready") { + // Upsert messaging providers even on reuse so credential changes take + // effect without requiring a full sandbox recreation. + deps.upsertMessagingProviders(messagingTokenDefs); + deps.note(` [non-interactive] Sandbox '${sandboxName}' exists and is ready — reusing it`); + deps.note( + " Pass --recreate-sandbox or set NEMOCLAW_RECREATE_SANDBOX=1 to force recreation.", + ); + deps.ensureDashboardForward(sandboxName, chatUiUrl); + return sandboxName; + } + console.error(` Sandbox '${sandboxName}' already exists but is not ready.`); + console.error( + " Pass --recreate-sandbox or set NEMOCLAW_RECREATE_SANDBOX=1 to overwrite.", + ); + process.exit(1); + } + + if (existingSandboxState === "ready") { + console.log(` Sandbox '${sandboxName}' already exists.`); + console.log(" Choosing 'n' will delete the existing sandbox and create a new one."); + const answer = await deps.promptOrDefault(" Reuse existing sandbox? [Y/n]: ", null, "y"); + const normalizedAnswer = answer.trim().toLowerCase(); + if (normalizedAnswer !== "n" && normalizedAnswer !== "no") { + deps.upsertMessagingProviders(messagingTokenDefs); + deps.ensureDashboardForward(sandboxName, chatUiUrl); + return sandboxName; + } + } else { + console.log(` Sandbox '${sandboxName}' exists but is not ready.`); + console.log(" Selecting 'n' will abort onboarding."); + const answer = await deps.promptOrDefault( + " Delete it and create a new one? [Y/n]: ", + null, + "y", + ); + const normalizedAnswer = answer.trim().toLowerCase(); + if (normalizedAnswer === "n" || normalizedAnswer === "no") { + console.log(" Aborting onboarding."); + process.exit(1); + } + } + } + + // Back up workspace state before destroying the sandbox when triggered + // by credential rotation, so files can be restored after recreation. + if (credentialRotation.changed && existingSandboxState === "ready") { + const rotatedNames = credentialRotation.changedProviders.join(", "); + console.log(` Messaging credential(s) rotated: ${rotatedNames}`); + console.log(" Rebuilding sandbox to propagate new credentials to the L7 proxy..."); + try { + const backup = deps.sandboxState.backupSandboxState(sandboxName); + if (backup.success) { + deps.note(` ✓ State backed up (${backup.backedUpDirs.length} directories)`); + pendingStateRestore = backup; + } else { + console.error(" State backup failed — aborting rebuild to prevent data loss."); + console.error(" Pass --recreate-sandbox to force recreation without backup."); + deps.upsertMessagingProviders(messagingTokenDefs); + // Update stored hashes so the next onboard doesn't re-detect rotation. + const abortHashes: Record = {}; + for (const { envKey, token } of messagingTokenDefs) { + if (token) { + const hash = deps.hashCredential(token); + if (hash) abortHashes[envKey] = hash; + } + } + if (Object.keys(abortHashes).length > 0) { + deps.registry.updateSandbox(sandboxName, { providerCredentialHashes: abortHashes }); + } + deps.ensureDashboardForward(sandboxName, chatUiUrl); + return sandboxName; + } + } catch (err: any) { + console.error(` State backup threw: ${err.message} — aborting rebuild.`); + console.error(" Pass --recreate-sandbox to force recreation without backup."); + deps.upsertMessagingProviders(messagingTokenDefs); + const abortHashes: Record = {}; + for (const { envKey, token } of messagingTokenDefs) { + if (token) { + const hash = deps.hashCredential(token); + if (hash) abortHashes[envKey] = hash; + } + } + if (Object.keys(abortHashes).length > 0) { + deps.registry.updateSandbox(sandboxName, { providerCredentialHashes: abortHashes }); + } + deps.ensureDashboardForward(sandboxName, chatUiUrl); + return sandboxName; + } + } + + if (needsProviderMigration) { + console.log(` Sandbox '${sandboxName}' exists but messaging providers are not attached.`); + console.log(" Recreating to ensure credentials flow through the provider pipeline."); + } else if (credentialRotation.changed) { + // Message already printed above during backup. + } else if (existingSandboxState === "ready") { + deps.note(` Sandbox '${sandboxName}' exists and is ready — recreating by explicit request.`); + } else { + deps.note(` Sandbox '${sandboxName}' exists but is not ready — recreating it.`); + } + + const previousEntry = deps.registry.getSandbox(sandboxName); + if (previousEntry?.policies?.length > 0) { + deps.onboardSession.updateSession((current) => { + current.policyPresets = previousEntry.policies; + return current; + }); + } + + deps.note(` Deleting and recreating sandbox '${sandboxName}'...`); + + // Destroy old sandbox + deps.runOpenshell(["sandbox", "delete", sandboxName], { ignoreError: true }); + deps.registry.removeSandbox(sandboxName); + } + + // Stage build context — use the custom Dockerfile path when provided, + // otherwise use the optimised default that only sends what the build needs. + let buildCtx: string; + let stagedDockerfile: string; + if (fromDockerfile) { + const fromResolved = path.resolve(fromDockerfile); + if (!fs.existsSync(fromResolved)) { + console.error(` Custom Dockerfile not found: ${fromResolved}`); + process.exit(1); + } + buildCtx = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-build-")); + stagedDockerfile = path.join(buildCtx, "Dockerfile"); + // Copy the entire parent directory as build context. + try { + fs.cpSync(path.dirname(fromResolved), buildCtx, { + recursive: true, + filter: (src) => { + const base = path.basename(src); + return !["node_modules", ".git", ".venv", "__pycache__"].includes(base); + }, + }); + } catch (err: any) { + if (err.code === "EACCES") { + console.error( + ` Permission denied while copying build context from: ${path.dirname(fromResolved)}`, + ); + console.error( + " The --from flag uses the Dockerfile's parent directory as the Docker build context.", + ); + console.error(" Move your Dockerfile to a dedicated directory and retry."); + process.exit(1); + } + throw err; + } + // If the caller pointed at a file not named "Dockerfile", copy it to the + // location openshell expects (buildCtx/Dockerfile). + if (path.basename(fromResolved) !== "Dockerfile") { + fs.copyFileSync(fromResolved, stagedDockerfile); + } + console.log(` Using custom Dockerfile: ${fromResolved}`); + } else if (agent) { + const agentBuild = deps.agentOnboard.createAgentSandbox(agent); + buildCtx = agentBuild.buildCtx; + stagedDockerfile = agentBuild.stagedDockerfile; + } else { + ({ buildCtx, stagedDockerfile } = deps.stageOptimizedSandboxBuildContext(deps.root)); + } + + // Create sandbox (use -- echo to avoid dropping into interactive shell) + // Pass the base policy so sandbox starts in proxy mode (required for policy updates later) + const globalPermissivePath = path.join( + deps.root, + "nemoclaw-blueprint", + "policies", + "openclaw-sandbox-permissive.yaml", + ); + let basePolicyPath: string; + if (dangerouslySkipPermissions) { + // Permissive mode: use agent-specific permissive policy if available, + // otherwise fall back to the global permissive policy. + const agentPermissive = agent && deps.agentOnboard.getAgentPermissivePolicyPath(agent); + basePolicyPath = agentPermissive || globalPermissivePath; + } else { + const defaultPolicyPath = path.join( + deps.root, + "nemoclaw-blueprint", + "policies", + "openclaw-sandbox.yaml", + ); + basePolicyPath = (agent && deps.agentOnboard.getAgentPolicyPath(agent)) || defaultPolicyPath; + } + const createArgs = [ + "--from", + `${buildCtx}/Dockerfile`, + "--name", + sandboxName, + "--policy", + basePolicyPath, + ]; + // --gpu is intentionally omitted. See comment in startGateway(). + + // Create OpenShell providers for messaging credentials so they flow through + // the provider/placeholder system instead of raw env vars. The L7 proxy + // rewrites Authorization headers (Bearer/Bot) and URL-path segments + // (/bot{TOKEN}/) with real secrets at egress (OpenShell ≥ 0.0.20). + const messagingProviders = deps.upsertMessagingProviders(messagingTokenDefs); + for (const providerName of messagingProviders) { + createArgs.push("--provider", providerName); + } + + console.log(` Creating sandbox '${sandboxName}' (this takes a few minutes on first run)...`); + if (webSearchConfig && !deps.getCredential(deps.webSearchBraveApiKeyEnv)) { + console.error(" Brave Search is enabled, but BRAVE_API_KEY is not available in this process."); + console.error( + " Re-run with BRAVE_API_KEY set, or disable Brave Search before recreating the sandbox.", + ); + process.exit(1); + } + const tokensByEnvKey = Object.fromEntries( + messagingTokenDefs.map(({ envKey, token }) => [envKey, token]), + ) as Record; + const activeMessagingChannels = [ + ...new Set( + messagingTokenDefs.flatMap(({ envKey, token }) => { + if (!token) return []; + if (envKey === "DISCORD_BOT_TOKEN") return ["discord"]; + if (envKey === "SLACK_BOT_TOKEN") return ["slack"]; + // SLACK_APP_TOKEN alone does not enable slack; bot token is required. + if (envKey === "SLACK_APP_TOKEN") { + return tokensByEnvKey["SLACK_BOT_TOKEN"] ? ["slack"] : []; + } + if (envKey === "TELEGRAM_BOT_TOKEN") return ["telegram"]; + return []; + }), + ), + ]; + // Build allowed sender IDs map from env vars set during the messaging prompt. + // Each channel with a userIdEnvKey in MESSAGING_CHANNELS may have a + // comma-separated list of IDs (e.g. TELEGRAM_ALLOWED_IDS="123,456"). + const messagingAllowedIds: Record = {}; + const enabledTokenEnvKeys = new Set(messagingTokenDefs.map(({ envKey }) => envKey)); + for (const channel of deps.messagingChannels) { + const rawIds = channel.userIdEnvKey ? process.env[channel.userIdEnvKey] : null; + if ( + enabledTokenEnvKeys.has(channel.envKey) && + channel.allowIdsMode === "dm" && + channel.userIdEnvKey && + rawIds + ) { + const ids = rawIds + .split(",") + .map((value) => value.trim()) + .filter(Boolean); + if (ids.length > 0) messagingAllowedIds[channel.name] = ids; + } + } + const discordGuilds: Record = {}; + if (enabledTokenEnvKeys.has("DISCORD_BOT_TOKEN")) { + const serverIds = (process.env.DISCORD_SERVER_IDS || process.env.DISCORD_SERVER_ID || "") + .split(",") + .map((value) => value.trim()) + .filter(Boolean); + const userIds = (process.env.DISCORD_ALLOWED_IDS || process.env.DISCORD_USER_ID || "") + .split(",") + .map((value) => value.trim()) + .filter(Boolean); + for (const serverId of serverIds) { + if (!deps.discordSnowflakeRe.test(serverId)) { + console.warn(` Warning: Discord server ID '${serverId}' does not look like a snowflake.`); + } + } + for (const userId of userIds) { + if (!deps.discordSnowflakeRe.test(userId)) { + console.warn(` Warning: Discord user ID '${userId}' does not look like a snowflake.`); + } + } + const requireMention = process.env.DISCORD_REQUIRE_MENTION !== "0"; + for (const serverId of serverIds) { + discordGuilds[serverId] = { + requireMention, + ...(userIds.length > 0 ? { users: userIds } : {}), + }; + } + } + // Pull the base image and resolve its digest so the Dockerfile is pinned to + // exactly what we just fetched. This prevents stale :latest tags from + // silently reusing a cached old image after NemoClaw upgrades (#1904). + const resolved = deps.pullAndResolveBaseImageDigest(); + if (resolved) { + console.log(` Pinning base image to ${resolved.digest.slice(0, 19)}...`); + } else { + // Check if the image exists locally before falling back to unpinned :latest. + // On a first-time install behind a firewall with no cached image, warn early + // so the user knows the build will likely fail. + const localCheck = deps.runCapture( + ["docker", "image", "inspect", `${deps.sandboxBaseImage}:${deps.sandboxBaseTag}`], + { ignoreError: true }, + ); + if (localCheck) { + console.warn(" Warning: could not pull base image from registry; using cached :latest."); + } else { + console.warn( + ` Warning: base image ${deps.sandboxBaseImage}:${deps.sandboxBaseTag} is not available locally.`, + ); + console.warn(" The build will fail unless Docker can pull the image during build."); + console.warn(" If offline, pull the image manually first:"); + console.warn(` docker pull ${deps.sandboxBaseImage}:${deps.sandboxBaseTag}`); + } + } + deps.patchStagedDockerfile( + stagedDockerfile, + model, + chatUiUrl, + String(Date.now()), + provider, + preferredInferenceApi, + webSearchConfig, + activeMessagingChannels, + messagingAllowedIds, + discordGuilds, + resolved ? resolved.ref : null, + ); + // Only pass non-sensitive env vars to the sandbox. Credentials flow through + // OpenShell providers — the gateway injects them as placeholders and the L7 + // proxy rewrites Authorization headers with real secrets at egress. + // See: crates/openshell-sandbox/src/secrets.rs (placeholder rewriting), + // crates/openshell-router/src/backend.rs (inference auth injection). + // + // Use the shared allowlist (subprocess-env.ts) instead of the old + // blocklist. The blocklist only blocked 12 specific credential names + // and passed EVERYTHING else — including GITHUB_TOKEN, + // AWS_SECRET_ACCESS_KEY, SSH_AUTH_SOCK, KUBECONFIG, NPM_TOKEN, and + // any CI/CD secrets that happened to be in the host environment. + // The allowlist inverts the default: only known-safe env vars are + // forwarded, everything else is dropped. + // + // For the sandbox specifically, we also strip KUBECONFIG and + // SSH_AUTH_SOCK — the generic allowlist includes these for host-side + // subprocesses (gateway start, openshell CLI) but the sandbox should + // never have access to the host's Kubernetes cluster or SSH agent. + const envArgs = [deps.formatEnvAssignment("CHAT_UI_URL", chatUiUrl)]; + // Pass the configured dashboard port into the sandbox so nemoclaw-start.sh + // can unconditionally override CHAT_UI_URL even when the Docker image was + // built with a different default. Without this, the baked-in Docker ENV + // value takes precedence and the gateway starts on the wrong port. (#1925) + if (process.env.NEMOCLAW_DASHBOARD_PORT) { + envArgs.push( + deps.formatEnvAssignment("NEMOCLAW_DASHBOARD_PORT", String(deps.dashboardPort)), + ); + } + if (webSearchConfig?.fetchEnabled) { + const braveKey = + deps.getCredential(deps.webSearchBraveApiKeyEnv) || process.env[deps.webSearchBraveApiKeyEnv]; + if (braveKey) { + envArgs.push(deps.formatEnvAssignment(deps.webSearchBraveApiKeyEnv, braveKey)); + } + } + const sandboxEnv = deps.buildSubprocessEnv(); + // Remove host-infrastructure credentials that the generic allowlist + // permits for host-side processes but that must not enter the sandbox. + delete sandboxEnv.KUBECONFIG; + delete sandboxEnv.SSH_AUTH_SOCK; + // Run without piping through awk — the pipe masked non-zero exit codes + // from openshell because bash returns the status of the last pipeline + // command (awk, always 0) unless pipefail is set. Removing the pipe + // lets the real exit code flow through to run(). + const createCommand = `${deps.openshellShellCommand([ + "sandbox", + "create", + ...createArgs, + "--", + "env", + ...envArgs, + "nemoclaw-start", + ])} 2>&1`; + const createResult = await deps.streamSandboxCreate(createCommand, sandboxEnv, { + readyCheck: () => { + const list = deps.runCaptureOpenshell(["sandbox", "list"], { ignoreError: true }); + return deps.isSandboxReady(list, sandboxName); + }, + }); + + // Clean up build context regardless of outcome + deps.run(`rm -rf "${buildCtx}"`, { ignoreError: true }); + + if (createResult.status !== 0) { + const failure = deps.classifySandboxCreateFailure(createResult.output); + if (failure.kind === "sandbox_create_incomplete") { + // The sandbox was created in the gateway but the create stream exited + // with a non-zero code (e.g. SSH 255). Fall through to the ready-wait + // loop — the sandbox may still reach Ready on its own. + console.warn(""); + console.warn( + ` Create stream exited with code ${createResult.status} after sandbox was created.`, + ); + console.warn(" Checking whether the sandbox reaches Ready state..."); + } else { + console.error(""); + console.error(` Sandbox creation failed (exit ${createResult.status}).`); + if (createResult.output) { + console.error(""); + console.error(createResult.output); + } + console.error(" Try: openshell sandbox list # check gateway state"); + deps.printSandboxCreateRecoveryHints(createResult.output); + process.exit(createResult.status || 1); + } + } + + // Wait for sandbox to reach Ready state in k3s before registering. + // On WSL2 + Docker Desktop the pod can take longer to initialize; + // without this gate, NemoClaw registers a phantom sandbox that + // causes "sandbox not found" on every subsequent connect/status call. + console.log(" Waiting for sandbox to become ready..."); + let ready = false; + for (let i = 0; i < 30; i++) { + const list = deps.runCaptureOpenshell(["sandbox", "list"], { ignoreError: true }); + if (deps.isSandboxReady(list, sandboxName)) { + ready = true; + break; + } + deps.sleep(2); + } + + if (!ready) { + // Clean up the orphaned sandbox so the next onboard retry with the same + // name doesn't fail on "sandbox already exists". + const delResult = deps.runOpenshell(["sandbox", "delete", sandboxName], { ignoreError: true }); + console.error(""); + console.error(` Sandbox '${sandboxName}' was created but did not become ready within 60s.`); + if (delResult.status === 0) { + console.error(" The orphaned sandbox has been removed — you can safely retry."); + } else { + console.error(` Could not remove the orphaned sandbox. Manual cleanup:`); + console.error(` openshell sandbox delete "${sandboxName}"`); + } + console.error(" Retry: nemoclaw onboard"); + process.exit(1); + } + + // Wait for NemoClaw dashboard to become fully ready (web server live) + // This prevents port forwards from connecting to a non-existent port + // or seeing 502/503 errors during initial load. + console.log(" Waiting for NemoClaw dashboard to become ready..."); + for (let i = 0; i < 15; i++) { + const readyMatch = deps.runCaptureOpenshell( + ["sandbox", "exec", sandboxName, "curl", "-sf", `http://localhost:${deps.controlUiPort}/`], + { ignoreError: true }, + ); + if (readyMatch) { + console.log(" ✓ Dashboard is live"); + break; + } + if (i === 14) { + console.warn(" Dashboard taking longer than expected to start. Continuing..."); + } else { + deps.sleep(2); + } + } + + // Release any stale forward on the dashboard port before claiming it for the new sandbox. + // A previous onboard run may have left the port forwarded to a different sandbox, + // which would silently prevent the new sandbox's dashboard from being reachable. + deps.ensureDashboardForward(sandboxName, chatUiUrl); + + // Register only after confirmed ready — prevents phantom entries + const effectiveAgent = agent || deps.agentDefs.loadAgent("openclaw"); + const providerCredentialHashes: Record = {}; + for (const { envKey, token } of messagingTokenDefs) { + if (token) { + const hash = deps.hashCredential(token); + if (hash) { + providerCredentialHashes[envKey] = hash; + } + } + } + deps.registry.registerSandbox({ + name: sandboxName, + model: model || null, + provider: provider || null, + gpuEnabled: !!gpu, + agent: agent ? agent.name : null, + agentVersion: fromDockerfile ? null : effectiveAgent.expectedVersion || null, + dangerouslySkipPermissions: dangerouslySkipPermissions || undefined, + providerCredentialHashes: + Object.keys(providerCredentialHashes).length > 0 ? providerCredentialHashes : undefined, + messagingChannels: activeMessagingChannels, + }); + + // Restore workspace state if we backed it up during credential rotation. + if (pendingStateRestore?.success) { + deps.note(" Restoring workspace state after credential rotation..."); + const restore = deps.sandboxState.restoreSandboxState( + sandboxName, + pendingStateRestore.manifest.backupPath, + ); + if (restore.success) { + deps.note(` ✓ State restored (${restore.restoredDirs.length} directories)`); + } else { + console.error( + ` Warning: partial restore. Manual recovery: ${pendingStateRestore.manifest.backupPath}`, + ); + } + } + + // DNS proxy — run a forwarder in the sandbox pod so the isolated + // sandbox namespace can resolve hostnames (fixes #626). + console.log(" Setting up sandbox DNS proxy..."); + deps.runFile("bash", [path.join(deps.scriptsDir, "setup-dns-proxy.sh"), deps.gatewayName, sandboxName], { + ignoreError: true, + }); + + // Check that messaging providers exist in the gateway (sandbox attachment + // cannot be verified via CLI yet — only gateway-level existence is checked). + for (const providerName of messagingProviders) { + if (!deps.providerExistsInGateway(providerName)) { + console.error(` ⚠ Messaging provider '${providerName}' was not found in the gateway.`); + console.error(` The credential may not be available inside the sandbox.`); + console.error( + ` To fix: openshell provider create --name ${providerName} --type generic --credential `, + ); + } + } + + console.log(` ✓ Sandbox '${sandboxName}' created`); + + try { + if (process.platform === "darwin") { + const vmKernel = deps.runCapture("docker info --format '{{.KernelVersion}}'", { + ignoreError: true, + }).trim(); + if (vmKernel) { + const parts = vmKernel.split("."); + const major = parseInt(parts[0], 10); + const minor = parseInt(parts[1], 10); + if (!Number.isNaN(major) && !Number.isNaN(minor) && (major < 5 || (major === 5 && minor < 13))) { + console.warn( + ` ⚠ Landlock: Docker VM kernel ${vmKernel} does not support Landlock (requires ≥5.13).`, + ); + console.warn( + " Sandbox filesystem restrictions will silently degrade (best_effort mode).", + ); + } + } + } else if (process.platform === "linux") { + const uname = deps.runCapture("uname -r", { ignoreError: true }).trim(); + if (uname) { + const parts = uname.split("."); + const major = parseInt(parts[0], 10); + const minor = parseInt(parts[1], 10); + if (!Number.isNaN(major) && !Number.isNaN(minor) && (major < 5 || (major === 5 && minor < 13))) { + console.warn(` ⚠ Landlock: Kernel ${uname} does not support Landlock (requires ≥5.13).`); + console.warn( + " Sandbox filesystem restrictions will silently degrade (best_effort mode).", + ); + } + } + } + } catch {} + + return sandboxName; +} diff --git a/src/lib/onboard-sandbox-flow.test.ts b/src/lib/onboard-sandbox-flow.test.ts new file mode 100644 index 0000000000..fd8de67000 --- /dev/null +++ b/src/lib/onboard-sandbox-flow.test.ts @@ -0,0 +1,190 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; +// Import from compiled dist/ so coverage is attributed correctly. +import { runSandboxProvisioningFlow } from "../../dist/lib/onboard-sandbox-flow"; + +describe("runSandboxProvisioningFlow", () => { + it("configures messaging and creates a sandbox on a fresh flow", async () => { + const events: string[] = []; + const createSandbox = vi.fn(async () => "alpha"); + + const result = await runSandboxProvisioningFlow( + { + gpu: null, + sandboxName: null, + model: "gpt-5.4", + provider: "openai-api", + preferredInferenceApi: "responses", + webSearchConfig: null, + selectedMessagingChannels: [], + nimContainer: null, + fromDockerfile: null, + agent: null, + dangerouslySkipPermissions: false, + }, + { + resume: false, + sessionMessagingChannels: null, + sessionWebSearchConfig: null, + hasCompletedMessaging: false, + hasCompletedSandbox: false, + setupMessagingChannels: async () => ["telegram"], + configureWebSearch: async () => ({ fetchEnabled: true }), + ensureValidatedBraveSearchCredential: async () => null, + getSandboxReuseState: () => "missing", + removeSandbox: () => events.push("remove-sandbox"), + repairRecordedSandbox: () => events.push("repair-sandbox"), + createSandbox, + persistRegistryModelProvider: (sandboxName, patch) => + events.push(`persist:${sandboxName}:${patch.provider}:${patch.model}`), + onNote: (message) => events.push(`note:${message}`), + onSkip: (step, detail) => events.push(`skip:${step}:${detail}`), + onStartStep: (step) => events.push(`start:${step}`), + onCompleteStep: (step) => events.push(`complete:${step}`), + }, + ); + + expect(result.sandboxName).toBe("alpha"); + expect(result.selectedMessagingChannels).toEqual(["telegram"]); + expect(result.webSearchConfig).toEqual({ fetchEnabled: true }); + expect(createSandbox).toHaveBeenCalledWith( + null, + "gpt-5.4", + "openai-api", + "responses", + null, + { fetchEnabled: true }, + ["telegram"], + null, + null, + false, + ); + expect(events).toEqual([ + "start:messaging", + "complete:messaging", + "start:sandbox", + "persist:alpha:openai-api:gpt-5.4", + "complete:sandbox", + ]); + }); + + it("reuses a completed sandbox without rerunning messaging or sandbox creation", async () => { + const events: string[] = []; + + const result = await runSandboxProvisioningFlow( + { + gpu: null, + sandboxName: "alpha", + model: "gpt-5.4", + provider: "openai-api", + preferredInferenceApi: "responses", + webSearchConfig: { fetchEnabled: true }, + selectedMessagingChannels: ["telegram"], + nimContainer: null, + fromDockerfile: null, + agent: null, + dangerouslySkipPermissions: false, + }, + { + resume: true, + sessionMessagingChannels: ["telegram"], + sessionWebSearchConfig: { fetchEnabled: true }, + hasCompletedMessaging: true, + hasCompletedSandbox: true, + setupMessagingChannels: async () => { + throw new Error("should not rerun messaging"); + }, + configureWebSearch: async () => { + throw new Error("should not rerun web search config"); + }, + ensureValidatedBraveSearchCredential: async () => null, + getSandboxReuseState: () => "ready", + removeSandbox: () => events.push("remove-sandbox"), + repairRecordedSandbox: () => events.push("repair-sandbox"), + createSandbox: async () => { + throw new Error("should not recreate sandbox"); + }, + persistRegistryModelProvider: () => events.push("persist"), + onNote: (message) => events.push(`note:${message}`), + onSkip: (step, detail) => events.push(`skip:${step}:${detail}`), + onStartStep: (step) => events.push(`start:${step}`), + onCompleteStep: (step) => events.push(`complete:${step}`), + }, + ); + + expect(result.sandboxName).toBe("alpha"); + expect(events).toEqual([ + "note: [resume] Reusing Brave Search configuration already baked into the sandbox.", + "skip:sandbox:alpha", + ]); + }); + + it("reuses recorded messaging channels when rebuilding a completed sandbox", async () => { + const events: string[] = []; + const createSandbox = vi.fn(async () => "alpha"); + + const result = await runSandboxProvisioningFlow( + { + gpu: null, + sandboxName: "alpha", + model: "gpt-5.4", + provider: "openai-api", + preferredInferenceApi: "responses", + webSearchConfig: { fetchEnabled: true }, + selectedMessagingChannels: [], + nimContainer: null, + fromDockerfile: null, + agent: null, + dangerouslySkipPermissions: false, + }, + { + resume: true, + sessionMessagingChannels: ["telegram", "slack"], + sessionWebSearchConfig: { fetchEnabled: true }, + hasCompletedMessaging: true, + hasCompletedSandbox: true, + setupMessagingChannels: async () => { + throw new Error("should not rerun messaging"); + }, + configureWebSearch: async () => null, + ensureValidatedBraveSearchCredential: async () => "brave-key", + getSandboxReuseState: () => "not_ready", + removeSandbox: () => events.push("remove-sandbox"), + repairRecordedSandbox: (sandboxName) => events.push(`repair:${sandboxName}`), + createSandbox, + persistRegistryModelProvider: () => events.push("persist"), + onNote: (message) => events.push(`note:${message}`), + onSkip: (step, detail) => events.push(`skip:${step}:${detail}`), + onStartStep: (step) => events.push(`start:${step}`), + onCompleteStep: (step) => events.push(`complete:${step}`), + }, + ); + + expect(result.selectedMessagingChannels).toEqual(["telegram", "slack"]); + expect(result.webSearchConfig).toEqual({ fetchEnabled: true }); + expect(createSandbox).toHaveBeenCalledWith( + null, + "gpt-5.4", + "openai-api", + "responses", + "alpha", + { fetchEnabled: true }, + ["telegram", "slack"], + null, + null, + false, + ); + expect(events).toEqual([ + "note: [resume] Recorded sandbox 'alpha' exists but is not ready; recreating it.", + "repair:alpha", + "note: [resume] Revalidating Brave Search configuration for sandbox recreation.", + "note: [resume] Reusing Brave Search configuration.", + "skip:messaging:telegram, slack", + "start:sandbox", + "persist", + "complete:sandbox", + ]); + }); +}); diff --git a/src/lib/onboard-sandbox-flow.ts b/src/lib/onboard-sandbox-flow.ts new file mode 100644 index 0000000000..5df0d13017 --- /dev/null +++ b/src/lib/onboard-sandbox-flow.ts @@ -0,0 +1,177 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import type { WebSearchConfig } from "./web-search"; + +export interface SandboxFlowState { + gpu: TGpu; + sandboxName: string | null; + model: string; + provider: string; + preferredInferenceApi: string | null; + webSearchConfig: WebSearchConfig | null; + selectedMessagingChannels: string[]; + nimContainer: string | null; + fromDockerfile: string | null; + agent: TAgent; + dangerouslySkipPermissions: boolean; +} + +export interface SandboxFlowDeps { + resume: boolean; + sessionMessagingChannels: string[] | null; + sessionWebSearchConfig: WebSearchConfig | null; + hasCompletedMessaging: boolean; + hasCompletedSandbox: boolean; + setupMessagingChannels: () => Promise; + configureWebSearch: (_existing: null) => Promise; + ensureValidatedBraveSearchCredential: () => Promise; + getSandboxReuseState: (sandboxName: string | null) => string; + removeSandbox: (sandboxName: string) => void; + repairRecordedSandbox: (sandboxName: string) => void; + createSandbox: ( + gpu: TGpu, + model: string, + provider: string, + preferredInferenceApi: string | null, + sandboxName: string | null, + webSearchConfig: WebSearchConfig | null, + messagingChannels: string[], + fromDockerfile: string | null, + agent: TAgent, + dangerouslySkipPermissions: boolean, + ) => Promise; + persistRegistryModelProvider: (sandboxName: string, patch: { model: string; provider: string }) => void; + onNote: (message: string) => void; + onSkip: (stepName: "messaging" | "sandbox", detail: string | null) => void; + onStartStep: ( + stepName: "messaging" | "sandbox", + updates?: { sandboxName?: string | null; provider?: string | null; model?: string | null }, + ) => void; + onCompleteStep: ( + stepName: "messaging" | "sandbox", + updates?: { + sandboxName?: string | null; + provider?: string | null; + model?: string | null; + messagingChannels?: string[]; + nimContainer?: string | null; + webSearchConfig?: WebSearchConfig | null; + }, + ) => void; +} + +export interface SandboxFlowResult + extends SandboxFlowState {} + +export async function runSandboxProvisioningFlow( + initialState: SandboxFlowState, + deps: SandboxFlowDeps, +): Promise> { + const state: SandboxFlowState = { + ...initialState, + selectedMessagingChannels: [...initialState.selectedMessagingChannels], + }; + + const sandboxReuseState = deps.getSandboxReuseState(state.sandboxName); + const webSearchConfigChanged = + Boolean(deps.sessionWebSearchConfig) !== Boolean(state.webSearchConfig); + const resumeSandbox = + deps.hasCompletedSandbox && !webSearchConfigChanged && sandboxReuseState === "ready"; + + if (resumeSandbox) { + if (state.webSearchConfig) { + deps.onNote(" [resume] Reusing Brave Search configuration already baked into the sandbox."); + } + deps.onSkip("sandbox", state.sandboxName); + return state; + } + + if (deps.hasCompletedSandbox) { + if (webSearchConfigChanged) { + deps.onNote(" [resume] Web Search configuration changed; recreating sandbox."); + if (state.sandboxName) { + deps.removeSandbox(state.sandboxName); + } + } else if (sandboxReuseState === "not_ready") { + deps.onNote( + ` [resume] Recorded sandbox '${state.sandboxName}' exists but is not ready; recreating it.`, + ); + if (state.sandboxName) { + deps.repairRecordedSandbox(state.sandboxName); + } + } else { + deps.onNote(" [resume] Recorded sandbox state is unavailable; recreating it."); + if (state.sandboxName) { + deps.removeSandbox(state.sandboxName); + } + } + } + + let nextWebSearchConfig = state.webSearchConfig; + if (nextWebSearchConfig) { + deps.onNote(" [resume] Revalidating Brave Search configuration for sandbox recreation."); + const braveApiKey = await deps.ensureValidatedBraveSearchCredential(); + nextWebSearchConfig = braveApiKey ? { fetchEnabled: true } : null; + if (nextWebSearchConfig) { + deps.onNote(" [resume] Reusing Brave Search configuration."); + } + } else { + nextWebSearchConfig = await deps.configureWebSearch(null); + } + + const resumeMessaging = + deps.resume && Array.isArray(deps.sessionMessagingChannels) && deps.hasCompletedMessaging; + if (resumeMessaging && Array.isArray(deps.sessionMessagingChannels)) { + state.selectedMessagingChannels = [...deps.sessionMessagingChannels]; + deps.onSkip("messaging", state.selectedMessagingChannels.join(", ")); + } else { + deps.onStartStep("messaging", { + sandboxName: state.sandboxName, + provider: state.provider, + model: state.model, + }); + state.selectedMessagingChannels = await deps.setupMessagingChannels(); + deps.onCompleteStep("messaging", { + sandboxName: state.sandboxName, + provider: state.provider, + model: state.model, + messagingChannels: state.selectedMessagingChannels, + }); + } + + deps.onStartStep("sandbox", { + sandboxName: state.sandboxName, + provider: state.provider, + model: state.model, + }); + const nextSandboxName = await deps.createSandbox( + state.gpu, + state.model, + state.provider, + state.preferredInferenceApi, + state.sandboxName, + nextWebSearchConfig, + state.selectedMessagingChannels, + state.fromDockerfile, + state.agent, + state.dangerouslySkipPermissions, + ); + deps.persistRegistryModelProvider(nextSandboxName, { + model: state.model, + provider: state.provider, + }); + deps.onCompleteStep("sandbox", { + sandboxName: nextSandboxName, + provider: state.provider, + model: state.model, + nimContainer: state.nimContainer, + webSearchConfig: nextWebSearchConfig, + }); + + return { + ...state, + sandboxName: nextSandboxName, + webSearchConfig: nextWebSearchConfig, + }; +} diff --git a/src/lib/onboard-sandbox-name.test.ts b/src/lib/onboard-sandbox-name.test.ts new file mode 100644 index 0000000000..6abc6c6d45 --- /dev/null +++ b/src/lib/onboard-sandbox-name.test.ts @@ -0,0 +1,87 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; +// Import from compiled dist/ so coverage is attributed correctly. +import { promptValidatedSandboxName } from "../../dist/lib/onboard-sandbox-name"; + +describe("promptValidatedSandboxName", () => { + it("re-prompts in interactive mode until a valid non-reserved name is provided", async () => { + const errorWriter = vi.fn(); + const answers = ["status", "9bad", "my-assistant"]; + const result = await promptValidatedSandboxName({ + promptOrDefault: async () => answers.shift() ?? "my-assistant", + validateName: (value) => { + if (/^[0-9]/.test(value)) { + throw new Error("invalid sandbox name"); + } + return value; + }, + isNonInteractive: () => false, + errorWriter, + exit: ((code: number) => { + throw new Error(`exit:${code}`); + }) as never, + }); + + expect(result).toBe("my-assistant"); + expect(errorWriter).toHaveBeenCalledWith( + " Reserved name: 'status' is a NemoClaw CLI command.", + ); + expect(errorWriter).toHaveBeenCalledWith(" Names must start with a letter, not a digit."); + }); + + it("checks reserved names after validation canonicalizes the input", async () => { + const errorWriter = vi.fn(); + const answers = ["Status", "my-assistant"]; + const result = await promptValidatedSandboxName({ + promptOrDefault: async () => answers.shift() ?? "my-assistant", + validateName: (value) => value.toLowerCase(), + isNonInteractive: () => false, + errorWriter, + exit: ((code: number) => { + throw new Error(`exit:${code}`); + }) as never, + }); + + expect(result).toBe("my-assistant"); + expect(errorWriter).toHaveBeenCalledWith( + " Reserved name: 'status' is a NemoClaw CLI command.", + ); + }); + + it("exits immediately in non-interactive mode when the name is invalid", async () => { + await expect( + promptValidatedSandboxName({ + promptOrDefault: async () => "9bad", + validateName: () => { + throw new Error("invalid sandbox name"); + }, + isNonInteractive: () => true, + errorWriter: vi.fn(), + exit: ((code: number) => { + throw new Error(`exit:${code}`); + }) as never, + }), + ).rejects.toThrow("exit:1"); + }); + + it("exits after too many invalid interactive attempts", async () => { + const errorWriter = vi.fn(); + await expect( + promptValidatedSandboxName({ + promptOrDefault: async () => "9bad", + validateName: () => { + throw new Error("invalid sandbox name"); + }, + isNonInteractive: () => false, + errorWriter, + exit: ((code: number) => { + throw new Error(`exit:${code}`); + }) as never, + }), + ).rejects.toThrow("exit:1"); + + expect(errorWriter).toHaveBeenCalledWith(" Too many invalid attempts."); + }); +}); diff --git a/src/lib/onboard-sandbox-name.ts b/src/lib/onboard-sandbox-name.ts new file mode 100644 index 0000000000..1257269199 --- /dev/null +++ b/src/lib/onboard-sandbox-name.ts @@ -0,0 +1,85 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +const RESERVED_SANDBOX_NAMES = new Set([ + "onboard", + "list", + "deploy", + "setup", + "setup-spark", + "start", + "stop", + "status", + "debug", + "uninstall", + "credentials", + "help", +]); + +export interface PromptValidatedSandboxNameDeps { + promptOrDefault: ( + question: string, + envVar: string | null, + defaultValue: string, + ) => Promise; + validateName: (value: string, label: string) => string; + isNonInteractive: () => boolean; + errorWriter?: (message?: string) => void; + exit?: (code: number) => never; +} + +export async function promptValidatedSandboxName( + deps: PromptValidatedSandboxNameDeps, +): Promise { + const errorWriter = deps.errorWriter ?? console.error; + const exit = deps.exit ?? ((code: number) => process.exit(code)); + const MAX_ATTEMPTS = 3; + + for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) { + const nameAnswer = await deps.promptOrDefault( + " Sandbox name (lowercase, starts with letter, hyphens ok) [my-assistant]: ", + "NEMOCLAW_SANDBOX_NAME", + "my-assistant", + ); + const sandboxName = (nameAnswer || "my-assistant").trim(); + + try { + const validatedSandboxName = deps.validateName(sandboxName, "sandbox name"); + if (RESERVED_SANDBOX_NAMES.has(validatedSandboxName)) { + errorWriter( + ` Reserved name: '${validatedSandboxName}' is a NemoClaw CLI command.`, + ); + errorWriter(" Choose a different name to avoid routing conflicts."); + if (deps.isNonInteractive()) { + exit(1); + } + if (attempt < MAX_ATTEMPTS - 1) { + errorWriter(" Please try again.\n"); + } + continue; + } + return validatedSandboxName; + } catch (error: unknown) { + errorWriter(` ${(error as Error).message}`); + } + + if (/^[0-9]/.test(sandboxName)) { + errorWriter(" Names must start with a letter, not a digit."); + } else { + errorWriter(" Names must be lowercase, contain only letters, numbers, and hyphens,"); + errorWriter(" must start with a letter, and end with a letter or number."); + } + + if (deps.isNonInteractive()) { + exit(1); + } + + if (attempt < MAX_ATTEMPTS - 1) { + errorWriter(" Please try again.\n"); + } + } + + errorWriter(" Too many invalid attempts."); + exit(1); + throw new Error("unreachable"); +} diff --git a/src/lib/onboard-session.test.ts b/src/lib/onboard-session.test.ts index 7589cedda2..5148c73392 100644 --- a/src/lib/onboard-session.test.ts +++ b/src/lib/onboard-session.test.ts @@ -48,7 +48,12 @@ describe("onboard session", () => { const stat = fs.statSync(session.SESSION_FILE); const dirStat = fs.statSync(path.dirname(session.SESSION_FILE)); + expect(saved.version).toBe(2); expect(saved.mode).toBe("non-interactive"); + expect(saved.steps.messaging.status).toBe("pending"); + expect(saved.steps.runtime_setup.status).toBe("pending"); + expect(saved.steps.openclaw.status).toBe("pending"); + expect(saved.steps.agent_setup.status).toBe("pending"); expect(fs.existsSync(session.SESSION_FILE)).toBe(true); expect(stat.mode & 0o777).toBe(0o600); expect(dirStat.mode & 0o777).toBe(0o700); @@ -122,6 +127,37 @@ describe("onboard session", () => { expect(loaded.metadata.token).toBeUndefined(); }); + it("clears provider-specific metadata when a later selection omits it", () => { + session.saveSession( + session.createSession({ + sandboxName: "alpha", + provider: "compatible-openai", + model: "stale-model", + endpointUrl: "https://old.example.com/v1", + credentialEnv: "COMPATIBLE_API_KEY", + preferredInferenceApi: "responses", + nimContainer: "nim-stale", + }), + ); + + session.markStepComplete("provider_selection", { + provider: "openai-api", + model: "gpt-5.4", + endpointUrl: null, + credentialEnv: null, + preferredInferenceApi: null, + nimContainer: null, + }); + + const loaded = session.loadSession(); + expect(loaded.provider).toBe("openai-api"); + expect(loaded.model).toBe("gpt-5.4"); + expect(loaded.endpointUrl).toBeNull(); + expect(loaded.credentialEnv).toBeNull(); + expect(loaded.preferredInferenceApi).toBeNull(); + expect(loaded.nimContainer).toBeNull(); + }); + it("persists messagingChannels across save/load roundtrips", () => { const created = session.createSession(); created.messagingChannels = ["telegram", "slack"]; @@ -159,6 +195,20 @@ describe("onboard session", () => { expect(loaded.webSearchConfig).toBeNull(); }); + it("persists messaging channel selections through safe session updates", () => { + session.saveSession(session.createSession()); + session.markStepComplete("provider_selection", { + messagingChannels: ["telegram", "slack"], + }); + + let loaded = session.loadSession(); + expect(loaded.messagingChannels).toEqual(["telegram", "slack"]); + + session.completeSession({ messagingChannels: null }); + loaded = session.loadSession(); + expect(loaded.messagingChannels).toBeNull(); + }); + it("does not clear existing metadata when updates omit whitelisted metadata fields", () => { session.saveSession(session.createSession({ metadata: { gatewayName: "nemoclaw" } })); session.markStepComplete("provider_selection", { @@ -172,6 +222,60 @@ describe("onboard session", () => { expect(loaded.metadata.token).toBeUndefined(); }); + it("migrates legacy v1 sessions into the typed v2 schema", () => { + fs.mkdirSync(path.dirname(session.SESSION_FILE), { recursive: true }); + fs.writeFileSync( + session.SESSION_FILE, + JSON.stringify({ + version: 1, + sessionId: "legacy-session", + resumable: true, + status: "in_progress", + mode: "interactive", + startedAt: "2026-04-17T00:00:00.000Z", + updatedAt: "2026-04-17T00:00:01.000Z", + lastStepStarted: "openclaw", + lastCompletedStep: "sandbox", + failure: null, + agent: null, + sandboxName: "legacy-box", + provider: "openai-api", + model: "gpt-5.4", + endpointUrl: "https://api.openai.com/v1", + credentialEnv: "OPENAI_API_KEY", + preferredInferenceApi: "responses", + nimContainer: null, + webSearchConfig: { fetchEnabled: true }, + policyPresets: ["npm"], + metadata: { gatewayName: "nemoclaw", fromDockerfile: null }, + steps: { + preflight: { status: "complete", startedAt: null, completedAt: null, error: null }, + gateway: { status: "complete", startedAt: null, completedAt: null, error: null }, + sandbox: { status: "complete", startedAt: null, completedAt: null, error: null }, + provider_selection: { + status: "complete", + startedAt: null, + completedAt: null, + error: null, + }, + inference: { status: "complete", startedAt: null, completedAt: null, error: null }, + openclaw: { status: "complete", startedAt: null, completedAt: null, error: null }, + agent_setup: { status: "skipped", startedAt: null, completedAt: null, error: null }, + policies: { status: "pending", startedAt: null, completedAt: null, error: null }, + }, + }), + ); + + const loaded = session.loadSession(); + expect(loaded.version).toBe(2); + expect(loaded.sandboxName).toBe("legacy-box"); + expect(loaded.steps.messaging.status).toBe("pending"); + expect(loaded.steps.runtime_setup.status).toBe("complete"); + expect(loaded.steps.openclaw.status).toBe("complete"); + expect(loaded.steps.agent_setup.status).toBe("skipped"); + expect(loaded.lastStepStarted).toBe("openclaw"); + }); + it("returns null for corrupt session data", () => { fs.mkdirSync(path.dirname(session.SESSION_FILE), { recursive: true }); fs.writeFileSync(session.SESSION_FILE, "not-json"); diff --git a/src/lib/onboard-session.ts b/src/lib/onboard-session.ts index d2936407cd..f8d3a719a5 100644 --- a/src/lib/onboard-session.ts +++ b/src/lib/onboard-session.ts @@ -10,25 +10,37 @@ import fs from "node:fs"; import path from "node:path"; +import { + createEmptyStepLedger, + isOnboardStepName, + type OnboardMode, + type OnboardRunStatus, + type OnboardStepLedger, + type OnboardStepName, + type OnboardStepState, + type OnboardStepStatus, +} from "./onboard-fsm"; import type { WebSearchConfig } from "./web-search"; -export const SESSION_VERSION = 1; +const LEGACY_SESSION_VERSION = 1; +export const SESSION_VERSION = 2; export const SESSION_DIR = path.join(process.env.HOME || "/tmp", ".nemoclaw"); export const SESSION_FILE = path.join(SESSION_DIR, "onboard-session.json"); export const LOCK_FILE = path.join(SESSION_DIR, "onboard.lock"); -const VALID_STEP_STATES = new Set(["pending", "in_progress", "complete", "failed", "skipped"]); +const VALID_STEP_STATES = new Set([ + "pending", + "in_progress", + "complete", + "failed", + "skipped", +]); // ── Types ──────────────────────────────────────────────────────── -export interface StepState { - status: string; - startedAt: string | null; - completedAt: string | null; - error: string | null; -} +export type StepState = OnboardStepState; export interface SessionFailure { - step: string | null; + step: OnboardStepName | null; message: string | null; recordedAt: string; } @@ -42,12 +54,12 @@ export interface Session { version: number; sessionId: string; resumable: boolean; - status: string; - mode: string; + status: OnboardRunStatus; + mode: OnboardMode; startedAt: string; updatedAt: string; - lastStepStarted: string | null; - lastCompletedStep: string | null; + lastStepStarted: OnboardStepName | null; + lastCompletedStep: OnboardStepName | null; failure: SessionFailure | null; agent: string | null; sandboxName: string | null; @@ -58,10 +70,10 @@ export interface Session { preferredInferenceApi: string | null; nimContainer: string | null; webSearchConfig: WebSearchConfig | null; - policyPresets: string[] | null; messagingChannels: string[] | null; + policyPresets: string[] | null; metadata: SessionMetadata; - steps: Record; + steps: OnboardStepLedger; } export interface LockInfo { @@ -80,16 +92,16 @@ export interface LockResult { } export interface SessionUpdates { - sandboxName?: string; - provider?: string; - model?: string; - endpointUrl?: string; - credentialEnv?: string; - preferredInferenceApi?: string; - nimContainer?: string; + sandboxName?: string | null; + provider?: string | null; + model?: string | null; + endpointUrl?: string | null; + credentialEnv?: string | null; + preferredInferenceApi?: string | null; + nimContainer?: string | null; webSearchConfig?: WebSearchConfig | null; + messagingChannels?: string[] | null; policyPresets?: string[]; - messagingChannels?: string[]; metadata?: { gatewayName?: string; fromDockerfile?: string | null }; } @@ -107,17 +119,8 @@ export function lockPath(): string { return LOCK_FILE; } -function defaultSteps(): Record { - return { - preflight: { status: "pending", startedAt: null, completedAt: null, error: null }, - gateway: { status: "pending", startedAt: null, completedAt: null, error: null }, - sandbox: { status: "pending", startedAt: null, completedAt: null, error: null }, - provider_selection: { status: "pending", startedAt: null, completedAt: null, error: null }, - inference: { status: "pending", startedAt: null, completedAt: null, error: null }, - openclaw: { status: "pending", startedAt: null, completedAt: null, error: null }, - agent_setup: { status: "pending", startedAt: null, completedAt: null, error: null }, - policies: { status: "pending", startedAt: null, completedAt: null, error: null }, - }; +function defaultSteps(): OnboardStepLedger { + return createEmptyStepLedger(); } export function isObject(value: unknown): value is Record { @@ -142,19 +145,92 @@ export function sanitizeFailure( input: { step?: unknown; message?: unknown; recordedAt?: unknown } | null | undefined, ): SessionFailure | null { if (!input) return null; - const step = typeof input.step === "string" ? input.step : null; + const step = isOnboardStepName(input.step) ? input.step : null; const message = redactSensitiveText(input.message); const recordedAt = typeof input.recordedAt === "string" ? input.recordedAt : new Date().toISOString(); return step || message ? { step, message, recordedAt } : null; } -export function validateStep(step: unknown): boolean { +export function validateStep(step: unknown): step is OnboardStepState { if (!isObject(step)) return false; - if (!VALID_STEP_STATES.has(step.status as string)) return false; + if (!VALID_STEP_STATES.has(step.status as OnboardStepStatus)) return false; return true; } +function isOnboardMode(value: unknown): value is OnboardMode { + return value === "interactive" || value === "non-interactive"; +} + +function isOnboardRunStatus(value: unknown): value is OnboardRunStatus { + return value === "in_progress" || value === "complete" || value === "failed"; +} + +function normalizeStepName(value: unknown): OnboardStepName | null { + return isOnboardStepName(value) ? value : null; +} + +function cloneStepState(step: OnboardStepState): OnboardStepState { + return { + status: step.status, + startedAt: step.startedAt, + completedAt: step.completedAt, + error: step.error, + }; +} + +function pickAggregateStepState(states: readonly OnboardStepState[]): OnboardStepState { + const failed = states.find((state) => state.status === "failed"); + if (failed) return cloneStepState(failed); + + const inProgress = states.find((state) => state.status === "in_progress"); + if (inProgress) return cloneStepState(inProgress); + + const complete = states.find((state) => state.status === "complete"); + if (complete) return cloneStepState(complete); + + if (states.every((state) => state.status === "skipped")) { + return cloneStepState(states[0]); + } + + const skipped = states.find((state) => state.status === "skipped"); + if (skipped) return cloneStepState(skipped); + + return cloneStepState(states[0]); +} + +function synchronizeRuntimeSteps(session: Session): void { + const runtimeStates = [ + session.steps.runtime_setup, + session.steps.openclaw, + session.steps.agent_setup, + ] as const; + session.steps.runtime_setup = pickAggregateStepState(runtimeStates); + + if (session.steps.runtime_setup.status === "pending") { + return; + } + + const selectedLegacyStep = session.agent ? "agent_setup" : "openclaw"; + const siblingLegacyStep = session.agent ? "openclaw" : "agent_setup"; + + if (session.steps[selectedLegacyStep].status === "pending") { + session.steps[selectedLegacyStep] = cloneStepState(session.steps.runtime_setup); + } + + if ( + session.steps.runtime_setup.status === "complete" && + session.steps[siblingLegacyStep].status === "pending" + ) { + session.steps[siblingLegacyStep] = { + status: "skipped", + startedAt: null, + completedAt: null, + error: null, + }; + } +} + export function redactUrl(value: unknown): string | null { if (typeof value !== "string" || value.length === 0) return null; try { @@ -179,12 +255,12 @@ export function redactUrl(value: unknown): string | null { export function createSession(overrides: Partial = {}): Session { const now = new Date().toISOString(); - return { + const session: Session = { version: SESSION_VERSION, sessionId: overrides.sessionId || `${Date.now()}-${Math.random().toString(36).slice(2, 10)}`, resumable: true, - status: "in_progress", - mode: overrides.mode || "interactive", + status: overrides.status && isOnboardRunStatus(overrides.status) ? overrides.status : "in_progress", + mode: overrides.mode && isOnboardMode(overrides.mode) ? overrides.mode : "interactive", startedAt: overrides.startedAt || now, updatedAt: overrides.updatedAt || now, lastStepStarted: overrides.lastStepStarted || null, @@ -202,12 +278,12 @@ export function createSession(overrides: Partial = {}): Session { overrides.webSearchConfig && overrides.webSearchConfig.fetchEnabled === true ? { fetchEnabled: true } : null, - policyPresets: Array.isArray(overrides.policyPresets) - ? overrides.policyPresets.filter((value) => typeof value === "string") - : null, messagingChannels: Array.isArray(overrides.messagingChannels) ? overrides.messagingChannels.filter((value) => typeof value === "string") : null, + policyPresets: Array.isArray(overrides.policyPresets) + ? overrides.policyPresets.filter((value) => typeof value === "string") + : null, metadata: { gatewayName: overrides.metadata?.gatewayName || "nemoclaw", fromDockerfile: overrides.metadata?.fromDockerfile || null, @@ -217,15 +293,22 @@ export function createSession(overrides: Partial = {}): Session { ...(overrides.steps || {}), }, }; + synchronizeRuntimeSteps(session); + return session; } // eslint-disable-next-line complexity export function normalizeSession(data: unknown): Session | null { - if (!isObject(data) || (data as Record).version !== SESSION_VERSION) return null; + if (!isObject(data)) return null; const d = data as Record; + if (d.version !== SESSION_VERSION && d.version !== LEGACY_SESSION_VERSION) { + return null; + } + const normalized = createSession({ sessionId: typeof d.sessionId === "string" ? d.sessionId : undefined, - mode: typeof d.mode === "string" ? d.mode : undefined, + mode: isOnboardMode(d.mode) ? d.mode : undefined, + status: isOnboardRunStatus(d.status) ? d.status : undefined, startedAt: typeof d.startedAt === "string" ? d.startedAt : undefined, updatedAt: typeof d.updatedAt === "string" ? d.updatedAt : undefined, agent: typeof d.agent === "string" ? d.agent : null, @@ -242,14 +325,14 @@ export function normalizeSession(data: unknown): Session | null { (d.webSearchConfig as Record).fetchEnabled === true ? { fetchEnabled: true } : null, - policyPresets: Array.isArray(d.policyPresets) - ? (d.policyPresets as unknown[]).filter((value) => typeof value === "string") as string[] - : null, messagingChannels: Array.isArray(d.messagingChannels) ? (d.messagingChannels as unknown[]).filter((value) => typeof value === "string") as string[] : null, - lastStepStarted: typeof d.lastStepStarted === "string" ? d.lastStepStarted : null, - lastCompletedStep: typeof d.lastCompletedStep === "string" ? d.lastCompletedStep : null, + policyPresets: Array.isArray(d.policyPresets) + ? (d.policyPresets as unknown[]).filter((value) => typeof value === "string") as string[] + : null, + lastStepStarted: normalizeStepName(d.lastStepStarted), + lastCompletedStep: normalizeStepName(d.lastCompletedStep), failure: sanitizeFailure(d.failure as Record | null), metadata: isObject(d.metadata) ? ({ @@ -259,25 +342,24 @@ export function normalizeSession(data: unknown): Session | null { : undefined, } as Partial); normalized.resumable = d.resumable !== false; - normalized.status = typeof d.status === "string" ? d.status : normalized.status; + normalized.version = SESSION_VERSION; if (isObject(d.steps)) { - for (const [name, step] of Object.entries(d.steps as Record)) { - if ( - Object.prototype.hasOwnProperty.call(normalized.steps, name) && - validateStep(step) - ) { - const s = step as Record; - normalized.steps[name] = { - status: s.status as string, - startedAt: typeof s.startedAt === "string" ? s.startedAt : null, - completedAt: typeof s.completedAt === "string" ? s.completedAt : null, - error: redactSensitiveText(s.error), - }; + for (const [rawName, step] of Object.entries(d.steps as Record)) { + const name = normalizeStepName(rawName); + if (!name || !validateStep(step)) { + continue; } + normalized.steps[name] = { + status: step.status, + startedAt: typeof step.startedAt === "string" ? step.startedAt : null, + completedAt: typeof step.completedAt === "string" ? step.completedAt : null, + error: redactSensitiveText(step.error), + }; } } + synchronizeRuntimeSteps(normalized); return normalized; } @@ -542,24 +624,33 @@ export function releaseOnboardLock(): void { export function filterSafeUpdates(updates: SessionUpdates): Partial { const safe: Partial = {}; if (!isObject(updates)) return safe; - if (typeof updates.sandboxName === "string") safe.sandboxName = updates.sandboxName; - if (typeof updates.provider === "string") safe.provider = updates.provider; - if (typeof updates.model === "string") safe.model = updates.model; - if (typeof updates.endpointUrl === "string") safe.endpointUrl = redactUrl(updates.endpointUrl); - if (typeof updates.credentialEnv === "string") safe.credentialEnv = updates.credentialEnv; - if (typeof updates.preferredInferenceApi === "string") + if (updates.sandboxName === null) safe.sandboxName = null; + else if (typeof updates.sandboxName === "string") safe.sandboxName = updates.sandboxName; + if (updates.provider === null) safe.provider = null; + else if (typeof updates.provider === "string") safe.provider = updates.provider; + if (updates.model === null) safe.model = null; + else if (typeof updates.model === "string") safe.model = updates.model; + if (updates.endpointUrl === null) safe.endpointUrl = null; + else if (typeof updates.endpointUrl === "string") safe.endpointUrl = redactUrl(updates.endpointUrl); + if (updates.credentialEnv === null) safe.credentialEnv = null; + else if (typeof updates.credentialEnv === "string") safe.credentialEnv = updates.credentialEnv; + if (updates.preferredInferenceApi === null) safe.preferredInferenceApi = null; + else if (typeof updates.preferredInferenceApi === "string") safe.preferredInferenceApi = updates.preferredInferenceApi; - if (typeof updates.nimContainer === "string") safe.nimContainer = updates.nimContainer; + if (updates.nimContainer === null) safe.nimContainer = null; + else if (typeof updates.nimContainer === "string") safe.nimContainer = updates.nimContainer; if (isObject(updates.webSearchConfig) && updates.webSearchConfig.fetchEnabled === true) { safe.webSearchConfig = { fetchEnabled: true }; } else if (updates.webSearchConfig === null) { safe.webSearchConfig = null; } - if (Array.isArray(updates.policyPresets)) { - safe.policyPresets = updates.policyPresets.filter((value) => typeof value === "string"); - } if (Array.isArray(updates.messagingChannels)) { safe.messagingChannels = updates.messagingChannels.filter((value) => typeof value === "string"); + } else if (updates.messagingChannels === null) { + safe.messagingChannels = null; + } + if (Array.isArray(updates.policyPresets)) { + safe.policyPresets = updates.policyPresets.filter((value) => typeof value === "string"); } if (isObject(updates.metadata) && typeof updates.metadata.gatewayName === "string") { safe.metadata = { @@ -576,73 +667,96 @@ export function updateSession(mutator: (session: Session) => Session | void): Se return saveSession(next); } -export function markStepStarted(stepName: string): Session { - return updateSession((session) => { - const step = session.steps[stepName]; - if (!step) return session; - step.status = "in_progress"; - step.startedAt = new Date().toISOString(); - step.completedAt = null; - step.error = null; - session.lastStepStarted = stepName; - session.failure = null; - session.status = "in_progress"; - return session; +export function applyStepStarted(session: Session, stepName: OnboardStepName): Session { + const step = session.steps[stepName]; + if (!step) return session; + step.status = "in_progress"; + step.startedAt = new Date().toISOString(); + step.completedAt = null; + step.error = null; + session.lastStepStarted = stepName; + session.failure = null; + session.status = "in_progress"; + synchronizeRuntimeSteps(session); + return session; +} + +export function applyStepComplete( + session: Session, + stepName: OnboardStepName, + updates: SessionUpdates = {}, +): Session { + const step = session.steps[stepName]; + if (!step) return session; + step.status = "complete"; + step.completedAt = new Date().toISOString(); + step.error = null; + session.lastCompletedStep = stepName; + session.failure = null; + Object.assign(session, filterSafeUpdates(updates)); + synchronizeRuntimeSteps(session); + return session; +} + +export function applyStepSkipped(session: Session, stepName: OnboardStepName): Session { + const step = session.steps[stepName]; + if (!step) return session; + if (step.status === "complete" || step.status === "failed") return session; + step.status = "skipped"; + step.startedAt = null; + step.completedAt = null; + step.error = null; + synchronizeRuntimeSteps(session); + return session; +} + +export function applyStepFailed( + session: Session, + stepName: OnboardStepName, + message: string | null = null, +): Session { + const step = session.steps[stepName]; + if (!step) return session; + step.status = "failed"; + step.completedAt = null; + step.error = redactSensitiveText(message); + session.failure = sanitizeFailure({ + step: stepName, + message, + recordedAt: new Date().toISOString(), }); + session.status = "failed"; + synchronizeRuntimeSteps(session); + return session; } -export function markStepComplete(stepName: string, updates: SessionUpdates = {}): Session { - return updateSession((session) => { - const step = session.steps[stepName]; - if (!step) return session; - step.status = "complete"; - step.completedAt = new Date().toISOString(); - step.error = null; - session.lastCompletedStep = stepName; - session.failure = null; - Object.assign(session, filterSafeUpdates(updates)); - return session; - }); +export function applySessionComplete(session: Session, updates: SessionUpdates = {}): Session { + Object.assign(session, filterSafeUpdates(updates)); + session.status = "complete"; + session.resumable = false; + session.failure = null; + synchronizeRuntimeSteps(session); + return session; } -export function markStepSkipped(stepName: string): Session { - return updateSession((session) => { - const step = session.steps[stepName]; - if (!step) return session; - if (step.status === "complete" || step.status === "failed") return session; - step.status = "skipped"; - step.startedAt = null; - step.completedAt = null; - step.error = null; - return session; - }); +export function markStepStarted(stepName: OnboardStepName): Session { + return updateSession((session) => applyStepStarted(session, stepName)); } -export function markStepFailed(stepName: string, message: string | null = null): Session { - return updateSession((session) => { - const step = session.steps[stepName]; - if (!step) return session; - step.status = "failed"; - step.completedAt = null; - step.error = redactSensitiveText(message); - session.failure = sanitizeFailure({ - step: stepName, - message, - recordedAt: new Date().toISOString(), - }); - session.status = "failed"; - return session; - }); +export function markStepComplete(stepName: OnboardStepName, updates: SessionUpdates = {}): Session { + return updateSession((session) => applyStepComplete(session, stepName, updates)); +} + +export function markStepSkipped(stepName: OnboardStepName): Session { + return updateSession((session) => applyStepSkipped(session, stepName)); +} + +export function markStepFailed(stepName: OnboardStepName, message: string | null = null): Session { + return updateSession((session) => applyStepFailed(session, stepName, message)); } export function completeSession(updates: SessionUpdates = {}): Session { - return updateSession((session) => { - Object.assign(session, filterSafeUpdates(updates)); - session.status = "complete"; - session.resumable = false; - session.failure = null; - return session; - }); + return updateSession((session) => applySessionComplete(session, updates)); } export function summarizeForDebug(session: Session | null = loadSession()): Record< @@ -665,6 +779,7 @@ export function summarizeForDebug(session: Session | null = loadSession()): Reco credentialEnv: session.credentialEnv, preferredInferenceApi: session.preferredInferenceApi, nimContainer: session.nimContainer, + messagingChannels: session.messagingChannels, policyPresets: session.policyPresets, lastStepStarted: session.lastStepStarted, lastCompletedStep: session.lastCompletedStep, diff --git a/src/lib/onboard-shell.test.ts b/src/lib/onboard-shell.test.ts new file mode 100644 index 0000000000..e78c9d8253 --- /dev/null +++ b/src/lib/onboard-shell.test.ts @@ -0,0 +1,88 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it } from "vitest"; +// Import from compiled dist/ so coverage is attributed correctly. +import { + buildOnboardLockCommand, + getDangerouslySkipPermissionsWarningLines, + getOnboardBannerLines, + getOnboardLockConflictLines, + resolveOnboardShellState, +} from "../../dist/lib/onboard-shell"; + +describe("onboard-shell", () => { + it("resolves shell state from opts and env", () => { + expect( + resolveOnboardShellState( + { resume: true, fromDockerfile: null }, + { + NEMOCLAW_NON_INTERACTIVE: "1", + NEMOCLAW_RECREATE_SANDBOX: "1", + NEMOCLAW_DANGEROUSLY_SKIP_PERMISSIONS: "1", + NEMOCLAW_FROM_DOCKERFILE: "/tmp/Custom.Dockerfile", + }, + ), + ).toEqual({ + nonInteractive: true, + recreateSandbox: true, + resume: true, + dangerouslySkipPermissions: true, + requestedFromDockerfile: "/tmp/Custom.Dockerfile", + }); + }); + + it("formats the lock command line consistently", () => { + expect( + buildOnboardLockCommand({ + resume: true, + nonInteractive: true, + requestedFromDockerfile: "/tmp/Custom.Dockerfile", + }), + ).toBe("nemoclaw onboard --resume --non-interactive --from '/tmp/Custom.Dockerfile'"); + + expect( + buildOnboardLockCommand({ + resume: false, + nonInteractive: false, + requestedFromDockerfile: "/tmp/agent's Dockerfile", + }), + ).toBe("nemoclaw onboard --from '/tmp/agent'\\''s Dockerfile'"); + }); + + it("renders banner and warning lines for the shell", () => { + expect(getOnboardBannerLines({ nonInteractive: true, resume: true })).toEqual([ + "", + " NemoClaw Onboarding", + " (non-interactive mode)", + " (resume mode)", + " ===================", + ]); + expect(getDangerouslySkipPermissionsWarningLines()).toEqual([ + "", + " ⚠ --dangerously-skip-permissions: sandbox security restrictions disabled.", + " Network: all known endpoints open (no method/path filtering)", + " Filesystem: sandbox home directory is writable", + " Use for development/testing only.", + "", + ]); + }); + + it("formats lock conflict guidance including holder metadata when present", () => { + expect( + getOnboardLockConflictLines({ + acquired: false, + lockFile: "/tmp/onboard.lock", + stale: false, + holderPid: 4242, + holderStartedAt: "2026-04-17T00:00:00.000Z", + }), + ).toEqual([ + " Another NemoClaw onboarding run is already in progress.", + " Lock holder PID: 4242", + " Started: 2026-04-17T00:00:00.000Z", + " Wait for it to finish, or remove the stale lock if the previous run crashed:", + ' rm -f "/tmp/onboard.lock"', + ]); + }); +}); diff --git a/src/lib/onboard-shell.ts b/src/lib/onboard-shell.ts new file mode 100644 index 0000000000..6e8c33f928 --- /dev/null +++ b/src/lib/onboard-shell.ts @@ -0,0 +1,90 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import type { LockResult } from "./onboard-session"; + +export interface OnboardShellInput { + nonInteractive?: boolean; + recreateSandbox?: boolean; + resume?: boolean; + dangerouslySkipPermissions?: boolean; + fromDockerfile?: string | null; +} + +export interface OnboardShellState { + nonInteractive: boolean; + recreateSandbox: boolean; + resume: boolean; + dangerouslySkipPermissions: boolean; + requestedFromDockerfile: string | null; +} + +export function resolveOnboardShellState( + opts: OnboardShellInput = {}, + env: NodeJS.ProcessEnv = process.env, +): OnboardShellState { + const nonInteractive = + opts.nonInteractive === true || env.NEMOCLAW_NON_INTERACTIVE === "1"; + return { + nonInteractive, + recreateSandbox: + opts.recreateSandbox === true || env.NEMOCLAW_RECREATE_SANDBOX === "1", + resume: opts.resume === true, + dangerouslySkipPermissions: + opts.dangerouslySkipPermissions === true || + env.NEMOCLAW_DANGEROUSLY_SKIP_PERMISSIONS === "1", + requestedFromDockerfile: + opts.fromDockerfile || (nonInteractive ? env.NEMOCLAW_FROM_DOCKERFILE || null : null), + }; +} + +function quoteShellArg(value: string): string { + return `'${value.replace(/'/g, `'\\''`)}'`; +} + +export function buildOnboardLockCommand( + state: Pick, +): string { + const fromArg = state.requestedFromDockerfile + ? ` --from ${quoteShellArg(state.requestedFromDockerfile)}` + : ""; + return `nemoclaw onboard${state.resume ? " --resume" : ""}${state.nonInteractive ? " --non-interactive" : ""}${fromArg}`; +} + +export function getOnboardBannerLines( + state: Pick, +): string[] { + return [ + "", + " NemoClaw Onboarding", + ...(state.nonInteractive ? [" (non-interactive mode)"] : []), + ...(state.resume ? [" (resume mode)"] : []), + " ===================", + ]; +} + +export function getDangerouslySkipPermissionsWarningLines(): string[] { + return [ + "", + " ⚠ --dangerously-skip-permissions: sandbox security restrictions disabled.", + " Network: all known endpoints open (no method/path filtering)", + " Filesystem: sandbox home directory is writable", + " Use for development/testing only.", + "", + ]; +} + +export function getOnboardLockConflictLines(lockResult: LockResult): string[] { + const lines = [" Another NemoClaw onboarding run is already in progress."]; + if (lockResult.holderPid) { + lines.push(` Lock holder PID: ${lockResult.holderPid}`); + } + if (lockResult.holderStartedAt) { + lines.push(` Started: ${lockResult.holderStartedAt}`); + } + lines.push( + " Wait for it to finish, or remove the stale lock if the previous run crashed:", + ` rm -f \"${lockResult.lockFile}\"`, + ); + return lines; +} diff --git a/src/lib/onboard-step-api.ts b/src/lib/onboard-step-api.ts new file mode 100644 index 0000000000..c7af335101 --- /dev/null +++ b/src/lib/onboard-step-api.ts @@ -0,0 +1,275 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { runSetupInference } from "./onboard-inference-provider"; +import { + getGatewayStartEnv as buildGatewayStartEnv, + recoverGatewayRuntime as recoverGatewayRuntimeWithDeps, + startGatewayWithOptions as startGatewayWithOptionsWithDeps, +} from "./onboard-gateway-runtime"; +import { setupMessagingChannels as setupMessagingChannelsWithDeps } from "./onboard-messaging"; +import { runSetupNim as setupNimWithDeps } from "./onboard-nim-setup"; +import { setupOpenclaw as setupOpenclawWithDeps } from "./onboard-openclaw-setup"; +import { getSuggestedPolicyPresets as getSuggestedPolicyPresetsWithDeps } from "./onboard-policy-suggestions"; +import { runOnboardPreflight } from "./onboard-preflight-run"; +import { checkTelegramReachability as checkTelegramReachabilityWithDeps } from "./onboard-telegram"; + +export function createHostGatewayApi(input: any) { + const preflight = async () => + runOnboardPreflight({ + step: input.step, + assessHost: input.assessHost, + planHostRemediation: input.planHostRemediation, + printRemediationActions: input.printRemediationActions, + isOpenshellInstalled: input.isOpenshellInstalled, + installOpenshell: input.installOpenshell, + getInstalledOpenshellVersion: input.getInstalledOpenshellVersion, + runCaptureOpenshell: input.runCaptureOpenshell, + getBlueprintMinOpenshellVersion: input.getBlueprintMinOpenshellVersion, + getBlueprintMaxOpenshellVersion: input.getBlueprintMaxOpenshellVersion, + versionGte: input.versionGte, + getGatewayReuseState: input.getGatewayReuseState, + verifyGatewayContainerRunning: input.verifyGatewayContainerRunning, + runOpenshell: input.runOpenshell, + destroyGateway: input.destroyGateway, + clearRegistryAll: input.clearRegistryAll, + run: input.run, + runCapture: input.runCapture, + checkPortAvailable: input.checkPortAvailable, + sleep: input.sleep, + getPortConflictServiceHints: input.getPortConflictServiceHints, + getMemoryInfo: input.getMemoryInfo, + ensureSwap: input.ensureSwap, + isNonInteractive: input.isNonInteractive, + prompt: input.prompt, + nimDetectGpu: input.nimDetectGpu, + processPlatform: input.processPlatform, + gatewayName: input.gatewayName, + dashboardPort: input.dashboardPort, + gatewayPort: input.gatewayPort, + }); + + /** Start the OpenShell gateway with retry logic and post-start health polling. */ + const startGatewayWithOptions = async (_gpu: unknown, { exitOnFailure = true } = {}) => + startGatewayWithOptionsWithDeps( + _gpu, + { + gatewayName: input.gatewayName, + gatewayPort: input.gatewayPort, + scriptsDir: input.scriptsDir, + processEnv: input.processEnv, + processArch: input.processArch, + showHeader: () => { + input.step(2, 8, "Starting OpenShell gateway"); + }, + log: input.log, + error: input.error, + exit: input.exit, + openshellShellCommand: (args: string[]) => input.openshellShellCommand(args), + streamGatewayStart: input.streamGatewayStart, + runCaptureOpenshell: input.runCaptureOpenshell, + runOpenshell: input.runOpenshell, + isGatewayHealthy: input.isGatewayHealthy, + hasStaleGateway: input.hasStaleGateway, + redact: input.redact, + compactText: input.compactText, + envInt: input.envInt, + sleep: input.sleep, + getInstalledOpenshellVersion: () => input.getInstalledOpenshellVersion(), + getContainerRuntime: input.getContainerRuntime, + shouldPatchCoredns: input.shouldPatchCoredns, + run: input.run, + destroyGateway: input.destroyGateway, + pruneKnownHostsEntries: input.pruneKnownHostsEntries, + }, + { exitOnFailure }, + ); + + const startGateway = async (_gpu: unknown) => + startGatewayWithOptions(_gpu, { exitOnFailure: true }); + + const startGatewayForRecovery = async (_gpu: unknown) => + startGatewayWithOptions(_gpu, { exitOnFailure: false }); + + const getGatewayStartEnv = () => buildGatewayStartEnv(input.getInstalledOpenshellVersion()); + + const recoverGatewayRuntime = async () => + recoverGatewayRuntimeWithDeps({ + gatewayName: input.gatewayName, + gatewayPort: input.gatewayPort, + processEnv: input.processEnv, + runCaptureOpenshell: input.runCaptureOpenshell, + runOpenshell: input.runOpenshell, + isSelectedGateway: input.isSelectedGateway, + getGatewayStartEnv, + envInt: input.envInt, + sleep: input.sleep, + redact: input.redact, + compactText: input.compactText, + getContainerRuntime: input.getContainerRuntime, + shouldPatchCoredns: input.shouldPatchCoredns, + run: input.run, + scriptsDir: input.scriptsDir, + error: input.error, + }); + + return { + preflight, + startGatewayWithOptions, + startGateway, + startGatewayForRecovery, + getGatewayStartEnv, + recoverGatewayRuntime, + }; +} + +export function createInferenceRuntimeApi(input: any) { + const setupNim = async (gpu: unknown) => + setupNimWithDeps(gpu, { + step: input.step, + remoteProviderConfig: input.remoteProviderConfig, + runCapture: input.runCapture, + ollamaPort: input.ollamaPort, + vllmPort: input.vllmPort, + ollamaProxyPort: input.ollamaProxyPort, + experimental: input.experimental, + isNonInteractive: input.isNonInteractive, + getNonInteractiveProvider: input.getNonInteractiveProvider, + getNonInteractiveModel: input.getNonInteractiveModel, + note: input.note, + prompt: input.prompt, + getNavigationChoice: input.getNavigationChoice, + exitOnboardFromPrompt: input.exitOnboardFromPrompt, + normalizeProviderBaseUrl: input.normalizeProviderBaseUrl, + validateNvidiaApiKeyValue: input.validateNvidiaApiKeyValue, + ensureApiKey: input.ensureApiKey, + defaultCloudModel: input.defaultCloudModel, + promptCloudModel: input.promptCloudModel, + ensureNamedCredential: input.ensureNamedCredential, + getProbeAuthMode: input.getProbeAuthMode, + validateOpenAiLikeModel: input.validateOpenAiLikeModel, + getCredential: input.getCredential, + validateAnthropicModel: input.validateAnthropicModel, + anthropicEndpointUrl: input.anthropicEndpointUrl, + promptRemoteModel: input.promptRemoteModel, + promptInputModel: input.promptInputModel, + backToSelection: input.backToSelection, + validateCustomOpenAiLikeSelection: input.validateCustomOpenAiLikeSelection, + validateCustomAnthropicSelection: input.validateCustomAnthropicSelection, + validateAnthropicSelectionWithRetryMessage: input.validateAnthropicSelectionWithRetryMessage, + validateOpenAiLikeSelection: input.validateOpenAiLikeSelection, + shouldRequireResponsesToolCalling: input.shouldRequireResponsesToolCalling, + shouldSkipResponsesProbe: input.shouldSkipResponsesProbe, + nim: input.nim, + gatewayName: input.gatewayName, + getLocalProviderBaseUrl: input.getLocalProviderBaseUrl, + getLocalProviderValidationBaseUrl: input.getLocalProviderValidationBaseUrl, + processPlatform: input.processPlatform, + validateLocalProvider: input.validateLocalProvider, + isWsl: input.isWsl, + run: input.run, + sleep: input.sleep, + printOllamaExposureWarning: input.printOllamaExposureWarning, + startOllamaAuthProxy: input.startOllamaAuthProxy, + getOllamaModelOptions: input.getOllamaModelOptions, + getDefaultOllamaModel: input.getDefaultOllamaModel, + promptOllamaModel: input.promptOllamaModel, + prepareOllamaModel: input.prepareOllamaModel, + isSafeModelId: input.isSafeModelId, + }); + + const setupInference = async ( + sandboxName: string, + model: string, + provider: string, + endpointUrl: string | null = null, + credentialEnv: string | null = null, + ) => + runSetupInference(sandboxName, model, provider, endpointUrl, credentialEnv, { + step: input.step, + runOpenshell: input.runOpenshell, + gatewayName: input.gatewayName, + remoteProviderConfig: input.remoteProviderConfig, + hydrateCredentialEnv: input.hydrateCredentialEnv, + upsertProvider: input.upsertProvider, + isNonInteractive: input.isNonInteractive, + promptValidationRecovery: input.promptValidationRecovery, + classifyApplyFailure: input.classifyApplyFailure, + compactText: input.compactText, + redact: input.redact, + validateLocalProvider: input.validateLocalProvider, + getLocalProviderBaseUrl: input.getLocalProviderBaseUrl, + localInferenceTimeoutSecs: input.localInferenceTimeoutSecs, + ensureOllamaAuthProxy: input.ensureOllamaAuthProxy, + getOllamaProxyToken: input.getOllamaProxyToken, + persistProxyToken: input.persistProxyToken, + isWsl: input.isWsl, + getOllamaWarmupCommand: input.getOllamaWarmupCommand, + validateOllamaModel: input.validateOllamaModel, + verifyInferenceRoute: input.verifyInferenceRoute, + updateSandbox: input.updateSandbox, + processPlatform: input.processPlatform, + run: input.run, + }); + + const checkTelegramReachability = async (token: string) => + checkTelegramReachabilityWithDeps(token, { + runCurlProbe: input.runCurlProbe, + isNonInteractive: input.isNonInteractive, + promptOrDefault: input.promptOrDefault, + log: input.log, + error: input.error, + exit: input.exit, + }); + + const setupMessagingChannels = async () => + setupMessagingChannelsWithDeps({ + step: input.step, + isNonInteractive: input.isNonInteractive, + note: input.note, + getCredential: input.getCredential, + normalizeCredentialValue: input.normalizeCredentialValue, + prompt: input.prompt, + promptOrDefault: input.promptOrDefault, + saveCredential: input.saveCredential, + checkTelegramReachability, + env: input.env, + input: input.stdin, + output: input.stderr, + }); + + const getSuggestedPolicyPresets = (options: any = {}) => + getSuggestedPolicyPresetsWithDeps({ + enabledChannels: options.enabledChannels ?? null, + webSearchConfig: options.webSearchConfig ?? null, + provider: options.provider ?? null, + getCredential: input.getCredential, + env: input.env, + isInteractiveTty: input.isInteractiveTty, + isNonInteractive: input.isNonInteractive(), + note: input.noteLog, + }); + + const setupOpenclaw = async (sandboxName: string, model: string, provider: string) => + setupOpenclawWithDeps(sandboxName, model, provider, { + step: input.step, + getProviderSelectionConfig: input.getProviderSelectionConfig, + writeSandboxConfigSyncFile: input.writeSandboxConfigSyncFile, + openshellShellCommand: input.openshellShellCommand, + shellQuote: input.shellQuote, + run: input.run, + cleanupTempDir: input.cleanupTempDir, + fetchGatewayAuthTokenFromSandbox: input.fetchGatewayAuthTokenFromSandbox, + log: input.log, + secureTempFile: input.secureTempFile, + }); + + return { + setupNim, + setupInference, + checkTelegramReachability, + setupMessagingChannels, + getSuggestedPolicyPresets, + setupOpenclaw, + }; +} diff --git a/src/lib/onboard-telegram.test.ts b/src/lib/onboard-telegram.test.ts new file mode 100644 index 0000000000..a5cab0cb9c --- /dev/null +++ b/src/lib/onboard-telegram.test.ts @@ -0,0 +1,79 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; +// Import from compiled dist/ so coverage is attributed correctly. +import { + checkTelegramReachability, + TELEGRAM_NETWORK_CURL_CODES, +} from "../../dist/lib/onboard-telegram"; + +describe("onboard-telegram", () => { + it("defines the expected curl codes for network-level Telegram failures", () => { + expect([...TELEGRAM_NETWORK_CURL_CODES]).toEqual([6, 7, 28, 35, 52, 56]); + }); + + it("aborts in non-interactive mode on network failures", async () => { + await expect( + checkTelegramReachability("fake-token", { + runCurlProbe: () => ({ + ok: false, + httpStatus: 0, + curlStatus: 52, + body: "", + stderr: "Empty reply from server", + message: "curl failed (exit 52): Empty reply from server", + }), + isNonInteractive: () => true, + promptOrDefault: async () => "n", + log: vi.fn(), + error: vi.fn(), + exit: ((code: number) => { + throw new Error(`exit:${code}`); + }) as never, + }), + ).rejects.toThrow("exit:1"); + }); + + it("warns on HTTP token rejection and succeeds silently on HTTP 200", async () => { + const logs: string[] = []; + await checkTelegramReachability("bad-token", { + runCurlProbe: () => ({ + ok: false, + httpStatus: 401, + curlStatus: 0, + body: "", + stderr: "", + message: "HTTP 401", + }), + isNonInteractive: () => true, + promptOrDefault: async () => "n", + log: (message = "") => logs.push(message), + error: vi.fn(), + exit: ((code: number) => { + throw new Error(`exit:${code}`); + }) as never, + }); + expect(logs).toContain(" ⚠ Bot token was rejected by Telegram — verify the token is correct."); + + const successLogs: string[] = []; + await checkTelegramReachability("valid-token", { + runCurlProbe: () => ({ + ok: true, + httpStatus: 200, + curlStatus: 0, + body: '{"ok":true}', + stderr: "", + message: "", + }), + isNonInteractive: () => true, + promptOrDefault: async () => "n", + log: (message = "") => successLogs.push(message), + error: vi.fn(), + exit: ((code: number) => { + throw new Error(`exit:${code}`); + }) as never, + }); + expect(successLogs).toEqual([]); + }); +}); diff --git a/src/lib/onboard-telegram.ts b/src/lib/onboard-telegram.ts new file mode 100644 index 0000000000..91e151f852 --- /dev/null +++ b/src/lib/onboard-telegram.ts @@ -0,0 +1,82 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +export interface TelegramProbeResult { + ok: boolean; + httpStatus: number; + curlStatus: number; + body: string; + stderr: string; + message: string; +} + +// Curl exit codes that indicate a network-level failure (not a token problem). +// 35 (TLS handshake failure) covers corporate proxies that MITM HTTPS. +export const TELEGRAM_NETWORK_CURL_CODES = new Set([6, 7, 28, 35, 52, 56]); + +export interface CheckTelegramReachabilityDeps { + runCurlProbe: (args: string[]) => TelegramProbeResult; + isNonInteractive: () => boolean; + promptOrDefault: ( + question: string, + envVar: string | null, + defaultValue: string, + ) => Promise; + log?: (message?: string) => void; + error?: (message?: string) => void; + exit?: (code: number) => never; +} + +export async function checkTelegramReachability( + token: string, + deps: CheckTelegramReachabilityDeps, +): Promise { + const log = deps.log ?? console.log; + const error = deps.error ?? console.error; + const exit = deps.exit ?? ((code: number) => process.exit(code)); + + const result = deps.runCurlProbe([ + "-sS", + "--connect-timeout", + "5", + "--max-time", + "10", + `https://api.telegram.org/bot${token}/getMe`, + ]); + + if (result.ok) return; + + if (result.httpStatus === 401 || result.httpStatus === 404) { + log(" ⚠ Bot token was rejected by Telegram — verify the token is correct."); + return; + } + + if (result.curlStatus && TELEGRAM_NETWORK_CURL_CODES.has(result.curlStatus)) { + log(""); + log(" ⚠ api.telegram.org is not reachable from this host."); + log(" Telegram integration requires outbound HTTPS access to api.telegram.org."); + log(" This is commonly blocked by corporate network proxies."); + + if (deps.isNonInteractive()) { + error( + " Aborting onboarding in non-interactive mode due to Telegram network reachability failure.", + ); + exit(1); + } else { + const answer = (await deps.promptOrDefault(" Continue anyway? [y/N]: ", null, "n")) + .trim() + .toLowerCase(); + if (answer !== "y" && answer !== "yes") { + log(" Aborting onboarding."); + exit(1); + } + } + return; + } + + if (!result.ok && result.httpStatus > 0) { + log(` ⚠ Telegram API returned HTTP ${result.httpStatus} — the bot may not work correctly.`); + } else if (!result.ok) { + log(` ⚠ Telegram reachability probe failed: ${result.message}`); + } +} diff --git a/src/lib/onboard-ui-api.ts b/src/lib/onboard-ui-api.ts new file mode 100644 index 0000000000..0fd4c4c3a7 --- /dev/null +++ b/src/lib/onboard-ui-api.ts @@ -0,0 +1,133 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { + arePolicyPresetsApplied as arePolicyPresetsAppliedWithDeps, + presetsCheckboxSelector as presetsCheckboxSelectorWithDeps, + selectPolicyTier as selectPolicyTierWithDeps, + selectTierPresetsAndAccess as selectTierPresetsAndAccessWithDeps, + setupPoliciesLegacy as setupPoliciesLegacyWithDeps, + setupPoliciesWithSelection as setupPoliciesWithSelectionWithDeps, +} from "./onboard-policy-ui"; +import { + buildAuthenticatedDashboardUrl, + ensureDashboardForward as ensureDashboardForwardWithDeps, + fetchGatewayAuthTokenFromSandbox as fetchGatewayAuthTokenFromSandboxWithDeps, + getDashboardAccessInfo as getDashboardAccessInfoWithDeps, + getDashboardForwardStartCommand as getDashboardForwardStartCommandWithDeps, + getDashboardGuidanceLines, + getWslHostAddress, +} from "./onboard-dashboard"; +import { printOnboardDashboard } from "./onboard-dashboard-print"; + +export function createPolicyUiApi(input: any) { + const deps = { + step: input.step, + prompt: input.prompt, + note: input.note, + sleep: input.sleep, + isNonInteractive: input.isNonInteractive, + parsePolicyPresetEnv: input.parsePolicyPresetEnv, + waitForSandboxReady: input.waitForSandboxReady, + localInferenceProviders: input.localInferenceProviders, + useColor: input.useColor, + policies: input.policies, + tiers: input.tiers, + updateSandbox: input.updateSandbox, + }; + + return { + async setupPoliciesLegacy(sandboxName: string, options: any = {}) { + return setupPoliciesLegacyWithDeps( + sandboxName, + { + ...options, + getSuggestedPolicyPresets: input.getSuggestedPolicyPresets, + }, + deps, + ); + }, + arePolicyPresetsApplied(sandboxName: string, selectedPresets: string[] = []) { + return arePolicyPresetsAppliedWithDeps(sandboxName, selectedPresets, deps); + }, + async selectPolicyTier() { + return selectPolicyTierWithDeps(deps); + }, + async selectTierPresetsAndAccess( + tierName: string, + allPresets: Array<{ name: string; description?: string }>, + extraSelected: string[] = [], + ) { + return selectTierPresetsAndAccessWithDeps(tierName, allPresets, extraSelected, deps); + }, + async presetsCheckboxSelector( + allPresets: Array<{ name: string; description: string }>, + initialSelected: string[], + ) { + return presetsCheckboxSelectorWithDeps(allPresets, initialSelected, deps); + }, + async setupPoliciesWithSelection(sandboxName: string, options: any = {}) { + return setupPoliciesWithSelectionWithDeps(sandboxName, options, deps); + }, + }; +} + +export function createDashboardApi(input: any) { + const ensureDashboardForward = ( + sandboxName: string, + chatUiUrl = `http://127.0.0.1:${input.controlUiPort}`, + ) => + ensureDashboardForwardWithDeps(sandboxName, { + chatUiUrl, + runOpenshell: input.runOpenshell, + warningWriter: input.warningWriter, + }); + + const fetchGatewayAuthTokenFromSandbox = (sandboxName: string) => + fetchGatewayAuthTokenFromSandboxWithDeps(sandboxName, { runOpenshell: input.runOpenshell }); + + const getDashboardForwardStartCommand = (sandboxName: string, options: any = {}) => + getDashboardForwardStartCommandWithDeps(sandboxName, { + ...options, + openshellShellCommand: input.openshellShellCommand, + }); + + const getDashboardAccessInfo = (sandboxName: string, options: any = {}) => + getDashboardAccessInfoWithDeps(sandboxName, { + ...options, + fetchToken: (name: string) => fetchGatewayAuthTokenFromSandbox(name), + runCapture: options.runCapture || input.runCapture, + }); + + const printDashboard = ( + sandboxName: string, + model: string, + provider: string, + nimContainer: string | null = null, + agent: unknown = null, + ) => + printOnboardDashboard(sandboxName, model, provider, nimContainer, agent, { + getNimStatus: (targetSandboxName: string, targetNimContainer: string | null) => + targetNimContainer + ? input.nimStatusByName(targetNimContainer) + : input.nimStatus(targetSandboxName), + fetchGatewayAuthTokenFromSandbox, + getDashboardAccessInfo: (targetSandboxName: string, options: any) => + getDashboardAccessInfo(targetSandboxName, options), + getDashboardGuidanceLines, + note: input.note, + log: input.log, + printAgentDashboardUi: input.printAgentDashboardUi, + buildControlUiUrls: input.buildControlUiUrls, + getWslHostAddress, + buildAuthenticatedDashboardUrl, + }); + + return { + ensureDashboardForward, + fetchGatewayAuthTokenFromSandbox, + getDashboardForwardStartCommand, + getDashboardAccessInfo, + printDashboard, + }; +} diff --git a/src/lib/onboard-web-search-config.ts b/src/lib/onboard-web-search-config.ts new file mode 100644 index 0000000000..ae70710389 --- /dev/null +++ b/src/lib/onboard-web-search-config.ts @@ -0,0 +1,183 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import type { WebSearchConfig } from "./web-search"; + +export interface WebSearchConfigDeps { + isNonInteractive: () => boolean; + prompt: (question: string, options?: { secret?: boolean }) => Promise; + normalizeCredentialValue: (value: string | null | undefined) => string | null; + getCredential: (envKey: string) => string | null; + saveCredential: (envKey: string, value: string) => void; + runCurlProbe: (argv: string[]) => { + ok: boolean; + message?: string; + }; + classifyValidationFailure: (validation: unknown) => { kind: string }; + getTransportRecoveryMessage: (validation: unknown) => string; + exitOnboardFromPrompt: () => never; + note: (message: string) => void; + braveApiKeyEnv: string; + braveSearchHelpUrl: string; +} + +function isAffirmativeAnswer(value: string): boolean { + return ["y", "yes"].includes( + String(value || "") + .trim() + .toLowerCase(), + ); +} + +function validateBraveSearchApiKey(apiKey: string, deps: WebSearchConfigDeps) { + return deps.runCurlProbe([ + "-sS", + "--compressed", + "-H", + "Accept: application/json", + "-H", + "Accept-Encoding: gzip", + "-H", + `X-Subscription-Token: ${apiKey}`, + "--get", + "--data-urlencode", + "q=ping", + "--data-urlencode", + "count=1", + "https://api.search.brave.com/res/v1/web/search", + ]); +} + +async function promptBraveSearchRecovery( + validation: unknown, + deps: WebSearchConfigDeps, +): Promise<"retry" | "skip"> { + const recovery = deps.classifyValidationFailure(validation); + + if (recovery.kind === "credential") { + console.log(" Brave Search rejected that API key."); + } else if (recovery.kind === "transport") { + console.log(deps.getTransportRecoveryMessage(validation)); + } else { + console.log(" Brave Search validation did not succeed."); + } + + const answer = (await deps.prompt(" Type 'retry', 'skip', or 'exit' [retry]: ")) + .trim() + .toLowerCase(); + if (answer === "skip") return "skip"; + if (answer === "exit" || answer === "quit") { + deps.exitOnboardFromPrompt(); + } + return "retry"; +} + +async function promptBraveSearchApiKey(deps: WebSearchConfigDeps): Promise { + console.log(""); + console.log(` Get your Brave Search API key from: ${deps.braveSearchHelpUrl}`); + console.log(""); + + while (true) { + const key = deps.normalizeCredentialValue( + await deps.prompt(" Brave Search API key: ", { secret: true }), + ); + if (!key) { + console.error(" Brave Search API key is required."); + continue; + } + return key; + } +} + +export async function ensureValidatedBraveSearchCredential( + nonInteractive = false, + deps: WebSearchConfigDeps, +): Promise { + const savedApiKey = deps.getCredential(deps.braveApiKeyEnv); + let apiKey = savedApiKey || deps.normalizeCredentialValue(process.env[deps.braveApiKeyEnv]); + let usingSavedKey = Boolean(savedApiKey); + + while (true) { + if (!apiKey) { + if (nonInteractive) { + throw new Error( + "Brave Search requires BRAVE_API_KEY or a saved Brave Search credential in non-interactive mode.", + ); + } + apiKey = await promptBraveSearchApiKey(deps); + usingSavedKey = false; + } + + const validation = validateBraveSearchApiKey(apiKey, deps); + if (validation.ok) { + deps.saveCredential(deps.braveApiKeyEnv, apiKey); + process.env[deps.braveApiKeyEnv] = apiKey; + return apiKey; + } + + const prefix = usingSavedKey + ? " Saved Brave Search API key validation failed." + : " Brave Search API key validation failed."; + console.error(prefix); + if (validation.message) { + console.error(` ${validation.message}`); + } + + if (nonInteractive) { + throw new Error( + validation.message || "Brave Search API key validation failed in non-interactive mode.", + ); + } + + const action = await promptBraveSearchRecovery(validation, deps); + if (action === "skip") { + console.log(" Skipping Brave Web Search setup."); + console.log(""); + return null; + } + + apiKey = null; + usingSavedKey = false; + } +} + +export async function configureWebSearch( + existingConfig: WebSearchConfig | null = null, + deps: WebSearchConfigDeps, +): Promise { + if (existingConfig) { + return { fetchEnabled: true }; + } + + if (deps.isNonInteractive()) { + const braveApiKey = deps.normalizeCredentialValue(process.env[deps.braveApiKeyEnv]); + if (!braveApiKey) { + return null; + } + deps.note(" [non-interactive] Brave Web Search requested."); + const validation = validateBraveSearchApiKey(braveApiKey, deps); + if (!validation.ok) { + console.error(" Brave Search API key validation failed."); + if (validation.message) { + console.error(` ${validation.message}`); + } + process.exit(1); + } + deps.saveCredential(deps.braveApiKeyEnv, braveApiKey); + process.env[deps.braveApiKeyEnv] = braveApiKey; + return { fetchEnabled: true }; + } + const enableAnswer = await deps.prompt(" Enable Brave Web Search? [y/N]: "); + if (!isAffirmativeAnswer(enableAnswer)) { + return null; + } + + const braveApiKey = await ensureValidatedBraveSearchCredential(deps.isNonInteractive(), deps); + if (!braveApiKey) { + return null; + } + + console.log(" ✓ Enabled Brave Web Search"); + console.log(""); + return { fetchEnabled: true }; +} diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index ccb6f3392a..d1617fc859 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -12,6 +12,7 @@ const os = require("os"); const path = require("path"); const { spawn, spawnSync } = require("child_process"); const pRetry = require("p-retry"); +const { ANSI_RE } = require("./ansi-utils"); /** Parse a numeric env var, returning `fallback` when unset or non-finite. */ function envInt(name, fallback) { @@ -23,9 +24,6 @@ function envInt(name, fallback) { /** Inference timeout (seconds) for local providers (Ollama, vLLM, NIM). */ const LOCAL_INFERENCE_TIMEOUT_SECS = envInt("NEMOCLAW_LOCAL_INFERENCE_TIMEOUT", 180); -/** Strip ANSI escape sequences before printing process output to the terminal. - * Covers CSI (color, erase, cursor), OSC, and C1 two-byte escapes per ECMA-48. */ -const ANSI_RE = /\x1B(?:\[[0-?]*[ -/]*[@-~]|\][^\x07]*(?:\x07|\x1B\\)|[@-_])/g; const { ROOT, SCRIPTS, redact, run, runCapture, runFile, shellQuote, validateName } = require("./runner"); const { stageOptimizedSandboxBuildContext } = require("./sandbox-build-context"); const { buildSubprocessEnv } = require("./subprocess-env"); @@ -46,10 +44,14 @@ const { parseGatewayInference, } = require("./inference-config"); -// Providers that run on the host and need the local-inference policy preset. -// Shared constant so getSuggestedPolicyPresets() and setupPoliciesWithSelection() -// stay in sync. -const LOCAL_INFERENCE_PROVIDERS = ["ollama-local", "vllm-local"]; +const { + ANTHROPIC_ENDPOINT_URL, + REMOTE_PROVIDER_CONFIG, +} = require("./onboard-remote-provider-config"); +const { + computeSetupPresetSuggestions: computeSetupPresetSuggestionsWithDeps, + LOCAL_INFERENCE_PROVIDERS, +} = require("./onboard-policy-suggestions"); const { inferContainerRuntime, isWsl, shouldPatchCoredns } = require("./platform"); const { resolveOpenshell } = require("./resolve-openshell"); const { @@ -62,6 +64,123 @@ const { const registry = require("./registry"); const nim = require("./nim"); const onboardSession = require("./onboard-session"); +const { ONBOARD_STEP_META, isOnboardStepName, toVisibleStepName } = require("./onboard-fsm"); +const { initializeOnboardRun } = require("./onboard-bootstrap"); +const { verifyGatewayContainerRunning: verifyGatewayContainerRunningWithDeps } = require("./onboard-gateway-liveness"); +const { streamGatewayStart: streamGatewayStartWithDeps } = require("./onboard-gateway-start-stream"); +const { + destroyGateway: destroyGatewayWithDeps, + getContainerRuntime: getContainerRuntimeWithDeps, + getSandboxReuseState: getSandboxReuseStateWithDeps, + installOpenshell: installOpenshellWithDepsRuntime, + isInferenceRouteReady: isInferenceRouteReadyWithDeps, + isOpenshellInstalled: isOpenshellInstalledWithDepsRuntime, + printRemediationActions: printRemediationActionsWithDeps, + pruneKnownHostsEntries: pruneKnownHostsEntriesWithDeps, + repairRecordedSandbox: repairRecordedSandboxWithDeps, + sleep: sleepWithDeps, + verifyInferenceRoute: verifyInferenceRouteWithDeps, + waitForSandboxReady: waitForSandboxReadyWithDepsRuntime, +} = require("./onboard-runtime-helpers"); +const { + getBlueprintMaxOpenshellVersion: getBlueprintMaxOpenshellVersionWithDeps, + getBlueprintMinOpenshellVersion: getBlueprintMinOpenshellVersionWithDeps, + getInstalledOpenshellVersion: getInstalledOpenshellVersionWithDeps, + getStableGatewayImageRef: getStableGatewayImageRefWithDeps, + versionGte: versionGteWithDeps, +} = require("./onboard-openshell-version"); +const { createHostGatewayApi, createInferenceRuntimeApi } = require("./onboard-step-api"); +const { + getProbeAuthMode: getProbeAuthModeWithDeps, + getValidationProbeCurlArgs: getValidationProbeCurlArgsWithDeps, + hasResponsesToolCall: hasResponsesToolCallWithDeps, + promptValidationRecovery: promptValidationRecoveryWithDeps, + shouldRequireResponsesToolCalling: shouldRequireResponsesToolCallingWithDeps, + validateAnthropicSelectionWithRetryMessage: validateAnthropicSelectionWithRetryMessageWithDeps, + validateCustomAnthropicSelection: validateCustomAnthropicSelectionWithDeps, + validateCustomOpenAiLikeSelection: validateCustomOpenAiLikeSelectionWithDeps, + validateOpenAiLikeSelection: validateOpenAiLikeSelectionWithDeps, +} = require("./onboard-inference-validation"); +const { + buildProviderArgs: buildProviderArgsWithDeps, + detectMessagingCredentialRotation: detectMessagingCredentialRotationWithDeps, + hashCredential: hashCredentialWithDeps, + makeConflictProbe: makeConflictProbeWithDeps, + providerExistsInGateway: providerExistsInGatewayWithDeps, + upsertMessagingProviders: upsertMessagingProvidersWithDeps, + upsertProvider: upsertProviderWithDeps, +} = require("./onboard-provider-management"); +const { + SANDBOX_BASE_IMAGE, + SANDBOX_BASE_TAG, + getSandboxInferenceConfig: getSandboxInferenceConfigWithDeps, + patchStagedDockerfile: patchStagedDockerfileWithDeps, + pullAndResolveBaseImageDigest: pullAndResolveBaseImageDigestWithDeps, +} = require("./onboard-sandbox-build-config"); +const { + ensureOllamaAuthProxy: ensureOllamaAuthProxyWithDeps, + getOllamaProxyToken: getOllamaProxyTokenWithDeps, + persistProxyToken: persistProxyTokenWithDeps, + startOllamaAuthProxy: startOllamaAuthProxyWithDeps, +} = require("./onboard-ollama-proxy"); +const { + buildSandboxConfigSyncScript: buildSandboxConfigSyncScriptWithDeps, + isOpenclawReady: isOpenclawReadyWithDeps, + setupOpenclaw: setupOpenclawWithDeps, + writeSandboxConfigSyncFile: writeSandboxConfigSyncFileWithDeps, +} = require("./onboard-openclaw-setup"); +const { + prepareOllamaModel: prepareOllamaModelWithDeps, + printOllamaExposureWarning: printOllamaExposureWarningWithDeps, + promptOllamaModel: promptOllamaModelWithDeps, +} = require("./onboard-ollama-models"); +const { runCreateSandbox } = require("./onboard-sandbox-create"); +const { + configureWebSearch: configureWebSearchWithDeps, + ensureValidatedBraveSearchCredential: ensureValidatedBraveSearchCredentialWithDeps, +} = require("./onboard-web-search-config"); +const { MESSAGING_CHANNELS } = require("./onboard-messaging"); +const { promptValidatedSandboxName: promptValidatedSandboxNameWithDeps } = require("./onboard-sandbox-name"); +const { + buildAuthenticatedDashboardUrl, + fetchGatewayAuthTokenFromSandbox: fetchGatewayAuthTokenFromSandboxWithDeps, + getDashboardForwardPort, + getDashboardForwardTarget, + getDashboardGuidanceLines, +} = require("./onboard-dashboard"); +const { createDashboardApi, createPolicyUiApi } = require("./onboard-ui-api"); +const { runOnboardingEntry } = require("./onboard-entry"); +const { createOnboardingOrchestratorDeps } = require("./onboard-orchestrator-deps"); +const { runOnboardingOrchestrator } = require("./onboard-orchestrator"); +const { createOnboardRunContext } = require("./onboard-run-context"); +const { + buildOnboardLockCommand, + getDangerouslySkipPermissionsWarningLines, + getOnboardBannerLines, + getOnboardLockConflictLines, + resolveOnboardShellState, +} = require("./onboard-shell"); +const { + getEffectiveProviderName: resolveEffectiveProviderName, + getNonInteractiveModel: resolveNonInteractiveModel, + getNonInteractiveProvider: resolveNonInteractiveProvider, + getRequestedModelHint: resolveRequestedModelHint, + getRequestedProviderHint: resolveRequestedProviderHint, + getRequestedSandboxNameHint: resolveRequestedSandboxNameHint, + getResumeConfigConflicts: collectRequestedResumeConfigConflicts, + getResumeSandboxConflict: detectRequestedResumeSandboxConflict, +} = require("./onboard-requests"); +const { + installOpenshell: installOpenshellWithDeps, + isOpenshellInstalled: detectInstalledOpenshell, + waitForSandboxReady: waitForSandboxReadyWithDeps, +} = require("./onboard-openshell"); +const { + getContainerRuntime: resolveContainerRuntime, + getFutureShellPathHint: resolveFutureShellPathHint, + getPortConflictServiceHints: resolvePortConflictServiceHints, + printRemediationActions: renderRemediationActions, +} = require("./onboard-remediation"); const policies = require("./policies"); const shields = require("./shields"); const tiers = require("./tiers"); @@ -119,114 +238,13 @@ let OPENSHELL_BIN = null; const GATEWAY_NAME = "nemoclaw"; const BACK_TO_SELECTION = "__NEMOCLAW_BACK_TO_SELECTION__"; -/** - * Probe whether the gateway Docker container is actually running. - * openshell CLI metadata can be stale after a manual `docker rm`, so this - * verifies the container is live before trusting a "healthy" reuse state. - * - * Returns "running" | "missing" | "unknown". - * - "running" — container exists and State.Running is true - * - "missing" — container was removed or exists but is stopped (not reusable) - * - "unknown" — any other failure (daemon down, timeout, etc.) - * - * Callers should only trigger stale-metadata cleanup on "missing", not on - * "unknown", to avoid destroying a healthy gateway when Docker is temporarily - * unavailable. See #2020. - */ function verifyGatewayContainerRunning() { - const containerName = `openshell-cluster-${GATEWAY_NAME}`; - const result = run( - `docker inspect --type container --format '{{.State.Running}}' ${containerName}`, - { ignoreError: true, suppressOutput: true }, - ); - if (result.status === 0 && String(result.stdout || "").trim() === "true") { - return "running"; - } - // Container exists but is stopped (exit 0, Running !== "true") - if (result.status === 0) { - return "missing"; - } - const stderr = (result.stderr || "").toString(); - if (stderr.includes("No such object") || stderr.includes("No such container")) { - return "missing"; - } - return "unknown"; + return verifyGatewayContainerRunningWithDeps(GATEWAY_NAME, { run }); } const OPENCLAW_LAUNCH_AGENT_PLIST = "~/Library/LaunchAgents/ai.openclaw.gateway.plist"; -const BUILD_ENDPOINT_URL = "https://integrate.api.nvidia.com/v1"; -const OPENAI_ENDPOINT_URL = "https://api.openai.com/v1"; -const ANTHROPIC_ENDPOINT_URL = "https://api.anthropic.com"; -const GEMINI_ENDPOINT_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"; const BRAVE_SEARCH_HELP_URL = "https://brave.com/search/api/"; -const REMOTE_PROVIDER_CONFIG = { - build: { - label: "NVIDIA Endpoints", - providerName: "nvidia-prod", - providerType: "nvidia", - credentialEnv: "NVIDIA_API_KEY", - endpointUrl: BUILD_ENDPOINT_URL, - helpUrl: "https://build.nvidia.com/settings/api-keys", - modelMode: "catalog", - defaultModel: DEFAULT_CLOUD_MODEL, - skipVerify: true, - }, - openai: { - label: "OpenAI", - providerName: "openai-api", - providerType: "openai", - credentialEnv: "OPENAI_API_KEY", - endpointUrl: OPENAI_ENDPOINT_URL, - helpUrl: "https://platform.openai.com/api-keys", - modelMode: "curated", - defaultModel: "gpt-5.4", - skipVerify: true, - }, - anthropic: { - label: "Anthropic", - providerName: "anthropic-prod", - providerType: "anthropic", - credentialEnv: "ANTHROPIC_API_KEY", - endpointUrl: ANTHROPIC_ENDPOINT_URL, - helpUrl: "https://console.anthropic.com/settings/keys", - modelMode: "curated", - defaultModel: "claude-sonnet-4-6", - }, - anthropicCompatible: { - label: "Other Anthropic-compatible endpoint", - providerName: "compatible-anthropic-endpoint", - providerType: "anthropic", - credentialEnv: "COMPATIBLE_ANTHROPIC_API_KEY", - endpointUrl: "", - helpUrl: null, - modelMode: "input", - defaultModel: "", - }, - gemini: { - label: "Google Gemini", - providerName: "gemini-api", - providerType: "openai", - credentialEnv: "GEMINI_API_KEY", - endpointUrl: GEMINI_ENDPOINT_URL, - helpUrl: "https://aistudio.google.com/app/apikey", - modelMode: "curated", - defaultModel: "gemini-2.5-flash", - skipVerify: true, - }, - custom: { - label: "Other OpenAI-compatible endpoint", - providerName: "compatible-endpoint", - providerType: "openai", - credentialEnv: "COMPATIBLE_API_KEY", - endpointUrl: "", - helpUrl: null, - modelMode: "input", - defaultModel: "", - skipVerify: true, - }, -}; - const DISCORD_SNOWFLAKE_RE = /^[0-9]{17,19}$/; // Non-interactive mode: set by --non-interactive flag or env var. @@ -275,180 +293,36 @@ const { * Preserves blank lines and comments. Returns the cleaned string. */ function pruneKnownHostsEntries(contents) { - return contents - .split("\n") - .filter((l) => { - const trimmed = l.trim(); - if (!trimmed || trimmed.startsWith("#")) return true; - const hostField = trimmed.split(/\s+/)[0]; - return !hostField.split(",").some((h) => h.startsWith("openshell-")); - }) - .join("\n"); + return pruneKnownHostsEntriesWithDeps(contents); } function getSandboxReuseState(sandboxName) { - if (!sandboxName) return "missing"; - const getOutput = runCaptureOpenshell(["sandbox", "get", sandboxName], { ignoreError: true }); - const listOutput = runCaptureOpenshell(["sandbox", "list"], { ignoreError: true }); - return getSandboxStateFromOutputs(sandboxName, getOutput, listOutput); + return getSandboxReuseStateWithDeps(sandboxName, { runCaptureOpenshell }); } function repairRecordedSandbox(sandboxName) { - if (!sandboxName) return; - note(` [resume] Cleaning up recorded sandbox '${sandboxName}' before recreating it.`); - runOpenshell(["forward", "stop", String(DASHBOARD_PORT)], { ignoreError: true }); - runOpenshell(["sandbox", "delete", sandboxName], { ignoreError: true }); - registry.removeSandbox(sandboxName); + return repairRecordedSandboxWithDeps(sandboxName, { + note, + dashboardPort: DASHBOARD_PORT, + runOpenshell, + removeSandbox: (name) => { + registry.removeSandbox(name); + }, + }); } const { streamSandboxCreate } = sandboxCreateStream; /** Spawn `openshell gateway start` and stream its output with progress heartbeats. */ function streamGatewayStart(command, env = process.env) { - const child = spawn("bash", ["-lc", command], { - cwd: ROOT, - env, - stdio: ["ignore", "pipe", "pipe"], - }); - - const lines = []; - let pending = ""; - let settled = false; - let resolvePromise; - let lastPrintedLine = ""; - let currentPhase = "cluster"; - let lastHeartbeatBucket = -1; - let lastOutputAt = Date.now(); - const startedAt = Date.now(); - - function getDisplayWidth() { - return Math.max(60, Number(process.stdout.columns || 100)); - } - - function trimDisplayLine(line) { - const width = getDisplayWidth(); - const maxLen = Math.max(40, width - 4); - if (line.length <= maxLen) return line; - return `${line.slice(0, Math.max(0, maxLen - 3))}...`; - } - - function printProgressLine(line) { - const display = trimDisplayLine(line); - if (display !== lastPrintedLine) { - console.log(display); - lastPrintedLine = display; - } - } - - function elapsedSeconds() { - return Math.max(0, Math.floor((Date.now() - startedAt) / 1000)); - } - - function setPhase(nextPhase) { - if (!nextPhase || nextPhase === currentPhase) return; - currentPhase = nextPhase; - const phaseLine = - nextPhase === "install" - ? " Installing OpenShell components..." - : nextPhase === "pod" - ? " Starting OpenShell gateway pod..." - : nextPhase === "health" - ? " Waiting for gateway health..." - : " Starting gateway cluster..."; - printProgressLine(phaseLine); - } - - function classifyLine(line) { - if (/ApplyJob|helm-install-openshell|Applying HelmChart/i.test(line)) return "install"; - if ( - /openshell-0|Observed pod startup duration|MountVolume\.MountDevice succeeded/i.test(line) - ) { - return "pod"; - } - if (/Gateway .* ready\.?$/i.test(line)) return "health"; - return null; - } - - function flushLine(rawLine) { - const line = rawLine.replace(/\r/g, "").trimEnd(); - if (!line) return; - lines.push(line); - lastOutputAt = Date.now(); - const nextPhase = classifyLine(line); - if (nextPhase) setPhase(nextPhase); - } - - function onChunk(chunk) { - pending += chunk.toString(); - const parts = pending.split("\n"); - pending = parts.pop(); - parts.forEach(flushLine); - } - - function finish(result) { - if (settled) return; - settled = true; - if (pending) flushLine(pending); - clearInterval(heartbeatTimer); - resolvePromise(result); - } - - child.stdout.on("data", onChunk); - child.stderr.on("data", onChunk); - - printProgressLine(" Starting gateway cluster..."); - const heartbeatTimer = setInterval(() => { - if (settled) return; - const elapsed = elapsedSeconds(); - const bucket = Math.floor(elapsed / 10); - if (bucket === lastHeartbeatBucket) return; - if (Date.now() - lastOutputAt < 3000 && elapsed < 10) return; - const heartbeatLine = - currentPhase === "install" - ? ` Still installing OpenShell components... (${elapsed}s elapsed)` - : currentPhase === "pod" - ? ` Still starting OpenShell gateway pod... (${elapsed}s elapsed)` - : currentPhase === "health" - ? ` Still waiting for gateway health... (${elapsed}s elapsed)` - : ` Still starting gateway cluster... (${elapsed}s elapsed)`; - printProgressLine(heartbeatLine); - lastHeartbeatBucket = bucket; - }, 5000); - heartbeatTimer.unref?.(); - - // Hard timeout to prevent indefinite hangs if the openshell process - // never exits (e.g. Docker daemon unresponsive, k3s restart loop). (#1830) - // On timeout, send SIGTERM and let the `close` event resolve the promise - // so the child has actually exited before the caller proceeds to retry. - const GATEWAY_START_TIMEOUT = envInt("NEMOCLAW_GATEWAY_START_TIMEOUT", 600) * 1000; - let killedByTimeout = false; - const killTimer = setTimeout(() => { - killedByTimeout = true; - lines.push("[NemoClaw] Gateway start timed out — killing process."); - child.kill("SIGTERM"); - // If SIGTERM is ignored, force-kill after 10s. - setTimeout(() => { - if (!settled) child.kill("SIGKILL"); - }, 10_000).unref?.(); - }, GATEWAY_START_TIMEOUT); - killTimer.unref?.(); - - return new Promise((resolve) => { - resolvePromise = resolve; - child.on("error", (error) => { - clearTimeout(killTimer); - const detail = error?.message || String(error); - lines.push(detail); - finish({ status: 1, output: lines.join("\n") }); - }); - child.on("close", (code) => { - clearTimeout(killTimer); - const exitCode = killedByTimeout ? 1 : (code ?? 1); - finish({ status: exitCode, output: lines.join("\n") }); - }); + return streamGatewayStartWithDeps(command, env, { + spawn, + root: ROOT, + envInt, }); } + function step(n, total, msg) { console.log(""); console.log(` [${n}/${total}] ${msg}`); @@ -456,65 +330,19 @@ function step(n, total, msg) { } function getInstalledOpenshellVersion(versionOutput = null) { - const output = String(versionOutput ?? runCapture("openshell -V", { ignoreError: true })).trim(); - const match = output.match(/openshell\s+([0-9]+\.[0-9]+\.[0-9]+)/i); - if (!match) return null; - return match[1]; + return getInstalledOpenshellVersionWithDeps(versionOutput, { runCapture }); } -/** - * Compare two semver-like x.y.z strings. Returns true iff `left >= right`. - * Non-numeric or missing components are treated as 0. - */ function versionGte(left = "0.0.0", right = "0.0.0") { - const lhs = String(left) - .split(".") - .map((part) => Number.parseInt(part, 10) || 0); - const rhs = String(right) - .split(".") - .map((part) => Number.parseInt(part, 10) || 0); - const length = Math.max(lhs.length, rhs.length); - for (let index = 0; index < length; index += 1) { - const a = lhs[index] || 0; - const b = rhs[index] || 0; - if (a > b) return true; - if (a < b) return false; - } - return true; -} - -/** - * Read a semver field from nemoclaw-blueprint/blueprint.yaml. Returns null if - * the blueprint or field is missing or unparseable — callers must treat null - * as "no constraint configured" so a malformed install does not become a hard - * onboard blocker. See #1317. - */ -function getBlueprintVersionField(field, rootDir = ROOT) { - try { - // Lazy require: yaml is already a dependency via the policy helpers but - // pulling it at module load would slow down `nemoclaw --help` for users - // who never reach the preflight path. - const YAML = require("yaml"); - const blueprintPath = path.join(rootDir, "nemoclaw-blueprint", "blueprint.yaml"); - if (!fs.existsSync(blueprintPath)) return null; - const raw = fs.readFileSync(blueprintPath, "utf8"); - const parsed = YAML.parse(raw); - const value = parsed && parsed[field]; - if (typeof value !== "string") return null; - const trimmed = value.trim(); - if (!/^[0-9]+\.[0-9]+\.[0-9]+/.test(trimmed)) return null; - return trimmed; - } catch { - return null; - } + return versionGteWithDeps(left, right); } function getBlueprintMinOpenshellVersion(rootDir = ROOT) { - return getBlueprintVersionField("min_openshell_version", rootDir); + return getBlueprintMinOpenshellVersionWithDeps(rootDir); } function getBlueprintMaxOpenshellVersion(rootDir = ROOT) { - return getBlueprintVersionField("max_openshell_version", rootDir); + return getBlueprintMaxOpenshellVersionWithDeps(rootDir); } // ── Base image digest resolution ──────────────────────────────── @@ -523,57 +351,18 @@ function getBlueprintMaxOpenshellVersion(rootDir = ROOT) { // e2e tests in #1937 — the digest always comes from the same registry // we're pinning to. See #1904. -const SANDBOX_BASE_IMAGE = "ghcr.io/nvidia/nemoclaw/sandbox-base"; -const SANDBOX_BASE_TAG = "latest"; - -/** - * Pull sandbox-base:latest from GHCR and resolve its repo digest. - * Returns { digest, ref } on success, or null when the pull or - * inspect fails (offline, GHCR outage, local-only build). - */ function pullAndResolveBaseImageDigest() { - const imageWithTag = `${SANDBOX_BASE_IMAGE}:${SANDBOX_BASE_TAG}`; - try { - run(["docker", "pull", imageWithTag], { suppressOutput: true }); - } catch { - // Pull failed — caller should fall back to unpin :latest - return null; - } - - let inspectOutput; - try { - inspectOutput = runCapture( - ["docker", "inspect", "--format", "{{json .RepoDigests}}", imageWithTag], - { ignoreError: false }, - ); - } catch { - return null; - } - - // RepoDigests is a JSON array like ["ghcr.io/nvidia/nemoclaw/sandbox-base@sha256:abc..."]. - // Filter to the entry matching our registry — index ordering is not guaranteed. - let repoDigests; - try { - repoDigests = JSON.parse(inspectOutput || "[]"); - } catch { - return null; - } - const repoDigest = Array.isArray(repoDigests) - ? repoDigests.find((entry) => entry.startsWith(`${SANDBOX_BASE_IMAGE}@sha256:`)) - : null; - if (!repoDigest) return null; - - const digest = repoDigest.slice(repoDigest.indexOf("@") + 1); - const ref = `${SANDBOX_BASE_IMAGE}@${digest}`; - return { digest, ref }; + return pullAndResolveBaseImageDigestWithDeps({ + run, + runCapture, + }); } function getStableGatewayImageRef(versionOutput = null) { - const version = getInstalledOpenshellVersion(versionOutput); - if (!version) return null; - return `ghcr.io/nvidia/openshell/cluster:${version}`; + return getStableGatewayImageRefWithDeps(versionOutput, { runCapture }); } + function getOpenshellBinary() { if (OPENSHELL_BIN) return OPENSHELL_BIN; const resolved = resolveOpenshell(); @@ -656,281 +445,81 @@ const { // validateNvidiaApiKeyValue — see validation import above -async function replaceNamedCredential(envName, label, helpUrl = null, validator = null) { - if (helpUrl) { - console.log(""); - console.log(` Get your ${label} from: ${helpUrl}`); - console.log(""); - } - - while (true) { - const key = normalizeCredentialValue(await prompt(` ${label}: `, { secret: true })); - if (!key) { - console.error(` ${label} is required.`); - continue; - } - const validationError = typeof validator === "function" ? validator(key) : null; - if (validationError) { - console.error(validationError); - continue; - } - saveCredential(envName, key); - process.env[envName] = key; - console.log(""); - console.log(` Key saved to ~/.nemoclaw/credentials.json (mode 600)`); - console.log(""); - return key; - } +function getInferenceValidationDeps() { + return { + isNonInteractive, + prompt, + normalizeCredentialValue, + saveCredential, + validateNvidiaApiKeyValue, + getTransportRecoveryMessage, + exitOnboardFromPrompt, + runCurlProbe, + runStreamingEventProbe, + getCurlTimingArgs, + isWsl, + getCredential, + getProbeRecovery, + isNvcfFunctionNotFoundForAccount, + nvcfFunctionNotFoundMessage, + shouldForceCompletionsApi, + }; } async function promptValidationRecovery(label, recovery, credentialEnv = null, helpUrl = null) { - if (isNonInteractive()) { - process.exit(1); - } - - if (recovery.kind === "credential" && credentialEnv) { - console.log( - ` ${label} authorization failed. Re-enter the API key or choose a different provider/model.`, - ); - console.log(" ⚠️ Do NOT paste your API key here — use the options below:"); - const choice = ( - await prompt(" Options: retry (re-enter key), back (change provider), exit [retry]: ", { - secret: true, - }) - ) - .trim() - .toLowerCase(); - // Guard against the user accidentally pasting an API key at this prompt. - // Tokens don't contain spaces; human sentences do — the no-space + length check - // avoids false-positives on long typed sentences. - const API_KEY_PREFIXES = ["nvapi-", "ghp_", "gcm-", "sk-", "gpt-", "gemini-", "nvcf-"]; - const looksLikeToken = - API_KEY_PREFIXES.some((p) => choice.startsWith(p)) || - (!choice.includes(" ") && choice.length > 40) || - // Regex fallback: base64-safe token pattern (20+ chars, no spaces, mixed alphanum) - /^[A-Za-z0-9_\-\.]{20,}$/.test(choice); - const validator = credentialEnv === "NVIDIA_API_KEY" ? validateNvidiaApiKeyValue : null; - if (looksLikeToken) { - console.log(" ⚠️ That looks like an API key — do not paste credentials here."); - console.log(" Treating as 'retry'. You will be prompted to enter the key securely."); - await replaceNamedCredential(credentialEnv, `${label} API key`, helpUrl, validator); - return "credential"; - } - if (choice === "back") { - console.log(" Returning to provider selection."); - console.log(""); - return "selection"; - } - if (choice === "exit" || choice === "quit") { - exitOnboardFromPrompt(); - } - if (choice === "" || choice === "retry") { - await replaceNamedCredential(credentialEnv, `${label} API key`, helpUrl, validator); - return "credential"; - } - console.log(" Please choose a provider/model again."); - console.log(""); - return "selection"; - } - - if (recovery.kind === "transport") { - console.log(getTransportRecoveryMessage(recovery.failure || {})); - const choice = (await prompt(" Type 'retry', 'back', or 'exit' [retry]: ")) - .trim() - .toLowerCase(); - if (choice === "back") { - console.log(" Returning to provider selection."); - console.log(""); - return "selection"; - } - if (choice === "exit" || choice === "quit") { - exitOnboardFromPrompt(); - } - if (choice === "" || choice === "retry") { - console.log(""); - return "retry"; - } - console.log(" Please choose a provider/model again."); - console.log(""); - return "selection"; - } - - if (recovery.kind === "model") { - console.log(` Please enter a different ${label} model name.`); - console.log(""); - return "model"; - } + return promptValidationRecoveryWithDeps( + label, + recovery, + credentialEnv, + helpUrl, + getInferenceValidationDeps(), + ); +} - console.log(" Please choose a provider/model again."); - console.log(""); - return "selection"; +function getProviderManagementDeps() { + return { + runOpenshell, + compactText, + redact, + registry, + runCaptureOpenshell, + }; } -/** - * Build the argument array for an `openshell provider create` or `update` command. - * @param {"create"|"update"} action - Whether to create or update. - * @param {string} name - Provider name. - * @param {string} type - Provider type (e.g. "openai", "anthropic", "generic"). - * @param {string} credentialEnv - Credential environment variable name. - * @param {string|null} baseUrl - Optional base URL for API-compatible endpoints. - * @returns {string[]} Argument array for runOpenshell(). - */ function buildProviderArgs(action, name, type, credentialEnv, baseUrl) { - const args = - action === "create" - ? ["provider", "create", "--name", name, "--type", type, "--credential", credentialEnv] - : ["provider", "update", name, "--credential", credentialEnv]; - if (baseUrl && type === "openai") { - args.push("--config", `OPENAI_BASE_URL=${baseUrl}`); - } else if (baseUrl && type === "anthropic") { - args.push("--config", `ANTHROPIC_BASE_URL=${baseUrl}`); - } - return args; + return buildProviderArgsWithDeps(action, name, type, credentialEnv, baseUrl); } -/** - * Create or update an OpenShell provider in the gateway. - * - * Checks whether the provider already exists via `openshell provider get`; - * uses `create` for new providers and `update` for existing ones. - * @param {string} name - Provider name (e.g. "discord-bridge", "inference"). - * @param {string} type - Provider type ("openai", "anthropic", "generic"). - * @param {string} credentialEnv - Environment variable name for the credential. - * @param {string|null} baseUrl - Optional base URL for the provider endpoint. - * @param {Record} [env={}] - Environment variables for the openshell command. - * @returns {{ ok: boolean, status?: number, message?: string }} - */ function upsertProvider(name, type, credentialEnv, baseUrl, env = {}) { - const exists = providerExistsInGateway(name); - const action = exists ? "update" : "create"; - const args = buildProviderArgs(action, name, type, credentialEnv, baseUrl); - const runOpts = { ignoreError: true, env, stdio: ["ignore", "pipe", "pipe"] }; - const result = runOpenshell(args, runOpts); - if (result.status !== 0) { - const output = - compactText(redact(`${result.stderr || ""}`)) || - compactText(redact(`${result.stdout || ""}`)) || - `Failed to ${action} provider '${name}'.`; - return { ok: false, status: result.status || 1, message: output }; - } - return { ok: true }; + return upsertProviderWithDeps(name, type, credentialEnv, baseUrl, env, getProviderManagementDeps()); } -/** - * Upsert all messaging providers that have tokens configured. - * Returns the list of provider names that were successfully created/updated. - * Exits the process if any upsert fails. - * @param {Array<{name: string, envKey: string, token: string|null}>} tokenDefs - * @returns {string[]} Provider names that were upserted. - */ function upsertMessagingProviders(tokenDefs) { - const providers = []; - for (const { name, envKey, token } of tokenDefs) { - if (!token) continue; - const result = upsertProvider(name, "generic", envKey, null, { [envKey]: token }); - if (!result.ok) { - console.error(`\n ✗ Failed to create messaging provider '${name}': ${result.message}`); - process.exit(1); - } - providers.push(name); - } - return providers; + return upsertMessagingProvidersWithDeps(tokenDefs, getProviderManagementDeps()); } -/** - * Check whether an OpenShell provider exists in the gateway. - * - * Queries the gateway-level provider registry via `openshell provider get`. - * Does NOT verify that the provider is attached to a specific sandbox — - * OpenShell CLI does not currently expose a sandbox-scoped provider query. - * @param {string} name - Provider name to look up (e.g. "discord-bridge"). - * @returns {boolean} True if the provider exists in the gateway. - */ function providerExistsInGateway(name) { - const result = runOpenshell(["provider", "get", name], { - ignoreError: true, - stdio: ["ignore", "ignore", "ignore"], - }); - return result.status === 0; + return providerExistsInGatewayWithDeps(name, getProviderManagementDeps()); } -/** - * Compute a SHA-256 hash of a credential value for change detection. - * Stored in the sandbox registry so we can detect rotation on reuse - * without needing to read the credential back from OpenShell. - * @param {string} value - Credential value to hash. - * @returns {string|null} Hex-encoded SHA-256 hash, or null if value is falsy. - */ function hashCredential(value) { - if (!value) return null; - return crypto.createHash("sha256").update(String(value).trim()).digest("hex"); + return hashCredentialWithDeps(value); } -/** - * Detect whether any messaging provider credential has been rotated since - * the sandbox was created, by comparing SHA-256 hashes of the current - * token values against hashes stored in the sandbox registry. - * - * Returns `changed: false` for legacy sandboxes that have no stored hashes - * (conservative — avoids unnecessary rebuilds after upgrade). - * - * @param {string} sandboxName - Name of the sandbox to check. - * @param {Array<{name: string, envKey: string, token: string|null}>} tokenDefs - * @returns {{ changed: boolean, changedProviders: string[] }} - */ function detectMessagingCredentialRotation(sandboxName, tokenDefs) { - const sb = registry.getSandbox(sandboxName); - const storedHashes = sb?.providerCredentialHashes || {}; - const changedProviders = []; - for (const { name, envKey, token } of tokenDefs) { - if (!token) continue; - const storedHash = storedHashes[envKey]; - if (!storedHash) continue; - if (storedHash !== hashCredential(token)) { - changedProviders.push(name); - } - } - return { changed: changedProviders.length > 0, changedProviders }; + return detectMessagingCredentialRotationWithDeps(sandboxName, tokenDefs, getProviderManagementDeps()); } -// Tri-state probe factory for messaging-conflict backfill. An upfront liveness -// check is necessary because `openshell provider get` exits non-zero for both -// "provider not attached" and "gateway unreachable"; without the liveness -// gate, a transient gateway failure would be recorded as "no providers" and -// permanently suppress future backfill retries. function makeConflictProbe() { - let gatewayAlive = null; - const isGatewayAlive = () => { - if (gatewayAlive === null) { - const result = runCaptureOpenshell(["sandbox", "list"], { ignoreError: true }); - // runCaptureOpenshell returns stdout/stderr as a single string; treat - // any non-empty output as a sign openshell answered. Empty output with - // ignoreError typically means the binary failed to produce anything. - gatewayAlive = typeof result === "string" && result.length > 0; - } - return gatewayAlive; - }; - return { - providerExists: (name) => { - if (!isGatewayAlive()) return "error"; - return providerExistsInGateway(name) ? "present" : "absent"; - }, - }; + return makeConflictProbeWithDeps(getProviderManagementDeps()); } function verifyInferenceRoute(_provider, _model) { - const output = runCaptureOpenshell(["inference", "get"], { ignoreError: true }); - if (!output || /Gateway inference:\s*[\r\n]+\s*Not configured/i.test(output)) { - console.error(" OpenShell inference route was not configured."); - process.exit(1); - } + return verifyInferenceRouteWithDeps(_provider, _model, runCaptureOpenshell); } function isInferenceRouteReady(provider, model) { - const live = parseGatewayInference( - runCaptureOpenshell(["inference", "get"], { ignoreError: true }), - ); - return Boolean(live && live.provider === provider && live.model === model); + return isInferenceRouteReadyWithDeps(provider, model, runCaptureOpenshell); } function sandboxExistsInGateway(sandboxName) { @@ -948,227 +537,50 @@ function pruneStaleSandboxEntry(sandboxName) { } function buildSandboxConfigSyncScript(selectionConfig) { - // openclaw.json is immutable (root:root 444, Landlock read-only) — never - // write to it at runtime. Model routing is handled by the host-side - // gateway (`openshell inference set` in Step 5), not from inside the - // sandbox. We only write the NemoClaw selection config (~/.nemoclaw/). - return ` -set -euo pipefail -mkdir -p ~/.nemoclaw -cat > ~/.nemoclaw/config.json <<'EOF_NEMOCLAW_CFG' -${JSON.stringify(selectionConfig, null, 2)} -EOF_NEMOCLAW_CFG -exit -`.trim(); + return buildSandboxConfigSyncScriptWithDeps(selectionConfig); } function isOpenclawReady(sandboxName) { - return Boolean(fetchGatewayAuthTokenFromSandbox(sandboxName)); + return isOpenclawReadyWithDeps(sandboxName, { fetchGatewayAuthTokenFromSandbox }); } function writeSandboxConfigSyncFile(script) { - const scriptFile = secureTempFile("nemoclaw-sync", ".sh"); - fs.writeFileSync(scriptFile, `${script}\n`, { mode: 0o600 }); - return scriptFile; -} - -function encodeDockerJsonArg(value) { - return Buffer.from(JSON.stringify(value || {}), "utf8").toString("base64"); -} - -function isAffirmativeAnswer(value) { - return ["y", "yes"].includes( - String(value || "") - .trim() - .toLowerCase(), - ); -} - -function validateBraveSearchApiKey(apiKey) { - return runCurlProbe([ - "-sS", - "--compressed", - "-H", - "Accept: application/json", - "-H", - "Accept-Encoding: gzip", - "-H", - `X-Subscription-Token: ${apiKey}`, - "--get", - "--data-urlencode", - "q=ping", - "--data-urlencode", - "count=1", - "https://api.search.brave.com/res/v1/web/search", - ]); -} - -async function promptBraveSearchRecovery(validation) { - const recovery = classifyValidationFailure(validation); - - if (recovery.kind === "credential") { - console.log(" Brave Search rejected that API key."); - } else if (recovery.kind === "transport") { - console.log(getTransportRecoveryMessage(validation)); - } else { - console.log(" Brave Search validation did not succeed."); - } - - const answer = (await prompt(" Type 'retry', 'skip', or 'exit' [retry]: ")).trim().toLowerCase(); - if (answer === "skip") return "skip"; - if (answer === "exit" || answer === "quit") { - exitOnboardFromPrompt(); - } - return "retry"; + return writeSandboxConfigSyncFileWithDeps(script, { secureTempFile }); } -async function promptBraveSearchApiKey() { - console.log(""); - console.log(` Get your Brave Search API key from: ${BRAVE_SEARCH_HELP_URL}`); - console.log(""); - - while (true) { - const key = normalizeCredentialValue( - await prompt(" Brave Search API key: ", { secret: true }), - ); - if (!key) { - console.error(" Brave Search API key is required."); - continue; - } - return key; - } +function getWebSearchConfigDeps() { + return { + isNonInteractive, + prompt, + normalizeCredentialValue, + getCredential, + saveCredential, + runCurlProbe, + classifyValidationFailure, + getTransportRecoveryMessage, + exitOnboardFromPrompt, + note, + braveApiKeyEnv: webSearch.BRAVE_API_KEY_ENV, + braveSearchHelpUrl: BRAVE_SEARCH_HELP_URL, + }; } async function ensureValidatedBraveSearchCredential(nonInteractive = isNonInteractive()) { - const savedApiKey = getCredential(webSearch.BRAVE_API_KEY_ENV); - let apiKey = savedApiKey || normalizeCredentialValue(process.env[webSearch.BRAVE_API_KEY_ENV]); - let usingSavedKey = Boolean(savedApiKey); - - while (true) { - if (!apiKey) { - if (nonInteractive) { - throw new Error( - "Brave Search requires BRAVE_API_KEY or a saved Brave Search credential in non-interactive mode.", - ); - } - apiKey = await promptBraveSearchApiKey(); - usingSavedKey = false; - } - - const validation = validateBraveSearchApiKey(apiKey); - if (validation.ok) { - saveCredential(webSearch.BRAVE_API_KEY_ENV, apiKey); - process.env[webSearch.BRAVE_API_KEY_ENV] = apiKey; - return apiKey; - } - - const prefix = usingSavedKey - ? " Saved Brave Search API key validation failed." - : " Brave Search API key validation failed."; - console.error(prefix); - if (validation.message) { - console.error(` ${validation.message}`); - } - - if (nonInteractive) { - throw new Error( - validation.message || - "Brave Search API key validation failed in non-interactive mode.", - ); - } - - const action = await promptBraveSearchRecovery(validation); - if (action === "skip") { - console.log(" Skipping Brave Web Search setup."); - console.log(""); - return null; - } - - apiKey = null; - usingSavedKey = false; - } + return ensureValidatedBraveSearchCredentialWithDeps(nonInteractive, getWebSearchConfigDeps()); } async function configureWebSearch(existingConfig = null) { - if (existingConfig) { - return { fetchEnabled: true }; - } - - if (isNonInteractive()) { - const braveApiKey = normalizeCredentialValue(process.env[webSearch.BRAVE_API_KEY_ENV]); - if (!braveApiKey) { - return null; - } - note(" [non-interactive] Brave Web Search requested."); - const validation = validateBraveSearchApiKey(braveApiKey); - if (!validation.ok) { - console.error(" Brave Search API key validation failed."); - if (validation.message) { - console.error(` ${validation.message}`); - } - process.exit(1); - } - saveCredential(webSearch.BRAVE_API_KEY_ENV, braveApiKey); - process.env[webSearch.BRAVE_API_KEY_ENV] = braveApiKey; - return { fetchEnabled: true }; - } - const enableAnswer = await prompt(" Enable Brave Web Search? [y/N]: "); - if (!isAffirmativeAnswer(enableAnswer)) { - return null; - } - - const braveApiKey = await ensureValidatedBraveSearchCredential(); - if (!braveApiKey) { - return null; - } + return configureWebSearchWithDeps(existingConfig, getWebSearchConfigDeps()); +} - console.log(" ✓ Enabled Brave Web Search"); - console.log(""); - return { fetchEnabled: true }; +function getSandboxBuildConfigDeps() { + return { + sandboxBaseImage: SANDBOX_BASE_IMAGE, + }; } function getSandboxInferenceConfig(model, provider = null, preferredInferenceApi = null) { - let providerKey; - let primaryModelRef; - let inferenceBaseUrl = "https://inference.local/v1"; - let inferenceApi = preferredInferenceApi || "openai-completions"; - let inferenceCompat = null; - - switch (provider) { - case "openai-api": - providerKey = "openai"; - primaryModelRef = `openai/${model}`; - break; - case "anthropic-prod": - case "compatible-anthropic-endpoint": - providerKey = "anthropic"; - primaryModelRef = `anthropic/${model}`; - inferenceBaseUrl = "https://inference.local"; - inferenceApi = "anthropic-messages"; - break; - case "gemini-api": - providerKey = "inference"; - primaryModelRef = `inference/${model}`; - inferenceCompat = { - supportsStore: false, - }; - break; - case "compatible-endpoint": - providerKey = "inference"; - primaryModelRef = `inference/${model}`; - inferenceCompat = { - supportsStore: false, - }; - break; - case "nvidia-prod": - case "nvidia-nim": - default: - providerKey = "inference"; - primaryModelRef = `inference/${model}`; - break; - } - - return { providerKey, primaryModelRef, inferenceBaseUrl, inferenceApi, inferenceCompat }; + return getSandboxInferenceConfigWithDeps(model, provider, preferredInferenceApi); } function patchStagedDockerfile( @@ -1184,455 +596,36 @@ function patchStagedDockerfile( discordGuilds = {}, baseImageRef = null, ) { - const { providerKey, primaryModelRef, inferenceBaseUrl, inferenceApi, inferenceCompat } = - getSandboxInferenceConfig(model, provider, preferredInferenceApi); - let dockerfile = fs.readFileSync(dockerfilePath, "utf8"); - // Pin the base image to a specific digest when available (#1904). - // The ref must come from pullAndResolveBaseImageDigest() — never from - // blueprint.yaml, whose digest belongs to a different registry. - // Only rewrite when the current value already points at our sandbox-base - // image — custom --from Dockerfiles may use a different base. - if (baseImageRef) { - dockerfile = dockerfile.replace(/^ARG BASE_IMAGE=(.*)$/m, (line, currentValue) => { - const trimmed = String(currentValue).trim(); - if (trimmed.startsWith(`${SANDBOX_BASE_IMAGE}:`) || trimmed.startsWith(`${SANDBOX_BASE_IMAGE}@`)) { - return `ARG BASE_IMAGE=${baseImageRef}`; - } - return line; - }); - } - dockerfile = dockerfile.replace(/^ARG NEMOCLAW_MODEL=.*$/m, `ARG NEMOCLAW_MODEL=${model}`); - dockerfile = dockerfile.replace( - /^ARG NEMOCLAW_PROVIDER_KEY=.*$/m, - `ARG NEMOCLAW_PROVIDER_KEY=${providerKey}`, - ); - dockerfile = dockerfile.replace( - /^ARG NEMOCLAW_PRIMARY_MODEL_REF=.*$/m, - `ARG NEMOCLAW_PRIMARY_MODEL_REF=${primaryModelRef}`, - ); - dockerfile = dockerfile.replace(/^ARG CHAT_UI_URL=.*$/m, `ARG CHAT_UI_URL=${chatUiUrl}`); - dockerfile = dockerfile.replace( - /^ARG NEMOCLAW_INFERENCE_BASE_URL=.*$/m, - `ARG NEMOCLAW_INFERENCE_BASE_URL=${inferenceBaseUrl}`, - ); - dockerfile = dockerfile.replace( - /^ARG NEMOCLAW_INFERENCE_API=.*$/m, - `ARG NEMOCLAW_INFERENCE_API=${inferenceApi}`, - ); - dockerfile = dockerfile.replace( - /^ARG NEMOCLAW_INFERENCE_COMPAT_B64=.*$/m, - `ARG NEMOCLAW_INFERENCE_COMPAT_B64=${encodeDockerJsonArg(inferenceCompat)}`, - ); - dockerfile = dockerfile.replace( - /^ARG NEMOCLAW_BUILD_ID=.*$/m, - `ARG NEMOCLAW_BUILD_ID=${buildId}`, - ); - // Honor NEMOCLAW_CONTEXT_WINDOW / NEMOCLAW_MAX_TOKENS / NEMOCLAW_REASONING - // so the user can tune model metadata without editing the Dockerfile. - const POSITIVE_INT_RE = /^[1-9][0-9]*$/; - const contextWindow = process.env.NEMOCLAW_CONTEXT_WINDOW; - if (contextWindow && POSITIVE_INT_RE.test(contextWindow)) { - dockerfile = dockerfile.replace( - /^ARG NEMOCLAW_CONTEXT_WINDOW=.*$/m, - `ARG NEMOCLAW_CONTEXT_WINDOW=${contextWindow}`, - ); - } - const maxTokens = process.env.NEMOCLAW_MAX_TOKENS; - if (maxTokens && POSITIVE_INT_RE.test(maxTokens)) { - dockerfile = dockerfile.replace( - /^ARG NEMOCLAW_MAX_TOKENS=.*$/m, - `ARG NEMOCLAW_MAX_TOKENS=${maxTokens}`, - ); - } - const reasoning = process.env.NEMOCLAW_REASONING; - if (reasoning === "true" || reasoning === "false") { - dockerfile = dockerfile.replace( - /^ARG NEMOCLAW_REASONING=.*$/m, - `ARG NEMOCLAW_REASONING=${reasoning}`, - ); - } - // Honor NEMOCLAW_PROXY_HOST / NEMOCLAW_PROXY_PORT exported in the host - // shell so the sandbox-side nemoclaw-start.sh sees them via $ENV at runtime. - // Without this, the host export is silently dropped at image build time and - // the sandbox falls back to the default 10.200.0.1:3128 proxy. See #1409. - const PROXY_HOST_RE = /^[A-Za-z0-9._:-]+$/; - const PROXY_PORT_RE = /^[0-9]{1,5}$/; - const proxyHostEnv = process.env.NEMOCLAW_PROXY_HOST; - if (proxyHostEnv && PROXY_HOST_RE.test(proxyHostEnv)) { - dockerfile = dockerfile.replace( - /^ARG NEMOCLAW_PROXY_HOST=.*$/m, - `ARG NEMOCLAW_PROXY_HOST=${proxyHostEnv}`, - ); - } - const proxyPortEnv = process.env.NEMOCLAW_PROXY_PORT; - if (proxyPortEnv && PROXY_PORT_RE.test(proxyPortEnv)) { - dockerfile = dockerfile.replace( - /^ARG NEMOCLAW_PROXY_PORT=.*$/m, - `ARG NEMOCLAW_PROXY_PORT=${proxyPortEnv}`, - ); - } - dockerfile = dockerfile.replace( - /^ARG NEMOCLAW_WEB_SEARCH_ENABLED=.*$/m, - `ARG NEMOCLAW_WEB_SEARCH_ENABLED=${webSearchConfig ? "1" : "0"}`, - ); - // Onboard flow expects immediate dashboard access without device pairing, - // so disable device auth for images built during onboard (see #1217). - dockerfile = dockerfile.replace( - /^ARG NEMOCLAW_DISABLE_DEVICE_AUTH=.*$/m, - `ARG NEMOCLAW_DISABLE_DEVICE_AUTH=1`, + return patchStagedDockerfileWithDeps( + dockerfilePath, + model, + chatUiUrl, + buildId, + provider, + preferredInferenceApi, + webSearchConfig, + messagingChannels, + messagingAllowedIds, + discordGuilds, + baseImageRef, + getSandboxBuildConfigDeps(), ); - if (messagingChannels.length > 0) { - dockerfile = dockerfile.replace( - /^ARG NEMOCLAW_MESSAGING_CHANNELS_B64=.*$/m, - `ARG NEMOCLAW_MESSAGING_CHANNELS_B64=${encodeDockerJsonArg(messagingChannels)}`, - ); - } - if (Object.keys(messagingAllowedIds).length > 0) { - dockerfile = dockerfile.replace( - /^ARG NEMOCLAW_MESSAGING_ALLOWED_IDS_B64=.*$/m, - `ARG NEMOCLAW_MESSAGING_ALLOWED_IDS_B64=${encodeDockerJsonArg(messagingAllowedIds)}`, - ); - } - if (Object.keys(discordGuilds).length > 0) { - dockerfile = dockerfile.replace( - /^ARG NEMOCLAW_DISCORD_GUILDS_B64=.*$/m, - `ARG NEMOCLAW_DISCORD_GUILDS_B64=${encodeDockerJsonArg(discordGuilds)}`, - ); - } - fs.writeFileSync(dockerfilePath, dockerfile); -} - -function parseJsonObject(body) { - if (!body) return null; - try { - return JSON.parse(body); - } catch { - return null; - } } function hasResponsesToolCall(body) { - const parsed = parseJsonObject(body); - if (!parsed || !Array.isArray(parsed.output)) return false; - - const stack = [...parsed.output]; - while (stack.length > 0) { - const item = stack.pop(); - if (!item || typeof item !== "object") continue; - if (item.type === "function_call" || item.type === "tool_call") return true; - if (Array.isArray(item.content)) { - stack.push(...item.content); - } - } - - return false; + return hasResponsesToolCallWithDeps(body); } function shouldRequireResponsesToolCalling(provider) { - return ( - provider === "nvidia-prod" || provider === "gemini-api" || provider === "compatible-endpoint" - ); + return shouldRequireResponsesToolCallingWithDeps(provider); } -// Google Gemini rejects requests that carry both an Authorization: Bearer -// header and a ?key= query parameter ("Multiple authentication credentials -// received"). Send the API key as ?key= only for Gemini. See issue #1960. function getProbeAuthMode(provider) { - return provider === "gemini-api" ? "query-param" : undefined; + return getProbeAuthModeWithDeps(provider); } -// shouldSkipResponsesProbe and isNvcfFunctionNotFoundForAccount / -// nvcfFunctionNotFoundMessage — see validation import above. They live in -// src/lib/validation.ts so they can be unit-tested independently. - -// Per-validation-probe curl timing. Tighter than the default 60s in -// getCurlTimingArgs() because validation must not hang the wizard for a -// minute on a misbehaving model. See issue #1601 (Bug 3). function getValidationProbeCurlArgs(opts) { - if (isWsl(opts)) { - return ["--connect-timeout", "20", "--max-time", "30"]; - } - return ["--connect-timeout", "10", "--max-time", "15"]; -} - -function probeResponsesToolCalling(endpointUrl, model, apiKey, options = {}) { - const useQueryParam = options.authMode === "query-param"; - const normalizedKey = apiKey ? normalizeCredentialValue(apiKey) : ""; - const baseUrl = String(endpointUrl).replace(/\/+$/, ""); - const authHeader = !useQueryParam && normalizedKey - ? ["-H", `Authorization: Bearer ${normalizedKey}`] - : []; - const url = useQueryParam && normalizedKey - ? `${baseUrl}/responses?key=${encodeURIComponent(normalizedKey)}` - : `${baseUrl}/responses`; - const result = runCurlProbe([ - "-sS", - ...getValidationProbeCurlArgs(), - "-H", - "Content-Type: application/json", - ...authHeader, - "-d", - JSON.stringify({ - model, - input: "Call the emit_ok function with value OK. Do not answer with plain text.", - tool_choice: "required", - tools: [ - { - type: "function", - name: "emit_ok", - description: "Returns the probe value for validation.", - parameters: { - type: "object", - properties: { - value: { type: "string" }, - }, - required: ["value"], - additionalProperties: false, - }, - }, - ], - }), - url, - ]); - - if (!result.ok) { - return result; - } - if (hasResponsesToolCall(result.body)) { - return result; - } - return { - ok: false, - httpStatus: result.httpStatus, - curlStatus: result.curlStatus, - body: result.body, - stderr: result.stderr, - message: `HTTP ${result.httpStatus}: Responses API did not return a tool call`, - }; -} - -function probeOpenAiLikeEndpoint(endpointUrl, model, apiKey, options = {}) { - const useQueryParam = options.authMode === "query-param"; - const normalizedKey = apiKey ? normalizeCredentialValue(apiKey) : ""; - const baseUrl = String(endpointUrl).replace(/\/+$/, ""); - const authHeader = !useQueryParam && normalizedKey - ? ["-H", `Authorization: Bearer ${normalizedKey}`] - : []; - const appendKey = (path) => - useQueryParam && normalizedKey ? `${baseUrl}${path}?key=${encodeURIComponent(normalizedKey)}` : `${baseUrl}${path}`; - - const responsesProbe = - options.requireResponsesToolCalling === true - ? { - name: "Responses API with tool calling", - api: "openai-responses", - execute: () => probeResponsesToolCalling(endpointUrl, model, apiKey, { authMode: options.authMode }), - } - : { - name: "Responses API", - api: "openai-responses", - execute: () => - runCurlProbe([ - "-sS", - ...getValidationProbeCurlArgs(), - "-H", - "Content-Type: application/json", - ...authHeader, - "-d", - JSON.stringify({ - model, - input: "Reply with exactly: OK", - }), - appendKey("/responses"), - ]), - }; - - const chatCompletionsProbe = { - name: "Chat Completions API", - api: "openai-completions", - execute: () => - runCurlProbe([ - "-sS", - ...getValidationProbeCurlArgs(), - "-H", - "Content-Type: application/json", - ...authHeader, - "-d", - JSON.stringify({ - model, - messages: [{ role: "user", content: "Reply with exactly: OK" }], - }), - appendKey("/chat/completions"), - ]), - }; - - // NVIDIA Build does not expose /v1/responses; probing it always returns - // "404 page not found" and only adds noise to error messages. Skip it - // entirely for that provider. See issue #1601. - const probes = options.skipResponsesProbe - ? [chatCompletionsProbe] - : [responsesProbe, chatCompletionsProbe]; - - const failures = []; - for (const probe of probes) { - const result = probe.execute(); - if (result.ok) { - // Streaming event validation — catch backends like SGLang that return - // valid non-streaming responses but emit incomplete SSE events in - // streaming mode. Only run for /responses probes on custom endpoints - // where probeStreaming was requested. - if (probe.api === "openai-responses" && options.probeStreaming === true) { - const streamResult = runStreamingEventProbe([ - "-sS", - ...getValidationProbeCurlArgs(), - "-H", - "Content-Type: application/json", - ...authHeader, - "-d", - JSON.stringify({ - model, - input: "Reply with exactly: OK", - stream: true, - }), - appendKey("/responses"), - ]); - if (!streamResult.ok && streamResult.missingEvents.length > 0) { - // Backend responds but lacks required streaming events — fall back - // to /chat/completions silently. - console.log(` ℹ ${streamResult.message}`); - failures.push({ - name: probe.name + " (streaming)", - httpStatus: 0, - curlStatus: 0, - message: streamResult.message, - body: "", - }); - continue; - } - if (!streamResult.ok) { - // Transport or execution failure — surface as a hard error instead - // of silently switching APIs. - return { - ok: false, - message: `${probe.name} (streaming): ${streamResult.message}`, - failures: [ - { - name: probe.name + " (streaming)", - httpStatus: 0, - curlStatus: 0, - message: streamResult.message, - body: "", - }, - ], - }; - } - } - return { ok: true, api: probe.api, label: probe.name }; - } - // Preserve the raw response body alongside the summarized message so the - // NVCF "Function not found for account" detector below can fall back to - // the raw body if summarizeProbeError ever stops surfacing the marker - // through `message`. - failures.push({ - name: probe.name, - httpStatus: result.httpStatus, - curlStatus: result.curlStatus, - message: result.message, - body: result.body, - }); - } - - // Single retry with doubled timeouts on timeout/connection failure. - // WSL2's virtualized network stack can cause the initial probe to time out - // before the TLS handshake completes. See issue #987. - const isTimeoutOrConnFailure = (cs) => cs === 28 || cs === 6 || cs === 7; - let retriedAfterTimeout = false; - if (failures.length > 0 && isTimeoutOrConnFailure(failures[0].curlStatus)) { - retriedAfterTimeout = true; - const baseArgs = getValidationProbeCurlArgs(); - const doubledArgs = baseArgs.map((arg) => - /^\d+$/.test(arg) ? String(Number(arg) * 2) : arg, - ); - const retryResult = runCurlProbe([ - "-sS", - ...doubledArgs, - "-H", - "Content-Type: application/json", - ...(apiKey ? ["-H", `Authorization: Bearer ${normalizeCredentialValue(apiKey)}`] : []), - "-d", - JSON.stringify({ - model, - messages: [{ role: "user", content: "Reply with exactly: OK" }], - }), - `${String(endpointUrl).replace(/\/+$/, "")}/chat/completions`, - ]); - if (retryResult.ok) { - return { ok: true, api: "openai-completions", label: "Chat Completions API" }; - } - } - - // Detect the NVCF "Function not found for account" error and reframe it - // with an actionable next step instead of dumping the raw NVCF body. - // See issue #1601 (Bug 2). - const accountFailure = failures.find( - (failure) => - isNvcfFunctionNotFoundForAccount(failure.message) || - isNvcfFunctionNotFoundForAccount(failure.body), - ); - if (accountFailure) { - return { - ok: false, - message: nvcfFunctionNotFoundMessage(model), - failures, - }; - } - - const baseMessage = failures.map((failure) => `${failure.name}: ${failure.message}`).join(" | "); - const wslHint = - isWsl() && retriedAfterTimeout - ? " · WSL2 detected \u2014 network verification may be slower than expected. " + - "Run `nemoclaw onboard` with the `--skip-verify` flag if this endpoint is known to be reachable." - : ""; - return { - ok: false, - message: baseMessage + wslHint, - failures, - }; -} - -function probeAnthropicEndpoint(endpointUrl, model, apiKey) { - const result = runCurlProbe([ - "-sS", - ...getCurlTimingArgs(), - "-H", - `x-api-key: ${normalizeCredentialValue(apiKey)}`, - "-H", - "anthropic-version: 2023-06-01", - "-H", - "content-type: application/json", - "-d", - JSON.stringify({ - model, - max_tokens: 16, - messages: [{ role: "user", content: "Reply with exactly: OK" }], - }), - `${String(endpointUrl).replace(/\/+$/, "")}/v1/messages`, - ]); - if (result.ok) { - return { ok: true, api: "anthropic-messages", label: "Anthropic Messages API" }; - } - return { - ok: false, - message: result.message, - failures: [ - { - name: "Anthropic Messages API", - httpStatus: result.httpStatus, - curlStatus: result.curlStatus, - message: result.message, - }, - ], - }; + return getValidationProbeCurlArgsWithDeps(opts, getInferenceValidationDeps()); } async function validateOpenAiLikeSelection( @@ -1644,28 +637,16 @@ async function validateOpenAiLikeSelection( helpUrl = null, options = {}, ) { - const apiKey = credentialEnv ? getCredential(credentialEnv) : ""; - const probe = probeOpenAiLikeEndpoint(endpointUrl, model, apiKey, options); - if (!probe.ok) { - console.error(` ${label} endpoint validation failed.`); - console.error(` ${probe.message}`); - if (isNonInteractive()) { - process.exit(1); - } - const retry = await promptValidationRecovery( - label, - getProbeRecovery(probe), - credentialEnv, - helpUrl, - ); - if (retry === "selection") { - console.log(` ${retryMessage}`); - console.log(""); - } - return { ok: false, retry }; - } - console.log(` ${probe.label} available — OpenClaw will use ${probe.api}.`); - return { ok: true, api: probe.api }; + return validateOpenAiLikeSelectionWithDeps( + label, + endpointUrl, + model, + credentialEnv, + retryMessage, + helpUrl, + options, + getInferenceValidationDeps(), + ); } async function validateAnthropicSelectionWithRetryMessage( @@ -1676,28 +657,15 @@ async function validateAnthropicSelectionWithRetryMessage( retryMessage = "Please choose a provider/model again.", helpUrl = null, ) { - const apiKey = getCredential(credentialEnv); - const probe = probeAnthropicEndpoint(endpointUrl, model, apiKey); - if (!probe.ok) { - console.error(` ${label} endpoint validation failed.`); - console.error(` ${probe.message}`); - if (isNonInteractive()) { - process.exit(1); - } - const retry = await promptValidationRecovery( - label, - getProbeRecovery(probe), - credentialEnv, - helpUrl, - ); - if (retry === "selection") { - console.log(` ${retryMessage}`); - console.log(""); - } - return { ok: false, retry }; - } - console.log(` ${probe.label} available — OpenClaw will use ${probe.api}.`); - return { ok: true, api: probe.api }; + return validateAnthropicSelectionWithRetryMessageWithDeps( + label, + endpointUrl, + model, + credentialEnv, + retryMessage, + helpUrl, + getInferenceValidationDeps(), + ); } async function validateCustomOpenAiLikeSelection( @@ -1707,32 +675,14 @@ async function validateCustomOpenAiLikeSelection( credentialEnv, helpUrl = null, ) { - const apiKey = getCredential(credentialEnv); - const probe = probeOpenAiLikeEndpoint(endpointUrl, model, apiKey, { - requireResponsesToolCalling: true, - skipResponsesProbe: shouldForceCompletionsApi(process.env.NEMOCLAW_PREFERRED_API), - probeStreaming: true, - }); - if (probe.ok) { - console.log(` ${probe.label} available — OpenClaw will use ${probe.api}.`); - return { ok: true, api: probe.api }; - } - console.error(` ${label} endpoint validation failed.`); - console.error(` ${probe.message}`); - if (isNonInteractive()) { - process.exit(1); - } - const retry = await promptValidationRecovery( + return validateCustomOpenAiLikeSelectionWithDeps( label, - getProbeRecovery(probe, { allowModelRetry: true }), + endpointUrl, + model, credentialEnv, helpUrl, + getInferenceValidationDeps(), ); - if (retry === "selection") { - console.log(" Please choose a provider/model again."); - console.log(""); - } - return { ok: false, retry }; } async function validateCustomAnthropicSelection( @@ -1742,30 +692,17 @@ async function validateCustomAnthropicSelection( credentialEnv, helpUrl = null, ) { - const apiKey = getCredential(credentialEnv); - const probe = probeAnthropicEndpoint(endpointUrl, model, apiKey); - if (probe.ok) { - console.log(` ${probe.label} available — OpenClaw will use ${probe.api}.`); - return { ok: true, api: probe.api }; - } - console.error(` ${label} endpoint validation failed.`); - console.error(` ${probe.message}`); - if (isNonInteractive()) { - process.exit(1); - } - const retry = await promptValidationRecovery( + return validateCustomAnthropicSelectionWithDeps( label, - getProbeRecovery(probe, { allowModelRetry: true }), + endpointUrl, + model, credentialEnv, helpUrl, + getInferenceValidationDeps(), ); - if (retry === "selection") { - console.log(" Please choose a provider/model again."); - console.log(""); - } - return { ok: false, retry }; } + const { promptManualModelId, promptCloudModel, promptRemoteModel, promptInputModel } = modelPrompts; const { validateAnthropicModel, validateOpenAiLikeModel } = providerModels; @@ -1774,132 +711,24 @@ const { shouldIncludeBuildContextPath, copyBuildContextDir, printSandboxCreateRe buildContext; // classifySandboxCreateFailure — see validation import above -// --------------------------------------------------------------------------- -// Ollama auth proxy — keeps Ollama on localhost, exposes a token-gated proxy -// on 0.0.0.0 so containers can reach it without exposing Ollama to the network. -// Token is persisted to ~/.nemoclaw/ollama-proxy-token so the proxy can be -// restarted after a host reboot without re-running onboard. -// --------------------------------------------------------------------------- - -const PROXY_STATE_DIR = path.join(os.homedir(), ".nemoclaw"); -const PROXY_TOKEN_PATH = path.join(PROXY_STATE_DIR, "ollama-proxy-token"); -const PROXY_PID_PATH = path.join(PROXY_STATE_DIR, "ollama-auth-proxy.pid"); - -let ollamaProxyToken: string | null = null; - -function ensureProxyStateDir(): void { - if (!fs.existsSync(PROXY_STATE_DIR)) { - fs.mkdirSync(PROXY_STATE_DIR, { recursive: true }); - } +function getOllamaProxyDeps() { + return { + runCapture, + run, + spawn, + sleep, + scriptsDir: SCRIPTS, + ollamaProxyPort: OLLAMA_PROXY_PORT, + ollamaPort: OLLAMA_PORT, + }; } function persistProxyToken(token: string): void { - ensureProxyStateDir(); - fs.writeFileSync(PROXY_TOKEN_PATH, token, { mode: 0o600 }); - // mode only applies on creation; ensure permissions on existing files too - fs.chmodSync(PROXY_TOKEN_PATH, 0o600); + return persistProxyTokenWithDeps(token); } -function loadPersistedProxyToken(): string | null { - try { - if (fs.existsSync(PROXY_TOKEN_PATH)) { - const token = fs.readFileSync(PROXY_TOKEN_PATH, "utf-8").trim(); - return token || null; - } - } catch { - /* ignore */ - } - return null; -} - -function persistProxyPid(pid: number | null | undefined): void { - if (!Number.isInteger(pid) || pid <= 0) return; - ensureProxyStateDir(); - fs.writeFileSync(PROXY_PID_PATH, `${pid}\n`, { mode: 0o600 }); - fs.chmodSync(PROXY_PID_PATH, 0o600); -} - -function loadPersistedProxyPid(): number | null { - try { - if (!fs.existsSync(PROXY_PID_PATH)) return null; - const raw = fs.readFileSync(PROXY_PID_PATH, "utf-8").trim(); - const pid = Number.parseInt(raw, 10); - return Number.isInteger(pid) && pid > 0 ? pid : null; - } catch { - return null; - } -} - -function clearPersistedProxyPid(): void { - try { - if (fs.existsSync(PROXY_PID_PATH)) { - fs.unlinkSync(PROXY_PID_PATH); - } - } catch { - /* ignore */ - } -} - -function isOllamaProxyProcess(pid: number | null | undefined): boolean { - if (!Number.isInteger(pid) || pid <= 0) return false; - const cmdline = runCapture(["ps", "-p", String(pid), "-o", "args="], { ignoreError: true }); - return Boolean(cmdline && cmdline.includes("ollama-auth-proxy.js")); -} - -function spawnOllamaAuthProxy(token: string): number | null { - const child = spawn(process.execPath, [path.join(SCRIPTS, "ollama-auth-proxy.js")], { - detached: true, - stdio: "ignore", - env: { - ...process.env, - OLLAMA_PROXY_TOKEN: token, - OLLAMA_PROXY_PORT: String(OLLAMA_PROXY_PORT), - OLLAMA_BACKEND_PORT: String(OLLAMA_PORT), - }, - }); - child.unref(); - persistProxyPid(child.pid); - return child.pid ?? null; -} - -function killStaleProxy(): void { - try { - const persistedPid = loadPersistedProxyPid(); - if (isOllamaProxyProcess(persistedPid)) { - run(["kill", String(persistedPid)], { ignoreError: true, suppressOutput: true }); - } - clearPersistedProxyPid(); - - // Best-effort cleanup for older proxy processes created before the PID file - // existed. Only kill processes that are actually the auth proxy, not - // unrelated services that happen to use the same port. - const pidOutput = runCapture(["lsof", "-ti", `:${OLLAMA_PROXY_PORT}`], { ignoreError: true }); - if (pidOutput && pidOutput.trim()) { - for (const pid of pidOutput.trim().split(/\s+/)) { - if (isOllamaProxyProcess(Number.parseInt(pid, 10))) { - run(["kill", pid], { ignoreError: true, suppressOutput: true }); - } - } - sleep(1); - } - } catch { - /* ignore */ - } -} - -function startOllamaAuthProxy(): void { - const crypto = require("crypto"); - killStaleProxy(); - - ollamaProxyToken = crypto.randomBytes(24).toString("hex"); - // Don't persist yet — wait until provider is confirmed in setupInference. - // If the user backs out to a different provider, the token stays in memory - // only and is discarded. - const pid = spawnOllamaAuthProxy(ollamaProxyToken); - sleep(1); - if (!isOllamaProxyProcess(pid)) { - console.error(` Warning: Ollama auth proxy did not start on :${OLLAMA_PROXY_PORT}`); - } +function startOllamaAuthProxy(): void { + return startOllamaAuthProxyWithDeps(getOllamaProxyDeps()); } /** @@ -1907,3915 +736,508 @@ function startOllamaAuthProxy(): void { * from host reboots where the background proxy process was lost. */ function ensureOllamaAuthProxy(): void { - // Try to load persisted token first — if none, this isn't an Ollama setup. - const token = loadPersistedProxyToken(); - if (!token) return; - - const pid = loadPersistedProxyPid(); - if (isOllamaProxyProcess(pid)) { - ollamaProxyToken = token; - return; - } - - // Proxy not running — restart it with the persisted token. - killStaleProxy(); - ollamaProxyToken = token; - spawnOllamaAuthProxy(token); - sleep(1); -} - -function getOllamaProxyToken(): string | null { - if (ollamaProxyToken) return ollamaProxyToken; - // Fall back to persisted token (resume / reconnect scenario) - ollamaProxyToken = loadPersistedProxyToken(); - return ollamaProxyToken; -} - -async function promptOllamaModel(gpu = null) { - const installed = getOllamaModelOptions(); - const options = installed.length > 0 ? installed : getBootstrapOllamaModelOptions(gpu); - const defaultModel = getDefaultOllamaModel(gpu); - const defaultIndex = Math.max(0, options.indexOf(defaultModel)); - - console.log(""); - console.log(installed.length > 0 ? " Ollama models:" : " Ollama starter models:"); - options.forEach((option, index) => { - console.log(` ${index + 1}) ${option}`); - }); - console.log(` ${options.length + 1}) Other...`); - if (installed.length === 0) { - console.log(""); - console.log(" No local Ollama models are installed yet. Choose one to pull and load now."); - } - console.log(""); - - const choice = await prompt(` Choose model [${defaultIndex + 1}]: `); - const index = parseInt(choice || String(defaultIndex + 1), 10) - 1; - if (index >= 0 && index < options.length) { - return options[index]; - } - return promptManualModelId(" Ollama model id: ", "Ollama"); -} - -function printOllamaExposureWarning() { - console.log(""); - console.log(" ⚠ Ollama is binding to 0.0.0.0 so the sandbox can reach it via Docker."); - console.log(" This exposes the Ollama API to your local network (no auth required)."); - console.log(" On public WiFi, any device on the same network can send prompts to your GPU."); - console.log(" See: CNVD-2025-04094, CVE-2024-37032"); - console.log(""); -} - -function pullOllamaModel(model) { - const result = spawnSync("bash", ["-c", `ollama pull ${shellQuote(model)}`], { - cwd: ROOT, - encoding: "utf8", - stdio: "inherit", - timeout: 600_000, - env: { ...process.env }, - }); - if (result.signal === "SIGTERM") { - console.error( - ` Model pull timed out after 10 minutes. Try a smaller model or check your network connection.`, - ); - return false; - } - return result.status === 0; -} - -function prepareOllamaModel(model, installedModels = []) { - const alreadyInstalled = installedModels.includes(model); - if (!alreadyInstalled) { - console.log(` Pulling Ollama model: ${model}`); - if (!pullOllamaModel(model)) { - return { - ok: false, - message: - `Failed to pull Ollama model '${model}'. ` + - "Check the model name and that Ollama can access the registry, then try another model.", - }; - } - } - - console.log(` Loading Ollama model: ${model}`); - run(getOllamaWarmupCommand(model), { ignoreError: true }); - return validateOllamaModel(model); -} - -function getRequestedSandboxNameHint() { - const raw = process.env.NEMOCLAW_SANDBOX_NAME; - if (typeof raw !== "string") return null; - const normalized = raw.trim().toLowerCase(); - return normalized || null; -} - -function getResumeSandboxConflict(session) { - const requestedSandboxName = getRequestedSandboxNameHint(); - if (!requestedSandboxName || !session?.sandboxName) { - return null; - } - return requestedSandboxName !== session.sandboxName - ? { requestedSandboxName, recordedSandboxName: session.sandboxName } - : null; -} - -function getRequestedProviderHint(nonInteractive = isNonInteractive()) { - return nonInteractive ? getNonInteractiveProvider() : null; -} - -function getRequestedModelHint(nonInteractive = isNonInteractive()) { - if (!nonInteractive) return null; - const providerKey = getRequestedProviderHint(nonInteractive) || "cloud"; - return getNonInteractiveModel(providerKey); -} - -function getEffectiveProviderName(providerKey) { - if (!providerKey) return null; - if (REMOTE_PROVIDER_CONFIG[providerKey]) { - return REMOTE_PROVIDER_CONFIG[providerKey].providerName; - } - - switch (providerKey) { - case "nim-local": - return "nvidia-nim"; - case "ollama": - return "ollama-local"; - case "vllm": - return "vllm-local"; - default: - return providerKey; - } -} - -function getResumeConfigConflicts(session, opts = {}) { - const conflicts = []; - const nonInteractive = opts.nonInteractive ?? isNonInteractive(); - - const sandboxConflict = getResumeSandboxConflict(session); - if (sandboxConflict) { - conflicts.push({ - field: "sandbox", - requested: sandboxConflict.requestedSandboxName, - recorded: sandboxConflict.recordedSandboxName, - }); - } - - const requestedProvider = getRequestedProviderHint(nonInteractive); - const effectiveRequestedProvider = getEffectiveProviderName(requestedProvider); - if ( - effectiveRequestedProvider && - session?.provider && - effectiveRequestedProvider !== session.provider - ) { - conflicts.push({ - field: "provider", - requested: effectiveRequestedProvider, - recorded: session.provider, - }); - } - - const requestedModel = getRequestedModelHint(nonInteractive); - if (requestedModel && session?.model && requestedModel !== session.model) { - conflicts.push({ - field: "model", - requested: requestedModel, - recorded: session.model, - }); - } - - const requestedFrom = opts.fromDockerfile ? path.resolve(opts.fromDockerfile) : null; - const recordedFrom = session?.metadata?.fromDockerfile - ? path.resolve(session.metadata.fromDockerfile) - : null; - if (requestedFrom !== recordedFrom) { - conflicts.push({ - field: "fromDockerfile", - requested: requestedFrom, - recorded: recordedFrom, - }); - } - - const requestedAgent = opts.agent || process.env.NEMOCLAW_AGENT || null; - const recordedAgent = session?.agent || null; - if (requestedAgent && recordedAgent && requestedAgent !== recordedAgent) { - conflicts.push({ - field: "agent", - requested: requestedAgent, - recorded: recordedAgent, - }); - } - - return conflicts; -} - -function getContainerRuntime() { - const info = runCapture("docker info 2>/dev/null", { ignoreError: true }); - return inferContainerRuntime(info); -} - -function printRemediationActions(actions) { - if (!Array.isArray(actions) || actions.length === 0) { - return; - } - - console.error(""); - console.error(" Suggested fix:"); - console.error(""); - for (const action of actions) { - console.error(` - ${action.title}: ${action.reason}`); - for (const command of action.commands || []) { - console.error(` ${command}`); - } - } -} - -function isOpenshellInstalled() { - return resolveOpenshell() !== null; -} - -function getFutureShellPathHint(binDir, pathValue = process.env.PATH || "") { - if (String(pathValue).split(path.delimiter).includes(binDir)) { - return null; - } - return `export PATH="${binDir}:$PATH"`; -} - -function getPortConflictServiceHints(platform = process.platform) { - if (platform === "darwin") { - return [ - " # or, if it's a launchctl service (macOS):", - " launchctl list | grep -i claw # columns: PID | ExitStatus | Label", - ` launchctl unload ${OPENCLAW_LAUNCH_AGENT_PLIST}`, - " # or: launchctl bootout gui/$(id -u)/ai.openclaw.gateway", - ]; - } - return [ - " # or, if it's a systemd service:", - " systemctl --user stop openclaw-gateway.service", - ]; -} - -function installOpenshell() { - const result = spawnSync("bash", [path.join(SCRIPTS, "install-openshell.sh")], { - cwd: ROOT, - env: process.env, - stdio: ["ignore", "pipe", "pipe"], - encoding: "utf-8", - timeout: 300_000, - }); - if (result.status !== 0) { - const output = `${result.stdout || ""}${result.stderr || ""}`.trim(); - if (output) { - console.error(output); - } - return { installed: false, localBin: null, futureShellPathHint: null }; - } - const localBin = process.env.XDG_BIN_HOME || path.join(process.env.HOME || "", ".local", "bin"); - const openshellPath = path.join(localBin, "openshell"); - const futureShellPathHint = fs.existsSync(openshellPath) - ? getFutureShellPathHint(localBin, process.env.PATH) - : null; - if (fs.existsSync(openshellPath) && futureShellPathHint) { - process.env.PATH = `${localBin}${path.delimiter}${process.env.PATH}`; - } - OPENSHELL_BIN = resolveOpenshell(); - return { - installed: OPENSHELL_BIN !== null, - localBin, - futureShellPathHint, - }; -} - -function sleep(seconds) { - require("child_process").spawnSync("sleep", [String(seconds)]); -} - -function destroyGateway() { - const destroyResult = runOpenshell(["gateway", "destroy", "-g", GATEWAY_NAME], { - ignoreError: true, - }); - // Clear the local registry so `nemoclaw list` stays consistent with OpenShell state. (#532) - if (destroyResult.status === 0) { - registry.clearAll(); - } - // openshell gateway destroy doesn't remove Docker volumes, which leaves - // corrupted cluster state that breaks the next gateway start. Clean them up. - run( - `docker volume ls -q --filter "name=openshell-cluster-${GATEWAY_NAME}" | grep . && docker volume ls -q --filter "name=openshell-cluster-${GATEWAY_NAME}" | xargs docker volume rm || true`, - { ignoreError: true }, - ); -} - -async function ensureNamedCredential(envName, label, helpUrl = null) { - let key = getCredential(envName); - if (key) { - process.env[envName] = key; - return key; - } - return replaceNamedCredential(envName, label, helpUrl); -} - -function waitForSandboxReady(sandboxName, attempts = 10, delaySeconds = 2) { - for (let i = 0; i < attempts; i += 1) { - const podPhase = runCaptureOpenshell( - [ - "doctor", - "exec", - "--", - "kubectl", - "-n", - "openshell", - "get", - "pod", - sandboxName, - "-o", - "jsonpath={.status.phase}", - ], - { ignoreError: true }, - ); - if (podPhase === "Running") return true; - sleep(delaySeconds); - } - return false; -} - -// parsePolicyPresetEnv — see urlUtils import above -// isSafeModelId — see validation import above - -function getNonInteractiveProvider() { - const providerKey = (process.env.NEMOCLAW_PROVIDER || "").trim().toLowerCase(); - if (!providerKey) return null; - const aliases = { - cloud: "build", - nim: "nim-local", - vllm: "vllm", - anthropiccompatible: "anthropicCompatible", - }; - const normalized = aliases[providerKey] || providerKey; - const validProviders = new Set([ - "build", - "openai", - "anthropic", - "anthropicCompatible", - "gemini", - "ollama", - "custom", - "nim-local", - "vllm", - ]); - if (!validProviders.has(normalized)) { - console.error(` Unsupported NEMOCLAW_PROVIDER: ${providerKey}`); - console.error( - " Valid values: build, openai, anthropic, anthropicCompatible, gemini, ollama, custom, nim-local, vllm", - ); - process.exit(1); - } - - return normalized; -} - -function getNonInteractiveModel(providerKey) { - const model = (process.env.NEMOCLAW_MODEL || "").trim(); - if (!model) return null; - if (!isSafeModelId(model)) { - console.error(` Invalid NEMOCLAW_MODEL for provider '${providerKey}': ${model}`); - console.error(" Model values may only contain letters, numbers, '.', '_', ':', '/', and '-'."); - process.exit(1); - } - return model; -} - -// ── Step 1: Preflight ──────────────────────────────────────────── - -// eslint-disable-next-line complexity -async function preflight() { - step(1, 8, "Preflight checks"); - - const host = assessHost(); - - // Docker / runtime - if (!host.dockerReachable) { - console.error(" Docker is not reachable. Please fix Docker and try again."); - printRemediationActions(planHostRemediation(host)); - process.exit(1); - } - console.log(" ✓ Docker is running"); - - if (host.runtime !== "unknown") { - console.log(` ✓ Container runtime: ${host.runtime}`); - } - // Podman is now supported — no unsupported runtime warning needed. - if (host.notes.includes("Running under WSL")) { - console.log(" ⓘ Running under WSL"); - } - - // OpenShell CLI — install if missing, upgrade if below minimum version. - // MIN_VERSION in install-openshell.sh handles the version gate; calling it - // when openshell already exists is safe (it exits early if version is OK). - let openshellInstall = { localBin: null, futureShellPathHint: null }; - if (!isOpenshellInstalled()) { - console.log(" openshell CLI not found. Installing..."); - openshellInstall = installOpenshell(); - if (!openshellInstall.installed) { - console.error(" Failed to install openshell CLI."); - console.error(" Install manually: https://github.com/NVIDIA/OpenShell/releases"); - process.exit(1); - } - } else { - // Ensure the installed version meets the minimum required by install-openshell.sh. - // The script itself is idempotent — it exits early if the version is already sufficient. - const currentVersion = getInstalledOpenshellVersion(); - if (!currentVersion) { - console.log(" openshell version could not be determined. Reinstalling..."); - openshellInstall = installOpenshell(); - if (!openshellInstall.installed) { - console.error(" Failed to reinstall openshell CLI."); - console.error(" Install manually: https://github.com/NVIDIA/OpenShell/releases"); - process.exit(1); - } - } else { - const parts = currentVersion.split(".").map(Number); - const minParts = [0, 0, 24]; // must match MIN_VERSION in scripts/install-openshell.sh - const needsUpgrade = - parts[0] < minParts[0] || - (parts[0] === minParts[0] && parts[1] < minParts[1]) || - (parts[0] === minParts[0] && parts[1] === minParts[1] && parts[2] < minParts[2]); - if (needsUpgrade) { - console.log( - ` openshell ${currentVersion} is below minimum required version. Upgrading...`, - ); - openshellInstall = installOpenshell(); - if (!openshellInstall.installed) { - console.error(" Failed to upgrade openshell CLI."); - console.error(" Install manually: https://github.com/NVIDIA/OpenShell/releases"); - process.exit(1); - } - } - } - } - const openshellVersionOutput = runCaptureOpenshell(["--version"], { ignoreError: true }); - console.log(` ✓ openshell CLI: ${openshellVersionOutput || "unknown"}`); - // Enforce nemoclaw-blueprint/blueprint.yaml's min_openshell_version. Without - // this check, users can complete a full onboard against an OpenShell that - // pre-dates required CLI surface (e.g. `sandbox exec`, `--upload`) and hit - // silent failures inside the sandbox at runtime. See #1317. - const installedOpenshellVersion = getInstalledOpenshellVersion(openshellVersionOutput); - const minOpenshellVersion = getBlueprintMinOpenshellVersion(); - if ( - installedOpenshellVersion && - minOpenshellVersion && - !versionGte(installedOpenshellVersion, minOpenshellVersion) - ) { - console.error(""); - console.error( - ` ✗ openshell ${installedOpenshellVersion} is below the minimum required by this NemoClaw release.`, - ); - console.error(` blueprint.yaml min_openshell_version: ${minOpenshellVersion}`); - console.error(""); - console.error(" Upgrade openshell and retry:"); - console.error(" https://github.com/NVIDIA/OpenShell/releases"); - console.error( - " Or remove the existing binary so the installer can re-fetch a current build:", - ); - console.error(' command -v openshell && rm -f "$(command -v openshell)"'); - console.error(""); - process.exit(1); - } - // Enforce nemoclaw-blueprint/blueprint.yaml's max_openshell_version. Newer - // OpenShell releases may change sandbox semantics that this NemoClaw version - // has not been validated against. Blocking early avoids silent runtime - // breakage. Users should upgrade NemoClaw to pick up support for newer - // OpenShell releases. - const maxOpenshellVersion = getBlueprintMaxOpenshellVersion(); - if ( - installedOpenshellVersion && - maxOpenshellVersion && - !versionGte(maxOpenshellVersion, installedOpenshellVersion) - ) { - console.error(""); - console.error( - ` ✗ openshell ${installedOpenshellVersion} is above the maximum supported by this NemoClaw release.`, - ); - console.error(` blueprint.yaml max_openshell_version: ${maxOpenshellVersion}`); - console.error(""); - console.error(" Upgrade NemoClaw to a version that supports your OpenShell release,"); - console.error(" or install a supported OpenShell version:"); - console.error(" https://github.com/NVIDIA/OpenShell/releases"); - console.error(""); - process.exit(1); - } - if (openshellInstall.futureShellPathHint) { - console.log( - ` Note: openshell was installed to ${openshellInstall.localBin} for this onboarding run.`, - ); - console.log(` Future shells may still need: ${openshellInstall.futureShellPathHint}`); - console.log( - " Add that export to your shell profile, or open a new terminal before running openshell directly.", - ); - } - - // Clean up stale or unnamed NemoClaw gateway state before checking ports. - // A healthy named gateway can be reused later in onboarding, so avoid - // tearing it down here. If some other gateway is active, do not treat it - // as NemoClaw state; let the port checks surface the conflict instead. - const gatewayStatus = runCaptureOpenshell(["status"], { ignoreError: true }); - const gwInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { - ignoreError: true, - }); - const activeGatewayInfo = runCaptureOpenshell(["gateway", "info"], { ignoreError: true }); - let gatewayReuseState = getGatewayReuseState(gatewayStatus, gwInfo, activeGatewayInfo); - - // Verify the gateway container is actually running — openshell CLI metadata - // can be stale after a manual `docker rm`. See #2020. - if (gatewayReuseState === "healthy") { - const containerState = verifyGatewayContainerRunning(); - if (containerState === "missing") { - console.log(" Gateway metadata is stale (container not running). Cleaning up..."); - runOpenshell(["forward", "stop", String(DASHBOARD_PORT)], { ignoreError: true }); - destroyGateway(); - registry.clearAll(); - gatewayReuseState = "missing"; - console.log(" ✓ Stale gateway metadata cleaned up"); - } else if (containerState === "unknown") { - console.log(" Warning: could not verify gateway container state (Docker may be unavailable). Proceeding with cached health status."); - } - } - - if (gatewayReuseState === "stale" || gatewayReuseState === "active-unnamed") { - console.log(" Cleaning up previous NemoClaw session..."); - runOpenshell(["forward", "stop", String(DASHBOARD_PORT)], { ignoreError: true }); - const destroyResult = runOpenshell(["gateway", "destroy", "-g", GATEWAY_NAME], { - ignoreError: true, - }); - // Sandboxes under the destroyed gateway no longer exist in OpenShell — - // clear the local registry so `nemoclaw list` stays consistent. (#532) - if (destroyResult.status === 0) { - registry.clearAll(); - } - console.log(" ✓ Previous session cleaned up"); - } - - // Clean up orphaned Docker containers from interrupted onboard (e.g. Ctrl+C - // during gateway start). The container may still be running even though - // OpenShell has no metadata for it (gatewayReuseState === "missing"). - if (gatewayReuseState === "missing") { - const containerName = `openshell-cluster-${GATEWAY_NAME}`; - const inspectResult = run( - `docker inspect --type container --format '{{.State.Status}}' ${containerName} 2>/dev/null`, - { ignoreError: true, suppressOutput: true }, - ); - if (inspectResult.status === 0) { - console.log(" Cleaning up orphaned gateway container..."); - run(`docker stop ${containerName} >/dev/null 2>&1`, { - ignoreError: true, - suppressOutput: true, - }); - run(`docker rm ${containerName} >/dev/null 2>&1`, { - ignoreError: true, - suppressOutput: true, - }); - const postInspectResult = run( - `docker inspect --type container ${containerName} 2>/dev/null`, - { - ignoreError: true, - suppressOutput: true, - }, - ); - if (postInspectResult.status !== 0) { - run( - `docker volume ls -q --filter "name=openshell-cluster-${GATEWAY_NAME}" | grep . && docker volume ls -q --filter "name=openshell-cluster-${GATEWAY_NAME}" | xargs docker volume rm 2>/dev/null || true`, - { ignoreError: true, suppressOutput: true }, - ); - registry.clearAll(); - console.log(" ✓ Orphaned gateway container removed"); - } else { - console.warn(" ! Found an orphaned gateway container, but automatic cleanup failed."); - } - } - } - - // Required ports — gateway and the dashboard port - const requiredPorts = [ - { port: GATEWAY_PORT, label: "OpenShell gateway" }, - { port: DASHBOARD_PORT, label: "NemoClaw dashboard" }, - ]; - for (const { port, label } of requiredPorts) { - let portCheck = await checkPortAvailable(port); - if (!portCheck.ok) { - if ((port === GATEWAY_PORT || port === DASHBOARD_PORT) && gatewayReuseState === "healthy") { - console.log(` ✓ Port ${port} already owned by healthy NemoClaw runtime (${label})`); - continue; - } - // Auto-cleanup orphaned SSH port-forward from a previous NemoClaw session - // (e.g. dashboard forward left behind after destroy). Only kill the process - // if its command line contains "openshell" to avoid killing unrelated SSH - // tunnels the user may have set up on the same port. (#1950) - if (port === DASHBOARD_PORT && portCheck.process === "ssh" && portCheck.pid) { - // Use `ps` to get the command line — works on Linux, macOS, and WSL. - const cmdline = runCapture( - `ps -p ${portCheck.pid} -o args= 2>/dev/null`, - { ignoreError: true }, - ).trim(); - if (cmdline.includes("openshell")) { - console.log(` Cleaning up orphaned SSH port-forward on port ${port} (PID ${portCheck.pid})...`); - run(`kill ${portCheck.pid} 2>/dev/null || true`, { ignoreError: true }); - sleep(1); - portCheck = await checkPortAvailable(port); - if (portCheck.ok) { - console.log(` ✓ Port ${port} available after orphaned forward cleanup (${label})`); - continue; - } - } - } - console.error(""); - console.error(` !! Port ${port} is not available.`); - console.error(` ${label} needs this port.`); - console.error(""); - if (portCheck.process && portCheck.process !== "unknown") { - console.error( - ` Blocked by: ${portCheck.process}${portCheck.pid ? ` (PID ${portCheck.pid})` : ""}`, - ); - console.error(""); - console.error(" To fix, stop the conflicting process:"); - console.error(""); - if (portCheck.pid) { - console.error(` sudo kill ${portCheck.pid}`); - } else { - console.error(` sudo lsof -i :${port} -sTCP:LISTEN -P -n`); - } - for (const hint of getPortConflictServiceHints()) { - console.error(hint); - } - } else { - console.error(` Could not identify the process using port ${port}.`); - console.error(` Run: sudo lsof -i :${port} -sTCP:LISTEN`); - } - console.error(""); - console.error(` Detail: ${portCheck.reason}`); - process.exit(1); - } - console.log(` ✓ Port ${port} available (${label})`); - } - - // GPU - const gpu = nim.detectGpu(); - if (gpu && gpu.type === "nvidia") { - console.log(` ✓ NVIDIA GPU detected: ${gpu.count} GPU(s), ${gpu.totalMemoryMB} MB VRAM`); - if (!gpu.nimCapable) { - console.log(" ⓘ GPU VRAM too small for local NIM — will use cloud inference"); - } - } else if (gpu && gpu.type === "apple") { - console.log( - ` ✓ Apple GPU detected: ${gpu.name}${gpu.cores ? ` (${gpu.cores} cores)` : ""}, ${gpu.totalMemoryMB} MB unified memory`, - ); - console.log(" ⓘ NIM requires NVIDIA GPU — will use cloud inference"); - } else { - console.log(" ⓘ No GPU detected — will use cloud inference"); - } - - // Memory / swap check (Linux only) - if (process.platform === "linux") { - const mem = getMemoryInfo(); - if (mem) { - if (mem.totalMB < 12000) { - console.log( - ` ⚠ Low memory detected (${mem.totalRamMB} MB RAM + ${mem.totalSwapMB} MB swap = ${mem.totalMB} MB total)`, - ); - - let proceedWithSwap = false; - if (!isNonInteractive()) { - const answer = await prompt( - " Create a 4 GB swap file to prevent OOM during sandbox build? (requires sudo) [y/N]: ", - ); - proceedWithSwap = answer && answer.toLowerCase().startsWith("y"); - } - - if (!proceedWithSwap) { - console.log( - " ⓘ Skipping swap creation. Sandbox build may fail with OOM on this system.", - ); - } else { - console.log(" Creating 4 GB swap file to prevent OOM during sandbox build..."); - const swapResult = ensureSwap(12000); - if (swapResult.ok && swapResult.swapCreated) { - console.log(" ✓ Swap file created and activated"); - } else if (swapResult.ok) { - if (swapResult.reason) { - console.log(` ⓘ ${swapResult.reason} — existing swap should help prevent OOM`); - } else { - console.log(` ✓ Memory OK: ${mem.totalRamMB} MB RAM + ${mem.totalSwapMB} MB swap`); - } - } else { - console.log(` ⚠ Could not create swap: ${swapResult.reason}`); - console.log(" Sandbox creation may fail with OOM on low-memory systems."); - } - } - } else { - console.log(` ✓ Memory OK: ${mem.totalRamMB} MB RAM + ${mem.totalSwapMB} MB swap`); - } - } - } - - return gpu; -} - -// ── Step 2: Gateway ────────────────────────────────────────────── - -/** Start the OpenShell gateway with retry logic and post-start health polling. */ -async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) { - step(2, 8, "Starting OpenShell gateway"); - - const gatewayStatus = runCaptureOpenshell(["status"], { ignoreError: true }); - const gwInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { - ignoreError: true, - }); - const activeGatewayInfo = runCaptureOpenshell(["gateway", "info"], { ignoreError: true }); - if (isGatewayHealthy(gatewayStatus, gwInfo, activeGatewayInfo)) { - console.log(" ✓ Reusing existing gateway"); - runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true }); - process.env.OPENSHELL_GATEWAY = GATEWAY_NAME; - return; - } - - // When a stale gateway is detected (metadata exists but container is gone, - // e.g. after a Docker/Colima restart), skip the destroy — `gateway start` - // can recover the container without wiping metadata and mTLS certs. - // The retry loop below will destroy only if start genuinely fails. - if (hasStaleGateway(gwInfo)) { - console.log(" Stale gateway detected — attempting restart without destroy..."); - } - - // Clear stale SSH host keys from previous gateway (fixes #768) - try { - const { execFileSync } = require("child_process"); - execFileSync("ssh-keygen", ["-R", `openshell-${GATEWAY_NAME}`], { stdio: "ignore" }); - } catch { - /* ssh-keygen -R may fail if entry doesn't exist — safe to ignore */ - } - // Also purge any known_hosts entries matching the gateway hostname pattern - const knownHostsPath = path.join(os.homedir(), ".ssh", "known_hosts"); - if (fs.existsSync(knownHostsPath)) { - try { - const kh = fs.readFileSync(knownHostsPath, "utf8"); - const cleaned = pruneKnownHostsEntries(kh); - if (cleaned !== kh) fs.writeFileSync(knownHostsPath, cleaned); - } catch { - /* best-effort cleanup — ignore read/write errors */ - } - } - - const gwArgs = ["--name", GATEWAY_NAME, "--port", String(GATEWAY_PORT)]; - // Do NOT pass --gpu here. On DGX Spark (and most GPU hosts), inference is - // routed through a host-side provider (Ollama, vLLM, or cloud API) — the - // sandbox itself does not need direct GPU access. Passing --gpu causes - // FailedPrecondition errors when the gateway's k3s device plugin cannot - // allocate GPUs. See: https://build.nvidia.com/spark/nemoclaw/instructions - const gatewayEnv = getGatewayStartEnv(); - if (gatewayEnv.OPENSHELL_CLUSTER_IMAGE) { - console.log(` Using pinned OpenShell gateway image: ${gatewayEnv.OPENSHELL_CLUSTER_IMAGE}`); - } - - // Retry gateway start with exponential backoff. On some hosts (Horde VMs, - // first-run environments) the embedded k3s needs more time than OpenShell's - // internal health-check window allows. Retrying after a clean destroy lets - // the second attempt benefit from cached images and cleaner cgroup state. - // See: https://github.com/NVIDIA/OpenShell/issues/433 - const retries = exitOnFailure ? 2 : 0; - try { - await pRetry( - async () => { - const startResult = await streamGatewayStart( - openshellShellCommand(["gateway", "start", ...gwArgs]), - { - ...process.env, - ...gatewayEnv, - }, - ); - if (startResult.status !== 0) { - const lines = String(redact(startResult.output || "")) - .split("\n") - .map((l) => compactText(l.replace(ANSI_RE, ""))) - .filter(Boolean) - .map((l) => ` ${l}`); - if (lines.length > 0) { - console.log(` Gateway start returned before healthy:\n${lines.join("\n")}`); - } - } - console.log(" Waiting for gateway health..."); - - // ARM64 (e.g. Raspberry Pi) needs more time: k3s takes 90-180s to init - const isArm64 = process.arch === "arm64"; - // After openshell gateway start returns (container HEALTHY at Layer 1), - // poll application-layer connectivity (Layer 2: gRPC, TLS, port mapping). - // 60s default gives enough buffer for gRPC init and TLS handshake. (#1830) - const healthPollCount = envInt("NEMOCLAW_HEALTH_POLL_COUNT", isArm64 ? 30 : 12); - const healthPollInterval = envInt("NEMOCLAW_HEALTH_POLL_INTERVAL", isArm64 ? 10 : 5); - for (let i = 0; i < healthPollCount; i++) { - // Ensure the gateway is selected before each probe (non-TTY environments - // like ARM64 may not have it selected automatically) - runCaptureOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true }); - const status = runCaptureOpenshell(["status"], { ignoreError: true }); - const namedInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { - ignoreError: true, - }); - const currentInfo = runCaptureOpenshell(["gateway", "info"], { ignoreError: true }); - if (isGatewayHealthy(status, namedInfo, currentInfo)) { - return; // success - } - if (i < healthPollCount - 1) sleep(healthPollInterval); - } - - throw new Error("Gateway failed to start"); - }, - { - retries, - minTimeout: 10_000, - factor: 3, - onFailedAttempt: (err) => { - console.log( - ` Gateway start attempt ${err.attemptNumber} failed. ${err.retriesLeft} retries left...`, - ); - if (err.retriesLeft > 0 && exitOnFailure) { - destroyGateway(); - } - }, - }, - ); - } catch { - if (exitOnFailure) { - console.error(` Gateway failed to start after ${retries + 1} attempts.`); - console.error(" Gateway state preserved for diagnostics."); - console.error(""); - try { - const logs = redact( - runCaptureOpenshell(["doctor", "logs", "--name", GATEWAY_NAME], { - ignoreError: true, - }), - ); - if (logs) { - console.error(" Gateway logs:"); - for (const line of String(logs) - .split("\n") - .map((l) => l.replace(/\r/g, "").replace(ANSI_RE, "")) - .filter(Boolean)) { - console.error(` ${line}`); - } - console.error(""); - } - } catch { - // doctor logs unavailable — fall through to manual instructions - } - console.error(" Troubleshooting:"); - console.error(" openshell doctor logs --name nemoclaw"); - console.error(" openshell doctor check"); - process.exit(1); - } - throw new Error("Gateway failed to start"); - } - - console.log(" ✓ Gateway is healthy"); - - // CoreDNS fix — k3s-inside-Docker has broken DNS forwarding on all platforms. - const runtime = getContainerRuntime(); - if (shouldPatchCoredns(runtime)) { - console.log(" Patching CoreDNS DNS forwarding..."); - run(`bash "${path.join(SCRIPTS, "fix-coredns.sh")}" ${GATEWAY_NAME} 2>&1 || true`, { - ignoreError: true, - }); - } - sleep(5); - runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true }); - process.env.OPENSHELL_GATEWAY = GATEWAY_NAME; -} - -async function startGateway(_gpu) { - return startGatewayWithOptions(_gpu, { exitOnFailure: true }); -} - -async function startGatewayForRecovery(_gpu) { - return startGatewayWithOptions(_gpu, { exitOnFailure: false }); + return ensureOllamaAuthProxyWithDeps(getOllamaProxyDeps()); } -function getGatewayStartEnv() { - const gatewayEnv = {}; - const openshellVersion = getInstalledOpenshellVersion(); - const stableGatewayImage = openshellVersion - ? `ghcr.io/nvidia/openshell/cluster:${openshellVersion}` - : null; - if (stableGatewayImage && openshellVersion) { - gatewayEnv.OPENSHELL_CLUSTER_IMAGE = stableGatewayImage; - gatewayEnv.IMAGE_TAG = openshellVersion; - } - return gatewayEnv; -} - -async function recoverGatewayRuntime() { - runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true }); - let status = runCaptureOpenshell(["status"], { ignoreError: true }); - if (status.includes("Connected") && isSelectedGateway(status)) { - process.env.OPENSHELL_GATEWAY = GATEWAY_NAME; - return true; - } - - const startResult = runOpenshell( - ["gateway", "start", "--name", GATEWAY_NAME, "--port", String(GATEWAY_PORT)], - { - ignoreError: true, - env: getGatewayStartEnv(), - suppressOutput: true, - }, - ); - if (startResult.status !== 0) { - const diagnostic = compactText( - redact(`${startResult.stderr || ""} ${startResult.stdout || ""}`), - ); - console.error(` Gateway restart failed (exit ${startResult.status}).`); - if (diagnostic) { - console.error(` ${diagnostic.slice(0, 240)}`); - } - } - runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true }); - - const recoveryPollCount = envInt("NEMOCLAW_HEALTH_POLL_COUNT", 10); - const recoveryPollInterval = envInt("NEMOCLAW_HEALTH_POLL_INTERVAL", 2); - for (let i = 0; i < recoveryPollCount; i++) { - status = runCaptureOpenshell(["status"], { ignoreError: true }); - if (status.includes("Connected") && isSelectedGateway(status)) { - process.env.OPENSHELL_GATEWAY = GATEWAY_NAME; - const runtime = getContainerRuntime(); - if (shouldPatchCoredns(runtime)) { - run(`bash "${path.join(SCRIPTS, "fix-coredns.sh")}" ${GATEWAY_NAME} 2>&1 || true`, { - ignoreError: true, - }); - } - return true; - } - if (i < recoveryPollCount - 1) sleep(recoveryPollInterval); - } - - return false; -} - -// ── Step 3: Sandbox ────────────────────────────────────────────── - -async function promptValidatedSandboxName() { - const MAX_ATTEMPTS = 3; - for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) { - const nameAnswer = await promptOrDefault( - " Sandbox name (lowercase, starts with letter, hyphens ok) [my-assistant]: ", - "NEMOCLAW_SANDBOX_NAME", - "my-assistant", - ); - const sandboxName = (nameAnswer || "my-assistant").trim(); - - try { - const validatedSandboxName = validateName(sandboxName, "sandbox name"); - // Reject names that collide with global CLI commands. - // A sandbox named 'status' makes 'nemoclaw status connect' route to - // the global status command instead of the sandbox. - const RESERVED_NAMES = new Set([ - "onboard", - "list", - "deploy", - "setup", - "setup-spark", - "start", - "stop", - "status", - "debug", - "uninstall", - "credentials", - "help", - ]); - if (RESERVED_NAMES.has(sandboxName)) { - console.error(` Reserved name: '${sandboxName}' is a NemoClaw CLI command.`); - console.error(" Choose a different name to avoid routing conflicts."); - if (isNonInteractive()) { - process.exit(1); - } - if (attempt < MAX_ATTEMPTS - 1) { - console.error(" Please try again.\n"); - } - continue; - } - return validatedSandboxName; - } catch (error) { - console.error(` ${error.message}`); - } - - if (/^[0-9]/.test(sandboxName)) { - console.error(" Names must start with a letter, not a digit."); - } else { - console.error(" Names must be lowercase, contain only letters, numbers, and hyphens,"); - console.error(" must start with a letter, and end with a letter or number."); - } - - // Non-interactive runs cannot re-prompt — abort so the caller can fix the - // NEMOCLAW_SANDBOX_NAME env var and retry. - if (isNonInteractive()) { - process.exit(1); - } - - if (attempt < MAX_ATTEMPTS - 1) { - console.error(" Please try again.\n"); - } - } - - console.error(" Too many invalid attempts."); - process.exit(1); -} - -// ── Step 5: Sandbox ────────────────────────────────────────────── - -// eslint-disable-next-line complexity -async function createSandbox( - gpu, - model, - provider, - preferredInferenceApi = null, - sandboxNameOverride = null, - webSearchConfig = null, - enabledChannels = null, - fromDockerfile = null, - agent = null, - dangerouslySkipPermissions = false, -) { - step(6, 8, "Creating sandbox"); - - const sandboxName = validateName( - sandboxNameOverride ?? (await promptValidatedSandboxName()), - "sandbox name", - ); - const effectivePort = agent ? agent.forwardPort : CONTROL_UI_PORT; - const chatUiUrl = process.env.CHAT_UI_URL || `http://127.0.0.1:${effectivePort}`; - - // Check whether messaging providers will be needed — this must happen before - // the sandbox reuse decision so we can detect stale sandboxes that were created - // without provider attachments (security: prevents legacy raw-env-var leaks). - const getMessagingToken = (envKey) => - getCredential(envKey) || normalizeCredentialValue(process.env[envKey]) || null; - - // The UI toggle list can include channels the user toggled on but then - // skipped the token prompt for. Only channels with a real token will have a - // provider attached, so the conflict check must filter out the skipped ones - // (otherwise we warn about phantom channels that will never poll). - const conflictCheckChannels: string[] = Array.isArray(enabledChannels) - ? enabledChannels.filter((name) => { - const def = MESSAGING_CHANNELS.find((c) => c.name === name); - return def ? !!getMessagingToken(def.envKey) : false; - }) - : []; - - // Messaging channels like Telegram (getUpdates), Discord (gateway), and Slack - // (Socket Mode) enforce one consumer per bot token. Two sandboxes sharing - // a token silently break both bridges (see #1953). Warn before we commit. - if (conflictCheckChannels.length > 0) { - const { - backfillMessagingChannels, - findChannelConflicts, - } = require("./messaging-conflict"); - backfillMessagingChannels(registry, makeConflictProbe()); - const conflicts = findChannelConflicts(sandboxName, conflictCheckChannels, registry); - if (conflicts.length > 0) { - for (const { channel, sandbox } of conflicts) { - console.log( - ` ⚠ Sandbox '${sandbox}' already has ${channel} enabled. Bot tokens only allow one sandbox to poll — continuing will break both bridges.`, - ); - } - if (isNonInteractive()) { - console.error( - " Aborting: resolve the messaging channel conflict above or run `nemoclaw destroy` on the other sandbox.", - ); - process.exit(1); - } - const answer = (await promptOrDefault(" Continue anyway? [y/N]: ", null, "n")) - .trim() - .toLowerCase(); - if (answer !== "y" && answer !== "yes") { - console.log(" Aborting sandbox creation."); - process.exit(1); - } - } - } - - // When enabledChannels is provided (from the toggle picker), only include - // channels the user selected. When null (backward compat), include all. - const enabledEnvKeys = - enabledChannels != null - ? new Set( - MESSAGING_CHANNELS.filter((c) => enabledChannels.includes(c.name)).flatMap((c) => - c.appTokenEnvKey ? [c.envKey, c.appTokenEnvKey] : [c.envKey], - ), - ) - : null; - - const messagingTokenDefs = [ - { - name: `${sandboxName}-discord-bridge`, - envKey: "DISCORD_BOT_TOKEN", - token: getMessagingToken("DISCORD_BOT_TOKEN"), - }, - { - name: `${sandboxName}-slack-bridge`, - envKey: "SLACK_BOT_TOKEN", - token: getMessagingToken("SLACK_BOT_TOKEN"), - }, - { - name: `${sandboxName}-slack-app`, - envKey: "SLACK_APP_TOKEN", - token: getMessagingToken("SLACK_APP_TOKEN"), - }, - { - name: `${sandboxName}-telegram-bridge`, - envKey: "TELEGRAM_BOT_TOKEN", - token: getMessagingToken("TELEGRAM_BOT_TOKEN"), - }, - ].filter(({ envKey }) => !enabledEnvKeys || enabledEnvKeys.has(envKey)); - - if (webSearchConfig) { - messagingTokenDefs.push({ - name: `${sandboxName}-brave-search`, - envKey: webSearch.BRAVE_API_KEY_ENV, - token: getCredential(webSearch.BRAVE_API_KEY_ENV), - }); - } - const hasMessagingTokens = messagingTokenDefs.some(({ token }) => !!token); - - // Reconcile local registry state with the live OpenShell gateway state. - const liveExists = pruneStaleSandboxEntry(sandboxName); - - // Declared outside the liveExists block so it is accessible during - // post-creation restore (the sandbox create path runs after the block). - let pendingStateRestore = null; - - if (liveExists) { - const existingSandboxState = getSandboxReuseState(sandboxName); - - // Check whether messaging providers are missing from the gateway. Only - // force recreation when at least one required provider doesn't exist yet — - // this avoids destroying sandboxes already created with provider attachments. - const needsProviderMigration = - hasMessagingTokens && - messagingTokenDefs.some(({ name, token }) => token && !providerExistsInGateway(name)); - - // Detect whether any messaging credential has been rotated since the - // sandbox was created. Provider credentials are resolved once at sandbox - // startup, so a rotated token requires a rebuild to take effect. - const credentialRotation = hasMessagingTokens - ? detectMessagingCredentialRotation(sandboxName, messagingTokenDefs) - : { changed: false, changedProviders: [] }; - - if (!isRecreateSandbox() && !needsProviderMigration && !credentialRotation.changed) { - if (isNonInteractive()) { - if (existingSandboxState === "ready") { - // Upsert messaging providers even on reuse so credential changes take - // effect without requiring a full sandbox recreation. - upsertMessagingProviders(messagingTokenDefs); - note(` [non-interactive] Sandbox '${sandboxName}' exists and is ready — reusing it`); - note(" Pass --recreate-sandbox or set NEMOCLAW_RECREATE_SANDBOX=1 to force recreation."); - ensureDashboardForward(sandboxName, chatUiUrl); - return sandboxName; - } - console.error(` Sandbox '${sandboxName}' already exists but is not ready.`); - console.error(" Pass --recreate-sandbox or set NEMOCLAW_RECREATE_SANDBOX=1 to overwrite."); - process.exit(1); - } - - if (existingSandboxState === "ready") { - console.log(` Sandbox '${sandboxName}' already exists.`); - console.log(" Choosing 'n' will delete the existing sandbox and create a new one."); - const answer = await promptOrDefault(" Reuse existing sandbox? [Y/n]: ", null, "y"); - const normalizedAnswer = answer.trim().toLowerCase(); - if (normalizedAnswer !== "n" && normalizedAnswer !== "no") { - upsertMessagingProviders(messagingTokenDefs); - ensureDashboardForward(sandboxName, chatUiUrl); - return sandboxName; - } - } else { - console.log(` Sandbox '${sandboxName}' exists but is not ready.`); - console.log(" Selecting 'n' will abort onboarding."); - const answer = await promptOrDefault( - " Delete it and create a new one? [Y/n]: ", - null, - "y", - ); - const normalizedAnswer = answer.trim().toLowerCase(); - if (normalizedAnswer === "n" || normalizedAnswer === "no") { - console.log(" Aborting onboarding."); - process.exit(1); - } - } - } - - // Back up workspace state before destroying the sandbox when triggered - // by credential rotation, so files can be restored after recreation. - if (credentialRotation.changed && existingSandboxState === "ready") { - const rotatedNames = credentialRotation.changedProviders.join(", "); - console.log(` Messaging credential(s) rotated: ${rotatedNames}`); - console.log(" Rebuilding sandbox to propagate new credentials to the L7 proxy..."); - try { - const backup = sandboxState.backupSandboxState(sandboxName); - if (backup.success) { - note(` ✓ State backed up (${backup.backedUpDirs.length} directories)`); - pendingStateRestore = backup; - } else { - console.error(" State backup failed — aborting rebuild to prevent data loss."); - console.error(" Pass --recreate-sandbox to force recreation without backup."); - upsertMessagingProviders(messagingTokenDefs); - // Update stored hashes so the next onboard doesn't re-detect rotation. - const abortHashes = {}; - for (const { envKey, token } of messagingTokenDefs) { - if (token) abortHashes[envKey] = hashCredential(token); - } - if (Object.keys(abortHashes).length > 0) { - registry.updateSandbox(sandboxName, { providerCredentialHashes: abortHashes }); - } - ensureDashboardForward(sandboxName, chatUiUrl); - return sandboxName; - } - } catch (err) { - console.error(` State backup threw: ${err.message} — aborting rebuild.`); - console.error(" Pass --recreate-sandbox to force recreation without backup."); - upsertMessagingProviders(messagingTokenDefs); - const abortHashes = {}; - for (const { envKey, token } of messagingTokenDefs) { - if (token) abortHashes[envKey] = hashCredential(token); - } - if (Object.keys(abortHashes).length > 0) { - registry.updateSandbox(sandboxName, { providerCredentialHashes: abortHashes }); - } - ensureDashboardForward(sandboxName, chatUiUrl); - return sandboxName; - } - } - - if (needsProviderMigration) { - console.log(` Sandbox '${sandboxName}' exists but messaging providers are not attached.`); - console.log(" Recreating to ensure credentials flow through the provider pipeline."); - } else if (credentialRotation.changed) { - // Message already printed above during backup. - } else if (existingSandboxState === "ready") { - note(` Sandbox '${sandboxName}' exists and is ready — recreating by explicit request.`); - } else { - note(` Sandbox '${sandboxName}' exists but is not ready — recreating it.`); - } - - const previousEntry = registry.getSandbox(sandboxName); - if (previousEntry?.policies?.length > 0) { - onboardSession.updateSession((current) => { - current.policyPresets = previousEntry.policies; - return current; - }); - } - - note(` Deleting and recreating sandbox '${sandboxName}'...`); - - // Destroy old sandbox - runOpenshell(["sandbox", "delete", sandboxName], { ignoreError: true }); - registry.removeSandbox(sandboxName); - } - - // Stage build context — use the custom Dockerfile path when provided, - // otherwise use the optimised default that only sends what the build needs. - let buildCtx, stagedDockerfile; - if (fromDockerfile) { - const fromResolved = path.resolve(fromDockerfile); - if (!fs.existsSync(fromResolved)) { - console.error(` Custom Dockerfile not found: ${fromResolved}`); - process.exit(1); - } - buildCtx = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-build-")); - stagedDockerfile = path.join(buildCtx, "Dockerfile"); - // Copy the entire parent directory as build context. - try { - fs.cpSync(path.dirname(fromResolved), buildCtx, { - recursive: true, - filter: (src) => { - const base = path.basename(src); - return !["node_modules", ".git", ".venv", "__pycache__"].includes(base); - }, - }); - } catch (err) { - if (err.code === "EACCES") { - console.error( - ` Permission denied while copying build context from: ${path.dirname(fromResolved)}`, - ); - console.error( - " The --from flag uses the Dockerfile's parent directory as the Docker build context.", - ); - console.error(" Move your Dockerfile to a dedicated directory and retry."); - process.exit(1); - } - throw err; - } - // If the caller pointed at a file not named "Dockerfile", copy it to the - // location openshell expects (buildCtx/Dockerfile). - if (path.basename(fromResolved) !== "Dockerfile") { - fs.copyFileSync(fromResolved, stagedDockerfile); - } - console.log(` Using custom Dockerfile: ${fromResolved}`); - } else if (agent) { - const agentBuild = agentOnboard.createAgentSandbox(agent); - buildCtx = agentBuild.buildCtx; - stagedDockerfile = agentBuild.stagedDockerfile; - } else { - ({ buildCtx, stagedDockerfile } = stageOptimizedSandboxBuildContext(ROOT)); - } - - // Create sandbox (use -- echo to avoid dropping into interactive shell) - // Pass the base policy so sandbox starts in proxy mode (required for policy updates later) - const globalPermissivePath = path.join( - ROOT, - "nemoclaw-blueprint", - "policies", - "openclaw-sandbox-permissive.yaml", - ); - let basePolicyPath; - if (dangerouslySkipPermissions) { - // Permissive mode: use agent-specific permissive policy if available, - // otherwise fall back to the global permissive policy. - const agentPermissive = agent && agentOnboard.getAgentPermissivePolicyPath(agent); - basePolicyPath = agentPermissive || globalPermissivePath; - } else { - const defaultPolicyPath = path.join( - ROOT, - "nemoclaw-blueprint", - "policies", - "openclaw-sandbox.yaml", - ); - basePolicyPath = (agent && agentOnboard.getAgentPolicyPath(agent)) || defaultPolicyPath; - } - const createArgs = [ - "--from", - `${buildCtx}/Dockerfile`, - "--name", - sandboxName, - "--policy", - basePolicyPath, - ]; - // --gpu is intentionally omitted. See comment in startGateway(). - - // Create OpenShell providers for messaging credentials so they flow through - // the provider/placeholder system instead of raw env vars. The L7 proxy - // rewrites Authorization headers (Bearer/Bot) and URL-path segments - // (/bot{TOKEN}/) with real secrets at egress (OpenShell ≥ 0.0.20). - const messagingProviders = upsertMessagingProviders(messagingTokenDefs); - for (const p of messagingProviders) { - createArgs.push("--provider", p); - } - - console.log(` Creating sandbox '${sandboxName}' (this takes a few minutes on first run)...`); - if (webSearchConfig && !getCredential(webSearch.BRAVE_API_KEY_ENV)) { - console.error(" Brave Search is enabled, but BRAVE_API_KEY is not available in this process."); - console.error( - " Re-run with BRAVE_API_KEY set, or disable Brave Search before recreating the sandbox.", - ); - process.exit(1); - } - const tokensByEnvKey = Object.fromEntries( - messagingTokenDefs.map(({ envKey, token }) => [envKey, token]), - ); - const activeMessagingChannels = [ - ...new Set( - messagingTokenDefs - .filter(({ token }) => !!token) - .map(({ envKey }) => { - if (envKey === "DISCORD_BOT_TOKEN") return "discord"; - if (envKey === "SLACK_BOT_TOKEN") return "slack"; - // SLACK_APP_TOKEN alone does not enable slack; bot token is required. - if (envKey === "SLACK_APP_TOKEN") - return tokensByEnvKey["SLACK_BOT_TOKEN"] ? "slack" : null; - if (envKey === "TELEGRAM_BOT_TOKEN") return "telegram"; - return null; - }) - .filter(Boolean), - ), - ]; - // Build allowed sender IDs map from env vars set during the messaging prompt. - // Each channel with a userIdEnvKey in MESSAGING_CHANNELS may have a - // comma-separated list of IDs (e.g. TELEGRAM_ALLOWED_IDS="123,456"). - const messagingAllowedIds = {}; - const enabledTokenEnvKeys = new Set(messagingTokenDefs.map(({ envKey }) => envKey)); - for (const ch of MESSAGING_CHANNELS) { - if ( - enabledTokenEnvKeys.has(ch.envKey) && - ch.allowIdsMode === "dm" && - ch.userIdEnvKey && - process.env[ch.userIdEnvKey] - ) { - const ids = process.env[ch.userIdEnvKey] - .split(",") - .map((s) => s.trim()) - .filter(Boolean); - if (ids.length > 0) messagingAllowedIds[ch.name] = ids; - } - } - const discordGuilds = {}; - if (enabledTokenEnvKeys.has("DISCORD_BOT_TOKEN")) { - const serverIds = (process.env.DISCORD_SERVER_IDS || process.env.DISCORD_SERVER_ID || "") - .split(",") - .map((s) => s.trim()) - .filter(Boolean); - const userIds = (process.env.DISCORD_ALLOWED_IDS || process.env.DISCORD_USER_ID || "") - .split(",") - .map((s) => s.trim()) - .filter(Boolean); - for (const serverId of serverIds) { - if (!DISCORD_SNOWFLAKE_RE.test(serverId)) { - console.warn(` Warning: Discord server ID '${serverId}' does not look like a snowflake.`); - } - } - for (const userId of userIds) { - if (!DISCORD_SNOWFLAKE_RE.test(userId)) { - console.warn(` Warning: Discord user ID '${userId}' does not look like a snowflake.`); - } - } - const requireMention = process.env.DISCORD_REQUIRE_MENTION !== "0"; - for (const serverId of serverIds) { - discordGuilds[serverId] = { - requireMention, - ...(userIds.length > 0 ? { users: userIds } : {}), - }; - } - } - // Pull the base image and resolve its digest so the Dockerfile is pinned to - // exactly what we just fetched. This prevents stale :latest tags from - // silently reusing a cached old image after NemoClaw upgrades (#1904). - const resolved = pullAndResolveBaseImageDigest(); - if (resolved) { - console.log(` Pinning base image to ${resolved.digest.slice(0, 19)}...`); - } else { - // Check if the image exists locally before falling back to unpinned :latest. - // On a first-time install behind a firewall with no cached image, warn early - // so the user knows the build will likely fail. - const localCheck = runCapture( - ["docker", "image", "inspect", `${SANDBOX_BASE_IMAGE}:${SANDBOX_BASE_TAG}`], - { ignoreError: true }, - ); - if (localCheck) { - console.warn(" Warning: could not pull base image from registry; using cached :latest."); - } else { - console.warn(` Warning: base image ${SANDBOX_BASE_IMAGE}:${SANDBOX_BASE_TAG} is not available locally.`); - console.warn(" The build will fail unless Docker can pull the image during build."); - console.warn(" If offline, pull the image manually first:"); - console.warn(` docker pull ${SANDBOX_BASE_IMAGE}:${SANDBOX_BASE_TAG}`); - } - } - patchStagedDockerfile( - stagedDockerfile, - model, - chatUiUrl, - String(Date.now()), - provider, - preferredInferenceApi, - webSearchConfig, - activeMessagingChannels, - messagingAllowedIds, - discordGuilds, - resolved ? resolved.ref : null, - ); - // Only pass non-sensitive env vars to the sandbox. Credentials flow through - // OpenShell providers — the gateway injects them as placeholders and the L7 - // proxy rewrites Authorization headers with real secrets at egress. - // See: crates/openshell-sandbox/src/secrets.rs (placeholder rewriting), - // crates/openshell-router/src/backend.rs (inference auth injection). - // - // Use the shared allowlist (subprocess-env.ts) instead of the old - // blocklist. The blocklist only blocked 12 specific credential names - // and passed EVERYTHING else — including GITHUB_TOKEN, - // AWS_SECRET_ACCESS_KEY, SSH_AUTH_SOCK, KUBECONFIG, NPM_TOKEN, and - // any CI/CD secrets that happened to be in the host environment. - // The allowlist inverts the default: only known-safe env vars are - // forwarded, everything else is dropped. - // - // For the sandbox specifically, we also strip KUBECONFIG and - // SSH_AUTH_SOCK — the generic allowlist includes these for host-side - // subprocesses (gateway start, openshell CLI) but the sandbox should - // never have access to the host's Kubernetes cluster or SSH agent. - const envArgs = [formatEnvAssignment("CHAT_UI_URL", chatUiUrl)]; - // Pass the configured dashboard port into the sandbox so nemoclaw-start.sh - // can unconditionally override CHAT_UI_URL even when the Docker image was - // built with a different default. Without this, the baked-in Docker ENV - // value takes precedence and the gateway starts on the wrong port. (#1925) - if (process.env.NEMOCLAW_DASHBOARD_PORT) { - envArgs.push(formatEnvAssignment("NEMOCLAW_DASHBOARD_PORT", String(DASHBOARD_PORT))); - } - if (webSearchConfig?.fetchEnabled) { - const braveKey = - getCredential(webSearch.BRAVE_API_KEY_ENV) || process.env[webSearch.BRAVE_API_KEY_ENV]; - if (braveKey) { - envArgs.push(formatEnvAssignment(webSearch.BRAVE_API_KEY_ENV, braveKey)); - } - } - const sandboxEnv = buildSubprocessEnv(); - // Remove host-infrastructure credentials that the generic allowlist - // permits for host-side processes but that must not enter the sandbox. - delete sandboxEnv.KUBECONFIG; - delete sandboxEnv.SSH_AUTH_SOCK; - // Run without piping through awk — the pipe masked non-zero exit codes - // from openshell because bash returns the status of the last pipeline - // command (awk, always 0) unless pipefail is set. Removing the pipe - // lets the real exit code flow through to run(). - const createCommand = `${openshellShellCommand([ - "sandbox", - "create", - ...createArgs, - "--", - "env", - ...envArgs, - "nemoclaw-start", - ])} 2>&1`; - const createResult = await streamSandboxCreate(createCommand, sandboxEnv, { - readyCheck: () => { - const list = runCaptureOpenshell(["sandbox", "list"], { ignoreError: true }); - return isSandboxReady(list, sandboxName); - }, - }); - - // Clean up build context regardless of outcome - run(`rm -rf "${buildCtx}"`, { ignoreError: true }); - - if (createResult.status !== 0) { - const failure = classifySandboxCreateFailure(createResult.output); - if (failure.kind === "sandbox_create_incomplete") { - // The sandbox was created in the gateway but the create stream exited - // with a non-zero code (e.g. SSH 255). Fall through to the ready-wait - // loop — the sandbox may still reach Ready on its own. - console.warn(""); - console.warn( - ` Create stream exited with code ${createResult.status} after sandbox was created.`, - ); - console.warn(" Checking whether the sandbox reaches Ready state..."); - } else { - console.error(""); - console.error(` Sandbox creation failed (exit ${createResult.status}).`); - if (createResult.output) { - console.error(""); - console.error(createResult.output); - } - console.error(" Try: openshell sandbox list # check gateway state"); - printSandboxCreateRecoveryHints(createResult.output); - process.exit(createResult.status || 1); - } - } - - // Wait for sandbox to reach Ready state in k3s before registering. - // On WSL2 + Docker Desktop the pod can take longer to initialize; - // without this gate, NemoClaw registers a phantom sandbox that - // causes "sandbox not found" on every subsequent connect/status call. - console.log(" Waiting for sandbox to become ready..."); - let ready = false; - for (let i = 0; i < 30; i++) { - const list = runCaptureOpenshell(["sandbox", "list"], { ignoreError: true }); - if (isSandboxReady(list, sandboxName)) { - ready = true; - break; - } - sleep(2); - } - - if (!ready) { - // Clean up the orphaned sandbox so the next onboard retry with the same - // name doesn't fail on "sandbox already exists". - const delResult = runOpenshell(["sandbox", "delete", sandboxName], { ignoreError: true }); - console.error(""); - console.error(` Sandbox '${sandboxName}' was created but did not become ready within 60s.`); - if (delResult.status === 0) { - console.error(" The orphaned sandbox has been removed — you can safely retry."); - } else { - console.error(` Could not remove the orphaned sandbox. Manual cleanup:`); - console.error(` openshell sandbox delete "${sandboxName}"`); - } - console.error(" Retry: nemoclaw onboard"); - process.exit(1); - } - - // Wait for NemoClaw dashboard to become fully ready (web server live) - // This prevents port forwards from connecting to a non-existent port - // or seeing 502/503 errors during initial load. - console.log(" Waiting for NemoClaw dashboard to become ready..."); - for (let i = 0; i < 15; i++) { - const readyMatch = runCaptureOpenshell( - ["sandbox", "exec", sandboxName, "curl", "-sf", `http://localhost:${CONTROL_UI_PORT}/`], - { ignoreError: true }, - ); - if (readyMatch) { - console.log(" ✓ Dashboard is live"); - break; - } - if (i === 14) { - console.warn(" Dashboard taking longer than expected to start. Continuing..."); - } else { - sleep(2); - } - } - - // Release any stale forward on the dashboard port before claiming it for the new sandbox. - // A previous onboard run may have left the port forwarded to a different sandbox, - // which would silently prevent the new sandbox's dashboard from being reachable. - ensureDashboardForward(sandboxName, chatUiUrl); - - // Register only after confirmed ready — prevents phantom entries - const effectiveAgent = agent || agentDefs.loadAgent("openclaw"); - const providerCredentialHashes = {}; - for (const { envKey, token } of messagingTokenDefs) { - if (token) { - providerCredentialHashes[envKey] = hashCredential(token); - } - } - registry.registerSandbox({ - name: sandboxName, - model: model || null, - provider: provider || null, - gpuEnabled: !!gpu, - agent: agent ? agent.name : null, - agentVersion: fromDockerfile ? null : effectiveAgent.expectedVersion || null, - dangerouslySkipPermissions: dangerouslySkipPermissions || undefined, - providerCredentialHashes: - Object.keys(providerCredentialHashes).length > 0 ? providerCredentialHashes : undefined, - messagingChannels: activeMessagingChannels, - }); - - // Restore workspace state if we backed it up during credential rotation. - if (pendingStateRestore?.success) { - note(" Restoring workspace state after credential rotation..."); - const restore = sandboxState.restoreSandboxState( - sandboxName, - pendingStateRestore.manifest.backupPath, - ); - if (restore.success) { - note(` ✓ State restored (${restore.restoredDirs.length} directories)`); - } else { - console.error( - ` Warning: partial restore. Manual recovery: ${pendingStateRestore.manifest.backupPath}`, - ); - } - } - - // DNS proxy — run a forwarder in the sandbox pod so the isolated - // sandbox namespace can resolve hostnames (fixes #626). - console.log(" Setting up sandbox DNS proxy..."); - runFile("bash", [path.join(SCRIPTS, "setup-dns-proxy.sh"), GATEWAY_NAME, sandboxName], { - ignoreError: true, - }); - - // Check that messaging providers exist in the gateway (sandbox attachment - // cannot be verified via CLI yet — only gateway-level existence is checked). - for (const p of messagingProviders) { - if (!providerExistsInGateway(p)) { - console.error(` ⚠ Messaging provider '${p}' was not found in the gateway.`); - console.error(` The credential may not be available inside the sandbox.`); - console.error( - ` To fix: openshell provider create --name ${p} --type generic --credential `, - ); - } - } - - console.log(` ✓ Sandbox '${sandboxName}' created`); - - try { - if (process.platform === "darwin") { - const vmKernel = runCapture("docker info --format '{{.KernelVersion}}'", { - ignoreError: true, - }).trim(); - if (vmKernel) { - const parts = vmKernel.split("."); - const major = parseInt(parts[0], 10); - const minor = parseInt(parts[1], 10); - if (!isNaN(major) && !isNaN(minor) && (major < 5 || (major === 5 && minor < 13))) { - console.warn( - ` ⚠ Landlock: Docker VM kernel ${vmKernel} does not support Landlock (requires ≥5.13).`, - ); - console.warn( - " Sandbox filesystem restrictions will silently degrade (best_effort mode).", - ); - } - } - } else if (process.platform === "linux") { - const uname = runCapture("uname -r", { ignoreError: true }).trim(); - if (uname) { - const parts = uname.split("."); - const major = parseInt(parts[0], 10); - const minor = parseInt(parts[1], 10); - if (!isNaN(major) && !isNaN(minor) && (major < 5 || (major === 5 && minor < 13))) { - console.warn(` ⚠ Landlock: Kernel ${uname} does not support Landlock (requires ≥5.13).`); - console.warn( - " Sandbox filesystem restrictions will silently degrade (best_effort mode).", - ); - } - } - } - } catch {} - - return sandboxName; -} - -// ── Step 3: Inference selection ────────────────────────────────── - -// eslint-disable-next-line complexity -async function setupNim(gpu) { - step(3, 8, "Configuring inference (NIM)"); - - let model = null; - let provider = REMOTE_PROVIDER_CONFIG.build.providerName; - let nimContainer = null; - let endpointUrl = REMOTE_PROVIDER_CONFIG.build.endpointUrl; - let credentialEnv = REMOTE_PROVIDER_CONFIG.build.credentialEnv; - let preferredInferenceApi = null; - - // Detect local inference options - const hasOllama = !!runCapture("command -v ollama", { ignoreError: true }); - const ollamaRunning = !!runCapture( - `curl -sf http://127.0.0.1:${OLLAMA_PORT}/api/tags 2>/dev/null`, - { - ignoreError: true, - }, - ); - const vllmRunning = !!runCapture(`curl -sf http://127.0.0.1:${VLLM_PORT}/v1/models 2>/dev/null`, { - ignoreError: true, - }); - const requestedProvider = isNonInteractive() ? getNonInteractiveProvider() : null; - const requestedModel = isNonInteractive() - ? getNonInteractiveModel(requestedProvider || "build") - : null; - const options = []; - options.push({ key: "build", label: "NVIDIA Endpoints" }); - options.push({ key: "openai", label: "OpenAI" }); - options.push({ key: "custom", label: "Other OpenAI-compatible endpoint" }); - options.push({ key: "anthropic", label: "Anthropic" }); - options.push({ key: "anthropicCompatible", label: "Other Anthropic-compatible endpoint" }); - options.push({ key: "gemini", label: "Google Gemini" }); - if (hasOllama || ollamaRunning) { - options.push({ - key: "ollama", - label: - `Local Ollama (localhost:${OLLAMA_PORT})${ollamaRunning ? " — running" : ""}` + - (ollamaRunning ? " (suggested)" : ""), - }); - } - if (EXPERIMENTAL && gpu && gpu.nimCapable) { - options.push({ key: "nim-local", label: "Local NVIDIA NIM [experimental]" }); - } - if (EXPERIMENTAL && vllmRunning) { - options.push({ - key: "vllm", - label: "Local vLLM [experimental] — running", - }); - } - // On macOS without Ollama, offer to install it - if (!hasOllama && process.platform === "darwin") { - options.push({ key: "install-ollama", label: "Install Ollama (macOS)" }); - } - - if (options.length > 1) { - selectionLoop: while (true) { - let selected; - - if (isNonInteractive()) { - const providerKey = requestedProvider || "build"; - selected = options.find((o) => o.key === providerKey); - if (!selected) { - console.error( - ` Requested provider '${providerKey}' is not available in this environment.`, - ); - process.exit(1); - } - note(` [non-interactive] Provider: ${selected.key}`); - } else { - const suggestions = []; - if (vllmRunning) suggestions.push("vLLM"); - if (ollamaRunning) suggestions.push("Ollama"); - if (suggestions.length > 0) { - console.log( - ` Detected local inference option${suggestions.length > 1 ? "s" : ""}: ${suggestions.join(", ")}`, - ); - console.log(""); - } - - console.log(""); - console.log(" Inference options:"); - options.forEach((o, i) => { - console.log(` ${i + 1}) ${o.label}`); - }); - console.log(""); - - const envProviderHint = (process.env.NEMOCLAW_PROVIDER || "").trim().toLowerCase(); - const envProviderIdx = envProviderHint - ? options.findIndex((o) => o.key.toLowerCase() === envProviderHint) - : -1; - const defaultIdx = - (envProviderIdx >= 0 ? envProviderIdx : options.findIndex((o) => o.key === "build")) + 1; - const choice = await prompt(` Choose [${defaultIdx}]: `); - const idx = parseInt(choice || String(defaultIdx), 10) - 1; - selected = options[idx] || options[defaultIdx - 1]; - } - - if (REMOTE_PROVIDER_CONFIG[selected.key]) { - const remoteConfig = REMOTE_PROVIDER_CONFIG[selected.key]; - provider = remoteConfig.providerName; - credentialEnv = remoteConfig.credentialEnv; - endpointUrl = remoteConfig.endpointUrl; - preferredInferenceApi = null; - - if (selected.key === "custom") { - const _envUrl = (process.env.NEMOCLAW_ENDPOINT_URL || "").trim(); - const endpointInput = isNonInteractive() - ? _envUrl - : (await prompt( - _envUrl - ? ` OpenAI-compatible base URL [${_envUrl}]: ` - : " OpenAI-compatible base URL (e.g., https://openrouter.ai): ", - )) || _envUrl; - const navigation = getNavigationChoice(endpointInput); - if (navigation === "back") { - console.log(" Returning to provider selection."); - console.log(""); - continue selectionLoop; - } - if (navigation === "exit") { - exitOnboardFromPrompt(); - } - endpointUrl = normalizeProviderBaseUrl(endpointInput, "openai"); - if (!endpointUrl) { - console.error(" Endpoint URL is required for Other OpenAI-compatible endpoint."); - if (isNonInteractive()) { - process.exit(1); - } - console.log(""); - continue selectionLoop; - } - } else if (selected.key === "anthropicCompatible") { - const _envUrl = (process.env.NEMOCLAW_ENDPOINT_URL || "").trim(); - const endpointInput = isNonInteractive() - ? _envUrl - : (await prompt( - _envUrl - ? ` Anthropic-compatible base URL [${_envUrl}]: ` - : " Anthropic-compatible base URL (e.g., https://proxy.example.com): ", - )) || _envUrl; - const navigation = getNavigationChoice(endpointInput); - if (navigation === "back") { - console.log(" Returning to provider selection."); - console.log(""); - continue selectionLoop; - } - if (navigation === "exit") { - exitOnboardFromPrompt(); - } - endpointUrl = normalizeProviderBaseUrl(endpointInput, "anthropic"); - if (!endpointUrl) { - console.error(" Endpoint URL is required for Other Anthropic-compatible endpoint."); - if (isNonInteractive()) { - process.exit(1); - } - console.log(""); - continue selectionLoop; - } - } - - if (selected.key === "build") { - // Allow NEMOCLAW_PROVIDER_KEY as a fallback for NVIDIA_API_KEY - const _nvProviderKey = (process.env.NEMOCLAW_PROVIDER_KEY || "").trim(); - if (_nvProviderKey && !process.env.NVIDIA_API_KEY) { - process.env.NVIDIA_API_KEY = _nvProviderKey; - } - if (isNonInteractive()) { - if (!process.env.NVIDIA_API_KEY) { - console.error( - " NVIDIA_API_KEY (or NEMOCLAW_PROVIDER_KEY) is required for NVIDIA Endpoints in non-interactive mode.", - ); - process.exit(1); - } - const keyError = validateNvidiaApiKeyValue(process.env.NVIDIA_API_KEY); - if (keyError) { - console.error(keyError); - console.error(` Get a key from ${REMOTE_PROVIDER_CONFIG.build.helpUrl}`); - process.exit(1); - } - } else { - await ensureApiKey(); - } - const _envModel = (process.env.NEMOCLAW_MODEL || "").trim(); - model = - requestedModel || - (isNonInteractive() - ? DEFAULT_CLOUD_MODEL - : await promptCloudModel({ defaultModelId: _envModel || undefined })) || - DEFAULT_CLOUD_MODEL; - if (model === BACK_TO_SELECTION) { - console.log(" Returning to provider selection."); - console.log(""); - continue selectionLoop; - } - } else { - // NEMOCLAW_PROVIDER_KEY is a universal alias: if the specific credential env - // isn't already set, use NEMOCLAW_PROVIDER_KEY as the API key for this provider. - const _providerKeyHint = (process.env.NEMOCLAW_PROVIDER_KEY || "").trim(); - if (_providerKeyHint && !process.env[credentialEnv]) { - process.env[credentialEnv] = _providerKeyHint; - } - - if (isNonInteractive()) { - if (!process.env[credentialEnv]) { - console.error( - ` ${credentialEnv} (or NEMOCLAW_PROVIDER_KEY) is required for ${remoteConfig.label} in non-interactive mode.`, - ); - process.exit(1); - } - } else { - await ensureNamedCredential( - credentialEnv, - remoteConfig.label + " API key", - remoteConfig.helpUrl, - ); - } - const _envModelRemote = (process.env.NEMOCLAW_MODEL || "").trim(); - const defaultModel = requestedModel || _envModelRemote || remoteConfig.defaultModel; - let modelValidator = null; - if (selected.key === "openai" || selected.key === "gemini") { - const modelAuthMode = getProbeAuthMode(provider); - modelValidator = (candidate) => - validateOpenAiLikeModel( - remoteConfig.label, - endpointUrl, - candidate, - getCredential(credentialEnv), - ...(modelAuthMode ? [{ authMode: modelAuthMode }] : []), - ); - } else if (selected.key === "anthropic") { - modelValidator = (candidate) => - validateAnthropicModel( - endpointUrl || ANTHROPIC_ENDPOINT_URL, - candidate, - getCredential(credentialEnv), - ); - } - while (true) { - if (isNonInteractive()) { - model = defaultModel; - } else if (remoteConfig.modelMode === "curated") { - model = await promptRemoteModel( - remoteConfig.label, - selected.key, - defaultModel, - modelValidator, - ); - } else { - model = await promptInputModel(remoteConfig.label, defaultModel, modelValidator); - } - if (model === BACK_TO_SELECTION) { - console.log(" Returning to provider selection."); - console.log(""); - continue selectionLoop; - } - - if (selected.key === "custom") { - const validation = await validateCustomOpenAiLikeSelection( - remoteConfig.label, - endpointUrl, - model, - credentialEnv, - remoteConfig.helpUrl, - ); - if (validation.ok) { - // Force chat completions for all OpenAI-compatible endpoints - // unless the user explicitly opted in to responses via env var. - // Many backends (Ollama, vLLM, LiteLLM) expose /v1/responses - // but do not correctly handle the `developer` role used by the - // Responses API — messages with that role are silently dropped, - // causing the model to receive no system prompt or tool - // definitions. Chat completions uses the `system` role which - // is universally supported. - // See: https://github.com/NVIDIA/NemoClaw/issues/1932 - const explicitApi = (process.env.NEMOCLAW_PREFERRED_API || "").trim().toLowerCase(); - if (explicitApi && explicitApi !== "openai-completions" && explicitApi !== "chat-completions") { - preferredInferenceApi = validation.api; - } else { - if (validation.api !== "openai-completions") { - console.log( - " ℹ Using chat completions API (compatible endpoints may not support the Responses API developer role)", - ); - } - preferredInferenceApi = "openai-completions"; - } - break; - } - if ( - validation.retry === "credential" || - validation.retry === "retry" || - validation.retry === "model" - ) { - continue; - } - if (validation.retry === "selection") { - continue selectionLoop; - } - } else if (selected.key === "anthropicCompatible") { - const validation = await validateCustomAnthropicSelection( - remoteConfig.label, - endpointUrl || ANTHROPIC_ENDPOINT_URL, - model, - credentialEnv, - remoteConfig.helpUrl, - ); - if (validation.ok) { - preferredInferenceApi = validation.api; - break; - } - if ( - validation.retry === "credential" || - validation.retry === "retry" || - validation.retry === "model" - ) { - continue; - } - if (validation.retry === "selection") { - continue selectionLoop; - } - } else { - const retryMessage = "Please choose a provider/model again."; - if (selected.key === "anthropic") { - const validation = await validateAnthropicSelectionWithRetryMessage( - remoteConfig.label, - endpointUrl || ANTHROPIC_ENDPOINT_URL, - model, - credentialEnv, - retryMessage, - remoteConfig.helpUrl, - ); - if (validation.ok) { - preferredInferenceApi = validation.api; - break; - } - if ( - validation.retry === "credential" || - validation.retry === "retry" || - validation.retry === "model" - ) { - continue; - } - } else { - const validation = await validateOpenAiLikeSelection( - remoteConfig.label, - endpointUrl, - model, - credentialEnv, - retryMessage, - remoteConfig.helpUrl, - { - requireResponsesToolCalling: shouldRequireResponsesToolCalling(provider), - skipResponsesProbe: shouldSkipResponsesProbe(provider), - authMode: getProbeAuthMode(provider), - }, - ); - if (validation.ok) { - preferredInferenceApi = validation.api; - break; - } - if ( - validation.retry === "credential" || - validation.retry === "retry" || - validation.retry === "model" - ) { - continue; - } - } - continue selectionLoop; - } - } - } - - if (selected.key === "build") { - while (true) { - const validation = await validateOpenAiLikeSelection( - remoteConfig.label, - endpointUrl, - model, - credentialEnv, - "Please choose a provider/model again.", - remoteConfig.helpUrl, - { - requireResponsesToolCalling: shouldRequireResponsesToolCalling(provider), - skipResponsesProbe: shouldSkipResponsesProbe(provider), - authMode: getProbeAuthMode(provider), - }, - ); - if (validation.ok) { - preferredInferenceApi = validation.api; - break; - } - if (validation.retry === "credential" || validation.retry === "retry") { - continue; - } - continue selectionLoop; - } - } - - console.log(` Using ${remoteConfig.label} with model: ${model}`); - break; - } else if (selected.key === "nim-local") { - // List models that fit GPU VRAM - const models = nim.listModels().filter((m) => m.minGpuMemoryMB <= gpu.totalMemoryMB); - if (models.length === 0) { - console.log(" No NIM models fit your GPU VRAM. Falling back to cloud API."); - } else { - let sel; - if (isNonInteractive()) { - if (requestedModel) { - sel = models.find((m) => m.name === requestedModel); - if (!sel) { - console.error(` Unsupported NEMOCLAW_MODEL for NIM: ${requestedModel}`); - process.exit(1); - } - } else { - sel = models[0]; - } - note(` [non-interactive] NIM model: ${sel.name}`); - } else { - console.log(""); - console.log(" Models that fit your GPU:"); - models.forEach((m, i) => { - console.log(` ${i + 1}) ${m.name} (min ${m.minGpuMemoryMB} MB)`); - }); - console.log(""); - - const modelChoice = await prompt(` Choose model [1]: `); - const midx = parseInt(modelChoice || "1", 10) - 1; - sel = models[midx] || models[0]; - } - model = sel.name; - - console.log(` Pulling NIM image for ${model}...`); - nim.pullNimImage(model); - - console.log(" Starting NIM container..."); - nimContainer = nim.startNimContainerByName(nim.containerName(GATEWAY_NAME), model); - - console.log(" Waiting for NIM to become healthy..."); - if (!nim.waitForNimHealth()) { - console.error(" NIM failed to start. Falling back to cloud API."); - model = null; - nimContainer = null; - } else { - provider = "vllm-local"; - credentialEnv = "OPENAI_API_KEY"; - endpointUrl = getLocalProviderBaseUrl(provider); - const validation = await validateOpenAiLikeSelection( - "Local NVIDIA NIM", - endpointUrl, - model, - credentialEnv, - ); - if ( - validation.retry === "selection" || - validation.retry === "back" || - validation.retry === "model" - ) { - continue selectionLoop; - } - if (!validation.ok) { - continue selectionLoop; - } - preferredInferenceApi = validation.api; - // NIM uses vLLM internally — same tool-call-parser limitation - // applies to /v1/responses. Force chat completions. - if (preferredInferenceApi !== "openai-completions") { - console.log( - " ℹ Using chat completions API (tool-call-parser requires /v1/chat/completions)", - ); - } - preferredInferenceApi = "openai-completions"; - } - } - break; - } else if (selected.key === "ollama") { - if (!ollamaRunning) { - console.log(" Starting Ollama..."); - if (isWsl()) { - // On WSL2, binding to 0.0.0.0 creates a dual-stack socket that Docker - // cannot reach via host-gateway. The default 127.0.0.1 binding works - // because WSL2 relays IPv4-only sockets to the Windows host (#1104). - run(`ollama serve > /dev/null 2>&1 &`, { ignoreError: true }); - } else { - // Bind to localhost only — the auth proxy handles container access. - run(`OLLAMA_HOST=127.0.0.1:${OLLAMA_PORT} ollama serve > /dev/null 2>&1 &`, { ignoreError: true }); - } - sleep(2); - if (!isWsl()) printOllamaExposureWarning(); - } - if (isWsl()) { - // WSL2 doesn't need the proxy — Docker can reach the host directly. - console.log(` ✓ Using Ollama on localhost:${OLLAMA_PORT}`); - } else { - startOllamaAuthProxy(); - console.log(` ✓ Using Ollama on localhost:${OLLAMA_PORT} (proxy on :${OLLAMA_PROXY_PORT})`); - } - provider = "ollama-local"; - credentialEnv = "OPENAI_API_KEY"; - endpointUrl = getLocalProviderBaseUrl(provider); - while (true) { - const installedModels = getOllamaModelOptions(); - if (isNonInteractive()) { - model = requestedModel || getDefaultOllamaModel(gpu); - } else { - model = await promptOllamaModel(gpu); - } - if (model === BACK_TO_SELECTION) { - console.log(" Returning to provider selection."); - console.log(""); - continue selectionLoop; - } - const probe = prepareOllamaModel(model, installedModels); - if (!probe.ok) { - console.error(` ${probe.message}`); - if (isNonInteractive()) { - process.exit(1); - } - console.log(" Choose a different Ollama model or select Other."); - console.log(""); - continue; - } - const validation = await validateOpenAiLikeSelection( - "Local Ollama", - getLocalProviderValidationBaseUrl(provider), - model, - null, - "Choose a different Ollama model or select Other.", - ); - if (validation.retry === "selection" || validation.retry === "back") { - continue selectionLoop; - } - if (!validation.ok) { - continue; - } - // Ollama's /v1/responses endpoint does not produce correctly - // formatted tool calls — force chat completions like vLLM/NIM. - if (validation.api !== "openai-completions") { - console.log( - " ℹ Using chat completions API (Ollama tool calls require /v1/chat/completions)", - ); - } - preferredInferenceApi = "openai-completions"; - break; - } - break; - } else if (selected.key === "install-ollama") { - // macOS only — this option is gated by process.platform === "darwin" above - console.log(" Installing Ollama via Homebrew..."); - run("brew install ollama", { ignoreError: true }); - console.log(" Starting Ollama..."); - // Bind to localhost — the auth proxy handles container access. - run(`OLLAMA_HOST=127.0.0.1:${OLLAMA_PORT} ollama serve > /dev/null 2>&1 &`, { - ignoreError: true, - }); - sleep(2); - startOllamaAuthProxy(); - console.log(` ✓ Using Ollama on localhost:${OLLAMA_PORT} (proxy on :${OLLAMA_PROXY_PORT})`); - provider = "ollama-local"; - credentialEnv = "OPENAI_API_KEY"; - endpointUrl = getLocalProviderBaseUrl(provider); - while (true) { - const installedModels = getOllamaModelOptions(); - if (isNonInteractive()) { - model = requestedModel || getDefaultOllamaModel(gpu); - } else { - model = await promptOllamaModel(gpu); - } - if (model === BACK_TO_SELECTION) { - console.log(" Returning to provider selection."); - console.log(""); - continue selectionLoop; - } - const probe = prepareOllamaModel(model, installedModels); - if (!probe.ok) { - console.error(` ${probe.message}`); - if (isNonInteractive()) { - process.exit(1); - } - console.log(" Choose a different Ollama model or select Other."); - console.log(""); - continue; - } - const validation = await validateOpenAiLikeSelection( - "Local Ollama", - getLocalProviderValidationBaseUrl(provider), - model, - null, - "Choose a different Ollama model or select Other.", - ); - if (validation.retry === "selection" || validation.retry === "back") { - continue selectionLoop; - } - if (!validation.ok) { - continue; - } - // Ollama's /v1/responses endpoint does not produce correctly - // formatted tool calls — force chat completions like vLLM/NIM. - if (validation.api !== "openai-completions") { - console.log( - " ℹ Using chat completions API (Ollama tool calls require /v1/chat/completions)", - ); - } - preferredInferenceApi = "openai-completions"; - break; - } - break; - } else if (selected.key === "vllm") { - console.log(` ✓ Using existing vLLM on localhost:${VLLM_PORT}`); - provider = "vllm-local"; - credentialEnv = "OPENAI_API_KEY"; - endpointUrl = getLocalProviderBaseUrl(provider); - // Query vLLM for the actual model ID - const vllmModelsRaw = runCapture( - `curl -sf http://127.0.0.1:${VLLM_PORT}/v1/models 2>/dev/null`, - { - ignoreError: true, - }, - ); - try { - const vllmModels = JSON.parse(vllmModelsRaw); - if (vllmModels.data && vllmModels.data.length > 0) { - model = vllmModels.data[0].id; - if (!isSafeModelId(model)) { - console.error(` Detected model ID contains invalid characters: ${model}`); - process.exit(1); - } - console.log(` Detected model: ${model}`); - } else { - console.error(" Could not detect model from vLLM. Please specify manually."); - process.exit(1); - } - } catch { - console.error( - ` Could not query vLLM models endpoint. Is vLLM running on localhost:${VLLM_PORT}?`, - ); - process.exit(1); - } - const validation = await validateOpenAiLikeSelection( - "Local vLLM", - getLocalProviderValidationBaseUrl(provider), - model, - credentialEnv, - ); - if ( - validation.retry === "selection" || - validation.retry === "back" || - validation.retry === "model" - ) { - continue selectionLoop; - } - if (!validation.ok) { - continue selectionLoop; - } - preferredInferenceApi = validation.api; - // Force chat completions — vLLM's /v1/responses endpoint does not - // run the --tool-call-parser, so tool calls arrive as raw text. - // See: https://github.com/NVIDIA/NemoClaw/issues/976 - if (preferredInferenceApi !== "openai-completions") { - console.log( - " ℹ Using chat completions API (tool-call-parser requires /v1/chat/completions)", - ); - } - preferredInferenceApi = "openai-completions"; - break; - } - } - } - - return { model, provider, endpointUrl, credentialEnv, preferredInferenceApi, nimContainer }; -} - -// ── Step 4: Inference provider ─────────────────────────────────── - -// eslint-disable-next-line complexity -async function setupInference( - sandboxName, - model, - provider, - endpointUrl = null, - credentialEnv = null, -) { - step(4, 8, "Setting up inference provider"); - runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true }); - - if ( - provider === "nvidia-prod" || - provider === "nvidia-nim" || - provider === "openai-api" || - provider === "anthropic-prod" || - provider === "compatible-anthropic-endpoint" || - provider === "gemini-api" || - provider === "compatible-endpoint" - ) { - const config = - provider === "nvidia-nim" - ? REMOTE_PROVIDER_CONFIG.build - : Object.values(REMOTE_PROVIDER_CONFIG).find((entry) => entry.providerName === provider); - while (true) { - const resolvedCredentialEnv = credentialEnv || (config && config.credentialEnv); - const resolvedEndpointUrl = endpointUrl || (config && config.endpointUrl); - const credentialValue = hydrateCredentialEnv(resolvedCredentialEnv); - const env = - resolvedCredentialEnv && credentialValue - ? { [resolvedCredentialEnv]: credentialValue } - : {}; - const providerResult = upsertProvider( - provider, - config.providerType, - resolvedCredentialEnv, - resolvedEndpointUrl, - env, - ); - if (!providerResult.ok) { - console.error(` ${providerResult.message}`); - if (isNonInteractive()) { - process.exit(providerResult.status || 1); - } - const retry = await promptValidationRecovery( - config.label, - classifyApplyFailure(providerResult.message), - resolvedCredentialEnv, - config.helpUrl, - ); - if (retry === "credential" || retry === "retry") { - continue; - } - if (retry === "selection" || retry === "model") { - return { retry: "selection" }; - } - process.exit(providerResult.status || 1); - } - const args = ["inference", "set"]; - if (config.skipVerify) { - args.push("--no-verify"); - } - args.push("--provider", provider, "--model", model); - const applyResult = runOpenshell(args, { ignoreError: true }); - if (applyResult.status === 0) { - break; - } - const message = - compactText(redact(`${applyResult.stderr || ""} ${applyResult.stdout || ""}`)) || - `Failed to configure inference provider '${provider}'.`; - console.error(` ${message}`); - if (isNonInteractive()) { - process.exit(applyResult.status || 1); - } - const retry = await promptValidationRecovery( - config.label, - classifyApplyFailure(message), - resolvedCredentialEnv, - config.helpUrl, - ); - if (retry === "credential" || retry === "retry") { - continue; - } - if (retry === "selection" || retry === "model") { - return { retry: "selection" }; - } - process.exit(applyResult.status || 1); - } - } else if (provider === "vllm-local") { - const validation = validateLocalProvider(provider); - if (!validation.ok) { - console.error(` ${validation.message}`); - process.exit(1); - } - const baseUrl = getLocalProviderBaseUrl(provider); - const providerResult = upsertProvider("vllm-local", "openai", "OPENAI_API_KEY", baseUrl, { - OPENAI_API_KEY: "dummy", - }); - if (!providerResult.ok) { - console.error(` ${providerResult.message}`); - process.exit(providerResult.status || 1); - } - runOpenshell([ - "inference", - "set", - "--no-verify", - "--provider", - "vllm-local", - "--model", - model, - "--timeout", - String(LOCAL_INFERENCE_TIMEOUT_SECS), - ]); - } else if (provider === "ollama-local") { - const validation = validateLocalProvider(provider); - if (!validation.ok) { - console.error(` ${validation.message}`); - if (process.platform === "darwin") { - console.error(" On macOS, local inference also depends on OpenShell host routing support."); - } - process.exit(1); - } - const baseUrl = getLocalProviderBaseUrl(provider); - let ollamaCredential = "ollama"; - if (!isWsl()) { - ensureOllamaAuthProxy(); - const proxyToken = getOllamaProxyToken(); - if (!proxyToken) { - console.error(" Ollama auth proxy token is not set. Re-run onboard to initialize the proxy."); - process.exit(1); - } - ollamaCredential = proxyToken; - // Persist token now that ollama-local is confirmed as the provider. - // Not persisted earlier in case the user backs out to a different provider. - persistProxyToken(proxyToken); - } - const providerResult = upsertProvider("ollama-local", "openai", "OPENAI_API_KEY", baseUrl, { - OPENAI_API_KEY: ollamaCredential, - }); - if (!providerResult.ok) { - console.error(` ${providerResult.message}`); - process.exit(providerResult.status || 1); - } - runOpenshell([ - "inference", - "set", - "--no-verify", - "--provider", - "ollama-local", - "--model", - model, - "--timeout", - String(LOCAL_INFERENCE_TIMEOUT_SECS), - ]); - console.log(` Priming Ollama model: ${model}`); - run(getOllamaWarmupCommand(model), { ignoreError: true }); - const probe = validateOllamaModel(model); - if (!probe.ok) { - console.error(` ${probe.message}`); - process.exit(1); - } - } - - verifyInferenceRoute(provider, model); - registry.updateSandbox(sandboxName, { model, provider }); - console.log(` ✓ Inference route set: ${provider} / ${model}`); - return { ok: true }; -} - -// ── Step 6: Messaging channels ─────────────────────────────────── - -const MESSAGING_CHANNELS = [ - { - name: "telegram", - envKey: "TELEGRAM_BOT_TOKEN", - description: "Telegram bot messaging", - help: "Create a bot via @BotFather on Telegram, then copy the token.", - label: "Telegram Bot Token", - userIdEnvKey: "TELEGRAM_ALLOWED_IDS", - userIdHelp: "Send /start to @userinfobot on Telegram to get your numeric user ID.", - userIdLabel: "Telegram User ID (for DM access)", - allowIdsMode: "dm", - }, - { - name: "discord", - envKey: "DISCORD_BOT_TOKEN", - description: "Discord bot messaging", - help: "Discord Developer Portal → Applications → Bot → Reset/Copy Token.", - label: "Discord Bot Token", - serverIdEnvKey: "DISCORD_SERVER_ID", - serverIdHelp: - "Enable Developer Mode in Discord, then right-click your server and copy the Server ID.", - serverIdLabel: "Discord Server ID (for guild workspace access)", - requireMentionEnvKey: "DISCORD_REQUIRE_MENTION", - requireMentionHelp: - "Choose whether the bot should reply only when @mentioned or to all messages in this server.", - userIdEnvKey: "DISCORD_USER_ID", - userIdHelp: - "Optional: enable Developer Mode in Discord, then right-click your user/avatar and copy the User ID. Leave blank to allow any member of the configured server to message the bot.", - userIdLabel: "Discord User ID (optional guild allowlist)", - allowIdsMode: "guild", - }, - { - name: "slack", - envKey: "SLACK_BOT_TOKEN", - description: "Slack bot messaging", - help: "Slack API → Your Apps → OAuth & Permissions → Bot User OAuth Token (xoxb-...).", - label: "Slack Bot Token", - appTokenEnvKey: "SLACK_APP_TOKEN", - appTokenHelp: "Slack API → Your Apps → Basic Information → App-Level Tokens (xapp-...).", - appTokenLabel: "Slack App Token (Socket Mode)", - }, -]; - -// Curl exit codes that indicate a network-level failure (not a token problem). -// 35 (TLS handshake failure) covers corporate proxies that MITM HTTPS. -const TELEGRAM_NETWORK_CURL_CODES = new Set([6, 7, 28, 35, 52, 56]); - -async function checkTelegramReachability(token: string) { - const result = runCurlProbe([ - "-sS", - "--connect-timeout", "5", - "--max-time", "10", - `https://api.telegram.org/bot${token}/getMe`, - ]); - - // HTTP 200 with "ok":true — Telegram is reachable and token is valid. - if (result.ok) return; - - // HTTP 401 or 404 — token was rejected by Telegram (not a network issue). - if (result.httpStatus === 401 || result.httpStatus === 404) { - console.log( - " ⚠ Bot token was rejected by Telegram — verify the token is correct.", - ); - return; - } - - // Network-level failure — Telegram is unreachable from this host. - if (result.curlStatus && TELEGRAM_NETWORK_CURL_CODES.has(result.curlStatus)) { - console.log(""); - console.log(" ⚠ api.telegram.org is not reachable from this host."); - console.log(" Telegram integration requires outbound HTTPS access to api.telegram.org."); - console.log(" This is commonly blocked by corporate network proxies."); - - if (isNonInteractive()) { - console.error(" Aborting onboarding in non-interactive mode due to Telegram network reachability failure."); - process.exit(1); - } else { - const answer = (await promptOrDefault(" Continue anyway? [y/N]: ", null, "n")) - .trim() - .toLowerCase(); - if (answer !== "y" && answer !== "yes") { - console.log(" Aborting onboarding."); - process.exit(1); - } - } - return; - } - - // Unexpected probe failure — warn but don't block. - if (!result.ok && result.httpStatus > 0) { - console.log( - ` ⚠ Telegram API returned HTTP ${result.httpStatus} — the bot may not work correctly.`, - ); - } else if (!result.ok) { - console.log(` ⚠ Telegram reachability probe failed: ${result.message}`); - } -} - -async function setupMessagingChannels() { - step(5, 8, "Messaging channels"); - - const getMessagingToken = (envKey) => - getCredential(envKey) || normalizeCredentialValue(process.env[envKey]) || null; - - // Non-interactive: skip prompt, tokens come from env/credentials - if (isNonInteractive() || process.env.NEMOCLAW_NON_INTERACTIVE === "1") { - const found = MESSAGING_CHANNELS.filter((c) => getMessagingToken(c.envKey)).map((c) => c.name); - if (found.length > 0) { - note(` [non-interactive] Messaging tokens detected: ${found.join(", ")}`); - if (found.includes("telegram")) { - await checkTelegramReachability(getMessagingToken("TELEGRAM_BOT_TOKEN")); - } - } else { - note(" [non-interactive] No messaging tokens configured. Skipping."); - } - return found; - } - - // Single-keypress toggle selector — pre-select channels that already have tokens. - // Press 1/2/3 to instantly toggle a channel; press Enter to continue. - const enabled = new Set( - MESSAGING_CHANNELS.filter((c) => getMessagingToken(c.envKey)).map((c) => c.name), - ); - - const output = process.stderr; - // Lines above the prompt: 1 blank + 1 header + N channels + 1 blank = N + 3 - const linesAbovePrompt = MESSAGING_CHANNELS.length + 3; - let firstDraw = true; - const showList = () => { - if (!firstDraw) { - // Cursor is at end of prompt line. Move to column 0, go up, clear to end of screen. - output.write(`\r\x1b[${linesAbovePrompt}A\x1b[J`); - } - firstDraw = false; - output.write("\n"); - output.write(" Available messaging channels:\n"); - MESSAGING_CHANNELS.forEach((ch, i) => { - const marker = enabled.has(ch.name) ? "●" : "○"; - const status = getMessagingToken(ch.envKey) ? " (configured)" : ""; - output.write(` [${i + 1}] ${marker} ${ch.name} — ${ch.description}${status}\n`); - }); - output.write("\n"); - output.write(" Press 1-3 to toggle, Enter when done: "); - }; - - showList(); - - await new Promise((resolve, reject) => { - const input = process.stdin; - let rawModeEnabled = false; - let finished = false; - - function cleanup() { - input.removeListener("data", onData); - if (rawModeEnabled && typeof input.setRawMode === "function") { - input.setRawMode(false); - } - } - - function finish() { - if (finished) return; - finished = true; - cleanup(); - output.write("\n"); - resolve(); - } - - function onData(chunk) { - const text = chunk.toString("utf8"); - for (let i = 0; i < text.length; i += 1) { - const ch = text[i]; - if (ch === "\u0003") { - cleanup(); - reject(Object.assign(new Error("Prompt interrupted"), { code: "SIGINT" })); - process.kill(process.pid, "SIGINT"); - return; - } - if (ch === "\r" || ch === "\n") { - finish(); - return; - } - const num = parseInt(ch, 10); - if (num >= 1 && num <= MESSAGING_CHANNELS.length) { - const channel = MESSAGING_CHANNELS[num - 1]; - if (enabled.has(channel.name)) { - enabled.delete(channel.name); - } else { - enabled.add(channel.name); - } - showList(); - } - } - } - - input.setEncoding("utf8"); - if (typeof input.resume === "function") { - input.resume(); - } - if (typeof input.setRawMode === "function") { - input.setRawMode(true); - rawModeEnabled = true; - } - input.on("data", onData); - }); - - const selected = Array.from(enabled); - if (selected.length === 0) { - console.log(" Skipping messaging channels."); - return []; - } - - // For each selected channel, prompt for token if not already set - for (const name of selected) { - const ch = MESSAGING_CHANNELS.find((c) => c.name === name); - if (!ch) { - console.log(` Unknown channel: ${name}`); - continue; - } - if (getMessagingToken(ch.envKey)) { - console.log(` ✓ ${ch.name} — already configured`); - } else { - console.log(""); - console.log(` ${ch.help}`); - const token = normalizeCredentialValue(await prompt(` ${ch.label}: `, { secret: true })); - if (token) { - saveCredential(ch.envKey, token); - process.env[ch.envKey] = token; - console.log(` ✓ ${ch.name} token saved`); - } else { - console.log(` Skipped ${ch.name} (no token entered)`); - enabled.delete(ch.name); - continue; - } - } - if (ch.serverIdEnvKey) { - const existingServerIds = process.env[ch.serverIdEnvKey] || ""; - if (existingServerIds) { - console.log(` ✓ ${ch.name} — server ID already set: ${existingServerIds}`); - } else { - console.log(` ${ch.serverIdHelp}`); - const serverId = (await prompt(` ${ch.serverIdLabel}: `)).trim(); - if (serverId) { - process.env[ch.serverIdEnvKey] = serverId; - console.log(` ✓ ${ch.name} server ID saved`); - } else { - console.log(` Skipped ${ch.name} server ID (guild channels stay disabled)`); - } - } - } - if (ch.requireMentionEnvKey && ch.serverIdEnvKey && process.env[ch.serverIdEnvKey]) { - const existingRequireMention = process.env[ch.requireMentionEnvKey]; - if (existingRequireMention === "0" || existingRequireMention === "1") { - const mode = existingRequireMention === "0" ? "all messages" : "@mentions only"; - console.log(` ✓ ${ch.name} — reply mode already set: ${mode}`); - } else { - console.log(` ${ch.requireMentionHelp}`); - const answer = (await prompt(" Reply only when @mentioned? [Y/n]: ")).trim().toLowerCase(); - process.env[ch.requireMentionEnvKey] = answer === "n" || answer === "no" ? "0" : "1"; - const mode = - process.env[ch.requireMentionEnvKey] === "0" ? "all messages" : "@mentions only"; - console.log(` ✓ ${ch.name} reply mode saved: ${mode}`); - } - } - // Prompt for user/sender ID when the channel supports allowlisting - if (ch.userIdEnvKey && (!ch.serverIdEnvKey || process.env[ch.serverIdEnvKey])) { - const existingIds = process.env[ch.userIdEnvKey] || ""; - if (existingIds) { - console.log(` ✓ ${ch.name} — allowed IDs already set: ${existingIds}`); - } else { - console.log(` ${ch.userIdHelp}`); - const userId = (await prompt(` ${ch.userIdLabel}: `)).trim(); - if (userId) { - process.env[ch.userIdEnvKey] = userId; - console.log(` ✓ ${ch.name} user ID saved`); - } else { - const skippedReason = - ch.allowIdsMode === "guild" - ? "any member in the configured server can message the bot" - : "bot will require manual pairing"; - console.log(` Skipped ${ch.name} user ID (${skippedReason})`); - } - } - } - } - console.log(""); - - // Channels where the user declined to enter a token were dropped from - // `enabled` inside the per-channel loop, so only channels with credentials - // configured remain in the Set. - - // Preflight: verify Telegram API is reachable from the host before sandbox creation. - // The non-interactive branch above already ran this probe and returned early, - // so this second call only fires on the interactive path — guard explicitly - // to make the no-double-probe invariant visible at the call site. - if ( - !isNonInteractive() && - enabled.has("telegram") && - getMessagingToken("TELEGRAM_BOT_TOKEN") - ) { - await checkTelegramReachability(getMessagingToken("TELEGRAM_BOT_TOKEN")); - } - - return Array.from(enabled); -} - -function getSuggestedPolicyPresets({ enabledChannels = null, webSearchConfig = null, provider = null } = {}) { - const suggestions = ["pypi", "npm"]; - - // Auto-suggest local-inference preset when a local provider is selected - if (provider && LOCAL_INFERENCE_PROVIDERS.includes(provider)) { - suggestions.push("local-inference"); - } - const usesExplicitMessagingSelection = Array.isArray(enabledChannels); - - const maybeSuggestMessagingPreset = (channel, envKey) => { - if (usesExplicitMessagingSelection) { - if (enabledChannels.includes(channel)) suggestions.push(channel); - return; - } - if (getCredential(envKey) || process.env[envKey]) { - suggestions.push(channel); - if (process.stdout.isTTY && !isNonInteractive() && process.env.CI !== "true") { - console.log(` Auto-detected: ${envKey} -> suggesting ${channel} preset`); - } - } - }; - - maybeSuggestMessagingPreset("telegram", "TELEGRAM_BOT_TOKEN"); - maybeSuggestMessagingPreset("slack", "SLACK_BOT_TOKEN"); - maybeSuggestMessagingPreset("discord", "DISCORD_BOT_TOKEN"); - - if (webSearchConfig) suggestions.push("brave"); - - return suggestions; -} - -// ── Step 7: OpenClaw ───────────────────────────────────────────── - -async function setupOpenclaw(sandboxName, model, provider) { - step(7, 8, "Setting up OpenClaw inside sandbox"); - - const selectionConfig = getProviderSelectionConfig(provider, model); - if (selectionConfig) { - const sandboxConfig = { - ...selectionConfig, - onboardedAt: new Date().toISOString(), - }; - const script = buildSandboxConfigSyncScript(sandboxConfig); - const scriptFile = writeSandboxConfigSyncFile(script); - try { - run( - `${openshellShellCommand(["sandbox", "connect", sandboxName])} < ${shellQuote(scriptFile)}`, - { stdio: ["ignore", "ignore", "inherit"] }, - ); - } finally { - cleanupTempDir(scriptFile, "nemoclaw-sync"); - } - } - - console.log(" ✓ OpenClaw gateway launched inside sandbox"); -} - -// ── Step 7: Policy presets ─────────────────────────────────────── - -// eslint-disable-next-line complexity -async function _setupPolicies(sandboxName, options = {}) { - step(8, 8, "Policy presets"); - const suggestions = getSuggestedPolicyPresets(options); - - const allPresets = policies.listPresets(); - const applied = policies.getAppliedPresets(sandboxName); - - if (isNonInteractive()) { - const policyMode = (process.env.NEMOCLAW_POLICY_MODE || "suggested").trim().toLowerCase(); - let selectedPresets = suggestions; - - if (policyMode === "skip" || policyMode === "none" || policyMode === "no") { - note(" [non-interactive] Skipping policy presets."); - return; - } - - if (policyMode === "custom" || policyMode === "list") { - selectedPresets = parsePolicyPresetEnv(process.env.NEMOCLAW_POLICY_PRESETS); - if (selectedPresets.length === 0) { - console.error(" NEMOCLAW_POLICY_PRESETS is required when NEMOCLAW_POLICY_MODE=custom."); - process.exit(1); - } - } else if (policyMode === "suggested" || policyMode === "default" || policyMode === "auto") { - const envPresets = parsePolicyPresetEnv(process.env.NEMOCLAW_POLICY_PRESETS); - if (envPresets.length > 0) { - selectedPresets = envPresets; - } - } else { - console.error(` Unsupported NEMOCLAW_POLICY_MODE: ${policyMode}`); - console.error(" Valid values: suggested, custom, skip"); - process.exit(1); - } - - const knownPresets = new Set(allPresets.map((p) => p.name)); - const invalidPresets = selectedPresets.filter((name) => !knownPresets.has(name)); - if (invalidPresets.length > 0) { - console.error(` Unknown policy preset(s): ${invalidPresets.join(", ")}`); - process.exit(1); - } - - if (!waitForSandboxReady(sandboxName)) { - console.error(` Sandbox '${sandboxName}' was not ready for policy application.`); - process.exit(1); - } - note(` [non-interactive] Applying policy presets: ${selectedPresets.join(", ")}`); - for (const name of selectedPresets) { - for (let attempt = 0; attempt < 3; attempt += 1) { - try { - policies.applyPreset(sandboxName, name); - break; - } catch (err) { - const message = err && err.message ? err.message : String(err); - if (!message.includes("sandbox not found") || attempt === 2) { - throw err; - } - sleep(2); - } - } - } - } else { - console.log(""); - console.log(" Available policy presets:"); - allPresets.forEach((p) => { - const marker = applied.includes(p.name) || suggestions.includes(p.name) ? "●" : "○"; - const suggested = suggestions.includes(p.name) ? " (suggested)" : ""; - console.log(` ${marker} ${p.name} — ${p.description}${suggested}`); - }); - console.log(""); - - const answer = await prompt( - ` Apply suggested presets (${suggestions.join(", ")})? [Y/n/list]: `, - ); - - if (answer.toLowerCase() === "n") { - console.log(" Skipping policy presets."); - return; - } - - if (!waitForSandboxReady(sandboxName)) { - console.error(` Sandbox '${sandboxName}' was not ready for policy application.`); - process.exit(1); - } - - if (answer.toLowerCase() === "list") { - // Let user pick - const picks = await prompt(" Enter preset names (comma-separated): "); - const selected = picks - .split(",") - .map((s) => s.trim()) - .filter(Boolean); - for (const name of selected) { - policies.applyPreset(sandboxName, name); - } - } else { - // Apply suggested - for (const name of suggestions) { - policies.applyPreset(sandboxName, name); - } - } - } - - console.log(" ✓ Policies applied"); -} - -function arePolicyPresetsApplied(sandboxName, selectedPresets = []) { - if (!Array.isArray(selectedPresets) || selectedPresets.length === 0) return false; - const applied = new Set(policies.getAppliedPresets(sandboxName)); - return selectedPresets.every((preset) => applied.has(preset)); -} - -/** - * Prompt the user to select a policy tier (restricted / balanced / open). - * Uses the same radio-style TUI as presetsCheckboxSelector (single-select). - * In non-interactive mode reads NEMOCLAW_POLICY_TIER (default: balanced). - * Returns the tier name string. - * - * @returns {Promise} - */ -async function selectPolicyTier() { - const allTiers = tiers.listTiers(); - const defaultTier = allTiers.find((t) => t.name === "balanced") || allTiers[1]; - - if (isNonInteractive()) { - const name = (process.env.NEMOCLAW_POLICY_TIER || "balanced").trim().toLowerCase(); - if (!tiers.getTier(name)) { - console.error( - ` Unknown policy tier: ${name}. Valid: ${allTiers.map((t) => t.name).join(", ")}`, - ); - process.exit(1); - } - note(` [non-interactive] Policy tier: ${name}`); - return name; - } - - const RADIO_ON = USE_COLOR ? "[\x1b[32m✓\x1b[0m]" : "[✓]"; - const RADIO_OFF = USE_COLOR ? "\x1b[2m[ ]\x1b[0m" : "[ ]"; - - // ── Fallback: non-TTY ───────────────────────────────────────────── - if (!process.stdin.isTTY || !process.stdout.isTTY) { - console.log(""); - console.log(" Policy tier — controls which network presets are enabled:"); - allTiers.forEach((t, i) => { - const marker = t.name === defaultTier.name ? RADIO_ON : RADIO_OFF; - console.log(` ${marker} ${t.label}`); - }); - console.log(""); - const answer = await prompt( - ` Select tier [1-${allTiers.length}] (default: ${allTiers.indexOf(defaultTier) + 1} ${defaultTier.name}): `, - ); - const idx = - answer.trim() === "" ? allTiers.indexOf(defaultTier) : parseInt(answer.trim(), 10) - 1; - const chosen = allTiers[idx] || defaultTier; - console.log(` Tier: ${chosen.label}`); - return chosen.name; - } - - // ── Raw-mode TUI (radio — single selection) ─────────────────────── - let cursor = allTiers.indexOf(defaultTier); - let selectedIdx = cursor; - const n = allTiers.length; - - const G = USE_COLOR ? "\x1b[32m" : ""; - const D = USE_COLOR ? "\x1b[2m" : ""; - const R = USE_COLOR ? "\x1b[0m" : ""; - const HINT = USE_COLOR - ? ` ${G}↑/↓ j/k${R} ${D}move${R} ${G}Space${R} ${D}select${R} ${G}Enter${R} ${D}confirm${R}` - : " ↑/↓ j/k move Space select Enter confirm"; - - const renderLines = () => { - const lines = [" Policy tier — controls which network presets are enabled:"]; - allTiers.forEach((t, i) => { - const radio = i === selectedIdx ? RADIO_ON : RADIO_OFF; - const arrow = i === cursor ? ">" : " "; - lines.push(` ${arrow} ${radio} ${t.label}`); - }); - lines.push(""); - lines.push(HINT); - return lines; - }; - - process.stdout.write("\n"); - const initial = renderLines(); - for (const line of initial) process.stdout.write(`${line}\n`); - let lineCount = initial.length; - - const redraw = () => { - process.stdout.write(`\x1b[${lineCount}A`); - const lines = renderLines(); - for (const line of lines) process.stdout.write(`\r\x1b[2K${line}\n`); - lineCount = lines.length; - }; - - process.stdin.setRawMode(true); - process.stdin.resume(); - process.stdin.setEncoding("utf8"); - - return new Promise((resolve) => { - const cleanup = () => { - process.stdin.setRawMode(false); - process.stdin.pause(); - process.stdin.removeListener("data", onData); - process.removeListener("SIGTERM", onSigterm); - }; - - const onSigterm = () => { - cleanup(); - process.exit(1); - }; - process.once("SIGTERM", onSigterm); - - const onData = (key) => { - if (key === "\r" || key === "\n") { - cleanup(); - process.stdout.write("\n"); - resolve(allTiers[selectedIdx].name); - } else if (key === " ") { - selectedIdx = cursor; - redraw(); - } else if (key === "\x03") { - cleanup(); - process.exit(1); - } else if (key === "\x1b[A" || key === "k") { - cursor = (cursor - 1 + n) % n; - redraw(); - } else if (key === "\x1b[B" || key === "j") { - cursor = (cursor + 1) % n; - redraw(); - } - }; - - process.stdin.on("data", onData); - }); -} - -/** - * Combined preset selector: shows ALL available presets, pre-checks those in - * the chosen tier, and lets the user include/exclude any preset and toggle - * per-preset access (read vs read-write). - * - * Tier presets are listed first (in tier order), then remaining presets - * alphabetically. Tier presets are pre-checked; others start unchecked. - * - * Keys: - * ↑/↓ j/k — move cursor - * Space — include / exclude current preset - * r — toggle read / read-write for current preset - * Enter — confirm - * - * @param {string} tierName - * @param {Array<{name: string}>} allPresets - * @param {string[]} [extraSelected] — names pre-checked even if not in tier (e.g. already-applied) - * @returns {Promise>} - */ -async function selectTierPresetsAndAccess(tierName, allPresets, extraSelected = []) { - const tierDef = tiers.getTier(tierName); - const tierPresetMap = {}; - if (tierDef) { - for (const p of tierDef.presets) { - tierPresetMap[p.name] = p.access; - } - } - - // Tier presets first (in tier order), then the rest in their original order. - const tierNames = tierDef ? tierDef.presets.map((p) => p.name) : []; - const tierSet = new Set(tierNames); - const ordered = [ - ...tierNames.map((name) => allPresets.find((p) => p.name === name)).filter(Boolean), - ...allPresets.filter((p) => !tierSet.has(p.name)), - ]; - - // Initial inclusion: tier presets + any already-applied extras. - const included = new Set([ - ...tierNames, - ...extraSelected.filter((n) => ordered.find((p) => p.name === n)), - ]); - - // Access levels: tier defaults for tier presets, read-write default for others. - const accessModes = {}; - for (const p of ordered) { - accessModes[p.name] = tierPresetMap[p.name] ?? "read-write"; - } +function getOllamaProxyToken(): string | null { + return getOllamaProxyTokenWithDeps(); +} - const G = USE_COLOR ? "\x1b[32m" : ""; - const O = USE_COLOR ? "\x1b[38;5;208m" : ""; - const D = USE_COLOR ? "\x1b[2m" : ""; - const R = USE_COLOR ? "\x1b[0m" : ""; - const GREEN_CHECK = USE_COLOR ? `[${G}✓${R}]` : "[✓]"; - const EMPTY_CHECK = USE_COLOR ? `${D}[ ]${R}` : "[ ]"; - const TOGGLE_RW = USE_COLOR ? `[${O}rw${R}]` : "[rw]"; - const TOGGLE_R = USE_COLOR ? `${D}[ r]${R}` : "[ r]"; - const label = tierDef ? ` Presets (${tierDef.label} defaults):` : " Presets:"; - const n = ordered.length; +function getOllamaModelDeps() { + return { + getOllamaModelOptions, + getBootstrapOllamaModelOptions, + getDefaultOllamaModel, + prompt, + promptManualModelId, + shellQuote, + root: ROOT, + getOllamaWarmupCommand, + run, + validateOllamaModel, + }; +} - // ── Non-interactive: return tier defaults silently ───────────────── - if (isNonInteractive()) { - return ordered - .filter((p) => included.has(p.name)) - .map((p) => ({ name: p.name, access: accessModes[p.name] })); - } +async function promptOllamaModel(gpu = null) { + return promptOllamaModelWithDeps(gpu, getOllamaModelDeps()); +} - // ── Fallback: non-TTY ───────────────────────────────────────────── - if (!process.stdin.isTTY || !process.stdout.isTTY) { - console.log(""); - console.log(label); - ordered.forEach((p) => { - const isIncluded = included.has(p.name); - const isRw = accessModes[p.name] === "read-write"; - const check = isIncluded ? GREEN_CHECK : EMPTY_CHECK; - const badge = isIncluded ? (isRw ? "[rw]" : "[ r]") : " "; - console.log(` ${check} ${badge} ${p.name}`); - }); - console.log(""); - const rawInclude = await prompt( - " Include presets (comma-separated names, Enter to keep defaults): ", - ); - if (rawInclude.trim()) { - const knownNames = new Set(ordered.map((p) => p.name)); - included.clear(); - for (const name of rawInclude - .split(",") - .map((s) => s.trim()) - .filter(Boolean)) { - if (knownNames.has(name)) { - included.add(name); - } else { - console.error(` Unknown preset name ignored: ${name}`); - } - } - } - return ordered - .filter((p) => included.has(p.name)) - .map((p) => ({ name: p.name, access: accessModes[p.name] })); - } +function printOllamaExposureWarning() { + return printOllamaExposureWarningWithDeps(); +} - // ── Raw-mode TUI ───────────────────────────────────────────────── - let cursor = 0; - - const HINT = USE_COLOR - ? ` ${G}↑/↓ j/k${R} ${D}move${R} ${G}Space${R} ${D}include${R} ${G}r${R} ${D}toggle rw${R} ${G}Enter${R} ${D}confirm${R}` - : " ↑/↓ j/k move Space include r toggle rw Enter confirm"; - - const renderLines = () => { - const lines = [label]; - ordered.forEach((p, i) => { - const isIncluded = included.has(p.name); - const isRw = accessModes[p.name] === "read-write"; - const check = isIncluded ? GREEN_CHECK : EMPTY_CHECK; - // badge is 4 visible chars + 1 space; blank when unchecked to keep name aligned - const badge = isIncluded ? (isRw ? TOGGLE_RW + " " : TOGGLE_R + " ") : " "; - const arrow = i === cursor ? ">" : " "; - lines.push(` ${arrow} ${check} ${badge}${p.name}`); - }); - lines.push(""); - lines.push(HINT); - return lines; - }; +function prepareOllamaModel(model, installedModels = []) { + return prepareOllamaModelWithDeps(model, installedModels, getOllamaModelDeps()); +} - process.stdout.write("\n"); - const initial = renderLines(); - for (const line of initial) process.stdout.write(`${line}\n`); - let lineCount = initial.length; - const redraw = () => { - process.stdout.write(`\x1b[${lineCount}A`); - const lines = renderLines(); - for (const line of lines) process.stdout.write(`\r\x1b[2K${line}\n`); - lineCount = lines.length; - }; +// ── Step 1: Preflight ──────────────────────────────────────────── - process.stdin.setRawMode(true); - process.stdin.resume(); - process.stdin.setEncoding("utf8"); - - return new Promise((resolve) => { - const cleanup = () => { - process.stdin.setRawMode(false); - process.stdin.pause(); - process.stdin.removeListener("data", onData); - process.removeListener("SIGTERM", onSigterm); - }; - - const onSigterm = () => { - cleanup(); - process.exit(1); - }; - process.once("SIGTERM", onSigterm); - - const onData = (key) => { - if (key === "\r" || key === "\n") { - cleanup(); - process.stdout.write("\n"); - resolve( - ordered - .filter((p) => included.has(p.name)) - .map((p) => ({ name: p.name, access: accessModes[p.name] })), - ); - } else if (key === "\x03") { - cleanup(); - process.exit(1); - } else if (key === "\x1b[A" || key === "k") { - cursor = (cursor - 1 + n) % n; - redraw(); - } else if (key === "\x1b[B" || key === "j") { - cursor = (cursor + 1) % n; - redraw(); - } else if (key === " ") { - const name = ordered[cursor].name; - if (included.has(name)) { - included.delete(name); - } else { - included.add(name); - } - redraw(); - } else if (key === "r" || key === "R") { - const name = ordered[cursor].name; - accessModes[name] = accessModes[name] === "read-write" ? "read" : "read-write"; - redraw(); - } - }; - - process.stdin.on("data", onData); - }); -} +const hostGatewayApi = createHostGatewayApi({ + step, + assessHost, + planHostRemediation, + printRemediationActions, + isOpenshellInstalled, + installOpenshell, + getInstalledOpenshellVersion, + runCaptureOpenshell, + getBlueprintMinOpenshellVersion, + getBlueprintMaxOpenshellVersion, + versionGte, + getGatewayReuseState, + verifyGatewayContainerRunning, + runOpenshell, + destroyGateway, + clearRegistryAll: () => { + registry.clearAll(); + }, + run, + runCapture, + checkPortAvailable, + sleep, + getPortConflictServiceHints, + getMemoryInfo, + ensureSwap, + isNonInteractive, + prompt, + nimDetectGpu: () => nim.detectGpu(), + processPlatform: process.platform, + gatewayName: GATEWAY_NAME, + dashboardPort: DASHBOARD_PORT, + gatewayPort: GATEWAY_PORT, + scriptsDir: SCRIPTS, + processEnv: process.env, + processArch: process.arch, + log: (...args) => console.log(...args), + error: (...args) => console.error(...args), + exit: (code) => process.exit(code), + openshellShellCommand, + streamGatewayStart, + isGatewayHealthy, + hasStaleGateway, + redact, + compactText, + envInt, + getContainerRuntime, + shouldPatchCoredns, + pruneKnownHostsEntries, + isSelectedGateway, +}); -/** - * Raw-mode TUI preset selector. - * Keys: ↑/↓ or k/j to move, Space to toggle, a to select/unselect all, Enter to confirm. - * Falls back to a simple line-based prompt when stdin is not a TTY. - */ -async function presetsCheckboxSelector(allPresets, initialSelected) { - const selected = new Set(initialSelected); - const n = allPresets.length; - - // ── Zero-presets guard ──────────────────────────────────────────── - if (n === 0) { - console.log(" No policy presets are available."); - return []; - } +const { + preflight, + startGatewayWithOptions, + startGateway, + startGatewayForRecovery, + getGatewayStartEnv, + recoverGatewayRuntime, +} = hostGatewayApi; - const GREEN_CHECK = USE_COLOR ? "[\x1b[32m✓\x1b[0m]" : "[✓]"; - - // ── Fallback: non-TTY or redirected stdout (piped input) ────────── - if (!process.stdin.isTTY || !process.stdout.isTTY) { - console.log(""); - console.log(" Available policy presets:"); - allPresets.forEach((p) => { - const marker = selected.has(p.name) ? GREEN_CHECK : "[ ]"; - console.log(` ${marker} ${p.name.padEnd(14)} — ${p.description}`); - }); - console.log(""); - const raw = await prompt(" Select presets (comma-separated names, Enter to skip): "); - if (!raw.trim()) { - console.log(" Skipping policy presets."); - return []; - } - const knownNames = new Set(allPresets.map((p) => p.name)); - const chosen = []; - for (const name of raw - .split(",") - .map((s) => s.trim()) - .filter(Boolean)) { - if (knownNames.has(name)) { - chosen.push(name); - } else { - console.error(` Unknown preset name ignored: ${name}`); - } - } - return chosen; - } +function getFutureShellPathHint(binDir, pathValue = process.env.PATH || "") { + return resolveFutureShellPathHint(binDir, pathValue); +} - // ── Raw-mode TUI ───────────────────────────────────────────────── - let cursor = 0; - - const G = USE_COLOR ? "\x1b[32m" : ""; - const D = USE_COLOR ? "\x1b[2m" : ""; - const R = USE_COLOR ? "\x1b[0m" : ""; - const HINT = USE_COLOR - ? ` ${G}↑/↓ j/k${R} ${D}move${R} ${G}Space${R} ${D}toggle${R} ${G}a${R} ${D}all/none${R} ${G}Enter${R} ${D}confirm${R}` - : " ↑/↓ j/k move Space toggle a all/none Enter confirm"; - - const renderLines = () => { - const lines = [" Available policy presets:"]; - allPresets.forEach((p, i) => { - const check = selected.has(p.name) ? GREEN_CHECK : "[ ]"; - const arrow = i === cursor ? ">" : " "; - lines.push(` ${arrow} ${check} ${p.name.padEnd(14)} — ${p.description}`); - }); - lines.push(""); - lines.push(HINT); - return lines; - }; +function getPortConflictServiceHints(platform = process.platform, launchAgentPlist = OPENCLAW_LAUNCH_AGENT_PLIST) { + return resolvePortConflictServiceHints(platform, launchAgentPlist); +} - // Initial paint - process.stdout.write("\n"); - const initial = renderLines(); - for (const line of initial) process.stdout.write(`${line}\n`); - let lineCount = initial.length; - - const redraw = () => { - process.stdout.write(`\x1b[${lineCount}A`); - const lines = renderLines(); - for (const line of lines) process.stdout.write(`\r\x1b[2K${line}\n`); - lineCount = lines.length; - }; +// ── Step 3: Sandbox ────────────────────────────────────────────── - process.stdin.setRawMode(true); - process.stdin.resume(); - process.stdin.setEncoding("utf8"); - - return new Promise((resolve) => { - const cleanup = () => { - process.stdin.setRawMode(false); - process.stdin.pause(); - process.stdin.removeListener("data", onData); - process.removeListener("SIGTERM", onSigterm); - }; - - const onSigterm = () => { - cleanup(); - process.exit(1); - }; - process.once("SIGTERM", onSigterm); - - const onData = (key) => { - if (key === "\r" || key === "\n") { - cleanup(); - process.stdout.write("\n"); - resolve([...selected]); - } else if (key === "\x03") { - // Ctrl+C - cleanup(); - process.exit(1); - } else if (key === "\x1b[A" || key === "k") { - cursor = (cursor - 1 + n) % n; - redraw(); - } else if (key === "\x1b[B" || key === "j") { - cursor = (cursor + 1) % n; - redraw(); - } else if (key === " ") { - const name = allPresets[cursor].name; - if (selected.has(name)) selected.delete(name); - else selected.add(name); - redraw(); - } else if (key === "a") { - if (selected.size === n) selected.clear(); - else for (const p of allPresets) selected.add(p.name); - redraw(); - } - }; - - process.stdin.on("data", onData); +async function promptValidatedSandboxName() { + return promptValidatedSandboxNameWithDeps({ + promptOrDefault, + validateName, + isNonInteractive, + errorWriter: console.error, + exit: (code) => process.exit(code), }); } -function computeSetupPresetSuggestions(tierName, options = {}) { - const { enabledChannels = null, webSearchConfig = null, provider = null } = options; - const known = Array.isArray(options.knownPresetNames) ? new Set(options.knownPresetNames) : null; - const suggestions = tiers.resolveTierPresets(tierName).map((p) => p.name); - const add = (name) => { - if (suggestions.includes(name)) return; - if (known && !known.has(name)) return; - suggestions.push(name); +// ── Step 5: Sandbox ────────────────────────────────────────────── + +function getCreateSandboxDeps() { + return { + step, + validateName, + promptValidatedSandboxName, + controlUiPort: CONTROL_UI_PORT, + dashboardPort: DASHBOARD_PORT, + getCredential, + normalizeCredentialValue, + messagingChannels: MESSAGING_CHANNELS, + registry, + makeConflictProbe, + isNonInteractive, + promptOrDefault, + getSandboxReuseState, + providerExistsInGateway, + detectMessagingCredentialRotation, + isRecreateSandbox, + upsertMessagingProviders, + note, + ensureDashboardForward, + sandboxState, + hashCredential, + onboardSession, + runOpenshell, + agentOnboard, + stageOptimizedSandboxBuildContext, + root: ROOT, + webSearchBraveApiKeyEnv: webSearch.BRAVE_API_KEY_ENV, + buildSubprocessEnv, + formatEnvAssignment, + runCapture, + sandboxBaseImage: SANDBOX_BASE_IMAGE, + sandboxBaseTag: SANDBOX_BASE_TAG, + pullAndResolveBaseImageDigest, + patchStagedDockerfile, + openshellShellCommand, + streamSandboxCreate, + run, + runCaptureOpenshell, + isSandboxReady, + sleep, + classifySandboxCreateFailure, + printSandboxCreateRecoveryHints, + agentDefs, + runFile, + scriptsDir: SCRIPTS, + gatewayName: GATEWAY_NAME, + discordSnowflakeRe: DISCORD_SNOWFLAKE_RE, }; - if (webSearchConfig) add("brave"); - if (provider && LOCAL_INFERENCE_PROVIDERS.includes(provider)) add("local-inference"); - if (Array.isArray(enabledChannels)) { - for (const channel of enabledChannels) add(channel); - } - return suggestions; } // eslint-disable-next-line complexity -async function setupPoliciesWithSelection(sandboxName, options = {}) { - const selectedPresets = Array.isArray(options.selectedPresets) ? options.selectedPresets : null; - const onSelection = typeof options.onSelection === "function" ? options.onSelection : null; - const webSearchConfig = options.webSearchConfig || null; - const enabledChannels = Array.isArray(options.enabledChannels) ? options.enabledChannels : null; - const provider = options.provider || null; - - step(8, 8, "Policy presets"); - - const allPresets = policies.listPresets(); - const applied = policies.getAppliedPresets(sandboxName); - let chosen = selectedPresets; - - // Resume path: caller supplies the preset list from a previous run. - if (chosen && chosen.length > 0) { - if (onSelection) onSelection(chosen); - if (!waitForSandboxReady(sandboxName)) { - console.error(` Sandbox '${sandboxName}' was not ready for policy application.`); - process.exit(1); - } - note(` [resume] Reapplying policy presets: ${chosen.join(", ")}`); - for (const name of chosen) { - if (applied.includes(name)) continue; - policies.applyPreset(sandboxName, name); - } - return chosen; - } - - // Tier selection — determines the default preset list for this install. - const tierName = await selectPolicyTier(); - registry.updateSandbox(sandboxName, { policyTier: tierName }); - const suggestions = computeSetupPresetSuggestions(tierName, { - enabledChannels, - webSearchConfig, +async function createSandbox( + gpu, + model, + provider, + preferredInferenceApi = null, + sandboxNameOverride = null, + webSearchConfig = null, + enabledChannels = null, + fromDockerfile = null, + agent = null, + dangerouslySkipPermissions = false, +) { + return runCreateSandbox( + gpu, + model, provider, - knownPresetNames: allPresets.map((p) => p.name), - }); - - if (isNonInteractive()) { - const policyMode = (process.env.NEMOCLAW_POLICY_MODE || "suggested").trim().toLowerCase(); - chosen = suggestions; - - if (policyMode === "skip" || policyMode === "none" || policyMode === "no") { - note(" [non-interactive] Skipping policy presets."); - return []; - } - - if (policyMode === "custom" || policyMode === "list") { - chosen = parsePolicyPresetEnv(process.env.NEMOCLAW_POLICY_PRESETS); - if (chosen.length === 0) { - console.error(" NEMOCLAW_POLICY_PRESETS is required when NEMOCLAW_POLICY_MODE=custom."); - process.exit(1); - } - } else if (policyMode === "suggested" || policyMode === "default" || policyMode === "auto") { - const envPresets = parsePolicyPresetEnv(process.env.NEMOCLAW_POLICY_PRESETS); - if (envPresets.length > 0) chosen = envPresets; - } else { - console.error(` Unsupported NEMOCLAW_POLICY_MODE: ${policyMode}`); - console.error(" Valid values: suggested, custom, skip"); - process.exit(1); - } - - const knownPresets = new Set(allPresets.map((p) => p.name)); - const invalidPresets = chosen.filter((name) => !knownPresets.has(name)); - if (invalidPresets.length > 0) { - console.error(` Unknown policy preset(s): ${invalidPresets.join(", ")}`); - process.exit(1); - } - - if (onSelection) onSelection(chosen); - if (!waitForSandboxReady(sandboxName)) { - console.error(` Sandbox '${sandboxName}' was not ready for policy application.`); - process.exit(1); - } - note(` [non-interactive] Applying policy presets: ${chosen.join(", ")}`); - for (const name of chosen) { - for (let attempt = 0; attempt < 3; attempt += 1) { - try { - policies.applyPreset(sandboxName, name); - break; - } catch (err) { - const message = err && err.message ? err.message : String(err); - if (!message.includes("sandbox not found") || attempt === 2) { - throw err; - } - sleep(2); - } - } - } - return chosen; - } - - // Interactive: combined tier preset selector + access-mode toggle. - // extraSelected seeds the initial checked state beyond the tier defaults: - // - presets already applied from a previous run - // - credential-based additions from suggestions (e.g. brave when webSearchConfig is set) - const knownNames = new Set(allPresets.map((p) => p.name)); - const extraSelected = [ - ...applied.filter((name) => knownNames.has(name)), - ...suggestions.filter((name) => knownNames.has(name) && !applied.includes(name)), - ]; - const resolvedPresets = await selectTierPresetsAndAccess(tierName, allPresets, extraSelected); - const interactiveChoice = resolvedPresets.map((p) => p.name); - - if (onSelection) onSelection(interactiveChoice); - if (!waitForSandboxReady(sandboxName)) { - console.error(` Sandbox '${sandboxName}' was not ready for policy application.`); - process.exit(1); - } + preferredInferenceApi, + sandboxNameOverride, + webSearchConfig, + enabledChannels, + fromDockerfile, + agent, + dangerouslySkipPermissions, + getCreateSandboxDeps(), + ); +} - const accessByName = {}; - for (const p of resolvedPresets) accessByName[p.name] = p.access; - const newlySelected = interactiveChoice.filter((name) => !applied.includes(name)); - const deselected = applied.filter((name) => !interactiveChoice.includes(name)); - - for (const name of deselected) { - for (let attempt = 0; attempt < 3; attempt += 1) { - try { - if (!policies.removePreset(sandboxName, name)) { - throw new Error(`Failed to remove preset '${name}'.`); - } - break; - } catch (err) { - const message = err && err.message ? err.message : String(err); - if (!message.includes("sandbox not found") || attempt === 2) { - throw err; - } - sleep(2); - } - } - } +function getRequestedSandboxNameHint(env = process.env) { + return resolveRequestedSandboxNameHint(env); +} - for (const name of newlySelected) { - for (let attempt = 0; attempt < 3; attempt += 1) { - try { - // Pass access mode so applyPreset can distinguish read vs read-write - // when preset infrastructure supports it. - policies.applyPreset(sandboxName, name, { access: accessByName[name] }); - break; - } catch (err) { - const message = err && err.message ? err.message : String(err); - if (!message.includes("sandbox not found") || attempt === 2) { - throw err; - } - sleep(2); - } - } - } - return interactiveChoice; +function getResumeSandboxConflict(session, env = process.env) { + return detectRequestedResumeSandboxConflict(session, env); } -// ── Dashboard ──────────────────────────────────────────────────── +function getRequestedProviderHint(nonInteractive = isNonInteractive()) { + return resolveRequestedProviderHint(nonInteractive, { + env: process.env, + error: console.error, + exit: (code) => process.exit(code), + }); +} -const CONTROL_UI_PORT = DASHBOARD_PORT; +function getRequestedModelHint(nonInteractive = isNonInteractive()) { + return resolveRequestedModelHint(nonInteractive, { + env: process.env, + error: console.error, + exit: (code) => process.exit(code), + isSafeModelId, + }); +} -// Dashboard helpers — delegated to src/lib/dashboard.ts -// isLoopbackHostname — see urlUtils import above -const { resolveDashboardForwardTarget, buildControlUiUrls } = dashboard; +function getEffectiveProviderName(providerKey) { + return resolveEffectiveProviderName(providerKey, REMOTE_PROVIDER_CONFIG); +} -function ensureDashboardForward(sandboxName, chatUiUrl = `http://127.0.0.1:${CONTROL_UI_PORT}`) { - const portToStop = getDashboardForwardPort(chatUiUrl); - const forwardTarget = getDashboardForwardTarget(chatUiUrl); - runOpenshell(["forward", "stop", portToStop], { ignoreError: true }); - // Use stdio "ignore" to prevent spawnSync from waiting on inherited pipe fds. - // The --background flag forks a child that inherits stdout/stderr; if those are - // pipes, spawnSync blocks until the background process exits (never). - const fwdResult = runOpenshell(["forward", "start", "--background", forwardTarget, sandboxName], { - ignoreError: true, - stdio: ["ignore", "ignore", "ignore"], +function getResumeConfigConflicts(session, opts = {}) { + return collectRequestedResumeConfigConflicts(session, { + nonInteractive: opts.nonInteractive ?? isNonInteractive(), + fromDockerfile: opts.fromDockerfile || null, + agent: opts.agent || null, + env: process.env, + error: console.error, + exit: (code) => process.exit(code), + isSafeModelId, + remoteProviderConfig: REMOTE_PROVIDER_CONFIG, }); - // A non-zero exit from the parent means forward start rejected before forking — - // typically because the port is already bound by another process (e.g. a local - // Docker test container with -p PORT:PORT). The error is otherwise swallowed by - // ignoreError + stdio:ignore, leaving the dashboard URL silently unreachable (#1925). - if (fwdResult && fwdResult.status !== 0) { - console.warn(`! Port ${portToStop} forward did not start — port may be in use by another process.`); - console.warn(` Check: docker ps --format 'table {{.Names}}\\t{{.Ports}}' | grep ${portToStop}`); - console.warn(` Free the port, then reconnect: nemoclaw ${sandboxName} connect`); - } } -function findOpenclawJsonPath(dir) { - if (!fs.existsSync(dir)) return null; - const entries = fs.readdirSync(dir, { withFileTypes: true }); - for (const e of entries) { - const p = path.join(dir, e.name); - if (e.isDirectory()) { - const found = findOpenclawJsonPath(p); - if (found) return found; - } else if (e.name === "openclaw.json") { - return p; - } - } - return null; +function getNonInteractiveProvider() { + return resolveNonInteractiveProvider({ + env: process.env, + error: console.error, + exit: (code) => process.exit(code), + }); } -/** - * Pull gateway.auth.token from the sandbox image via openshell sandbox download - * so onboard can print copy-paste Control UI URLs with #token= (same idea as nemoclaw-start.sh). - */ -function fetchGatewayAuthTokenFromSandbox(sandboxName) { - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-token-")); - try { - const destDir = `${tmpDir}${path.sep}`; - const result = runOpenshell( - ["sandbox", "download", sandboxName, "/sandbox/.openclaw/openclaw.json", destDir], - { ignoreError: true, stdio: ["ignore", "ignore", "ignore"] }, - ); - if (result.status !== 0) return null; - const jsonPath = findOpenclawJsonPath(tmpDir); - if (!jsonPath) return null; - const cfg = JSON.parse(fs.readFileSync(jsonPath, "utf-8")); - const token = cfg && cfg.gateway && cfg.gateway.auth && cfg.gateway.auth.token; - return typeof token === "string" && token.length > 0 ? token : null; - } catch { - return null; - } finally { - try { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } catch { - // ignore cleanup errors - } - } +function getNonInteractiveModel(providerKey) { + return resolveNonInteractiveModel(providerKey, { + env: process.env, + error: console.error, + exit: (code) => process.exit(code), + isSafeModelId, + }); } -// buildControlUiUrls — see dashboard import above +function sleep(seconds) { + return sleepWithDeps(seconds); +} -function getDashboardForwardPort( - chatUiUrl = process.env.CHAT_UI_URL || `http://127.0.0.1:${CONTROL_UI_PORT}`, -) { - const forwardTarget = resolveDashboardForwardTarget(chatUiUrl); - return forwardTarget.includes(":") - ? (forwardTarget.split(":").pop() ?? String(CONTROL_UI_PORT)) - : forwardTarget; +function destroyGateway() { + return destroyGatewayWithDeps(GATEWAY_NAME, { + runOpenshell, + clearRegistryAll: () => { + registry.clearAll(); + }, + run, + }); } -function getDashboardForwardTarget( - chatUiUrl = process.env.CHAT_UI_URL || `http://127.0.0.1:${CONTROL_UI_PORT}`, - options = {}, -) { - const port = getDashboardForwardPort(chatUiUrl); - return isWsl(options) ? `0.0.0.0:${port}` : resolveDashboardForwardTarget(chatUiUrl); +function installOpenshell() { + const result = installOpenshellWithDepsRuntime({ + scriptPath: path.join(SCRIPTS, "install-openshell.sh"), + rootDir: ROOT, + env: process.env, + getFutureShellPathHint, + errorWriter: console.error, + }); + if (result.updatedPathValue) { + process.env.PATH = result.updatedPathValue; + } + OPENSHELL_BIN = result.openshellBinary; + return { + installed: result.installed, + localBin: result.localBin, + futureShellPathHint: result.futureShellPathHint, + }; } -function getDashboardForwardStartCommand(sandboxName, options = {}) { - const chatUiUrl = - options.chatUiUrl || process.env.CHAT_UI_URL || `http://127.0.0.1:${CONTROL_UI_PORT}`; - const forwardTarget = getDashboardForwardTarget(chatUiUrl, options); - return `${openshellShellCommand( - ["forward", "start", "--background", forwardTarget, sandboxName], - options, - )}`; +function isOpenshellInstalled() { + return isOpenshellInstalledWithDepsRuntime(); } -function buildAuthenticatedDashboardUrl(baseUrl, token = null) { - if (!token) return baseUrl; - return `${baseUrl}#token=${encodeURIComponent(token)}`; +function getContainerRuntime() { + return getContainerRuntimeWithDeps(runCapture); } -function getWslHostAddress(options = {}) { - if (options.wslHostAddress) { - return options.wslHostAddress; - } - if (!isWsl(options)) { - return null; - } - const runCaptureFn = options.runCapture || runCapture; - const output = runCaptureFn("hostname -I 2>/dev/null", { ignoreError: true }); - const candidates = String(output || "") - .trim() - .split(/\s+/) - .filter(Boolean); - return candidates[0] || null; +function printRemediationActions(actions) { + return printRemediationActionsWithDeps(actions, console.error); } -function getDashboardAccessInfo(sandboxName, options = {}) { - const token = Object.prototype.hasOwnProperty.call(options, "token") - ? options.token - : fetchGatewayAuthTokenFromSandbox(sandboxName); - const chatUiUrl = - options.chatUiUrl || process.env.CHAT_UI_URL || `http://127.0.0.1:${CONTROL_UI_PORT}`; - const dashboardPort = Number(getDashboardForwardPort(chatUiUrl)); - const dashboardAccess = buildControlUiUrls(token, dashboardPort).map((url, index) => ({ - label: index === 0 ? "Dashboard" : `Alt ${index}`, - url: buildAuthenticatedDashboardUrl(url, null), - })); - - const wslHostAddress = getWslHostAddress(options); - if (wslHostAddress) { - const wslUrl = buildAuthenticatedDashboardUrl( - `http://${wslHostAddress}:${dashboardPort}/`, - token, - ); - if (!dashboardAccess.some((access) => access.url === wslUrl)) { - dashboardAccess.push({ label: "VS Code/WSL", url: wslUrl }); - } +async function ensureNamedCredential(envName, label, helpUrl = null) { + let key = getCredential(envName); + if (key) { + process.env[envName] = key; + return key; } - - return dashboardAccess; + return replaceNamedCredential(envName, label, helpUrl); } -function getDashboardGuidanceLines(dashboardAccess = [], options = {}) { - const dashboardPort = getDashboardForwardPort( - options.chatUiUrl || process.env.CHAT_UI_URL || `http://127.0.0.1:${CONTROL_UI_PORT}`, +function waitForSandboxReady(sandboxName, attempts = 10, delaySeconds = 2) { + return waitForSandboxReadyWithDepsRuntime( + sandboxName, + { + runCaptureOpenshell, + }, + attempts, + delaySeconds, ); - const guidance = [`Port ${dashboardPort} must be forwarded before opening these URLs.`]; - if (isWsl(options)) { - guidance.push( - "WSL detected: if localhost fails in Windows, use the WSL host IP shown by `hostname -I`.", - ); - } - if (dashboardAccess.length === 0) { - guidance.push("No dashboard URLs were generated."); - } - return guidance; } -function printDashboard(sandboxName, model, provider, nimContainer = null, agent = null) { - const nimStat = nimContainer ? nim.nimStatusByName(nimContainer) : nim.nimStatus(sandboxName); - const nimLabel = nimStat.running ? "running" : "not running"; - - let providerLabel = provider; - if (provider === "nvidia-prod" || provider === "nvidia-nim") providerLabel = "NVIDIA Endpoints"; - else if (provider === "openai-api") providerLabel = "OpenAI"; - else if (provider === "anthropic-prod") providerLabel = "Anthropic"; - else if (provider === "compatible-anthropic-endpoint") - providerLabel = "Other Anthropic-compatible endpoint"; - else if (provider === "gemini-api") providerLabel = "Google Gemini"; - else if (provider === "compatible-endpoint") providerLabel = "Other OpenAI-compatible endpoint"; - else if (provider === "vllm-local") providerLabel = "Local vLLM"; - else if (provider === "ollama-local") providerLabel = "Local Ollama"; - - const token = fetchGatewayAuthTokenFromSandbox(sandboxName); - const dashboardAccess = getDashboardAccessInfo(sandboxName, { token }); - const guidanceLines = getDashboardGuidanceLines(dashboardAccess); +// ── Step 3: Inference selection ────────────────────────────────── - console.log(""); - console.log(` ${"─".repeat(50)}`); - // console.log(` Dashboard http://localhost:${DASHBOARD_PORT}/`); - console.log(` Sandbox ${sandboxName} (Landlock + seccomp + netns)`); - console.log(` Model ${model} (${providerLabel})`); - console.log(` NIM ${nimLabel}`); - console.log(` ${"─".repeat(50)}`); - console.log(` Run: nemoclaw ${sandboxName} connect`); - console.log(` Status: nemoclaw ${sandboxName} status`); - console.log(` Logs: nemoclaw ${sandboxName} logs --follow`); - console.log(""); - if (agent) { - agentOnboard.printDashboardUi(sandboxName, token, agent, { - note, - buildControlUiUrls: (tokenValue, port) => { - const urls = buildControlUiUrls(tokenValue, port); - const wslHostAddress = getWslHostAddress(); - if (wslHostAddress) { - const wslUrl = buildAuthenticatedDashboardUrl( - `http://${wslHostAddress}:${port}/`, - tokenValue, - ); - if (!urls.includes(wslUrl)) { - urls.push(wslUrl); - } - } - return urls; - }, - }); - } else if (token) { - console.log(" OpenClaw UI (tokenized URL; treat it like a password)"); - for (const line of guidanceLines) { - console.log(` ${line}`); - } - for (const entry of dashboardAccess) { - console.log(` ${entry.label}: ${entry.url}`); - } - } else { - note(" Could not read gateway token from the sandbox (download failed)."); - console.log(" OpenClaw UI"); - for (const line of guidanceLines) { - console.log(` ${line}`); - } - for (const entry of dashboardAccess) { - console.log(` ${entry.label}: ${entry.url}`); - } - console.log( - ` Token: nemoclaw ${sandboxName} connect → jq -r '.gateway.auth.token' /sandbox/.openclaw/openclaw.json`, - ); - console.log( - ` append #token= to the URL, or see /tmp/gateway.log inside the sandbox.`, - ); - } - console.log(` ${"─".repeat(50)}`); - console.log(""); -} +const { TELEGRAM_NETWORK_CURL_CODES } = require("./onboard-telegram"); + +const inferenceRuntimeApi = createInferenceRuntimeApi({ + step, + remoteProviderConfig: REMOTE_PROVIDER_CONFIG, + runCapture, + ollamaPort: OLLAMA_PORT, + vllmPort: VLLM_PORT, + ollamaProxyPort: OLLAMA_PROXY_PORT, + experimental: EXPERIMENTAL, + isNonInteractive, + getNonInteractiveProvider, + getNonInteractiveModel, + note, + prompt, + getNavigationChoice, + exitOnboardFromPrompt, + normalizeProviderBaseUrl, + validateNvidiaApiKeyValue, + ensureApiKey, + defaultCloudModel: DEFAULT_CLOUD_MODEL, + promptCloudModel, + ensureNamedCredential, + getProbeAuthMode, + validateOpenAiLikeModel, + getCredential, + validateAnthropicModel, + anthropicEndpointUrl: ANTHROPIC_ENDPOINT_URL, + promptRemoteModel, + promptInputModel, + backToSelection: BACK_TO_SELECTION, + validateCustomOpenAiLikeSelection, + validateCustomAnthropicSelection, + validateAnthropicSelectionWithRetryMessage, + validateOpenAiLikeSelection, + shouldRequireResponsesToolCalling, + shouldSkipResponsesProbe, + nim, + gatewayName: GATEWAY_NAME, + getLocalProviderBaseUrl, + getLocalProviderValidationBaseUrl, + processPlatform: process.platform, + validateLocalProvider, + isWsl, + run, + sleep, + printOllamaExposureWarning, + startOllamaAuthProxy, + getOllamaModelOptions, + getDefaultOllamaModel, + promptOllamaModel, + prepareOllamaModel, + isSafeModelId, + runOpenshell, + hydrateCredentialEnv, + upsertProvider, + promptValidationRecovery, + classifyApplyFailure, + compactText, + redact, + localInferenceTimeoutSecs: LOCAL_INFERENCE_TIMEOUT_SECS, + ensureOllamaAuthProxy, + getOllamaProxyToken, + persistProxyToken, + getOllamaWarmupCommand, + validateOllamaModel, + verifyInferenceRoute, + updateSandbox: (name, patch) => { + registry.updateSandbox(name, patch); + }, + runCurlProbe, + promptOrDefault, + log: (...args) => console.log(...args), + error: (...args) => console.error(...args), + exit: (code) => process.exit(code), + normalizeCredentialValue, + saveCredential, + env: process.env, + stdin: process.stdin, + stderr: process.stderr, + isInteractiveTty: process.stdout.isTTY, + noteLog: (...args) => console.log(...args), + getProviderSelectionConfig, + writeSandboxConfigSyncFile, + openshellShellCommand, + shellQuote, + cleanupTempDir, + fetchGatewayAuthTokenFromSandbox: (sandboxName) => + fetchGatewayAuthTokenFromSandboxWithDeps(sandboxName, { runOpenshell }), + secureTempFile, +}); -function startRecordedStep(stepName, updates = {}) { - onboardSession.markStepStarted(stepName); - if (Object.keys(updates).length > 0) { - onboardSession.updateSession((session) => { - if (typeof updates.sandboxName === "string") session.sandboxName = updates.sandboxName; - if (typeof updates.provider === "string") session.provider = updates.provider; - if (typeof updates.model === "string") session.model = updates.model; - return session; - }); - } +const { + setupNim, + setupInference, + checkTelegramReachability, + setupMessagingChannels, + getSuggestedPolicyPresets, + setupOpenclaw, +} = inferenceRuntimeApi; + +function computeSetupPresetSuggestions(tierName, options = {}) { + return computeSetupPresetSuggestionsWithDeps(tierName, { + ...options, + resolveTierPresets: (name) => tiers.resolveTierPresets(name), + }); } -const ONBOARD_STEP_INDEX = { - preflight: { number: 1, title: "Preflight checks" }, - gateway: { number: 2, title: "Starting OpenShell gateway" }, - provider_selection: { number: 3, title: "Configuring inference (NIM)" }, - inference: { number: 4, title: "Setting up inference provider" }, - messaging: { number: 5, title: "Messaging channels" }, - sandbox: { number: 6, title: "Creating sandbox" }, - openclaw: { number: 7, title: "Setting up OpenClaw inside sandbox" }, - policies: { number: 8, title: "Policy presets" }, -}; +// ── Step 7: Policy presets ─────────────────────────────────────── + +const policyUiApi = createPolicyUiApi({ + step, + prompt, + note, + sleep, + isNonInteractive, + parsePolicyPresetEnv, + waitForSandboxReady, + localInferenceProviders: LOCAL_INFERENCE_PROVIDERS, + useColor: USE_COLOR, + policies, + tiers, + updateSandbox: (name, patch) => { + registry.updateSandbox(name, patch); + }, + getSuggestedPolicyPresets, +}); + +const { + setupPoliciesLegacy: _setupPolicies, + arePolicyPresetsApplied, + selectPolicyTier, + selectTierPresetsAndAccess, + presetsCheckboxSelector, + setupPoliciesWithSelection, +} = policyUiApi; +// ── Dashboard ──────────────────────────────────────────────────── + +const CONTROL_UI_PORT = DASHBOARD_PORT; + +// Dashboard helpers — delegated to src/lib/dashboard.ts +// isLoopbackHostname — see urlUtils import above +const { resolveDashboardForwardTarget, buildControlUiUrls } = dashboard; + +const dashboardApi = createDashboardApi({ + controlUiPort: CONTROL_UI_PORT, + runOpenshell, + warningWriter: console.warn, + openshellShellCommand, + runCapture, + nimStatusByName: (containerName) => nim.nimStatusByName(containerName), + nimStatus: (sandboxName) => nim.nimStatus(sandboxName), + note, + log: console.log, + printAgentDashboardUi: agentOnboard.printDashboardUi, + buildControlUiUrls, +}); + +const { + ensureDashboardForward, + fetchGatewayAuthTokenFromSandbox, + getDashboardForwardStartCommand, + getDashboardAccessInfo, + printDashboard, +} = dashboardApi; + +const TOTAL_ONBOARD_STEPS = Math.max( + ...Object.values(ONBOARD_STEP_META).map((meta) => meta.number), +); function skippedStepMessage(stepName, detail, reason = "resume") { - const stepInfo = ONBOARD_STEP_INDEX[stepName]; + const visibleStepName = isOnboardStepName(stepName) ? toVisibleStepName(stepName) : null; + const stepInfo = visibleStepName ? ONBOARD_STEP_META[visibleStepName] : null; if (stepInfo) { - step(stepInfo.number, 8, stepInfo.title); + step(stepInfo.number, TOTAL_ONBOARD_STEPS, stepInfo.title); } const prefix = reason === "reuse" ? "[reuse]" : "[resume]"; console.log(` ${prefix} Skipping ${stepName}${detail ? ` (${detail})` : ""}`); @@ -5825,476 +1247,100 @@ function skippedStepMessage(stepName, detail, reason = "resume") { // eslint-disable-next-line complexity async function onboard(opts = {}) { - NON_INTERACTIVE = opts.nonInteractive || process.env.NEMOCLAW_NON_INTERACTIVE === "1"; - RECREATE_SANDBOX = opts.recreateSandbox || process.env.NEMOCLAW_RECREATE_SANDBOX === "1"; - const dangerouslySkipPermissions = - opts.dangerouslySkipPermissions || process.env.NEMOCLAW_DANGEROUSLY_SKIP_PERMISSIONS === "1"; - if (dangerouslySkipPermissions) { - console.error(""); - console.error( - " \u26a0 --dangerously-skip-permissions: sandbox security restrictions disabled.", - ); - console.error(" Network: all known endpoints open (no method/path filtering)"); - console.error(" Filesystem: sandbox home directory is writable"); - console.error(" Use for development/testing only."); - console.error(""); - } - delete process.env.OPENSHELL_GATEWAY; - const resume = opts.resume === true; - // In non-interactive mode also accept the env var so CI pipelines can set it. - // This is the explicitly requested value; on resume it may be absent and the - // session-recorded path is used instead (see below). - const requestedFromDockerfile = - opts.fromDockerfile || - (isNonInteractive() ? process.env.NEMOCLAW_FROM_DOCKERFILE || null : null); - const noticeAccepted = await ensureUsageNoticeConsent({ - nonInteractive: isNonInteractive(), - acceptedByFlag: opts.acceptThirdPartySoftware === true, - writeLine: console.error, - }); - if (!noticeAccepted) { - process.exit(1); - } - // Validate NEMOCLAW_PROVIDER early so invalid values fail before - // preflight (Docker/OpenShell checks). Without this, users see a - // misleading 'Docker is not reachable' error instead of the real - // problem: an unsupported provider value. - getRequestedProviderHint(); - const lockResult = onboardSession.acquireOnboardLock( - `nemoclaw onboard${resume ? " --resume" : ""}${isNonInteractive() ? " --non-interactive" : ""}${requestedFromDockerfile ? ` --from ${requestedFromDockerfile}` : ""}`, - ); - if (!lockResult.acquired) { - console.error(" Another NemoClaw onboarding run is already in progress."); - if (lockResult.holderPid) { - console.error(` Lock holder PID: ${lockResult.holderPid}`); - } - if (lockResult.holderStartedAt) { - console.error(` Started: ${lockResult.holderStartedAt}`); - } - console.error(" Wait for it to finish, or remove the stale lock if the previous run crashed:"); - console.error(` rm -f "${lockResult.lockFile}"`); - process.exit(1); - } - - let lockReleased = false; - const releaseOnboardLock = () => { - if (lockReleased) return; - lockReleased = true; - onboardSession.releaseOnboardLock(); - }; - process.once("exit", releaseOnboardLock); - - try { - let session; - let selectedMessagingChannels = []; - // Merged, absolute fromDockerfile: explicit flag/env takes precedence; on - // resume falls back to what the original session recorded so the same image - // is used even when --from is omitted from the resume invocation. - let fromDockerfile; - if (resume) { - session = onboardSession.loadSession(); - if (!session || session.resumable === false) { - console.error(" No resumable onboarding session was found."); - console.error(" Run: nemoclaw onboard"); - process.exit(1); - } - const sessionFrom = session?.metadata?.fromDockerfile || null; - fromDockerfile = requestedFromDockerfile - ? path.resolve(requestedFromDockerfile) - : sessionFrom - ? path.resolve(sessionFrom) - : null; - const resumeConflicts = getResumeConfigConflicts(session, { - nonInteractive: isNonInteractive(), - fromDockerfile: requestedFromDockerfile, - agent: opts.agent || null, - }); - if (resumeConflicts.length > 0) { - for (const conflict of resumeConflicts) { - if (conflict.field === "sandbox") { - console.error( - ` Resumable state belongs to sandbox '${conflict.recorded}', not '${conflict.requested}'.`, - ); - } else if (conflict.field === "agent") { - console.error( - ` Session was started with agent '${conflict.recorded}', not '${conflict.requested}'.`, - ); - } else if (conflict.field === "fromDockerfile") { - if (!conflict.recorded) { - console.error( - ` Session was started without --from; add --from '${conflict.requested}' to resume it.`, - ); - } else if (!conflict.requested) { - console.error( - ` Session was started with --from '${conflict.recorded}'; rerun with that path to resume it.`, - ); - } else { - console.error( - ` Session was started with --from '${conflict.recorded}', not '${conflict.requested}'.`, - ); - } - } else { - console.error( - ` Resumable state recorded ${conflict.field} '${conflict.recorded}', not '${conflict.requested}'.`, - ); - } - } - console.error(" Run: nemoclaw onboard # start a fresh onboarding session"); - console.error(" Or rerun with the original settings to continue that session."); - process.exit(1); - } - onboardSession.updateSession((current) => { - current.mode = isNonInteractive() ? "non-interactive" : "interactive"; - current.failure = null; - current.status = "in_progress"; - return current; - }); - session = onboardSession.loadSession(); - } else { - fromDockerfile = requestedFromDockerfile ? path.resolve(requestedFromDockerfile) : null; - session = onboardSession.saveSession( - onboardSession.createSession({ - mode: isNonInteractive() ? "non-interactive" : "interactive", - metadata: { gatewayName: "nemoclaw", fromDockerfile: fromDockerfile || null }, - }), - ); - } - - let completed = false; - process.once("exit", (code) => { - if (!completed && code !== 0) { - const current = onboardSession.loadSession(); - const failedStep = current?.lastStepStarted; - if (failedStep) { - onboardSession.markStepFailed(failedStep, "Onboarding exited before the step completed."); - } - } - }); - - console.log(""); - console.log(" NemoClaw Onboarding"); - if (isNonInteractive()) note(" (non-interactive mode)"); - if (resume) note(" (resume mode)"); - console.log(" ==================="); - - const agent = agentOnboard.resolveAgent({ agentFlag: opts.agent, session }); - if (agent) { - onboardSession.updateSession((s) => { - s.agent = agent.name; - return s; - }); - } - - let gpu; - const resumePreflight = resume && session?.steps?.preflight?.status === "complete"; - if (resumePreflight) { - skippedStepMessage("preflight", "cached"); - gpu = nim.detectGpu(); - } else { - startRecordedStep("preflight"); - gpu = await preflight(); - onboardSession.markStepComplete("preflight"); - } - - const gatewayStatus = runCaptureOpenshell(["status"], { ignoreError: true }); - const gatewayInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { - ignoreError: true, - }); - const activeGatewayInfo = runCaptureOpenshell(["gateway", "info"], { ignoreError: true }); - let gatewayReuseState = getGatewayReuseState(gatewayStatus, gatewayInfo, activeGatewayInfo); - - // Verify the gateway container is actually running — openshell CLI metadata - // can be stale after a manual `docker rm`. See #2020. - if (gatewayReuseState === "healthy") { - const containerState = verifyGatewayContainerRunning(); - if (containerState === "missing") { - console.log(" Gateway metadata is stale (container not running). Cleaning up..."); - runOpenshell(["forward", "stop", String(DASHBOARD_PORT)], { ignoreError: true }); - destroyGateway(); - registry.clearAll(); - gatewayReuseState = "missing"; - console.log(" ✓ Stale gateway metadata cleaned up"); - } else if (containerState === "unknown") { - console.log(" Warning: could not verify gateway container state (Docker may be unavailable). Proceeding with cached health status."); - } - } - - const canReuseHealthyGateway = gatewayReuseState === "healthy"; - const resumeGateway = - resume && session?.steps?.gateway?.status === "complete" && canReuseHealthyGateway; - if (resumeGateway) { - skippedStepMessage("gateway", "running"); - } else if (!resume && canReuseHealthyGateway) { - skippedStepMessage("gateway", "running", "reuse"); - note(" Reusing healthy NemoClaw gateway."); - } else { - if (resume && session?.steps?.gateway?.status === "complete") { - if (gatewayReuseState === "active-unnamed") { - note(" [resume] Gateway is active but named metadata is missing; recreating it safely."); - } else if (gatewayReuseState === "foreign-active") { - note(" [resume] A different OpenShell gateway is active; NemoClaw will not reuse it."); - } else if (gatewayReuseState === "stale") { - note(" [resume] Recorded gateway is unhealthy; recreating it."); - } else { - note(" [resume] Recorded gateway state is unavailable; recreating it."); - } - } - startRecordedStep("gateway"); - await startGateway(gpu); - onboardSession.markStepComplete("gateway"); - } - - let sandboxName = session?.sandboxName || null; - let model = session?.model || null; - let provider = session?.provider || null; - let endpointUrl = session?.endpointUrl || null; - let credentialEnv = session?.credentialEnv || null; - let preferredInferenceApi = session?.preferredInferenceApi || null; - let nimContainer = session?.nimContainer || null; - let webSearchConfig = session?.webSearchConfig || null; - let forceProviderSelection = false; - while (true) { - const resumeProviderSelection = - !forceProviderSelection && - resume && - session?.steps?.provider_selection?.status === "complete" && - typeof provider === "string" && - typeof model === "string"; - if (resumeProviderSelection) { - skippedStepMessage("provider_selection", `${provider} / ${model}`); - hydrateCredentialEnv(credentialEnv); - } else { - startRecordedStep("provider_selection", { sandboxName }); - const selection = await setupNim(gpu); - model = selection.model; - provider = selection.provider; - endpointUrl = selection.endpointUrl; - credentialEnv = selection.credentialEnv; - preferredInferenceApi = selection.preferredInferenceApi; - nimContainer = selection.nimContainer; - onboardSession.markStepComplete("provider_selection", { - sandboxName, - provider, - model, - endpointUrl, - credentialEnv, - preferredInferenceApi, - nimContainer, - }); - } - - process.env.NEMOCLAW_OPENSHELL_BIN = getOpenshellBinary(); - const resumeInference = - !forceProviderSelection && - resume && - typeof provider === "string" && - typeof model === "string" && - isInferenceRouteReady(provider, model); - if (resumeInference) { - skippedStepMessage("inference", `${provider} / ${model}`); - if (nimContainer) { - registry.updateSandbox(sandboxName, { nimContainer }); - } - onboardSession.markStepComplete("inference", { - sandboxName, - provider, - model, - nimContainer, - }); - break; - } - - startRecordedStep("inference", { sandboxName, provider, model }); - const inferenceResult = await setupInference( - sandboxName, - model, - provider, - endpointUrl, - credentialEnv, - ); - delete process.env.NVIDIA_API_KEY; - if (inferenceResult?.retry === "selection") { - forceProviderSelection = true; - continue; - } - if (nimContainer) { - registry.updateSandbox(sandboxName, { nimContainer }); - } - onboardSession.markStepComplete("inference", { sandboxName, provider, model, nimContainer }); - break; - } - - const sandboxReuseState = getSandboxReuseState(sandboxName); - const webSearchConfigChanged = Boolean(session?.webSearchConfig) !== Boolean(webSearchConfig); - const resumeSandbox = - resume && - !webSearchConfigChanged && - session?.steps?.sandbox?.status === "complete" && - sandboxReuseState === "ready"; - if (resumeSandbox) { - if (webSearchConfig) { - note(" [resume] Reusing Brave Search configuration already baked into the sandbox."); - } - skippedStepMessage("sandbox", sandboxName); - } else { - if (resume && session?.steps?.sandbox?.status === "complete") { - if (webSearchConfigChanged) { - note(" [resume] Web Search configuration changed; recreating sandbox."); - if (sandboxName) { - registry.removeSandbox(sandboxName); - } - } else if (sandboxReuseState === "not_ready") { - note( - ` [resume] Recorded sandbox '${sandboxName}' exists but is not ready; recreating it.`, - ); - repairRecordedSandbox(sandboxName); - } else { - note(" [resume] Recorded sandbox state is unavailable; recreating it."); - if (sandboxName) { - registry.removeSandbox(sandboxName); - } - } - } - let nextWebSearchConfig = webSearchConfig; - if (nextWebSearchConfig) { - note(" [resume] Revalidating Brave Search configuration for sandbox recreation."); - const braveApiKey = await ensureValidatedBraveSearchCredential(); - nextWebSearchConfig = braveApiKey ? { fetchEnabled: true } : null; - if (nextWebSearchConfig) { - note(" [resume] Reusing Brave Search configuration."); - } - } else { - nextWebSearchConfig = await configureWebSearch(null); - } - startRecordedStep("sandbox", { sandboxName, provider, model }); - selectedMessagingChannels = await setupMessagingChannels(); - onboardSession.updateSession((current) => { - current.messagingChannels = selectedMessagingChannels; - return current; - }); - sandboxName = await createSandbox( - gpu, - model, - provider, - preferredInferenceApi, - sandboxName, - nextWebSearchConfig, - selectedMessagingChannels, - fromDockerfile, - agent, - dangerouslySkipPermissions, - ); - webSearchConfig = nextWebSearchConfig; - // Persist model and provider after the sandbox entry exists in the registry. - // updateSandbox() silently no-ops when the entry is missing, so this must - // run after createSandbox() / registerSandbox() — not before. Fixes #1881. - registry.updateSandbox(sandboxName, { model, provider }); - onboardSession.markStepComplete("sandbox", { - sandboxName, - provider, - model, - nimContainer, - webSearchConfig, - }); - } - - if (agent) { - await agentOnboard.handleAgentSetup(sandboxName, model, provider, agent, resume, session, { + return runOnboardingEntry(opts, { + env: process.env, + resolveShellState: resolveOnboardShellState, + applyShellState: (shellState) => { + NON_INTERACTIVE = shellState.nonInteractive; + RECREATE_SANDBOX = shellState.recreateSandbox; + }, + getDangerouslySkipPermissionsWarningLines, + ensureUsageNoticeConsent, + validateRequestedProviderHint: () => { + getRequestedProviderHint(); + }, + acquireOnboardLock: (command) => onboardSession.acquireOnboardLock(command), + buildOnboardLockCommand, + getOnboardLockConflictLines, + releaseOnboardLock: () => { + onboardSession.releaseOnboardLock(); + }, + clearGatewayEnv: () => { + delete process.env.OPENSHELL_GATEWAY; + }, + initializeOnboardRun, + getResumeConflicts: (session, shellState, requestedAgent) => + getResumeConfigConflicts(session, { + nonInteractive: shellState.nonInteractive, + fromDockerfile: shellState.requestedFromDockerfile, + agent: requestedAgent, + }), + createOnboardRunContext, + getOnboardBannerLines, + buildOrchestratorDeps: (runContext, shellState, requestedAgent) => + createOnboardingOrchestratorDeps(runContext, { + resume: shellState.resume, + dangerouslySkipPermissions: shellState.dangerouslySkipPermissions, + requestedAgent, + gatewayName: GATEWAY_NAME, + dashboardPort: DASHBOARD_PORT, + resolveAgent: agentOnboard.resolveAgent, + note, + log: console.log, + skippedStepMessage, step, + preflight, + detectGpu: () => nim.detectGpu(), runCaptureOpenshell, + getGatewayReuseState, + verifyGatewayContainerRunning, + runOpenshell, + destroyGateway, + clearRegistryAll: () => { + registry.clearAll(); + }, + startGateway, + setupNim, + setupInference, + isInferenceRouteReady, + hydrateCredentialEnv, + getOpenshellBinary, + updateSandbox: (name, patch) => { + registry.updateSandbox(name, patch); + }, + setupMessagingChannels, + configureWebSearch, + ensureValidatedBraveSearchCredential, + getSandboxReuseState, + removeSandbox: (name) => { + registry.removeSandbox(name); + }, + repairRecordedSandbox, + createSandbox, + handleAgentSetup: agentOnboard.handleAgentSetup, openshellShellCommand, buildSandboxConfigSyncScript, writeSandboxConfigSyncFile, cleanupTempDir, - startRecordedStep, - skippedStepMessage, - }); - onboardSession.markStepSkipped("openclaw"); - } else { - const resumeOpenclaw = resume && sandboxName && isOpenclawReady(sandboxName); - if (resumeOpenclaw) { - skippedStepMessage("openclaw", sandboxName); - onboardSession.markStepComplete("openclaw", { sandboxName, provider, model }); - } else { - startRecordedStep("openclaw", { sandboxName, provider, model }); - await setupOpenclaw(sandboxName, model, provider); - onboardSession.markStepComplete("openclaw", { sandboxName, provider, model }); - } - onboardSession.markStepSkipped("agent_setup"); - } - - const latestSession = onboardSession.loadSession(); - const recordedPolicyPresets = Array.isArray(latestSession?.policyPresets) - ? latestSession.policyPresets - : null; - const recordedMessagingChannels = Array.isArray(latestSession?.messagingChannels) - ? latestSession.messagingChannels - : []; - if (dangerouslySkipPermissions) { - step(8, 8, "Policy presets"); - if (!waitForSandboxReady(sandboxName)) { - console.error(`\n ✗ Sandbox '${sandboxName}' not ready after creation. Giving up.`); - process.exit(1); - } - shields.shieldsDownPermanent(sandboxName); - onboardSession.markStepComplete("policies", { - sandboxName, - provider, - model, - policyPresets: [], - }); - } else { - const resumePolicies = - resume && sandboxName && arePolicyPresetsApplied(sandboxName, recordedPolicyPresets || []); - if (resumePolicies) { - skippedStepMessage("policies", (recordedPolicyPresets || []).join(", ")); - onboardSession.markStepComplete("policies", { - sandboxName, - provider, - model, - policyPresets: recordedPolicyPresets || [], - }); - } else { - startRecordedStep("policies", { - sandboxName, - provider, - model, - policyPresets: recordedPolicyPresets || [], - }); - const appliedPolicyPresets = await setupPoliciesWithSelection(sandboxName, { - selectedPresets: - Array.isArray(recordedPolicyPresets) && recordedPolicyPresets.length > 0 - ? recordedPolicyPresets - : null, - enabledChannels: - selectedMessagingChannels.length > 0 - ? selectedMessagingChannels - : recordedMessagingChannels, - webSearchConfig, - provider, - onSelection: (policyPresets) => { - onboardSession.updateSession((current) => { - current.policyPresets = policyPresets; - return current; - }); - }, - }); - onboardSession.markStepComplete("policies", { - sandboxName, - provider, - model, - policyPresets: appliedPolicyPresets, - }); - } - } - - onboardSession.completeSession({ sandboxName, provider, model }); - completed = true; - printDashboard(sandboxName, model, provider, nimContainer, agent); - } finally { - releaseOnboardLock(); - } -} + isOpenclawReady, + setupOpenclaw, + waitForSandboxReady, + applyPermissivePolicy: (name) => { + policies.applyPermissivePolicy(name); + }, + arePolicyPresetsApplied, + setupPoliciesWithSelection, + }), + runOnboardingOrchestrator, + printDashboard, + note, + log: console.log, + error: console.error, + exit: (code) => process.exit(code), + onceProcessExit: (handler) => { + process.once("exit", handler); + }, + });} module.exports = { buildProviderArgs, @@ -6347,7 +1393,6 @@ module.exports = { getDashboardAccessInfo, getDashboardForwardPort, getDashboardForwardStartCommand, - getDashboardGuidanceLines, startGatewayForRecovery, runCaptureOpenshell, setupInference, diff --git a/test/credential-exposure.test.ts b/test/credential-exposure.test.ts index 9c4c593825..32dc92f7de 100644 --- a/test/credential-exposure.test.ts +++ b/test/credential-exposure.test.ts @@ -13,6 +13,27 @@ import path from "node:path"; import { describe, it, expect } from "vitest"; const ONBOARD_JS = path.join(import.meta.dirname, "..", "src", "lib", "onboard.ts"); +const ONBOARD_INFERENCE_VALIDATION_TS = path.join( + import.meta.dirname, + "..", + "src", + "lib", + "onboard-inference-validation.ts", +); +const ONBOARD_PROVIDER_MANAGEMENT_TS = path.join( + import.meta.dirname, + "..", + "src", + "lib", + "onboard-provider-management.ts", +); +const ONBOARD_SANDBOX_CREATE_TS = path.join( + import.meta.dirname, + "..", + "src", + "lib", + "onboard-sandbox-create.ts", +); const RUNNER_TS = path.join(import.meta.dirname, "..", "nemoclaw", "src", "blueprint", "runner.ts"); const SERVICES_TS = path.join(import.meta.dirname, "..", "src", "lib", "services.ts"); @@ -65,7 +86,7 @@ describe("credential exposure in process arguments", () => { }); it("onboard.js --credential flags pass env var names only", () => { - const src = fs.readFileSync(ONBOARD_JS, "utf-8"); + const src = fs.readFileSync(ONBOARD_PROVIDER_MANAGEMENT_TS, "utf-8"); expect(src).toMatch(/"--credential", credentialEnv/); expect(src).not.toMatch(/"--credential",\s*["'][A-Z_]+=/); @@ -73,7 +94,7 @@ describe("credential exposure in process arguments", () => { }); it("onboard.ts uses subprocess allowlist (not blocklist) for sandbox env", () => { - const src = fs.readFileSync(ONBOARD_JS, "utf-8"); + const src = fs.readFileSync(ONBOARD_SANDBOX_CREATE_TS, "utf-8"); // The sandbox create path must use the shared subprocess-env.ts // allowlist, NOT the old blocklist. The allowlist inverts the @@ -112,7 +133,7 @@ describe("credential exposure in process arguments", () => { }); it("api-key paste-guard uses extensible prefix list and regex fallback", () => { - const src = fs.readFileSync(ONBOARD_JS, "utf-8"); + const src = fs.readFileSync(ONBOARD_INFERENCE_VALIDATION_TS, "utf-8"); // Known prefix list must include at least NVIDIA and GitHub prefixes expect(src).toMatch(/API_KEY_PREFIXES/); diff --git a/test/e2e/test-onboard-resume.sh b/test/e2e/test-onboard-resume.sh index d14d9c382c..6802814985 100755 --- a/test/e2e/test-onboard-resume.sh +++ b/test/e2e/test-onboard-resume.sh @@ -244,25 +244,25 @@ else fail "Resume did not skip sandbox" fi -if echo "$resume_output" | grep -q "\[1/7\] Preflight checks"; then +if echo "$resume_output" | grep -q "\[1/8\] Preflight checks"; then fail "Resume reran preflight unexpectedly" else pass "Resume did not rerun preflight" fi -if echo "$resume_output" | grep -q "\[2/7\] Starting OpenShell gateway"; then +if echo "$resume_output" | grep -q "\[2/8\] Starting OpenShell gateway"; then fail "Resume reran gateway startup unexpectedly" else pass "Resume did not rerun gateway startup" fi -if echo "$resume_output" | grep -q "\[5/7\] Creating sandbox"; then +if echo "$resume_output" | grep -q "\[6/8\] Creating sandbox"; then fail "Resume reran sandbox creation unexpectedly" else pass "Resume did not rerun sandbox creation" fi -if echo "$resume_output" | grep -q "\[4/7\] Setting up inference provider"; then +if echo "$resume_output" | grep -q "\[4/8\] Setting up inference provider"; then pass "Resume continued with inference setup" else fail "Resume did not continue with inference setup" @@ -282,11 +282,13 @@ if (data.status !== "complete") process.exit(1); if (data.provider !== "nvidia-prod") process.exit(2); if (data.steps.preflight.status !== "complete") process.exit(3); if (data.steps.gateway.status !== "complete") process.exit(4); -if (data.steps.sandbox.status !== "complete") process.exit(5); -if (data.steps.provider_selection.status !== "complete") process.exit(6); -if (data.steps.inference.status !== "complete") process.exit(7); -if (data.steps.openclaw.status !== "complete") process.exit(8); -if (data.steps.policies.status !== "complete") process.exit(9); +if (data.steps.provider_selection.status !== "complete") process.exit(5); +if (data.steps.inference.status !== "complete") process.exit(6); +if (data.steps.messaging.status !== "complete") process.exit(7); +if (data.steps.sandbox.status !== "complete") process.exit(8); +if (data.steps.runtime_setup.status !== "complete") process.exit(9); +if (data.steps.openclaw.status !== "complete") process.exit(10); +if (data.steps.policies.status !== "complete") process.exit(11); ' "$SESSION_FILE" case $? in 0) pass "Session file recorded full completion after resume" ;; diff --git a/test/gateway-cleanup.test.ts b/test/gateway-cleanup.test.ts index f5d81d7b13..3a55e2f41a 100644 --- a/test/gateway-cleanup.test.ts +++ b/test/gateway-cleanup.test.ts @@ -16,21 +16,27 @@ const ROOT = path.resolve(import.meta.dirname, ".."); describe("gateway cleanup: Docker volumes removed on failure (#17)", () => { it("onboard.js: destroyGateway() removes Docker volumes", () => { - const content = fs.readFileSync(path.join(ROOT, "src/lib/onboard.ts"), "utf-8"); + const content = fs.readFileSync( + path.join(ROOT, "src/lib/onboard-runtime-helpers.ts"), + "utf-8", + ); expect(content.includes("docker volume") && content.includes("openshell-cluster")).toBe(true); }); - it("onboard.js: volume cleanup runs on gateway start failure", () => { - const content = fs.readFileSync(path.join(ROOT, "src/lib/onboard.ts"), "utf-8"); - const startGwBlock = content.match(/async function startGatewayWithOptions[\s\S]*?^}/m); + it("gateway runtime helper: volume cleanup runs on gateway start failure", () => { + const content = fs.readFileSync( + path.join(ROOT, "src/lib/onboard-gateway-runtime.ts"), + "utf-8", + ); + const startGwBlock = content.match(/export async function startGatewayWithOptions[\s\S]*?^}/m); expect(startGwBlock).toBeTruthy(); // Current behavior: // 1. stale gateway is detected but NOT destroyed upfront — gateway start // can recover the container without wiping metadata/certs // 2. destroyGateway() runs inside the retry loop only on genuine failure - expect(startGwBlock[0].includes("if (hasStaleGateway(gwInfo))")).toBe(true); - expect(startGwBlock[0]).toContain("destroyGateway()"); + expect(startGwBlock[0].includes("if (deps.hasStaleGateway(gwInfo))")).toBe(true); + expect(startGwBlock[0]).toContain("deps.destroyGateway()"); }); it("uninstall.sh: includes Docker volume cleanup", () => { diff --git a/test/gateway-liveness-probe.test.ts b/test/gateway-liveness-probe.test.ts index 5ae95ed5c2..41b01ffb6c 100644 --- a/test/gateway-liveness-probe.test.ts +++ b/test/gateway-liveness-probe.test.ts @@ -16,10 +16,25 @@ import path from "node:path"; const ROOT = path.resolve(import.meta.dirname, ".."); describe("gateway liveness probe (#2020)", () => { - const content = fs.readFileSync(path.join(ROOT, "src/lib/onboard.ts"), "utf-8"); + const content = fs.readFileSync( + path.join(ROOT, "src/lib/onboard-gateway-liveness.ts"), + "utf-8", + ); + const preflightContent = fs.readFileSync( + path.join(ROOT, "src/lib/onboard-preflight-run.ts"), + "utf-8", + ); + const depsContent = fs.readFileSync( + path.join(ROOT, "src/lib/onboard-orchestrator-deps.ts"), + "utf-8", + ); + const helperContent = fs.readFileSync( + path.join(ROOT, "src/lib/onboard-host-flow.ts"), + "utf-8", + ); it("verifyGatewayContainerRunning() helper exists and checks Docker state", () => { - expect(content).toContain("function verifyGatewayContainerRunning()"); + expect(content).toContain("verifyGatewayContainerRunning("); // Must use docker inspect to probe container state expect(content).toContain("docker inspect --type container"); // Must check .State.Running, not just container existence @@ -27,20 +42,16 @@ describe("gateway liveness probe (#2020)", () => { }); it("preflight probes the container when gatewayReuseState is 'healthy'", () => { - // The preflight section must call the probe before entering the port loop. - // Find the first gatewayReuseState assignment and the port loop. - const preflightProbe = content.match( - /let gatewayReuseState = getGatewayReuseState[\s\S]*?verifyGatewayContainerRunning\(\)[\s\S]*?gatewayReuseState = "missing"/, + const preflightProbe = preflightContent.match( + /let gatewayReuseState = deps\.getGatewayReuseState[\s\S]*?deps\.verifyGatewayContainerRunning\(\)[\s\S]*?gatewayReuseState = "missing"/, ); expect(preflightProbe).toBeTruthy(); }); - it("main onboard flow probes the container before canReuseHealthyGateway", () => { - // The main onboard flow must also probe before setting canReuseHealthyGateway. - // Scope to the onboard() function so the regex can't accidentally match the preflight block. - const onboardSection = content.slice(content.indexOf("async function onboard(")); - const mainFlowProbe = onboardSection.match( - /let gatewayReuseState = getGatewayReuseState[\s\S]*?verifyGatewayContainerRunning\(\)[\s\S]*?const canReuseHealthyGateway/, + it("main onboard flow delegates the probe to the extracted host preparation helper", () => { + expect(depsContent).toContain("run: runHostPreparationFlow"); + const mainFlowProbe = helperContent.match( + /let gatewayReuseState = deps\.getGatewayReuseState[\s\S]*?deps\.verifyGatewayContainerRunning\(\)[\s\S]*?const canReuseHealthyGateway/, ); expect(mainFlowProbe).toBeTruthy(); }); @@ -53,17 +64,18 @@ describe("gateway liveness probe (#2020)", () => { }); it("only downgrades to 'missing' when container is confirmed missing", () => { - // Both probe sites must check containerState === "missing" before cleanup - const downgrades = content.match(/containerState === "missing"/g); - expect(downgrades).toBeTruthy(); - expect(downgrades.length).toBeGreaterThanOrEqual(2); + // Both probe sites must check containerState === "missing" before cleanup. + const preflightDowngrades = preflightContent.match(/containerState === "missing"/g) ?? []; + const helperDowngrades = helperContent.match(/containerState === "missing"/g) ?? []; + expect(preflightDowngrades.length).toBeGreaterThanOrEqual(1); + expect(helperDowngrades.length).toBeGreaterThanOrEqual(1); }); it("cleans up stale metadata when container is confirmed missing", () => { // After detecting a removed container, the code must clean up forwarding // and destroy the gateway via the shared destroyGateway() helper. - const cleanupAfterProbe = content.match( - /containerState === "missing"[\s\S]*?forward.*stop[\s\S]*?destroyGateway\(\)/, + const cleanupAfterProbe = helperContent.match( + /containerState === "missing"[\s\S]*?stopDashboardForward\(\)[\s\S]*?destroyGateway\(\)/, ); expect(cleanupAfterProbe).toBeTruthy(); }); diff --git a/test/gemini-probe-auth.test.ts b/test/gemini-probe-auth.test.ts index 43e1259e79..5dc32b8053 100644 --- a/test/gemini-probe-auth.test.ts +++ b/test/gemini-probe-auth.test.ts @@ -25,7 +25,7 @@ describe("Gemini dual-auth credential fix (issue #1960)", () => { describe("compiled probe uses ?key= for Gemini instead of Bearer header", () => { const onboardSrc = fs.readFileSync( - path.join(import.meta.dirname, "..", "dist", "lib", "onboard.js"), + path.join(import.meta.dirname, "..", "dist", "lib", "onboard-inference-validation.js"), "utf-8", ); diff --git a/test/legacy-path-guard.test.ts b/test/legacy-path-guard.test.ts index 30a977ca18..f73fc25dcf 100644 --- a/test/legacy-path-guard.test.ts +++ b/test/legacy-path-guard.test.ts @@ -27,6 +27,7 @@ function initTempRepo(prefix: string): string { run("git", ["init", "-b", "main"], repoDir); run("git", ["config", "user.name", "Test User"], repoDir); run("git", ["config", "user.email", "test@example.com"], repoDir); + run("git", ["config", "commit.gpgsign", "false"], repoDir); return repoDir; } diff --git a/test/onboard.test.ts b/test/onboard.test.ts index 185559e7be..ce51ef5ea5 100644 --- a/test/onboard.test.ts +++ b/test/onboard.test.ts @@ -54,6 +54,27 @@ import { import { stageOptimizedSandboxBuildContext } from "../dist/lib/sandbox-build-context"; import { buildWebSearchDockerConfig } from "../dist/lib/web-search"; +function extractFunctionBodyByMarker(source, marker) { + const markerIndex = source.indexOf(marker); + assert.notEqual(markerIndex, -1, `${marker} not found`); + + const openBraceIndex = source.indexOf("{", markerIndex); + assert.notEqual(openBraceIndex, -1, `opening brace not found for ${marker}`); + + let depth = 0; + for (let index = openBraceIndex; index < source.length; index += 1) { + const character = source[index]; + if (character === "{") depth += 1; + else if (character === "}") depth -= 1; + + if (depth === 0) { + return source.slice(openBraceIndex + 1, index); + } + } + + assert.fail(`closing brace not found for ${marker}`); +} + describe("onboard helpers", () => { it("classifies sandbox create timeout failures and tracks upload progress", () => { expect( @@ -1053,20 +1074,20 @@ describe("onboard helpers", () => { it("passes --port GATEWAY_PORT through every gateway start path", () => { const source = fs.readFileSync( - path.join(import.meta.dirname, "..", "src", "lib", "onboard.ts"), + path.join(import.meta.dirname, "..", "src", "lib", "onboard-gateway-runtime.ts"), "utf-8", ); // Primary start path (startGatewayWithOptions) builds gwArgs with --port. assert.match( source, - /const gwArgs = \["--name", GATEWAY_NAME, "--port", String\(GATEWAY_PORT\)\]/, + /const gwArgs = \["--name", deps\.gatewayName, "--port", String\(deps\.gatewayPort\)\]/, ); // Recovery start path (recoverGatewayRuntime) also passes --port. assert.match( source, - /runOpenshell\(\s*\["gateway", "start", "--name", GATEWAY_NAME, "--port", String\(GATEWAY_PORT\)\]/, + /\["gateway", "start", "--name", deps\.gatewayName, "--port", String\(deps\.gatewayPort\)\]/, ); }); @@ -2170,74 +2191,110 @@ const { setupInference } = require(${onboardPath}); it("checks provider existence before create/update to avoid AlreadyExists noise (#1155)", () => { const source = fs.readFileSync( - path.join(import.meta.dirname, "..", "src", "lib", "onboard.ts"), + path.join(import.meta.dirname, "..", "src", "lib", "onboard-provider-management.ts"), "utf-8", ); // upsertProvider must check existence first so it never triggers AlreadyExists. - assert.match(source, /providerExistsInGateway\(name\)/); + assert.match(source, /providerExistsInGateway\(name, deps\)/); assert.match(source, /exists \? "update" : "create"/); // Only one openshell call should be made (no create-then-update fallback). - assert.match(source, /const result = runOpenshell\(args, runOpts\)/); + assert.match(source, /const result = deps\.runOpenshell\(args, runOpts\)/); }); it("marks the unused agent_setup/openclaw sibling step as skipped (#1834)", () => { - const source = fs.readFileSync( - path.join(import.meta.dirname, "..", "src", "lib", "onboard.ts"), + const depsSource = fs.readFileSync( + path.join(import.meta.dirname, "..", "src", "lib", "onboard-orchestrator-deps.ts"), + "utf-8", + ); + const helperSource = fs.readFileSync( + path.join(import.meta.dirname, "..", "src", "lib", "onboard-runtime-flow.ts"), "utf-8", ); + assert.match(depsSource, /run: runRuntimeSetupFlow/); // When agent path is taken, openclaw must be marked skipped. - assert.match(source, /handleAgentSetup[\s\S]*?markStepSkipped\("openclaw"\)/); + assert.match(helperSource, /handleAgentSetup\(/); + assert.match(helperSource, /onSkipSiblingStep\("openclaw"\)/); // When default openclaw path is taken, agent_setup must be marked skipped. - assert.match(source, /setupOpenclaw[\s\S]*?markStepSkipped\("agent_setup"\)/); + assert.match(helperSource, /setupOpenclaw\(/); + assert.match(helperSource, /onSkipSiblingStep\("agent_setup"\)/); }); - it("starts the sandbox step before prompting for the sandbox name", () => { - const source = fs.readFileSync( - path.join(import.meta.dirname, "..", "src", "lib", "onboard.ts"), + it("delegates messaging+sandbox provisioning to the extracted sandbox flow helper", () => { + const depsSource = fs.readFileSync( + path.join(import.meta.dirname, "..", "src", "lib", "onboard-orchestrator-deps.ts"), "utf-8", ); - - assert.match( - source, - /startRecordedStep\("sandbox", \{ sandboxName, provider, model \}\);\s*selectedMessagingChannels = await setupMessagingChannels\(\);\s*onboardSession\.updateSession\(\(current\) => \{\s*current\.messagingChannels = selectedMessagingChannels;\s*return current;\s*\}\);\s*sandboxName = await createSandbox\(\s*gpu,\s*model,\s*provider,\s*preferredInferenceApi,\s*sandboxName,\s*nextWebSearchConfig,\s*selectedMessagingChannels,\s*fromDockerfile,\s*agent,\s*dangerouslySkipPermissions,\s*\);/, + const helperSource = fs.readFileSync( + path.join(import.meta.dirname, "..", "src", "lib", "onboard-sandbox-flow.ts"), + "utf-8", ); + + assert.match(depsSource, /run: runSandboxProvisioningFlow/); + assert.match(helperSource, /onStartStep\("messaging"/); + assert.match(helperSource, /onCompleteStep\("messaging"/); + assert.match(helperSource, /onStartStep\("sandbox"/); + assert.match(helperSource, /persistRegistryModelProvider\(/); + assert.match(helperSource, /resume && Array\.isArray\(deps\.sessionMessagingChannels\)/); }); it("prints numbered step headers even when onboarding skips resumed steps", () => { - const source = fs.readFileSync( + const onboardSource = fs.readFileSync( path.join(import.meta.dirname, "..", "src", "lib", "onboard.ts"), "utf-8", ); + const hostHelperSource = fs.readFileSync( + path.join(import.meta.dirname, "..", "src", "lib", "onboard-host-flow.ts"), + "utf-8", + ); + const runtimeHelperSource = fs.readFileSync( + path.join(import.meta.dirname, "..", "src", "lib", "onboard-runtime-flow.ts"), + "utf-8", + ); + const policyHelperSource = fs.readFileSync( + path.join(import.meta.dirname, "..", "src", "lib", "onboard-policy-flow.ts"), + "utf-8", + ); - assert.match(source, /const ONBOARD_STEP_INDEX = \{/); - assert.match(source, /function skippedStepMessage\(stepName, detail, reason = "resume"\)/); - assert.match(source, /step\(stepInfo\.number, 8, stepInfo\.title\);/); - assert.match(source, /skippedStepMessage\("openclaw", sandboxName\)/); + assert.match(onboardSource, /const TOTAL_ONBOARD_STEPS = Math\.max\(/); + assert.match(onboardSource, /function skippedStepMessage\(stepName, detail, reason = "resume"\)/); assert.match( - source, - /skippedStepMessage\("policies", \(recordedPolicyPresets \|\| \[\]\)\.join\(", "\)\)/, + onboardSource, + /const visibleStepName = isOnboardStepName\(stepName\) \? toVisibleStepName\(stepName\) : null;/, + ); + assert.match(onboardSource, /step\(stepInfo\.number, TOTAL_ONBOARD_STEPS, stepInfo\.title\);/); + assert.match(hostHelperSource, /deps\.onSkip\("preflight", "cached"\)/); + assert.match(hostHelperSource, /deps\.onSkip\("gateway", "running"/); + assert.match(runtimeHelperSource, /deps\.onSkip\("openclaw", state\.sandboxName\)/); + assert.match( + policyHelperSource, + /deps\.onSkip\("policies", \(state\.recordedPolicyPresets \|\| \[\]\)\.join\(", "\)\)/, ); }); - it("enters permanent shields-down state when dangerouslySkipPermissions is true", () => { - const source = fs.readFileSync( - path.join(import.meta.dirname, "..", "src", "lib", "onboard.ts"), + it("activates permissive policy via policy set when dangerouslySkipPermissions is true", () => { + const depsSource = fs.readFileSync( + path.join(import.meta.dirname, "..", "src", "lib", "onboard-orchestrator-deps.ts"), "utf-8", ); - - // The dangerouslySkipPermissions branch must call shields.shieldsDownPermanent - // to activate the permissive policy, unlock the config file with doctor-aligned - // permissions, and record permanent shields-down state. This replaced the - // previous policies.applyPermissivePolicy call to unify the shields state machine. - assert.match( - source, - /if \(dangerouslySkipPermissions\) \{\s*step\(8, 8, "Policy presets"\);\s*if \(!waitForSandboxReady\(sandboxName\)\) \{[\s\S]*?\}\s*shields\.shieldsDownPermanent\(sandboxName\);/, + const helperSource = fs.readFileSync( + path.join(import.meta.dirname, "..", "src", "lib", "onboard-policy-flow.ts"), + "utf-8", ); + + // The dangerouslySkipPermissions branch must call applyPermissivePolicy to + // activate the policy via `openshell policy set --wait`. Without this, + // the base policy from sandbox create stays in Pending status (#897). + assert.match(depsSource, /run: runPolicySetupFlow/); + assert.match(helperSource, /if \(deps\.dangerouslySkipPermissions\)/); + assert.match(helperSource, /deps\.onShowHeader\(\)/); + assert.match(helperSource, /!deps\.waitForSandboxReady\(state\.sandboxName\)/); + assert.match(helperSource, /kind: "sandbox_not_ready"/); + assert.match(helperSource, /deps\.applyPermissivePolicy\(state\.sandboxName\)/); // Must NOT just print a skip message without activating the policy. assert.doesNotMatch( - source, + helperSource, /dangerouslySkipPermissions\)[\s\S]*?Skipped —.*permissive base policy/, ); }); @@ -5262,16 +5319,14 @@ const { createSandbox } = require(${onboardPath}); }); it("re-prompts on invalid sandbox names instead of exiting in interactive mode", () => { - const source = fs.readFileSync( - path.join(import.meta.dirname, "..", "src", "lib", "onboard.ts"), + const shellSource = fs.readFileSync( + path.join(import.meta.dirname, "..", "src", "lib", "onboard-sandbox-name.ts"), "utf-8", ); - // Extract the promptValidatedSandboxName function body - const fnMatch = source.match( - /async function promptValidatedSandboxName\(\)\s*\{([\s\S]*?)\n\}/, + const fnBody = extractFunctionBodyByMarker( + shellSource, + "export async function promptValidatedSandboxName(", ); - assert.ok(fnMatch, "promptValidatedSandboxName function not found"); - const fnBody = fnMatch[1]; // Verify the bounded retry loop exists within this function assert.match(fnBody, /MAX_ATTEMPTS/); assert.match(fnBody, /for\s*\(let attempt/); @@ -5280,26 +5335,34 @@ const { createSandbox } = require(${onboardPath}); assert.match(fnBody, /Too many invalid attempts/); // Non-interactive still exits within this function assert.match(fnBody, /isNonInteractive\(\)/); - assert.match(fnBody, /process\.exit\(1\)/); + assert.match(fnBody, /exit\(1\)/); }); it("regression #1881: registry.updateSandbox(model/provider) is called AFTER createSandbox", () => { // updateSandbox() silently no-ops when the entry does not exist yet. - // This asserts that the model/provider update comes AFTER createSandbox() - // returns, not before registerSandbox() is called (the original bug). - const source = fs.readFileSync( - path.join(import.meta.dirname, "..", "src", "lib", "onboard.ts"), + // This asserts that the extracted sandbox helper still performs the + // model/provider registry update AFTER createSandbox() returns. + const helperSource = fs.readFileSync( + path.join(import.meta.dirname, "..", "src", "lib", "onboard-sandbox-flow.ts"), + "utf-8", + ); + const depsSource = fs.readFileSync( + path.join(import.meta.dirname, "..", "src", "lib", "onboard-orchestrator-deps.ts"), "utf-8", ); - const createSandboxPos = source.indexOf("sandboxName = await createSandbox("); - assert.ok(createSandboxPos !== -1, "createSandbox call not found in onboard.ts"); - const updateAfterCreate = source.indexOf( - "registry.updateSandbox(sandboxName, { model, provider })", + const createSandboxPos = helperSource.indexOf("const nextSandboxName = await deps.createSandbox("); + assert.ok(createSandboxPos !== -1, "createSandbox call not found in onboard-sandbox-flow.ts"); + const updateAfterCreate = helperSource.indexOf( + "deps.persistRegistryModelProvider(nextSandboxName, {", createSandboxPos, ); assert.ok( updateAfterCreate !== -1, - "registry.updateSandbox(model, provider) must appear AFTER createSandbox() — regression #1881", + "persistRegistryModelProvider must appear AFTER createSandbox() — regression #1881", + ); + assert.match( + depsSource, + /persistRegistryModelProvider: \(name, patch\) => \{[\s\S]*?input\.updateSandbox\(name, patch\);[\s\S]*?\}/, ); }); @@ -5549,11 +5612,14 @@ const { createSandbox } = require(${onboardPath}); it("regression #1904: createSandbox calls pullAndResolveBaseImageDigest before patchStagedDockerfile", () => { const source = fs.readFileSync( - path.join(import.meta.dirname, "..", "src", "lib", "onboard.ts"), + path.join(import.meta.dirname, "..", "src", "lib", "onboard-sandbox-create.ts"), "utf-8", ); const pullPos = source.indexOf("pullAndResolveBaseImageDigest()"); - assert.ok(pullPos !== -1, "pullAndResolveBaseImageDigest() call not found in onboard.ts"); + assert.ok( + pullPos !== -1, + "pullAndResolveBaseImageDigest() call not found in onboard-sandbox-create.ts", + ); const patchPos = source.indexOf("patchStagedDockerfile(", pullPos); assert.ok( patchPos > pullPos, diff --git a/test/runner.test.ts b/test/runner.test.ts index 5d0c5c8612..99e6b8d10f 100644 --- a/test/runner.test.ts +++ b/test/runner.test.ts @@ -553,17 +553,23 @@ describe("regression guards", () => { describe("credential exposure guards (#429)", () => { it("onboard createSandbox does not pass NVIDIA_API_KEY to sandbox env", () => { const fs = require("fs"); - const src = fs.readFileSync(path.join(import.meta.dirname, "..", "src", "lib", "onboard.ts"), "utf-8"); + const src = fs.readFileSync( + path.join(import.meta.dirname, "..", "src", "lib", "onboard-sandbox-create.ts"), + "utf-8", + ); // Find the envArgs block in createSandbox — it should not contain NVIDIA_API_KEY const envArgsMatch = src.match(/const envArgs = \[[\s\S]*?\];/); expect(envArgsMatch).toBeTruthy(); expect(envArgsMatch[0].includes("NVIDIA_API_KEY")).toBe(false); }); - it("onboard clears NVIDIA_API_KEY from process.env after setupInference", () => { + it("onboard clears the active provider credential from process.env after setupInference", () => { const fs = require("fs"); - const src = fs.readFileSync(path.join(import.meta.dirname, "..", "src", "lib", "onboard.ts"), "utf-8"); - expect(src.includes("delete process.env.NVIDIA_API_KEY")).toBeTruthy(); + const src = fs.readFileSync( + path.join(import.meta.dirname, "..", "src", "lib", "onboard-orchestrator-deps.ts"), + "utf-8", + ); + expect(src.includes("delete process.env[credentialEnv]")).toBeTruthy(); }); it("setupSpark is a compatibility alias that does not shell out to sudo", () => { diff --git a/test/sandbox-build-context.test.ts b/test/sandbox-build-context.test.ts index da8d6e520e..0662f12391 100644 --- a/test/sandbox-build-context.test.ts +++ b/test/sandbox-build-context.test.ts @@ -34,7 +34,7 @@ describe("sandbox build context staging", () => { } }); - it("optimized staging is smaller than the legacy build context", { timeout: 30_000 }, () => { + it("optimized staging is smaller than the legacy build context", { timeout: 90_000 }, () => { const repoRoot = path.join(import.meta.dirname, ".."); const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-build-context-compare-")); diff --git a/test/shellquote-sandbox.test.ts b/test/shellquote-sandbox.test.ts index 1dba05d68a..8834fb1afa 100644 --- a/test/shellquote-sandbox.test.ts +++ b/test/shellquote-sandbox.test.ts @@ -9,16 +9,18 @@ import { describe, it, expect } from "vitest"; describe("sandboxName command hardening in onboard.js", () => { const src = fs.readFileSync( - path.join(import.meta.dirname, "..", "src", "lib", "onboard.ts"), + path.join(import.meta.dirname, "..", "src", "lib", "onboard-sandbox-create.ts"), "utf-8", ); it("re-validates sandboxName at the createSandbox boundary", () => { - expect(src).toMatch(/const sandboxName = validateName\(/); + expect(src).toMatch(/const sandboxName = deps\.validateName\(/); }); it("runs setup-dns-proxy.sh through the argv helper instead of bash -c interpolation", () => { - expect(src).toMatch(/runFile\("bash",\s*\[path\.join\(SCRIPTS, "setup-dns-proxy\.sh"\),/); + expect(src).toMatch( + /runFile\("bash",\s*\[path\.join\(deps\.scriptsDir, "setup-dns-proxy\.sh"\),/, + ); }); it("does not have raw sandboxName interpolation in run or runCapture template literals", () => { diff --git a/test/wsl2-probe-timeout.test.ts b/test/wsl2-probe-timeout.test.ts index 75f3811a62..4e27b9cd8d 100644 --- a/test/wsl2-probe-timeout.test.ts +++ b/test/wsl2-probe-timeout.test.ts @@ -41,9 +41,9 @@ describe("WSL2 inference verification timeouts (issue #987)", () => { describe("retry logic in probeOpenAiLikeEndpoint", () => { // The retry logic is embedded in probeOpenAiLikeEndpoint which is not // exported. Verify the retry triggers on the correct curl exit codes by - // scanning the compiled source for the guard condition. + // scanning the compiled helper source for the guard condition. const onboardSrc = fs.readFileSync( - path.join(import.meta.dirname, "..", "dist", "lib", "onboard.js"), + path.join(import.meta.dirname, "..", "dist", "lib", "onboard-inference-validation.js"), "utf-8", );