diff --git a/src/lib/agent-runtime.test.ts b/src/lib/agent-runtime.test.ts index ade44ac92c..51635c0245 100644 --- a/src/lib/agent-runtime.test.ts +++ b/src/lib/agent-runtime.test.ts @@ -3,7 +3,11 @@ import { describe, it, expect } from "vitest"; // Import from compiled dist/ so coverage is attributed correctly. -import { buildOpenClawRecoveryScript, buildRecoveryScript } from "../../dist/lib/agent-runtime"; +import { + buildManualRecoveryCommand, + buildOpenClawRecoveryScript, + buildRecoveryScript, +} from "../../dist/lib/agent-runtime"; import type { AgentDefinition } from "./agent-defs"; function makeAgent(overrides: Partial = {}): AgentDefinition { @@ -42,6 +46,20 @@ function makeAgent(overrides: Partial = {}): AgentDefinition { } const minimalAgent = makeAgent(); +const hermesAgent = makeAgent({ + name: "hermes", + displayName: "Hermes Agent", + binary_path: "/usr/local/bin/hermes", + gateway_command: "hermes gateway run", + healthProbe: { url: "http://localhost:8642/health", port: 8642, timeout_seconds: 90 }, + forwardPort: 8642, + configPaths: { + dir: "/sandbox/.hermes", + configFile: "/sandbox/.hermes/config.yaml", + envFile: "/sandbox/.hermes/.env", + format: "yaml", + }, +}); function extractGatewayProcessPattern(script: string | null): string { const match = script?.match(/_GATEWAY_PROC_PATTERN='([^']+)'/); @@ -74,6 +92,17 @@ describe("buildRecoveryScript", () => { expect(script).toContain('"$AGENT_BIN" gateway run --port 19000'); }); + it("omits --port for Hermes so config.yaml controls the internal listen port (#2426)", () => { + const script = buildRecoveryScript(hermesAgent, 8642); + expect(script).toContain("export HERMES_HOME=/sandbox/.hermes"); + expect(script).toContain("HERMES_HOME=/sandbox/.hermes"); + expect(script).toContain("HTTPS_PROXY=http://127.0.0.1:3129"); + expect(script).toContain("nemoclaw-decode-proxy"); + expect(script).toContain('"$AGENT_BIN" gateway run'); + expect(script).not.toContain('"$AGENT_BIN" gateway run --port 8642'); + expect(script).not.toContain("hermes gateway run --port 8642"); + }); + it("falls back to openclaw gateway run when gateway_command is absent", () => { const agent = makeAgent({ gateway_command: undefined }); const script = buildRecoveryScript(agent, 19000); @@ -94,6 +123,17 @@ describe("buildRecoveryScript", () => { expect(script).toContain("nohup custom-launch --mode recovery --port 19000"); }); + it("does not append the external forward port to custom Hermes launch commands (#2426)", () => { + const agent = makeAgent({ + ...hermesAgent, + gateway_command: "hermes gateway run --profile recovery", + }); + const script = buildRecoveryScript(agent, 8642); + expect(script).toContain("nohup env HERMES_HOME=/sandbox/.hermes"); + expect(script).toContain("hermes gateway run --profile recovery"); + expect(script).not.toContain("hermes gateway run --profile recovery --port 8642"); + }); + // Regression coverage for #2478. The recovery script must explicitly source // /tmp/nemoclaw-proxy-env.sh (single source of truth for NODE_OPTIONS // library guards) and warn — not silently continue — when the file is @@ -258,3 +298,39 @@ describe("buildRecoveryScript", () => { }); }); }); + +describe("buildManualRecoveryCommand (#2426)", () => { + it("backgrounds non-Hermes gateways with nohup and the requested port", () => { + const cmd = buildManualRecoveryCommand(minimalAgent, 19000); + expect(cmd).toContain("nohup test-agent gateway run --port 19000"); + expect(cmd).toContain('>> "$_GATEWAY_LOG" 2>&1 &'); + }); + + it("selects a writable gateway log before launching", () => { + const cmd = buildManualRecoveryCommand(minimalAgent, 19000); + expect(cmd).toContain("_GATEWAY_LOG=/tmp/gateway.log"); + expect(cmd).toContain("_GATEWAY_LOG=/tmp/gateway-recovery.log"); + expect(cmd).not.toContain(">/tmp/gateway.log 2>&1"); + }); + + it("omits --port for Hermes and uses the current Hermes home", () => { + const cmd = buildManualRecoveryCommand(hermesAgent, 8642); + expect(cmd).toContain("HERMES_HOME=/sandbox/.hermes"); + expect(cmd).toContain("HTTPS_PROXY=http://127.0.0.1:3129"); + expect(cmd).toContain("nemoclaw-decode-proxy"); + expect(cmd).toContain("nohup hermes gateway run"); + expect(cmd).not.toContain("--port 8642"); + expect(cmd).not.toContain("/sandbox/.hermes-data"); + }); + + it("derives the default gateway command from binary_path when gateway_command is blank", () => { + const agent = makeAgent({ gateway_command: " " }); + const cmd = buildManualRecoveryCommand(agent, 19000); + expect(cmd).toContain("nohup '/usr/local/bin/test-agent' gateway run --port 19000"); + }); + + it("falls back to openclaw gateway run for a null agent", () => { + const cmd = buildManualRecoveryCommand(null, 18789); + expect(cmd).toContain("nohup '/usr/local/bin/openclaw' gateway run --port 18789"); + }); +}); diff --git a/src/lib/agent-runtime.ts b/src/lib/agent-runtime.ts index 7f2f50d4c0..270002a653 100644 --- a/src/lib/agent-runtime.ts +++ b/src/lib/agent-runtime.ts @@ -138,6 +138,21 @@ function gatewayLaunchCommand(command: string, runAsUser?: string): string { return `${logSelection} if [ "$(id -u)" = "0" ] && command -v gosu >/dev/null 2>&1 && id ${shellQuote(runAsUser)} >/dev/null 2>&1; then nohup gosu ${shellQuote(runAsUser)} ${command} >> "$_GATEWAY_LOG" 2>&1 & else ${userLaunch} fi;`; } +function hermesGatewayEnvPrefix(): string { + const decodeProxy = "http://127.0.0.1:3129"; + return [ + "HERMES_HOME=/sandbox/.hermes", + `HTTPS_PROXY=${decodeProxy}`, + `HTTP_PROXY=${decodeProxy}`, + `https_proxy=${decodeProxy}`, + `http_proxy=${decodeProxy}`, + ].join(" "); +} + +function hermesDecodeProxyRecoveryCommand(): string { + return 'if ! command -v ss >/dev/null 2>&1 || ! ss -tln 2>/dev/null | grep -q "127.0.0.1:3129"; then nohup python3 /usr/local/bin/nemoclaw-decode-proxy >/dev/null 2>&1 & for _i in 1 2 3 4 5 6 7 8 9 10; do ! command -v ss >/dev/null 2>&1 || ss -tln 2>/dev/null | grep -q "127.0.0.1:3129" && break; sleep 0.5; done; fi;'; +} + /** * Build the OpenClaw recovery shell script used by the default sandbox. */ @@ -193,11 +208,14 @@ export function buildRecoveryScript(agent: AgentDefinition | null, port: number) // survive past the gateway launch — otherwise the warning explaining // *why* the gateway is about to crash gets wiped by the same launch // that's about to crash on a missing guard. (#2478) - const launchCommand = usesValidatedBinary - ? gatewayLaunchCommand(`"$AGENT_BIN" gateway run --port ${port}`) - : gatewayLaunchCommand(`${configuredGatewayCommand} --port ${port}`); const isHermes = agent.name === "hermes"; const hermesHome = isHermes ? "export HERMES_HOME=/sandbox/.hermes; " : ""; + const hermesLaunchEnv = isHermes ? `env ${hermesGatewayEnvPrefix()} ` : ""; + const launchCommand = usesValidatedBinary + ? gatewayLaunchCommand(`${hermesLaunchEnv}"$AGENT_BIN" gateway run${isHermes ? "" : ` --port ${port}`}`) + : gatewayLaunchCommand( + `${hermesLaunchEnv}${configuredGatewayCommand}${isHermes ? "" : ` --port ${port}`}`, + ); // Source /tmp/nemoclaw-proxy-env.sh immediately before launching. That file // is the single source of truth for NODE_OPTIONS preload guards (safety-net, @@ -219,6 +237,7 @@ export function buildRecoveryScript(agent: AgentDefinition | null, port: number) 'if [ "$_PE_MISSING" = "0" ]; then case "${NODE_OPTIONS:-}" in *nemoclaw-sandbox-safety-net*) _SN_MISSING=0 ;; *) _SN_MISSING=1 ;; esac; case "${NODE_OPTIONS:-}" in *nemoclaw-ciao-network-guard*) _CIAO_MISSING=0 ;; *) _CIAO_MISSING=1 ;; esac; if [ "$_SN_MISSING" = "0" ] && [ "$_CIAO_MISSING" = "0" ]; then _GUARDS_MISSING=0; else _GUARDS_MISSING=1; fi; else _GUARDS_MISSING=0; fi;', '[ "$_PE_MISSING" = "1" ] && { _W="[gateway-recovery] WARNING: /tmp/nemoclaw-proxy-env.sh missing - gateway launching without library guards (#2478)"; echo "$_W" >&2; echo "$_W" >> "$_GATEWAY_LOG"; };', '[ "$_PE_MISSING" = "0" ] && [ "$_GUARDS_MISSING" = "1" ] && { _E="[gateway-recovery] ERROR: /tmp/nemoclaw-proxy-env.sh present but NODE_OPTIONS missing safety-net preload or ciao preload - refusing unguarded gateway relaunch (#2478)"; echo "$_E" >&2; echo "$_E" >> "$_GATEWAY_LOG"; exit 1; };', + isHermes ? hermesDecodeProxyRecoveryCommand() : "", launchCommand, "GPID=$!; sleep 2;", 'if kill -0 "$GPID" 2>/dev/null; then echo "GATEWAY_PID=$GPID"; else echo GATEWAY_FAILED; tail -5 "$_GATEWAY_LOG" 2>/dev/null; fi', @@ -238,3 +257,19 @@ export function getAgentDisplayName(agent: AgentDefinition | null): string { export function getGatewayCommand(agent: AgentDefinition | null): string { return agent?.gateway_command || "openclaw gateway run"; } + +/** + * Build a single copy-pasteable command for the user to run when automatic + * gateway recovery fails. Unlike the raw gateway command, this keeps the + * process alive after disconnect and preserves the agent-specific launch shape. + */ +export function buildManualRecoveryCommand(agent: AgentDefinition | null, port: number): string { + const binaryPath = agent?.binary_path || "/usr/local/bin/openclaw"; + const defaultGatewayCommand = `${shellQuote(binaryPath)} gateway run`; + const gatewayCmd = agent?.gateway_command?.trim() || defaultGatewayCommand; + const isHermes = agent?.name === "hermes"; + const envPrefix = isHermes ? `${hermesGatewayEnvPrefix()} ` : ""; + const portFlag = isHermes ? "" : ` --port ${port}`; + const decodeProxySetup = isHermes ? `${hermesDecodeProxyRecoveryCommand()} ` : ""; + return `${buildGatewayLogSelection()} ${decodeProxySetup}${envPrefix}nohup ${gatewayCmd}${portFlag} >> "$_GATEWAY_LOG" 2>&1 &`; +} diff --git a/src/lib/sandbox-process-recovery-action.ts b/src/lib/sandbox-process-recovery-action.ts index 4bb01a9091..4a5eb4e836 100644 --- a/src/lib/sandbox-process-recovery-action.ts +++ b/src/lib/sandbox-process-recovery-action.ts @@ -296,6 +296,7 @@ export function checkAndRecoverSandboxProcesses( return { checked: false, wasRunning: null, recovered: false, forwardRecovered: false }; } const recoveryAgent = agentRuntime.getSessionAgent(sandboxName); + const recoveryPort = recoveryAgent?.forwardPort ?? DASHBOARD_PORT; if (running) { // Gateway is alive but the host-side forward can still be dead or // owned by another sandbox. Probe and re-establish only when @@ -340,6 +341,10 @@ export function checkAndRecoverSandboxProcesses( if (!quiet) { console.error(" Gateway process started but is not responding."); console.error(" Check /tmp/gateway.log inside the sandbox for details."); + console.error(" Connect to the sandbox and run manually:"); + console.error( + ` ${agentRuntime.buildManualRecoveryCommand(recoveryAgent, recoveryPort)}`, + ); } return { checked: true, wasRunning: false, recovered: false, forwardRecovered: false }; } @@ -364,7 +369,7 @@ export function checkAndRecoverSandboxProcesses( ` Could not restart ${agentRuntime.getAgentDisplayName(recoveryAgent)} gateway automatically.`, ); console.error(" Connect to the sandbox and run manually:"); - console.error(` ${agentRuntime.getGatewayCommand(recoveryAgent)}`); + console.error(` ${agentRuntime.buildManualRecoveryCommand(recoveryAgent, recoveryPort)}`); } return { checked: true, wasRunning: false, recovered, forwardRecovered: false };