Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 77 additions & 1 deletion src/lib/agent-runtime.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@

import { describe, it, expect } from "vitest";
// Import from compiled dist/ so coverage is attributed correctly.
import { buildOpenClawRecoveryScript, buildRecoveryScript } from "../../dist/lib/agent-runtime";
import {
buildManualRecoveryCommand,
buildOpenClawRecoveryScript,
buildRecoveryScript,
} from "../../dist/lib/agent-runtime";
import type { AgentDefinition } from "./agent-defs";

function makeAgent(overrides: Partial<AgentDefinition> = {}): AgentDefinition {
Expand Down Expand Up @@ -42,6 +46,20 @@ function makeAgent(overrides: Partial<AgentDefinition> = {}): AgentDefinition {
}

const minimalAgent = makeAgent();
const hermesAgent = makeAgent({
name: "hermes",
displayName: "Hermes Agent",
binary_path: "/usr/local/bin/hermes",
gateway_command: "hermes gateway run",
healthProbe: { url: "http://localhost:8642/health", port: 8642, timeout_seconds: 90 },
forwardPort: 8642,
configPaths: {
dir: "/sandbox/.hermes",
configFile: "/sandbox/.hermes/config.yaml",
envFile: "/sandbox/.hermes/.env",
format: "yaml",
},
});

function extractGatewayProcessPattern(script: string | null): string {
const match = script?.match(/_GATEWAY_PROC_PATTERN='([^']+)'/);
Expand Down Expand Up @@ -74,6 +92,17 @@ describe("buildRecoveryScript", () => {
expect(script).toContain('"$AGENT_BIN" gateway run --port 19000');
});

it("omits --port for Hermes so config.yaml controls the internal listen port (#2426)", () => {
const script = buildRecoveryScript(hermesAgent, 8642);
expect(script).toContain("export HERMES_HOME=/sandbox/.hermes");
expect(script).toContain("HERMES_HOME=/sandbox/.hermes");
expect(script).toContain("HTTPS_PROXY=http://127.0.0.1:3129");
expect(script).toContain("nemoclaw-decode-proxy");
expect(script).toContain('"$AGENT_BIN" gateway run');
expect(script).not.toContain('"$AGENT_BIN" gateway run --port 8642');
expect(script).not.toContain("hermes gateway run --port 8642");
});

it("falls back to openclaw gateway run when gateway_command is absent", () => {
const agent = makeAgent({ gateway_command: undefined });
const script = buildRecoveryScript(agent, 19000);
Expand All @@ -94,6 +123,17 @@ describe("buildRecoveryScript", () => {
expect(script).toContain("nohup custom-launch --mode recovery --port 19000");
});

it("does not append the external forward port to custom Hermes launch commands (#2426)", () => {
const agent = makeAgent({
...hermesAgent,
gateway_command: "hermes gateway run --profile recovery",
});
const script = buildRecoveryScript(agent, 8642);
expect(script).toContain("nohup env HERMES_HOME=/sandbox/.hermes");
expect(script).toContain("hermes gateway run --profile recovery");
expect(script).not.toContain("hermes gateway run --profile recovery --port 8642");
});

// Regression coverage for #2478. The recovery script must explicitly source
// /tmp/nemoclaw-proxy-env.sh (single source of truth for NODE_OPTIONS
// library guards) and warn — not silently continue — when the file is
Expand Down Expand Up @@ -258,3 +298,39 @@ describe("buildRecoveryScript", () => {
});
});
});

describe("buildManualRecoveryCommand (#2426)", () => {
it("backgrounds non-Hermes gateways with nohup and the requested port", () => {
const cmd = buildManualRecoveryCommand(minimalAgent, 19000);
expect(cmd).toContain("nohup test-agent gateway run --port 19000");
expect(cmd).toContain('>> "$_GATEWAY_LOG" 2>&1 &');
});

it("selects a writable gateway log before launching", () => {
const cmd = buildManualRecoveryCommand(minimalAgent, 19000);
expect(cmd).toContain("_GATEWAY_LOG=/tmp/gateway.log");
expect(cmd).toContain("_GATEWAY_LOG=/tmp/gateway-recovery.log");
expect(cmd).not.toContain(">/tmp/gateway.log 2>&1");
});

it("omits --port for Hermes and uses the current Hermes home", () => {
const cmd = buildManualRecoveryCommand(hermesAgent, 8642);
expect(cmd).toContain("HERMES_HOME=/sandbox/.hermes");
expect(cmd).toContain("HTTPS_PROXY=http://127.0.0.1:3129");
expect(cmd).toContain("nemoclaw-decode-proxy");
expect(cmd).toContain("nohup hermes gateway run");
expect(cmd).not.toContain("--port 8642");
expect(cmd).not.toContain("/sandbox/.hermes-data");
});

it("derives the default gateway command from binary_path when gateway_command is blank", () => {
const agent = makeAgent({ gateway_command: " " });
const cmd = buildManualRecoveryCommand(agent, 19000);
expect(cmd).toContain("nohup '/usr/local/bin/test-agent' gateway run --port 19000");
});

it("falls back to openclaw gateway run for a null agent", () => {
const cmd = buildManualRecoveryCommand(null, 18789);
expect(cmd).toContain("nohup '/usr/local/bin/openclaw' gateway run --port 18789");
});
});
41 changes: 38 additions & 3 deletions src/lib/agent-runtime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,21 @@ function gatewayLaunchCommand(command: string, runAsUser?: string): string {
return `${logSelection} if [ "$(id -u)" = "0" ] && command -v gosu >/dev/null 2>&1 && id ${shellQuote(runAsUser)} >/dev/null 2>&1; then nohup gosu ${shellQuote(runAsUser)} ${command} >> "$_GATEWAY_LOG" 2>&1 & else ${userLaunch} fi;`;
}

function hermesGatewayEnvPrefix(): string {
const decodeProxy = "http://127.0.0.1:3129";
return [
"HERMES_HOME=/sandbox/.hermes",
`HTTPS_PROXY=${decodeProxy}`,
`HTTP_PROXY=${decodeProxy}`,
`https_proxy=${decodeProxy}`,
`http_proxy=${decodeProxy}`,
].join(" ");
}

function hermesDecodeProxyRecoveryCommand(): string {
return 'if ! command -v ss >/dev/null 2>&1 || ! ss -tln 2>/dev/null | grep -q "127.0.0.1:3129"; then nohup python3 /usr/local/bin/nemoclaw-decode-proxy >/dev/null 2>&1 & for _i in 1 2 3 4 5 6 7 8 9 10; do ! command -v ss >/dev/null 2>&1 || ss -tln 2>/dev/null | grep -q "127.0.0.1:3129" && break; sleep 0.5; done; fi;';
}

/**
* Build the OpenClaw recovery shell script used by the default sandbox.
*/
Expand Down Expand Up @@ -193,11 +208,14 @@ export function buildRecoveryScript(agent: AgentDefinition | null, port: number)
// survive past the gateway launch — otherwise the warning explaining
// *why* the gateway is about to crash gets wiped by the same launch
// that's about to crash on a missing guard. (#2478)
const launchCommand = usesValidatedBinary
? gatewayLaunchCommand(`"$AGENT_BIN" gateway run --port ${port}`)
: gatewayLaunchCommand(`${configuredGatewayCommand} --port ${port}`);
const isHermes = agent.name === "hermes";
const hermesHome = isHermes ? "export HERMES_HOME=/sandbox/.hermes; " : "";
const hermesLaunchEnv = isHermes ? `env ${hermesGatewayEnvPrefix()} ` : "";
const launchCommand = usesValidatedBinary
? gatewayLaunchCommand(`${hermesLaunchEnv}"$AGENT_BIN" gateway run${isHermes ? "" : ` --port ${port}`}`)
: gatewayLaunchCommand(
`${hermesLaunchEnv}${configuredGatewayCommand}${isHermes ? "" : ` --port ${port}`}`,
);

// Source /tmp/nemoclaw-proxy-env.sh immediately before launching. That file
// is the single source of truth for NODE_OPTIONS preload guards (safety-net,
Expand All @@ -219,6 +237,7 @@ export function buildRecoveryScript(agent: AgentDefinition | null, port: number)
'if [ "$_PE_MISSING" = "0" ]; then case "${NODE_OPTIONS:-}" in *nemoclaw-sandbox-safety-net*) _SN_MISSING=0 ;; *) _SN_MISSING=1 ;; esac; case "${NODE_OPTIONS:-}" in *nemoclaw-ciao-network-guard*) _CIAO_MISSING=0 ;; *) _CIAO_MISSING=1 ;; esac; if [ "$_SN_MISSING" = "0" ] && [ "$_CIAO_MISSING" = "0" ]; then _GUARDS_MISSING=0; else _GUARDS_MISSING=1; fi; else _GUARDS_MISSING=0; fi;',
'[ "$_PE_MISSING" = "1" ] && { _W="[gateway-recovery] WARNING: /tmp/nemoclaw-proxy-env.sh missing - gateway launching without library guards (#2478)"; echo "$_W" >&2; echo "$_W" >> "$_GATEWAY_LOG"; };',
'[ "$_PE_MISSING" = "0" ] && [ "$_GUARDS_MISSING" = "1" ] && { _E="[gateway-recovery] ERROR: /tmp/nemoclaw-proxy-env.sh present but NODE_OPTIONS missing safety-net preload or ciao preload - refusing unguarded gateway relaunch (#2478)"; echo "$_E" >&2; echo "$_E" >> "$_GATEWAY_LOG"; exit 1; };',
isHermes ? hermesDecodeProxyRecoveryCommand() : "",
launchCommand,
"GPID=$!; sleep 2;",
'if kill -0 "$GPID" 2>/dev/null; then echo "GATEWAY_PID=$GPID"; else echo GATEWAY_FAILED; tail -5 "$_GATEWAY_LOG" 2>/dev/null; fi',
Expand All @@ -238,3 +257,19 @@ export function getAgentDisplayName(agent: AgentDefinition | null): string {
export function getGatewayCommand(agent: AgentDefinition | null): string {
return agent?.gateway_command || "openclaw gateway run";
}

/**
* Build a single copy-pasteable command for the user to run when automatic
* gateway recovery fails. Unlike the raw gateway command, this keeps the
* process alive after disconnect and preserves the agent-specific launch shape.
*/
export function buildManualRecoveryCommand(agent: AgentDefinition | null, port: number): string {
const binaryPath = agent?.binary_path || "/usr/local/bin/openclaw";
const defaultGatewayCommand = `${shellQuote(binaryPath)} gateway run`;
const gatewayCmd = agent?.gateway_command?.trim() || defaultGatewayCommand;
const isHermes = agent?.name === "hermes";
const envPrefix = isHermes ? `${hermesGatewayEnvPrefix()} ` : "";
const portFlag = isHermes ? "" : ` --port ${port}`;
const decodeProxySetup = isHermes ? `${hermesDecodeProxyRecoveryCommand()} ` : "";
return `${buildGatewayLogSelection()} ${decodeProxySetup}${envPrefix}nohup ${gatewayCmd}${portFlag} >> "$_GATEWAY_LOG" 2>&1 &`;
}
7 changes: 6 additions & 1 deletion src/lib/sandbox-process-recovery-action.ts
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ export function checkAndRecoverSandboxProcesses(
return { checked: false, wasRunning: null, recovered: false, forwardRecovered: false };
}
const recoveryAgent = agentRuntime.getSessionAgent(sandboxName);
const recoveryPort = recoveryAgent?.forwardPort ?? DASHBOARD_PORT;
if (running) {
// Gateway is alive but the host-side forward can still be dead or
// owned by another sandbox. Probe and re-establish only when
Expand Down Expand Up @@ -340,6 +341,10 @@ export function checkAndRecoverSandboxProcesses(
if (!quiet) {
console.error(" Gateway process started but is not responding.");
console.error(" Check /tmp/gateway.log inside the sandbox for details.");
console.error(" Connect to the sandbox and run manually:");
console.error(
` ${agentRuntime.buildManualRecoveryCommand(recoveryAgent, recoveryPort)}`,
);
}
return { checked: true, wasRunning: false, recovered: false, forwardRecovered: false };
}
Expand All @@ -364,7 +369,7 @@ export function checkAndRecoverSandboxProcesses(
` Could not restart ${agentRuntime.getAgentDisplayName(recoveryAgent)} gateway automatically.`,
);
console.error(" Connect to the sandbox and run manually:");
console.error(` ${agentRuntime.getGatewayCommand(recoveryAgent)}`);
console.error(` ${agentRuntime.buildManualRecoveryCommand(recoveryAgent, recoveryPort)}`);
}

return { checked: true, wasRunning: false, recovered, forwardRecovered: false };
Expand Down
Loading