browserbase · ziruihao · May 22, 2026
diff --git a/skills/autobrowse/SKILL.md b/skills/autobrowse/SKILL.md
@@ -296,23 +296,35 @@ How refs are resolved: every `[X-Y]` ref in the trace is looked up against the m
 
 The final extract step is generated with one Claude Haiku call at export time (requires `ANTHROPIC_API_KEY`). The LLM is given the final snapshot, the Zod schema parsed from `task.md`'s `## Output` block, and the agent's final reasoning. If the API key is missing the export still produces a script — the extract block is a TODO placeholder.
 
-For a Stagehand-targeted export (LLM-driven replay via `stagehand.act`/`observe`), use the standalone `/stagehand-export` skill.
+For a Stagehand-targeted export (self-healing replay via `stagehand.page.act` / `stagehand.page.extract`), pass `--target stagehand`:
 
-## Iterative Playwright loop (recommended for tasks that need a deterministic artifact)
+```bash
+node ${CLAUDE_SKILL_DIR}/scripts/export.mjs --task <task-name> --target stagehand
+```
+
+Stagehand-native: every interaction op (clicks, fills, selects) collapses into a `page.act("…")` call. Deterministic ops (goto, waits, keyboard, scroll, eval, page nav) stay as raw `page.*` calls — there's no element to find, so no LLM call is needed. The final extract step uses `page.extract({ instruction, schema })` with a one-sentence instruction generated at export time (Haiku, ~$0.001) or a generic fallback if `ANTHROPIC_API_KEY` is missing.
 
-When the end goal is a runnable Playwright script (cron, Browserbase Functions, etc.), prefer `loop.mjs` over manually orchestrating evaluate + export. The loop converges on a workflow that **both** the LLM explorer **and** the deterministic Playwright replay can complete — which is a strictly stronger guarantee than "the LLM agent's trace ends with success: true."
+The Stagehand script reads `BROWSERBASE_API_KEY` / `BROWSERBASE_PROJECT_ID` to run against Browserbase (and `BROWSERBASE_CONTEXT_ID` for pre-authed sessions); when those are absent it falls back to `env: "LOCAL"`. Model selection is controlled by the `STAGEHAND_MODEL` env var (defaults to a current Claude Sonnet).
+
+## Iterative loop (recommended for tasks that need a deterministic artifact)
+
+When the end goal is a runnable script (cron, Browserbase Functions, etc.), prefer `loop.mjs` over manually orchestrating evaluate + export. The loop converges on a workflow that **both** the LLM explorer **and** the deterministic replay can complete — which is a strictly stronger guarantee than "the LLM agent's trace ends with success: true."
 
 ```bash
+# Playwright (default)
 node ${CLAUDE_SKILL_DIR}/scripts/loop.mjs --task <task-name> --env remote \
   --max-iterations 8 --max-turns-per-iter 60
+
+# Stagehand
+node ${CLAUDE_SKILL_DIR}/scripts/loop.mjs --task <task-name> --target stagehand --env remote
 ```
 
 What it does per iteration:
 
 1. Runs `evaluate.mjs` (one LLM-driven exploration round).
-2. If the trace passed (`success: true` in the final JSON), runs `export.mjs --target playwright --no-verify` to emit a fresh script.
+2. If the trace passed (`success: true` in the final JSON), runs `export.mjs --target <playwright|stagehand> --no-verify` to emit a fresh script.
 3. Runs the emitted script (`npx tsx <task>.ts`) against a new BB session — the actual deterministic replay.
-4. If the Playwright replay passed → records a pass. If it failed → distills the failure (Claude Haiku, ~$0.01) into a new entry under `strategy.md`'s "Recent Playwright Failures" section.
+4. If the replay passed → records a pass. If it failed → distills the failure (Claude Haiku, ~$0.01) into a new entry under `strategy.md`'s "Recent Playwright Failures" or "Recent Stagehand Failures" section (target-scoped).
 5. Next iteration's evaluate reads the updated strategy.md and adapts.
 
 **Convergence**: graduates when the emitted script passes in 2 of the last 3 iterations.

diff --git a/skills/autobrowse/scripts/export.mjs b/skills/autobrowse/scripts/export.mjs
@@ -4,9 +4,10 @@
  * export.mjs — Translate a graduated autobrowse task into a deterministic
  * runnable script.
  *
- * Currently supports --target playwright. The Stagehand variant lives in
- * the standalone /stagehand-export skill; once Playwright is shipped and
- * proven we can fold both targets behind this CLI.
+ * Supports --target playwright (default) and --target stagehand. Playwright
+ * resolves every ARIA ref to a locator at export time; Stagehand-native
+ * collapses every interaction op to `page.act(...)` and lets Stagehand
+ * self-heal at replay time.
  *
  * Usage:
  *   node scripts/export.mjs --task <name> --target playwright \\
@@ -26,8 +27,15 @@ import {
   playwrightPackageJson,
   playwrightTsconfig,
 } from "./lib/codegen-playwright.mjs";
+import {
+  generateStagehandScript,
+  stagehandPackageJson,
+  stagehandTsconfig,
+} from "./lib/codegen-stagehand.mjs";
 import { verifyGenerated } from "./lib/verify.mjs";
 
+const SUPPORTED_TARGETS = new Set(["playwright", "stagehand"]);
+
 // ── CLI args ───────────────────────────────────────────────────────
 
 function getArg(name, fallback) {
@@ -43,7 +51,7 @@ Usage: node scripts/export.mjs --task <name> [options]
 
 Options:
   --task <name>          Task name — matches tasks/<name>/ (required)
-  --target <kind>        playwright (default; stagehand lives in /stagehand-export)
+  --target <kind>        playwright (default) | stagehand
   --workspace <dir>      Workspace root holding tasks/ and traces/ (default: ./autobrowse)
   --run <id>             Force a specific run (default: newest passing)
   --output <dir>         Output directory for generated files (default: <workspace>/tasks/<name>/<target>)
@@ -69,8 +77,8 @@ if (!TASK) {
   console.error("Run with --help for usage.");
   process.exit(1);
 }
-if (TARGET !== "playwright") {
-  console.error(`ERROR: --target=${TARGET} not yet supported here. Use the /stagehand-export skill for Stagehand output.`);
+if (!SUPPORTED_TARGETS.has(TARGET)) {
+  console.error(`ERROR: --target=${TARGET} not supported. Use one of: ${[...SUPPORTED_TARGETS].join(", ")}.`);
   process.exit(1);
 }
 
@@ -130,9 +138,10 @@ for (let i = trace.length - 1; i >= 0; i--) {
   }
 }
 
-// ── Generate Playwright script ─────────────────────────────────────
+// ── Generate script ────────────────────────────────────────────────
 
-const { scriptCode, cachedActions, stats, extract } = await generatePlaywrightScript({
+const generate = TARGET === "stagehand" ? generateStagehandScript : generatePlaywrightScript;
+const { scriptCode, cachedActions, stats, extract } = await generate({
   task: TASK,
   runId,
   workspace: WORKSPACE,
@@ -168,15 +177,21 @@ fs.writeFileSync(
     2,
   ),
 );
+const pkgGen = TARGET === "stagehand" ? stagehandPackageJson : playwrightPackageJson;
+const tsconfigGen = TARGET === "stagehand" ? stagehandTsconfig : playwrightTsconfig;
 if (!fs.existsSync(pkgPath)) {
-  fs.writeFileSync(pkgPath, JSON.stringify(playwrightPackageJson(TASK), null, 2));
+  fs.writeFileSync(pkgPath, JSON.stringify(pkgGen(TASK), null, 2));
 }
 if (!fs.existsSync(tsconfigPath)) {
-  fs.writeFileSync(tsconfigPath, JSON.stringify(playwrightTsconfig(), null, 2));
+  fs.writeFileSync(tsconfigPath, JSON.stringify(tsconfigGen(), null, 2));
 }
 
 console.error(`[export] wrote ${path.relative(process.cwd(), scriptPath)}`);
-console.error(`[export] ops: ${ops.length} | cached: ${stats.cached} | ref_resolved: ${stats.ref_resolved} | ref_failed: ${stats.ref_failed} | dropped: ${stats.dropped}`);
+if (TARGET === "stagehand") {
+  console.error(`[export] ops: ${ops.length} | deterministic: ${stats.deterministic} | act: ${stats.act} | ref_resolved: ${stats.ref_resolved} | ref_failed: ${stats.ref_failed} | dropped: ${stats.dropped}`);
+} else {
+  console.error(`[export] ops: ${ops.length} | cached: ${stats.cached} | ref_resolved: ${stats.ref_resolved} | ref_failed: ${stats.ref_failed} | dropped: ${stats.dropped}`);
+}
 console.error(`[export] schema fields: ${schemaFieldCount} | extract: ${extract.generated ? "LLM-generated" : `fallback (${extract.reason})`}`);
 
 // ── Verify ─────────────────────────────────────────────────────────