From 2906b16c8a28d87d3b2207c779af26b5d343fa9c Mon Sep 17 00:00:00 2001 From: skyzer Date: Sat, 16 May 2026 00:55:35 +0200 Subject: [PATCH] browser-trace: fix bodies-snapshot dir + add helper, browser-to-api: docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Following the documented two-step workflow for browser-to-api ends with: cp -r "$(browse network path | jq -r .path)" .o11y//cdp/network/bodies/ That command fails on macOS (BSD cp) before bisect-cdp.mjs has been run, because the parent dir `cdp/network/` does not exist yet and BSD cp won't create a parent for a trailing-slash destination. Repro: follow the SKILL.md quickstart on macOS — the cp step exits with "No such file or directory" and discover.mjs then runs without bodies. Fix is two-part: 1. start-capture.mjs now creates `cdp/network/` up front (one extra ensureDir). bisect-cdp.mjs still creates it later via writeJsonl — this is just earlier so the snapshot step doesn't depend on bisect having run first. 2. New helper `browser-trace/scripts/snapshot-bodies.mjs` encodes the safe snapshot path: resolves `browse network path`, copies via fs.cpSync (no BSD/GNU cp variance), and optionally runs `browse network off` (`--no-off` to skip). Replaces the brittle cp one-liner in both browser-to-api/SKILL.md and REFERENCE.md. The manual cp pattern is kept in REFERENCE.md as commented context for people who want to do it by hand, with the mkdir -p step it actually needs. Verified end-to-end against `browse-cli@0.6.0`: start-capture → browse network on → drive → snapshot-bodies → stop-capture → bisect-cdp → discover.mjs produces an OpenAPI doc with response-body schemas. --- skills/browser-to-api/REFERENCE.md | 14 ++- skills/browser-to-api/SKILL.md | 9 +- .../browser-trace/scripts/snapshot-bodies.mjs | 90 +++++++++++++++++++ .../browser-trace/scripts/start-capture.mjs | 5 ++ 4 files changed, 110 insertions(+), 8 deletions(-) create mode 100755 skills/browser-trace/scripts/snapshot-bodies.mjs diff --git a/skills/browser-to-api/REFERENCE.md b/skills/browser-to-api/REFERENCE.md index 77928a86..ab352c68 100644 --- a/skills/browser-to-api/REFERENCE.md +++ b/skills/browser-to-api/REFERENCE.md @@ -98,9 +98,17 @@ Workflow: # during capture, alongside browser-trace browse network on # ...drive... -# IMPORTANT: snapshot the dir before it gets reused -cp -r "$(browse network path | jq -r .path)" .o11y//cdp/network/bodies/ -browse network off +# IMPORTANT: snapshot the dir before another `browse network on` overwrites it. +# The helper handles the mkdir + cp + `browse network off` sequence. +node ../browser-trace/scripts/snapshot-bodies.mjs + +# Equivalent manual form if you want to do it by hand. The mkdir is required +# because `cp -r src dest/` fails on macOS BSD cp when dest's parent doesn't +# exist yet. start-capture.mjs already creates `cdp/network/`, but older runs +# may not have it — the mkdir is cheap insurance. +# mkdir -p .o11y//cdp/network +# cp -R "$(browse network path | jq -r .path)" .o11y//cdp/network/bodies +# browse network off ``` Internals (matched in `lib/io.mjs` + `load.mjs`): diff --git a/skills/browser-to-api/SKILL.md b/skills/browser-to-api/SKILL.md index f1b10f15..569e00e6 100644 --- a/skills/browser-to-api/SKILL.md +++ b/skills/browser-to-api/SKILL.md @@ -41,11 +41,10 @@ browse network on # capture request/response browse open https://example.com # ...drive whatever flows you want covered... -# Snapshot the bodies dir BEFORE turning capture off (the temp dir is shared -# per-session, so subsequent `browse network on` runs would mix your bodies -# with whatever a future capture writes if you skip this step). -cp -r "$(browse network path | jq -r .path)" .o11y/my-site/cdp/network/bodies/ -browse network off +# Snapshot the bodies dir BEFORE another `browse network on` overwrites it +# (the temp dir is shared per-session). The helper creates +# .o11y/my-site/cdp/network/bodies/ and runs `browse network off` for you. +node ../browser-trace/scripts/snapshot-bodies.mjs my-site node ../browser-trace/scripts/stop-capture.mjs my-site node ../browser-trace/scripts/bisect-cdp.mjs my-site diff --git a/skills/browser-trace/scripts/snapshot-bodies.mjs b/skills/browser-trace/scripts/snapshot-bodies.mjs new file mode 100755 index 00000000..1fb26dbf --- /dev/null +++ b/skills/browser-trace/scripts/snapshot-bodies.mjs @@ -0,0 +1,90 @@ +#!/usr/bin/env node +// Snapshot the `browse network` bodies dir into a run before the next capture +// overwrites it. +// +// Usage: +// node scripts/snapshot-bodies.mjs [--no-off] [--bodies ] +// +// What it does: +// 1. Resolves the `browse network` capture dir (live, via `browse network path`, +// or an explicit `--bodies ` override). +// 2. Copies its contents into `/cdp/network/bodies/`. +// 3. Calls `browse network off` so a future `browse network on` starts clean +// (skip with `--no-off`). +// +// The manual `cp -r "$(browse network path | jq -r .path)" /cdp/network/bodies` +// pattern is fragile across BSD vs GNU `cp` (trailing-slash semantics differ and +// the parent dir must already exist). This script encodes the safe path so the +// docs can refer to one command. + +import fs from 'node:fs'; +import path from 'node:path'; +import { spawnSync } from 'node:child_process'; + +import { runDir, ensureDir } from './lib.mjs'; + +const args = process.argv.slice(2); +if (args.length === 0 || args[0].startsWith('--')) { + console.error('usage: snapshot-bodies.mjs [--no-off] [--bodies ]'); + process.exit(2); +} +const runId = args[0]; +let bodiesOverride = null; +let runOff = true; +for (let i = 1; i < args.length; i++) { + if (args[i] === '--no-off') { runOff = false; continue; } + if (args[i] === '--bodies') { bodiesOverride = args[++i]; continue; } + console.error(`unknown arg: ${args[i]}`); + process.exit(2); +} + +const RD = runDir(runId); +if (!fs.existsSync(RD)) { + console.error(`run dir not found: ${RD}`); + process.exit(1); +} + +let src = bodiesOverride; +if (!src) { + const out = spawnSync('browse', ['network', 'path', '--json'], { encoding: 'utf8' }); + if (out.status !== 0) { + console.error('failed to resolve `browse network path`:'); + console.error(out.stderr || out.stdout); + process.exit(1); + } + try { + src = JSON.parse(out.stdout).path; + } catch { + // older `browse` versions don't take --json; fall back to plain stdout. + src = (spawnSync('browse', ['network', 'path'], { encoding: 'utf8' }).stdout || '').trim(); + } +} +if (!src || !fs.existsSync(src)) { + console.error(`bodies source dir not found: ${src ?? '(unresolved)'}`); + console.error('Did you run `browse network on` before capturing?'); + process.exit(1); +} + +const dest = path.join(RD, 'cdp', 'network', 'bodies'); +ensureDir(path.dirname(dest)); +// fs.cpSync avoids the BSD-vs-GNU cp portability footgun: cp's trailing-slash +// and missing-parent semantics differ across macOS and Linux. Node's recursive +// copy is the same everywhere. +fs.cpSync(src, dest, { recursive: true }); +const fileCount = fs.readdirSync(dest).length; + +if (runOff) { + const off = spawnSync('browse', ['network', 'off'], { encoding: 'utf8' }); + if (off.status !== 0) { + console.error('warning: `browse network off` failed (continuing):'); + console.error(off.stderr || off.stdout); + } +} + +console.log(JSON.stringify({ + run_id: runId, + bodies_src: src, + bodies_dest: dest, + files: fileCount, + ran_off: runOff, +})); diff --git a/skills/browser-trace/scripts/start-capture.mjs b/skills/browser-trace/scripts/start-capture.mjs index ab101854..48901c6e 100755 --- a/skills/browser-trace/scripts/start-capture.mjs +++ b/skills/browser-trace/scripts/start-capture.mjs @@ -32,6 +32,11 @@ const domainArgs = domainsList.flatMap(d => ['--domain', d]); const RD = runDir(runId); ensureDir(path.join(RD, 'cdp')); +// `cdp/network/` is also created by bisect-cdp.mjs, but users typically snapshot +// `browse network` bodies into `cdp/network/bodies/` BEFORE bisect runs. Create +// the parent dir up front so `cp -r ... cdp/network/bodies` works without an +// extra `mkdir -p` step in the docs. +ensureDir(path.join(RD, 'cdp', 'network')); ensureDir(path.join(RD, 'screenshots')); ensureDir(path.join(RD, 'dom'));