From 6ebaae95c3e14d6e1998cb8b9af4892bff4f5121 Mon Sep 17 00:00:00 2001 From: Nader Helmy Date: Fri, 29 May 2026 17:35:51 -0500 Subject: [PATCH 1/2] docs: add stock phrase guardrail --- CHANGELOG.md | 30 +++++++++---------- CLAUDE.md | 2 +- SKILL.md | 2 +- autoplan/SKILL.md | 2 +- benchmark-models/SKILL.md | 2 +- benchmark/SKILL.md | 2 +- bin/gstack-gbrain-lib.sh | 4 +-- browse/SKILL.md | 2 +- browse/src/browser-skill-commands.ts | 4 +-- browse/src/browser-skill-write.ts | 2 +- browse/src/domain-skills.ts | 2 +- browse/test/cli-supervisor.test.ts | 2 +- browse/test/domain-skills-e2e.test.ts | 2 +- browse/test/server-pty-lease-routes.test.ts | 2 +- browse/test/sidebar-tabs.test.ts | 2 +- browse/test/sidepanel-reattach.test.ts | 2 +- .../terminal-agent-detach-reattach.test.ts | 2 +- .../test/terminal-agent-integration.test.ts | 2 +- browse/test/terminal-agent-watchdog.test.ts | 2 +- canary/SKILL.md | 2 +- codex/SKILL.md | 2 +- context-restore/SKILL.md | 2 +- context-save/SKILL.md | 2 +- cso/SKILL.md | 2 +- design-consultation/SKILL.md | 2 +- design-html/SKILL.md | 2 +- design-review/SKILL.md | 2 +- design-shotgun/SKILL.md | 2 +- design/src/daemon.ts | 2 +- devex-review/SKILL.md | 2 +- docs/designs/PACING_UPDATES_V0.md | 4 +-- docs/designs/SIDEBAR_MESSAGE_FLOW.md | 2 +- docs/designs/SLATE_HOST.md | 2 +- docs/designs/v2_PLAN.md | 4 +-- docs/explanation-diataxis-in-gstack.md | 2 +- docs/skills.md | 4 +-- document-generate/SKILL.md | 2 +- document-release/SKILL.md | 2 +- extension/sidepanel.js | 2 +- health/SKILL.md | 2 +- investigate/SKILL.md | 2 +- ios-clean/SKILL.md | 2 +- ios-design-review/SKILL.md | 2 +- ios-fix/SKILL.md | 2 +- ios-qa/SKILL.md | 2 +- ios-sync/SKILL.md | 2 +- land-and-deploy/SKILL.md | 2 +- landing-report/SKILL.md | 2 +- learn/SKILL.md | 2 +- make-pdf/SKILL.md | 2 +- office-hours/SKILL.md | 2 +- open-gstack-browser/SKILL.md | 2 +- pair-agent/SKILL.md | 2 +- plan-ceo-review/SKILL.md | 4 +-- plan-ceo-review/SKILL.md.tmpl | 2 +- plan-design-review/SKILL.md | 2 +- plan-devex-review/SKILL.md | 2 +- plan-eng-review/SKILL.md | 2 +- plan-tune/SKILL.md | 2 +- qa-only/SKILL.md | 2 +- qa/SKILL.md | 2 +- retro/SKILL.md | 2 +- review/SKILL.md | 2 +- scrape/SKILL.md | 2 +- .../preamble/generate-voice-directive.ts | 4 +-- setup-browser-cookies/SKILL.md | 2 +- setup-deploy/SKILL.md | 2 +- setup-gbrain/SKILL.md | 2 +- ship/SKILL.md | 2 +- skillify/SKILL.md | 2 +- spec/SKILL.md | 4 +-- sync-gbrain/SKILL.md | 2 +- test/cso-preserved.test.ts | 6 ++-- .../StateServerSmokeTests.swift | 2 +- test/gen-skill-docs.test.ts | 4 +-- test/helpers/parity-harness.ts | 2 +- test/helpers/providers/gpt.ts | 2 +- test/preamble-compose.test.ts | 2 +- test/skill-e2e-auto-decide-preserved.test.ts | 2 +- test/skill-e2e-plan-ceo-finding-count.test.ts | 2 +- test/skill-e2e-ship-idempotency.test.ts | 2 +- test/v0-dormancy.test.ts | 2 +- 82 files changed, 107 insertions(+), 107 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7dbc82f998..2845098f41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -147,7 +147,7 @@ If you run gstack in CI, the new `EVALS_BUDGET_HARD_CAP=$30` cap (per-suite: gat **Added** - `scripts/capture-baseline.ts` + `test/helpers/capture-parity-baseline.ts` — captures per-skill SKILL.md sizes, token estimates, frontmatter description lengths, and eval coverage flags. Writes JSON snapshots used by the parity and size-budget gates. Locks `test/fixtures/parity-baseline-v1.44.1.json` as the v1→v2 reference. -- `test/helpers/parity-harness.ts` + `test/parity-suite.test.ts` — cathedral parity-eval suite floor. `PARITY_INVARIANTS` registry pins must-preserve phrases per skill family (cso: OWASP/STRIDE; plan-ceo: SCOPE EXPANSION / HOLD SCOPE; ship: VERSION/CHANGELOG/PR) so future compression can't silently strip load-bearing prose. +- `test/helpers/parity-harness.ts` + `test/parity-suite.test.ts` — cathedral parity-eval suite floor. `PARITY_INVARIANTS` registry pins must-preserve phrases per skill family (cso: OWASP/STRIDE; plan-ceo: SCOPE EXPANSION / HOLD SCOPE; ship: VERSION/CHANGELOG/PR) so future compression can't silently strip decision-critical prose. - `test/skill-coverage-matrix.ts` + `test/skill-coverage-matrix.test.ts` — single source of truth mapping each skill to gate + periodic tests; CI gate asserts every skill has at least one gate-tier entry. 51 skills, 51 entries. - `test/skill-coverage-floor.test.ts` — per-skill structural-compliance smoke test (file-IO, free). Verifies frontmatter shape, generated header, body non-trivial, no leaked `{{TEMPLATE}}` placeholders, catalog-trim contract on description. 309 assertions across 51 skills. - `test/skill-size-budget.test.ts` — per-skill SKILL.md byte budget (×1.05 default ratio), total corpus budget, catalog token budget (≤7000 for v1.46). Caught regressions get a per-skill breakdown + override path. @@ -218,7 +218,7 @@ Open `/design-shotgun` Monday morning, work through three rounds of variants, wa #### Changed - **Board JS uses relative URLs** instead of an injected `__GSTACK_SERVER_URL` global. The same generated HTML works at `/` (legacy `--no-daemon`) and `/boards//` (daemon). `location.protocol` feature-detect keeps the `file://` DOM-only fallback path working. -- **Bare `GET /boards/` returns 301** to `/boards//`. The trailing slash is load-bearing for relative-URL resolution in the board JS; without it, `fetch('./api/feedback')` would resolve to the wrong scope. +- **Bare `GET /boards/` returns 301** to `/boards//`. The trailing slash is decision-critical for relative-URL resolution in the board JS; without it, `fetch('./api/feedback')` would resolve to the wrong scope. - **Reload guard rejects directory paths**. `design/src/serve.ts:200-212` previously let `resolvedReload === allowedDir` through, which then crashed `readFileSync` with `EISDIR`. Now requires `statSync(resolvedReload).isFile()` with a clear 400 instead. - **Feedback files carry `boardId` and `publishedAt`** so agents polling `feedback.json` / `feedback-pending.json` in a multi-board world can verify which board produced what. - **`sourceDir` is derived from `realpath(html)` server-side**, never trusted from the publish POST body. @@ -331,7 +331,7 @@ Open the sidebar once. Use it. Close your laptop. Wake up tomorrow. Type a key. #### For contributors - **Test framework `bunfig.toml` + `test-setup.ts`** — Global afterEach restores `process.env.PATH` only. Narrow on purpose — broader snapshot/restore breaks tests that legitimately set `process.env.GSTACK_HOME` at module load (`domain-skills-storage.test.ts`). -- **12 new test files, 83 new unit-tier tests.** Static-grep tripwires defend the load-bearing protocol contracts (close codes, lease lifecycle, watchdog identity check, supervisor crash-loop guard, ring buffer ESC boundaries) without paying for live WebSocket cycles in CI. +- **12 new test files, 83 new unit-tier tests.** Static-grep tripwires defend the decision-critical protocol contracts (close codes, lease lifecycle, watchdog identity check, supervisor crash-loop guard, ring buffer ESC boundaries) without paying for live WebSocket cycles in CI. - **Eng review + outside voice (codex) ran on this branch.** 17 decisions baked: 10 from the in-review architecture pass (D1-D10), 6 from codex cross-model tension resolution (T1-T6, all adopted in codex's favor — most consequential was T1, separating sessionId from auth token), and 1 from in-PR scope-up of the outer supervisor. ## [1.43.3.0] - 2026-05-21 @@ -535,7 +535,7 @@ If you have `VOYAGE_API_KEY` set and run `/setup-gbrain` on a fresh machine, `gb ## **iOS QA on a real iPhone — no XCTest, no WebDriverAgent, no simulators.** ## **Verified end-to-end on a real iPhone 17 Pro Max running iOS 26.5; any agent that speaks HTTP can run full QA against a real iOS app, locally over USB or remotely over Tailscale.** -Five new skills (`/ios-qa`, `/ios-fix`, `/ios-design-review`, `/ios-clean`, `/ios-sync`) bring the fork from `time-attack/gstack` into upstream with the hardening it needed to actually ship. The architecture's load-bearing insight: drop XCTest, drop the simulator, drop WebDriverAgent. Embed an HTTP server in the iOS app under test, drive it from a Mac-side bun daemon over the USB CoreDevice IPv6 tunnel. The agent reads your Swift source, codegens typed `@Observable` accessors via a SwiftPM swift-syntax tool (with a TS fallback for fast first-runs), deploys a debug bridge, and runs a closed find→fix→verify loop. With the optional `--tailnet` flag, the Mac daemon also binds Tailscale and accepts authenticated remote calls — your Mac plus an iPhone you already own becomes the iOS QA surface for any agent on your tailnet. +Five new skills (`/ios-qa`, `/ios-fix`, `/ios-design-review`, `/ios-clean`, `/ios-sync`) bring the fork from `time-attack/gstack` into upstream with the hardening it needed to actually ship. The architecture's decision-critical insight: drop XCTest, drop the simulator, drop WebDriverAgent. Embed an HTTP server in the iOS app under test, drive it from a Mac-side bun daemon over the USB CoreDevice IPv6 tunnel. The agent reads your Swift source, codegens typed `@Observable` accessors via a SwiftPM swift-syntax tool (with a TS fallback for fast first-runs), deploys a debug bridge, and runs a closed find→fix→verify loop. With the optional `--tailnet` flag, the Mac daemon also binds Tailscale and accepts authenticated remote calls — your Mac plus an iPhone you already own becomes the iOS QA surface for any agent on your tailnet. Two Mac-side CLIs ship alongside the skills: `gstack-ios-qa-daemon` brokers traffic between the agent and the connected iPhone, and `gstack-ios-qa-mint` is the owner-grant tool for the tailnet allowlist (grant / revoke / list). The full end-to-end walkthrough lives at [docs/howto-ios-testing-with-gstack.md](docs/howto-ios-testing-with-gstack.md). @@ -971,7 +971,7 @@ When the model finishes a plan-* review and is about to exit plan mode, it reads #### For contributors -- The implementation sequence is load-bearing: resolver → index → templates → preamble → `bun run gen:skill-docs` → tests. Adding the test before regeneration fails on missing gate; regenerating before the resolver edits produces no-op output. Bisectable commits should respect this order. +- The implementation sequence is decision-critical: resolver → index → templates → preamble → `bun run gen:skill-docs` → tests. Adding the test before regeneration fails on missing gate; regenerating before the resolver edits produces no-op output. Bisectable commits should respect this order. - The codex gate is intentionally NOT terminal in `codex/SKILL.md`. Codex has three modes (review/challenge/consult) and only review mode writes to plan files. The gate's check-2 ("last heading is GSTACK REVIEW REPORT") short-circuits cleanly when no plan file is in context, so non-plan codex invocations are unaffected. ## [1.39.0.0] - 2026-05-14 @@ -1545,7 +1545,7 @@ If you've been hitting the 35-minute hang on `/sync-gbrain`, it's gone. The arch Seven community PRs land together, hand-picked through `/plan-eng-review` plus a Codex outside-voice review that reshaped the wave mid-flight. The headline fixes are real: the root-token authentication path no longer throws on a multibyte input that matches JS character length but not UTF-8 byte length, direct `http://[fe80::N]/` URLs are now rejected the same way ULA addresses already were, `gbrain put` strips NUL bytes from pasted transcript content so Postgres doesn't reject the write, and the build script doesn't tear down when run on a fresh worktree with no git HEAD yet. -Two PRs in the original 9-PR plan got moved to follow-up reviews after Codex caught load-bearing problems: the SVG-XSS fix (#1153) needs a sanitizer integration rebuild, and the hook-command variable swap (#1141) needs runtime verification in plugin + dev-symlink modes. Both will land as their own PRs. +Two PRs in the original 9-PR plan got moved to follow-up reviews after Codex caught decision-critical problems: the SVG-XSS fix (#1153) needs a sanitizer integration rebuild, and the hook-command variable swap (#1141) needs runtime verification in plugin + dev-symlink modes. Both will land as their own PRs. ### The numbers that matter @@ -1586,7 +1586,7 @@ If you run `pair-agent` and someone hits your tunnel with a multibyte token gues #### For contributors - The AskUserQuestion preamble byte budget ratchets from 36,500 → 39,000 to absorb the new CJK rule (rule 12 + self-check item). Generated SKILL.md files for all 35 tier-≥2 skills regenerate as a single mechanical commit. -- Two PRs from the original 9-PR plan moved to follow-up reviews after Codex outside-voice caught load-bearing problems: #1153 (SVG sanitizer) needs the sanitizer integration rebuilt against the current `setTabContent` boundary in `browse/src/write-commands.ts:319` (the original PR removed `.svg` from the allowlist; the right fix is to keep it allowed and sanitize via DOMPurify before `setTabContent`). #1141 (CLAUDE_PLUGIN_ROOT) needs runtime verification in both plugin-installed and dev-symlink modes plus scope expansion to the non-frontmatter shell snippet at `investigate/SKILL.md.tmpl:107`. +- Two PRs from the original 9-PR plan moved to follow-up reviews after Codex outside-voice caught decision-critical problems: #1153 (SVG sanitizer) needs the sanitizer integration rebuilt against the current `setTabContent` boundary in `browse/src/write-commands.ts:319` (the original PR removed `.svg` from the allowlist; the right fix is to keep it allowed and sanitize via DOMPurify before `setTabContent`). #1141 (CLAUDE_PLUGIN_ROOT) needs runtime verification in both plugin-installed and dev-symlink modes plus scope expansion to the non-frontmatter shell snippet at `investigate/SKILL.md.tmpl:107`. - Five gate-tier evals hardened against non-determinism / TTY rendering quirks after the wave's first `test:gate` run surfaced them as flakes (verified pre-existing on `main`, then fixed): `office-hours-builder-wildness` retiers `gate` → `periodic` because LLM-judge creativity scoring belongs in periodic per the tier-classification rules. `plan-design-with-ui` AUQ-detection tail expands 2.5KB → 5KB so the full Step 0 box-rendered AUQ fits inside the regex window. `ask-user-question-format-compliance` budget stretches 300s → 540s (poll), 360s → 600s (PTY session), 420s → 660s (bun wrapper) to accommodate `/plan-ceo-review`'s multi-bash-block preamble on substantive branches. `benchmark-providers` gemini smoke drops the brittle `toContain('ok')` assertion in favor of a shape check on the adapter result. `skillify` scrape-prototype-path accepts JSON shape variants (`results`, `data`, `hits`, bare arrays of `{title, score}` objects) instead of grepping for the literal `"items":[` key. - Housekeeping: the three source PRs absorbed into v1.31.1.0 (#1242, #1394, #1393) get closed with credit comments pointing at the merge SHA. @@ -1646,7 +1646,7 @@ the truncated 2KB evidence window. | LLM judge classifications | 0 | 4 (waiting/working/hung/unknown) | +4 | | Diff size on this branch (after merge with main) | — | -721 / +928 | net +207 | -The deleted "fallback" clause was the load-bearing instruction the +The deleted "fallback" clause was the decision-critical instruction the model was rationalizing as a general escape hatch from "fanning out round-trip AUQs." Once it's gone, the anti-shortcut clause and STOP gates in `plan-eng-review` Sections 1-4 stand without a contradicting @@ -1661,7 +1661,7 @@ quietly batched into a "## Decisions to confirm" plan-file write that gets buried under ExitPlanMode. The harness improvements (prose-AUQ detector, LLM judge, snapshot logs at `~/.gstack/analytics/pty-judge.jsonl` and `~/.gstack/analytics/pty-snapshots/` when `GSTACK_PTY_LOG=1`) are -load-bearing for any future plan-mode regression test that needs to +decision-critical for any future plan-mode regression test that needs to distinguish "model is thinking" from "model is waiting for me." ### Itemized changes @@ -1722,7 +1722,7 @@ distinguish "model is thinking" from "model is waiting for me." #### For contributors - Three subagent investigations across the debugging cycle were the - load-bearing diagnostic step: the architectural fix, the prose-AUQ + decision-critical diagnostic step: the architectural fix, the prose-AUQ detector design, and the test-fictional-state retraction. The pattern that worked: have a fresh-context subagent verify the parent's mental model against actual file contents before committing @@ -2310,7 +2310,7 @@ The `## GSTACK REVIEW REPORT` section had a write rule that contradicted itself: - `test/gen-skill-docs.test.ts` — new `GSTACK REVIEW REPORT delete-then-append flow` describe block: 4 SKILL.md target tests + 1 source resolver test. Static, deterministic, free. #### For contributors -- The `/autoplan` E2E approach attempted in the plan was dropped after a paid run revealed that `--disallowedTools AskUserQuestion` makes autoplan bail at the Phase 1 premise gate via the plan-file fallback. The PTY harness can't drive autoplan through its review phases without auto-progression of AskUserQuestions. The static prompt-text test catches the load-bearing change without needing that infrastructure. +- The `/autoplan` E2E approach attempted in the plan was dropped after a paid run revealed that `--disallowedTools AskUserQuestion` makes autoplan bail at the Phase 1 premise gate via the plan-file fallback. The PTY harness can't drive autoplan through its review phases without auto-progression of AskUserQuestions. The static prompt-text test catches the decision-critical change without needing that infrastructure. ## [1.26.3.0] - 2026-05-03 @@ -3088,7 +3088,7 @@ The harness itself is a reusable primitive. `runPlanSkillObservation()` watches - 18 preamble resolvers compressed: `generate-ask-user-format.ts`, `generate-brain-sync-block.ts`, `generate-completeness-section.ts`, `generate-completion-status.ts`, `generate-confusion-protocol.ts`, `generate-context-health.ts`, `generate-context-recovery.ts`, `generate-continuous-checkpoint.ts`, `generate-lake-intro.ts`, `generate-preamble-bash.ts`, `generate-proactive-prompt.ts`, `generate-routing-injection.ts`, `generate-telemetry-prompt.ts`, `generate-upgrade-check.ts`, `generate-vendoring-deprecation.ts`, `generate-voice-directive.ts`, `generate-writing-style-migration.ts`, `generate-writing-style.ts`. - All 47 generated `SKILL.md` files regenerated; 3 ship golden fixtures regenerated. -- Plan-* skills retain full preamble surface (Brain Sync, Context Recovery, Routing Injection) — the early slim attempt that cut these was reverted after diagnosing them as load-bearing. +- Plan-* skills retain full preamble surface (Brain Sync, Context Recovery, Routing Injection) — the early slim attempt that cut these was reverted after diagnosing them as decision-critical. - 5 existing plan-mode tests (`plan-ceo`, `plan-eng`, `plan-design`, `plan-devex`, `plan-mode-no-op`) rewritten onto the new harness with a 300s observation budget. All 5 verify-pass under `EVALS=1 EVALS_TIER=gate` against the real `claude` binary in 790s sequential. - `isNumberedOptionListVisible` regex tolerates whitespace collapse from TTY cursor-positioning escapes (`\x1b[40C`) which `stripAnsi` removes — `\b2\.` was failing on word-to-word transitions where stripped output read `text2.`. @@ -3105,7 +3105,7 @@ The harness itself is a reusable primitive. `runPlanSkillObservation()` watches - `test/helpers/touchfiles.ts`: 5 plan-mode test selections + e2e-harness-audit selection now point at `claude-pty-runner.ts` instead of the deleted helper. 6 new entries (`ask-user-question-format-pty`, `plan-ceo-mode-routing`, `plan-design-with-ui-scope`, `budget-regression-pty`, `ship-idempotency-pty`, `autoplan-chain-pty`) with tier classifications: 3 gate, 3 periodic. - `test/e2e-harness-audit.test.ts`: recognizes `runPlanSkillObservation` as a valid coverage path alongside the legacy `canUseTool` / `runPlanModeSkillTest` patterns. -- New unit test: `test/gen-skill-docs.test.ts` asserts plan-review preambles stay under 33 KB and the slim Voice section preserves its load-bearing semantic contract (lead-with-the-point, name-the-file, user-outcome framing, no-corporate, no-AI-vocab, user-sovereignty). +- New unit test: `test/gen-skill-docs.test.ts` asserts plan-review preambles stay under 33 KB and the slim Voice section preserves its decision-critical semantic contract (lead-with-the-point, name-the-file, user-outcome framing, no-corporate, no-AI-vocab, user-sovereignty). - `test/touchfiles.test.ts`: skill-specific change selection count updated 15 → 18 to match the 6 new touchfile entries that depend on `plan-ceo-review/**`. ## [1.14.0.0] - 2026-04-25 @@ -3581,7 +3581,7 @@ Measured across the v1.10.0.0 fix. Verify any claim with `git log 1.9.0.0..1.10. | Periodic evals defending against escape-hatch abuse | 0 | **4** | +4 (2 positive, 2 negative-case) | | Cross-model review findings incorporated before landing | N/A | **5 of 8** | Codex caught real bugs CEO+Eng missed | -Two of the five Codex findings were load-bearing. (1) The overlay reorder theory wasn't enough on its own. The `(recommended)` label on a neutral-posture question had to stay, because `question-tuning.ts:29` reads it to power AUTO_DECIDE. Omitting it would have silently broken auto-decide on every cherry-pick prompt. (2) The "31 sites global replace" in the original plan was factually wrong. Actual count, verified with `rg`, is 16 sites across 4 templates, and eng/design/devex templates used different phrasing than CEO. Without the audit, the fix would have shipped half-applied. +Two of the five Codex findings were decision-critical. (1) The overlay reorder theory wasn't enough on its own. The `(recommended)` label on a neutral-posture question had to stay, because `question-tuning.ts:29` reads it to power AUTO_DECIDE. Omitting it would have silently broken auto-decide on every cherry-pick prompt. (2) The "31 sites global replace" in the original plan was factually wrong. Actual count, verified with `rg`, is 16 sites across 4 templates, and eng/design/devex templates used different phrasing than CEO. Without the audit, the fix would have shipped half-applied. ### What this means for anyone running plan reviews on Opus 4.7 @@ -4087,7 +4087,7 @@ Same 200 cases, before and after the fixes above: **4.4x lift in detection.** FP rate also climbed 3.7x — Haiku is more aggressive and fires on edge cases that TestSavantAI smiles through. The review banner makes those FPs recoverable: user sees the suspected excerpt + layer scores, clicks Allow once, session continues. A P1 follow-up is tuning the Haiku WARN threshold (currently 0.6, probably should be 0.7-0.85) against real-world attempts.jsonl data once gstack users start reporting. -Honest shipping posture: this is meaningfully safer than v1.3.x, not bulletproof. Canary (deterministic), content-security L1-L3 (structural), and the review banner remain the load-bearing defenses when the ML layers miss or over-fire. +Honest shipping posture: this is meaningfully safer than v1.3.x, not bulletproof. Canary (deterministic), content-security L1-L3 (structural), and the review banner remain the decision-critical defenses when the ML layers miss or over-fire. ### Env knobs diff --git a/CLAUDE.md b/CLAUDE.md index a002c124be..a873c17263 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -277,7 +277,7 @@ to `~/.gstack/security/attempts.jsonl` via `tunnel-denial-log.ts`. Before editin `server.ts`, `sse-session-cookie.ts`, or `tunnel-denial-log.ts`, read [ARCHITECTURE.md](ARCHITECTURE.md#dual-listener-tunnel-architecture-v1600) — the module boundary (no imports from `token-registry.ts` into `sse-session-cookie.ts`) -is load-bearing for scope isolation. +is decision-critical for scope isolation. **Unicode sanitization at server egress** (v1.38.0.0+). Every server egress that ships page-content-derived strings MUST go through `JSON.stringify(payload, diff --git a/SKILL.md b/SKILL.md index 569350e37f..c1de8eb662 100644 --- a/SKILL.md +++ b/SKILL.md @@ -436,7 +436,7 @@ equivalents (cat, sed, find, grep). The dedicated tools are cheaper and clearer. Direct, concrete, builder-to-builder. Name the file, function, command, and user-visible impact. No filler. -No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted. Never corporate or academic. Short paragraphs. End with what to do. +No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing. Never corporate or academic. Short paragraphs. End with what to do. The user has context you do not. Cross-model agreement is a recommendation, not a decision. The user decides. diff --git a/autoplan/SKILL.md b/autoplan/SKILL.md index 0e77d81968..99b9fc4258 100644 --- a/autoplan/SKILL.md +++ b/autoplan/SKILL.md @@ -563,7 +563,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/benchmark-models/SKILL.md b/benchmark-models/SKILL.md index 38baa88519..08ca5d36e7 100644 --- a/benchmark-models/SKILL.md +++ b/benchmark-models/SKILL.md @@ -439,7 +439,7 @@ equivalents (cat, sed, find, grep). The dedicated tools are cheaper and clearer. Direct, concrete, builder-to-builder. Name the file, function, command, and user-visible impact. No filler. -No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted. Never corporate or academic. Short paragraphs. End with what to do. +No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing. Never corporate or academic. Short paragraphs. End with what to do. The user has context you do not. Cross-model agreement is a recommendation, not a decision. The user decides. diff --git a/benchmark/SKILL.md b/benchmark/SKILL.md index d2b54a2c4e..a1d89424e5 100644 --- a/benchmark/SKILL.md +++ b/benchmark/SKILL.md @@ -439,7 +439,7 @@ equivalents (cat, sed, find, grep). The dedicated tools are cheaper and clearer. Direct, concrete, builder-to-builder. Name the file, function, command, and user-visible impact. No filler. -No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted. Never corporate or academic. Short paragraphs. End with what to do. +No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing. Never corporate or academic. Short paragraphs. End with what to do. The user has context you do not. Cross-model agreement is a recommendation, not a decision. The user decides. diff --git a/bin/gstack-gbrain-lib.sh b/bin/gstack-gbrain-lib.sh index b89cce2e03..06b6cf41fc 100644 --- a/bin/gstack-gbrain-lib.sh +++ b/bin/gstack-gbrain-lib.sh @@ -22,12 +22,12 @@ # Exported after read so sub-processes inherit the secret. Caller # is responsible for `unset ` when done. # -# Load-bearing for D3-eng (shared secret helper across PAT + URL paste), +# Decision-critical for D3-eng (shared secret helper across PAT + URL paste), # D10 (env-var handoff, never argv), D11 (PAT scope disclosure + SIGINT # restore), D16 (pooler URL paste hygiene with redacted preview). # _gstack_gbrain_validate_varname — returns 0 if usable, 2 otherwise. -# `local LC_ALL=C` is load-bearing twice over: +# `local LC_ALL=C` is decision-critical twice over: # 1. In many macOS shells the default locale (e.g. en_US.UTF-8) makes `case` # glob brackets like `[A-Z]` match lowercase letters too. Without the # LC_ALL=C pin, names like `lower-case` pass validation and then trip diff --git a/browse/SKILL.md b/browse/SKILL.md index 99e5add79d..825c4b0f1e 100644 --- a/browse/SKILL.md +++ b/browse/SKILL.md @@ -437,7 +437,7 @@ equivalents (cat, sed, find, grep). The dedicated tools are cheaper and clearer. Direct, concrete, builder-to-builder. Name the file, function, command, and user-visible impact. No filler. -No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted. Never corporate or academic. Short paragraphs. End with what to do. +No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing. Never corporate or academic. Short paragraphs. End with what to do. The user has context you do not. Cross-model agreement is a recommendation, not a decision. The user decides. diff --git a/browse/src/browser-skill-commands.ts b/browse/src/browser-skill-commands.ts index 3c0805f5d6..2bbd7222d2 100644 --- a/browse/src/browser-skill-commands.ts +++ b/browse/src/browser-skill-commands.ts @@ -8,7 +8,7 @@ * test — run script.test.ts via bun test * rm [--global] — tombstone a user-tier skill * - * Load-bearing: spawnSkill mints a per-spawn scoped token (read+write scope) + * Decision-critical: spawnSkill mints a per-spawn scoped token (read+write scope) * and passes it via GSTACK_SKILL_TOKEN. The skill never sees the daemon root * token. Untrusted skills get a scrubbed env (no $HOME, $PATH minimal, no * secrets like $GITHUB_TOKEN/$OPENAI_API_KEY/etc.) and a locked cwd. Trusted @@ -216,7 +216,7 @@ function handleRm(args: string[], ctx: SkillCommandContext): string { return `Tombstoned "${name}" (${effectiveTier} tier) → ${dst}\n`; } -// ─── spawnSkill (load-bearing) ────────────────────────────────── +// ─── spawnSkill (decision-critical) ────────────────────────────────── export interface SpawnSkillOptions { skill: BrowserSkill; diff --git a/browse/src/browser-skill-write.ts b/browse/src/browser-skill-write.ts index 81599b419b..4b35590aa0 100644 --- a/browse/src/browser-skill-write.ts +++ b/browse/src/browser-skill-write.ts @@ -178,7 +178,7 @@ export function commitSkill(opts: CommitSkillOptions): string { * test failure (step 8 of /skillify) or approval rejection (step 9). * * Idempotent: missing dirs are not an error. Best-effort: failures are - * swallowed (cleanup is fire-and-forget, not load-bearing). + * swallowed (cleanup is fire-and-forget, not decision-critical). */ export function discardStaged(stagedDir: string): void { // Remove the leaf skill dir first, then the wrapper skillify-/. diff --git a/browse/src/domain-skills.ts b/browse/src/domain-skills.ts index 011059b273..9503c3caae 100644 --- a/browse/src/domain-skills.ts +++ b/browse/src/domain-skills.ts @@ -296,7 +296,7 @@ export async function writeSkill(input: WriteSkillInput): Promise 0 gate is load-bearing: handleSave currently writes + * The classifier_score > 0 gate is decision-critical: handleSave currently writes * classifier_score=0 with the comment "L4 deferred to load-time / sidebar-agent * fills this in on first prompt-injection load," but sidebar-agent was ripped * (CLAUDE.md "Sidebar architecture") and nothing else updates the score, so diff --git a/browse/test/cli-supervisor.test.ts b/browse/test/cli-supervisor.test.ts index d9cec7b89d..2a7838a436 100644 --- a/browse/test/cli-supervisor.test.ts +++ b/browse/test/cli-supervisor.test.ts @@ -12,7 +12,7 @@ import * as path from 'path'; // terminal-agent watchdog. // // Live respawn tests belong in the e2e tier (real Bun.spawn cycles take -// 3-8s each). These tripwires defend the load-bearing invariants: +// 3-8s each). These tripwires defend the decision-critical invariants: // opt-in by default, signal handlers wired, crash-loop guard, env knobs. const CLI_TS = path.resolve(new URL(import.meta.url).pathname, '..', '..', 'src', 'cli.ts'); diff --git a/browse/test/domain-skills-e2e.test.ts b/browse/test/domain-skills-e2e.test.ts index 29d33c4bc8..8ecb4b36df 100644 --- a/browse/test/domain-skills-e2e.test.ts +++ b/browse/test/domain-skills-e2e.test.ts @@ -92,7 +92,7 @@ describe('$B domain-skill (E2E gate tier)', () => { expect(await readSkill('127.0.0.1', 'e2e-test-slug')).toBeNull(); // Three uses without flag with classifier_score=0 (the default until L4 is - // rewired) MUST stay quarantined per #1369. The gate is load-bearing: a + // rewired) MUST stay quarantined per #1369. The gate is decision-critical: a // quarantined skill written under the influence of a poisoned page would // otherwise auto-promote after three benign uses without the L4 body scan // ever running. diff --git a/browse/test/server-pty-lease-routes.test.ts b/browse/test/server-pty-lease-routes.test.ts index 2c12618830..2827a8df5b 100644 --- a/browse/test/server-pty-lease-routes.test.ts +++ b/browse/test/server-pty-lease-routes.test.ts @@ -5,7 +5,7 @@ import * as path from 'path'; // Server-side route shape for the v1.44 lease + restart + dispose + // lease-refresh wiring. Live route exercises require the terminal-agent // loopback to be live (e2e-tier); these static-grep tripwires pin the -// load-bearing protocol invariants. +// decision-critical protocol invariants. const SERVER_TS = path.resolve(new URL(import.meta.url).pathname, '..', '..', 'src', 'server.ts'); diff --git a/browse/test/sidebar-tabs.test.ts b/browse/test/sidebar-tabs.test.ts index 91d50dcef2..14285776a7 100644 --- a/browse/test/sidebar-tabs.test.ts +++ b/browse/test/sidebar-tabs.test.ts @@ -7,7 +7,7 @@ * endpoints are gone, and the primary-tab nav (Terminal | Chat) is * gone. Terminal is now the sole primary surface. * - * This file locks the load-bearing invariants of that layout so a + * This file locks the decision-critical invariants of that layout so a * future refactor can't silently re-introduce the old surface or break * the new one. */ diff --git a/browse/test/sidepanel-reattach.test.ts b/browse/test/sidepanel-reattach.test.ts index 9179e57c40..130edd60ff 100644 --- a/browse/test/sidepanel-reattach.test.ts +++ b/browse/test/sidepanel-reattach.test.ts @@ -9,7 +9,7 @@ import * as path from 'path'; // opens a new WS with the fresh attachToken, writes RIS to xterm when // the agent sends {type:"reattach-begin"}, then treats the next binary // frame as the scrollback replay payload. Static-grep tripwires defend -// the load-bearing protocol invariants; live re-attach exercises belong +// the decision-critical protocol invariants; live re-attach exercises belong // in the e2e tier. const TERMINAL_JS = path.resolve( diff --git a/browse/test/terminal-agent-detach-reattach.test.ts b/browse/test/terminal-agent-detach-reattach.test.ts index 89fbe5a1ca..6faa5407c0 100644 --- a/browse/test/terminal-agent-detach-reattach.test.ts +++ b/browse/test/terminal-agent-detach-reattach.test.ts @@ -7,7 +7,7 @@ import * as path from 'path'; // The state machine is what turns a single network blip from "fall through // to ENDED state, click Restart" into "silent re-attach with scrollback // intact, keep typing." Live WS cycles + buffer-overflow exercises belong -// in the e2e tier; these static-grep tripwires defend the load-bearing +// in the e2e tier; these static-grep tripwires defend the decision-critical // protocol + correctness properties. const AGENT_TS = path.resolve(new URL(import.meta.url).pathname, '..', '..', 'src', 'terminal-agent.ts'); diff --git a/browse/test/terminal-agent-integration.test.ts b/browse/test/terminal-agent-integration.test.ts index cdcbe8de56..279fe17dca 100644 --- a/browse/test/terminal-agent-integration.test.ts +++ b/browse/test/terminal-agent-integration.test.ts @@ -204,7 +204,7 @@ describe('terminal-agent: PTY round-trip via real WebSocket (Cookie auth)', () = // `protocols` cleanly when also passed `headers` (the constructor // detects the third-arg form unreliably). Real browsers (Chromium) // use the standard protocols arg fine — the server-side handler is - // identical either way, so this test still locks the load-bearing + // identical either way, so this test still locks the decision-critical // invariant: the agent accepts a token via Sec-WebSocket-Protocol // and echoes the protocol back so a browser would accept the upgrade. const handshakeKey = 'dGhlIHNhbXBsZSBub25jZQ=='; diff --git a/browse/test/terminal-agent-watchdog.test.ts b/browse/test/terminal-agent-watchdog.test.ts index f012dc406e..8414eb142c 100644 --- a/browse/test/terminal-agent-watchdog.test.ts +++ b/browse/test/terminal-agent-watchdog.test.ts @@ -7,7 +7,7 @@ import * as path from 'path'; // The watchdog respawns terminal-agent when its PID dies. Live process-tree // tests would require spawning, killing, and observing across two real Bun // processes — slow and flaky in the free tier. These tripwires defend the -// load-bearing properties: identity-based liveness check (not name match), +// decision-critical properties: identity-based liveness check (not name match), // crash-loop guard, gated on ownsTerminalAgent, and cleared on shutdown. const SERVER_TS = path.resolve(new URL(import.meta.url).pathname, '..', '..', 'src', 'server.ts'); diff --git a/canary/SKILL.md b/canary/SKILL.md index 2693319be6..94f97e7558 100644 --- a/canary/SKILL.md +++ b/canary/SKILL.md @@ -555,7 +555,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/codex/SKILL.md b/codex/SKILL.md index 24331dde34..a7e6708ab8 100644 --- a/codex/SKILL.md +++ b/codex/SKILL.md @@ -558,7 +558,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/context-restore/SKILL.md b/context-restore/SKILL.md index 22e499dd25..490a5c4eda 100644 --- a/context-restore/SKILL.md +++ b/context-restore/SKILL.md @@ -559,7 +559,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/context-save/SKILL.md b/context-save/SKILL.md index f41551d78c..a5a79b9cee 100644 --- a/context-save/SKILL.md +++ b/context-save/SKILL.md @@ -558,7 +558,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/cso/SKILL.md b/cso/SKILL.md index 3e39ce4c57..a3a86edbb4 100644 --- a/cso/SKILL.md +++ b/cso/SKILL.md @@ -561,7 +561,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/design-consultation/SKILL.md b/design-consultation/SKILL.md index 235026d2f7..1f356cd7c9 100644 --- a/design-consultation/SKILL.md +++ b/design-consultation/SKILL.md @@ -581,7 +581,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/design-html/SKILL.md b/design-html/SKILL.md index 70b87ff7e0..a05731b3b2 100644 --- a/design-html/SKILL.md +++ b/design-html/SKILL.md @@ -562,7 +562,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/design-review/SKILL.md b/design-review/SKILL.md index 33c43ceb56..830bfd21d5 100644 --- a/design-review/SKILL.md +++ b/design-review/SKILL.md @@ -559,7 +559,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/design-shotgun/SKILL.md b/design-shotgun/SKILL.md index 71f1a02564..2ab648530b 100644 --- a/design-shotgun/SKILL.md +++ b/design-shotgun/SKILL.md @@ -576,7 +576,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/design/src/daemon.ts b/design/src/daemon.ts index 8b6e4a1edc..be3e84e4fd 100644 --- a/design/src/daemon.ts +++ b/design/src/daemon.ts @@ -358,7 +358,7 @@ async function handlePublish(req: Request, origin: string): Promise { function handleBoardGet(board: Board): Response { board.lastTouched = Date.now(); // No __GSTACK_SERVER_URL injection — board JS uses relative URLs that - // resolve against /boards// (the trailing slash is load-bearing here; + // resolve against /boards// (the trailing slash is decision-critical here; // the 301 from the bare /boards/ form ensures it). return new Response(board.htmlContent, { headers: { "Content-Type": "text/html; charset=utf-8" }, diff --git a/devex-review/SKILL.md b/devex-review/SKILL.md index a15ed78796..566bb21d42 100644 --- a/devex-review/SKILL.md +++ b/devex-review/SKILL.md @@ -561,7 +561,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/docs/designs/PACING_UPDATES_V0.md b/docs/designs/PACING_UPDATES_V0.md index f8a49480aa..1f340cc390 100644 --- a/docs/designs/PACING_UPDATES_V0.md +++ b/docs/designs/PACING_UPDATES_V0.md @@ -28,7 +28,7 @@ During V1 planning, a pacing workstream was drafted: rank findings, auto-accept 4. **Pacing as prose can't invert existing control flow.** V1 planned to add a "rank findings, then ask" rule to preamble prose. But existing skill templates like `plan-eng-review/SKILL.md.tmpl` have per-section STOP/AskUserQuestion sequences. A prose rule in preamble can't reliably override a hardcoded per-section STOP. The behavioral change is sequencing, not prompt wording. 5. **Flip mechanism has no implementation.** "Reply `flip ` to change" was prose. No command parser, no state store, no replay behavior. If the conversation compacts and the Silent Decisions block leaves context, the original decision is lost. 6. **Migration prompt is itself an interrupt.** V1's post-upgrade migration prompt (offering to restore V0 prose) counts against the interruption budget V1.1 is trying to reduce. V1.1 must decide: exempt from budget, or include as interrupt-1-of-N? -7. **First-run preamble prompts count too.** Lake intro, telemetry, proactive, routing injection — Louise saw all of them on first run. They're interruptions before the first real skill runs. V1.1 must audit which of these are load-bearing for new users vs. deferrable until session N. +7. **First-run preamble prompts count too.** Lake intro, telemetry, proactive, routing injection — Louise saw all of them on first run. They're interruptions before the first real skill runs. V1.1 must audit which of these are decision-critical for new users vs. deferrable until session N. 8. **Ranking formula not calibrated against real data.** V1 considered `product 0-8` (broken: `{0,1,2,4,8}` distribution), then `sum 0-6` with threshold ≥ 4. But neither was validated against actual finding distribution. V1.1 should instrument V0 question-log to measure what real findings look like, then calibrate. 9. **"Every one-way door surfaces" vs "max 3 per phase" contradicts.** One-way cap = uncapped (safety); two-way cap = 3. But the plan had both rules without explicit precedence. V1.1 must state: one-way doors surface uncapped regardless of phase budget. 10. **Undefined verification values.** V1 plan had "Silent Decisions block ≥ N entries" with N never defined, and `active: true` field in throughput JSON never defined. V1.1 gets concrete values. @@ -49,7 +49,7 @@ During V1 planning, a pacing workstream was drafted: rank findings, auto-accept 6. **Migration-prompt budget decision.** Explicit rule: one-shot migration prompts are exempt from the per-phase interruption budget. Rationale: they fire before review phases start, not during. -7. **First-run preamble audit.** Audit lake intro, telemetry, proactive, routing injection. For each: is this load-bearing for a first-time user, or deferrable? Likely outcome: suppress all but lake intro until session 2+. Offer remaining ones via a `/plan-tune first-run` command that users can invoke voluntarily. +7. **First-run preamble audit.** Audit lake intro, telemetry, proactive, routing injection. For each: is this decision-critical for a first-time user, or deferrable? Likely outcome: suppress all but lake intro until session 2+. Offer remaining ones via a `/plan-tune first-run` command that users can invoke voluntarily. 8. **Ranking threshold calibration.** Instrument V0's question-log (already running, has history). Measure the actual distribution of `severity × irreversibility × user-decision-matters` across recent CEO + Eng + DX + Design reviews. Pick threshold based on real data. Target: ~20% of findings surface, ~80% auto-accept. diff --git a/docs/designs/SIDEBAR_MESSAGE_FLOW.md b/docs/designs/SIDEBAR_MESSAGE_FLOW.md index 4c8fc8c7f6..c7ce453150 100644 --- a/docs/designs/SIDEBAR_MESSAGE_FLOW.md +++ b/docs/designs/SIDEBAR_MESSAGE_FLOW.md @@ -121,7 +121,7 @@ purpose — the user is typing directly to claude, there's no untrusted page content in the loop. Trust source is the keyboard, same as any local terminal. -That trust assumption is load-bearing on three transport guarantees: +That trust assumption is decision-critical on three transport guarantees: 1. **Local-only listener.** terminal-agent.ts binds `127.0.0.1` only. The dual-listener tunnel surface (server.ts `TUNNEL_PATHS`) does diff --git a/docs/designs/SLATE_HOST.md b/docs/designs/SLATE_HOST.md index 8e5bb154d8..5ab39912fd 100644 --- a/docs/designs/SLATE_HOST.md +++ b/docs/designs/SLATE_HOST.md @@ -158,7 +158,7 @@ OPENCODE_TERMINAL ### Critical env vars for gstack integration **`SLATE_DISABLE_CLAUDE_CODE_SKILLS`** — When set, `.claude/skills/` loading is disabled. -This makes publishing to `.slate/skills/` load-bearing, not just an optimization. +This makes publishing to `.slate/skills/` decision-critical, not just an optimization. Without native `.slate/` publishing, gstack skills vanish when this flag is set. **`SLATE_TEST_HOME`** — Useful for E2E tests. Can redirect Slate's home directory diff --git a/docs/designs/v2_PLAN.md b/docs/designs/v2_PLAN.md index bd684df0dd..3f45b84305 100644 --- a/docs/designs/v2_PLAN.md +++ b/docs/designs/v2_PLAN.md @@ -82,9 +82,9 @@ reviews internally measured pack" exte ## Phase 0 — Eval coverage matrix (v1.45.0.0) -**Goal:** every skill in gstack ships with at least one gate-tier eval AND one periodic-tier eval that asserts a must-have behavior. The eval suite becomes the design spec. This is the load-bearing claim of the plan — must come first. +**Goal:** every skill in gstack ships with at least one gate-tier eval AND one periodic-tier eval that asserts a must-have behavior. The eval suite becomes the design spec. This is the decision-critical claim of the plan — must come first. -**Cross-model tension noted:** Codex argued this is a procrastination trap and shape-asserts are shallow. User explicitly chose full tiered coverage anyway (D9 = A), with rationale: "the eval suite IS the design spec; that commitment is the load-bearing claim of the whole plan." We accept the larger upfront investment. +**Cross-model tension noted:** Codex argued this is a procrastination trap and shape-asserts are shallow. User explicitly chose full tiered coverage anyway (D9 = A), with rationale: "the eval suite IS the design spec; that commitment is the decision-critical claim of the whole plan." We accept the larger upfront investment. **Mitigation of Codex's "shape vs quality" critique:** for orchestration/judgment skills (plan-ceo, office-hours, autoplan), the must-have isn't deterministic output — it's structural compliance (does it call AskUserQuestion in the right shape? does it follow the section order? does it persist artifacts?). Eval design must capture structural contracts, not output content. Where structural eval is impossible, that section is explicitly noted as "judgment-dependent, not eval-protected" — Codex's #2 critique is honored by NOT then stripping unprotected judgment prose. diff --git a/docs/explanation-diataxis-in-gstack.md b/docs/explanation-diataxis-in-gstack.md index 0e201d5ea2..f1ef43f1ba 100644 --- a/docs/explanation-diataxis-in-gstack.md +++ b/docs/explanation-diataxis-in-gstack.md @@ -1,6 +1,6 @@ # Why gstack uses Diataxis for documentation -The two doc skills in gstack — `/document-release` and `/document-generate` — both speak Diataxis. New entities get scored across four quadrants. Coverage gaps surface in PR bodies tagged by quadrant. This doc explains why that vocabulary is load-bearing, and why a simpler "just write markdown" approach falls down at the scale gstack operates at. +The two doc skills in gstack — `/document-release` and `/document-generate` — both speak Diataxis. New entities get scored across four quadrants. Coverage gaps surface in PR bodies tagged by quadrant. This doc explains why that vocabulary is decision-critical, and why a simpler "just write markdown" approach falls down at the scale gstack operates at. ## The problem diff --git a/docs/skills.md b/docs/skills.md index 1ef0f6ae9c..65c461af36 100644 --- a/docs/skills.md +++ b/docs/skills.md @@ -94,7 +94,7 @@ After the reframe, it presents premises for you to validate. Not "does this soun 3. The narrowest wedge is a daily briefing that actually works 4. CRM integration is a must-have, not a nice-to-have -You agree, disagree, or adjust. Every premise you accept becomes load-bearing in the design doc. +You agree, disagree, or adjust. Every premise you accept becomes decision-critical in the design doc. ### Implementation alternatives @@ -1189,7 +1189,7 @@ Three Greptile comments. One real fix. One auto-acknowledged. One false positive ## `/ios-qa` -Live-device iOS QA. The fork's load-bearing insight was: don't simulate, don't run XCTest, don't bring up WebDriverAgent. Embed an HTTP server in the app under test, drive it from a Mac-side daemon over the USB CoreDevice IPv6 tunnel. +Live-device iOS QA. The fork's decision-critical insight was: don't simulate, don't run XCTest, don't bring up WebDriverAgent. Embed an HTTP server in the app under test, drive it from a Mac-side daemon over the USB CoreDevice IPv6 tunnel. The agent reads your Swift source, finds `@Observable` classes with `@Snapshotable`-marked fields, codegens typed accessors, deploys a debug bridge, then runs a closed find→fix→verify loop. diff --git a/document-generate/SKILL.md b/document-generate/SKILL.md index cb89b4ee5d..dab35a6ccb 100644 --- a/document-generate/SKILL.md +++ b/document-generate/SKILL.md @@ -561,7 +561,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/document-release/SKILL.md b/document-release/SKILL.md index 3fc606e8ac..bbf543fd76 100644 --- a/document-release/SKILL.md +++ b/document-release/SKILL.md @@ -559,7 +559,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/extension/sidepanel.js b/extension/sidepanel.js index 14834519b7..91e9bf94f1 100644 --- a/extension/sidepanel.js +++ b/extension/sidepanel.js @@ -1090,7 +1090,7 @@ chrome.runtime.onMessage.addListener((msg) => { // "intentional close" (sidebar closed, browser quit, extension reload) // from "transient blip" (wifi hiccup) reliably — Chrome routes the // former through code 1001 (going-away) and the latter through 1006 -// (abnormal), but neither is a load-bearing contract across browsers +// (abnormal), but neither is a decision-critical contract across browsers // and extension lifecycles. // // pagehide fires reliably for tab close, panel close, extension reload, diff --git a/health/SKILL.md b/health/SKILL.md index ef63acaf65..0feaa0bd81 100644 --- a/health/SKILL.md +++ b/health/SKILL.md @@ -557,7 +557,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/investigate/SKILL.md b/investigate/SKILL.md index f1d12dd1e6..3ebed2a106 100644 --- a/investigate/SKILL.md +++ b/investigate/SKILL.md @@ -596,7 +596,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/ios-clean/SKILL.md b/ios-clean/SKILL.md index f925bc9486..dc75051597 100644 --- a/ios-clean/SKILL.md +++ b/ios-clean/SKILL.md @@ -559,7 +559,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/ios-design-review/SKILL.md b/ios-design-review/SKILL.md index 76f9629f98..8b3dfa5dd4 100644 --- a/ios-design-review/SKILL.md +++ b/ios-design-review/SKILL.md @@ -561,7 +561,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/ios-fix/SKILL.md b/ios-fix/SKILL.md index 11d7a3f1b1..787986c729 100644 --- a/ios-fix/SKILL.md +++ b/ios-fix/SKILL.md @@ -562,7 +562,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/ios-qa/SKILL.md b/ios-qa/SKILL.md index 1080896c57..b2ea8f0d91 100644 --- a/ios-qa/SKILL.md +++ b/ios-qa/SKILL.md @@ -565,7 +565,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/ios-sync/SKILL.md b/ios-sync/SKILL.md index 2e0f703afa..1c8218511c 100644 --- a/ios-sync/SKILL.md +++ b/ios-sync/SKILL.md @@ -559,7 +559,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/land-and-deploy/SKILL.md b/land-and-deploy/SKILL.md index 8bfec441c5..a3ff53eadd 100644 --- a/land-and-deploy/SKILL.md +++ b/land-and-deploy/SKILL.md @@ -554,7 +554,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/landing-report/SKILL.md b/landing-report/SKILL.md index 442c28d7f9..2b138f2165 100644 --- a/landing-report/SKILL.md +++ b/landing-report/SKILL.md @@ -555,7 +555,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/learn/SKILL.md b/learn/SKILL.md index 3eb54e696d..6669b2c880 100644 --- a/learn/SKILL.md +++ b/learn/SKILL.md @@ -557,7 +557,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/make-pdf/SKILL.md b/make-pdf/SKILL.md index 229f082cf2..0a3cd17c7f 100644 --- a/make-pdf/SKILL.md +++ b/make-pdf/SKILL.md @@ -474,7 +474,7 @@ equivalents (cat, sed, find, grep). The dedicated tools are cheaper and clearer. Direct, concrete, builder-to-builder. Name the file, function, command, and user-visible impact. No filler. -No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted. Never corporate or academic. Short paragraphs. End with what to do. +No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing. Never corporate or academic. Short paragraphs. End with what to do. The user has context you do not. Cross-model agreement is a recommendation, not a decision. The user decides. diff --git a/office-hours/SKILL.md b/office-hours/SKILL.md index bfa14d6bd3..80a5b2d28f 100644 --- a/office-hours/SKILL.md +++ b/office-hours/SKILL.md @@ -592,7 +592,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/open-gstack-browser/SKILL.md b/open-gstack-browser/SKILL.md index ef01414de8..7c689937cd 100644 --- a/open-gstack-browser/SKILL.md +++ b/open-gstack-browser/SKILL.md @@ -554,7 +554,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/pair-agent/SKILL.md b/pair-agent/SKILL.md index baa1553b76..75ea418bc4 100644 --- a/pair-agent/SKILL.md +++ b/pair-agent/SKILL.md @@ -556,7 +556,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/plan-ceo-review/SKILL.md b/plan-ceo-review/SKILL.md index 526bb0e2e3..d57a3d93df 100644 --- a/plan-ceo-review/SKILL.md +++ b/plan-ceo-review/SKILL.md @@ -586,7 +586,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." @@ -1550,7 +1550,7 @@ Evaluate: **EXPANSION and SELECTIVE EXPANSION additions:** * What comes after this ships? Phase 2? Phase 3? Does the architecture support that trajectory? * Platform potential. Does this create capabilities other features can leverage? -* (SELECTIVE EXPANSION only) Retrospective: Were the right cherry-picks accepted? Did any rejected expansions turn out to be load-bearing for the accepted ones? +* (SELECTIVE EXPANSION only) Retrospective: Were the right cherry-picks accepted? Did any rejected expansions turn out to be decision-critical for the accepted ones? **STOP.** AskUserQuestion once per issue. Do NOT batch. Recommend + WHY. If this section turned up zero findings, state "No issues, moving on" and proceed. If the section has findings, you MUST call AskUserQuestion as a tool_use — a finding with an "obvious fix" is still a finding and still needs user approval before any change lands in the plan. Do NOT proceed until the user responds. **Reminder: Do NOT make any code changes. Review only.** diff --git a/plan-ceo-review/SKILL.md.tmpl b/plan-ceo-review/SKILL.md.tmpl index 4e4861d62b..93fcb37626 100644 --- a/plan-ceo-review/SKILL.md.tmpl +++ b/plan-ceo-review/SKILL.md.tmpl @@ -633,7 +633,7 @@ Evaluate: **EXPANSION and SELECTIVE EXPANSION additions:** * What comes after this ships? Phase 2? Phase 3? Does the architecture support that trajectory? * Platform potential. Does this create capabilities other features can leverage? -* (SELECTIVE EXPANSION only) Retrospective: Were the right cherry-picks accepted? Did any rejected expansions turn out to be load-bearing for the accepted ones? +* (SELECTIVE EXPANSION only) Retrospective: Were the right cherry-picks accepted? Did any rejected expansions turn out to be decision-critical for the accepted ones? **STOP.** AskUserQuestion once per issue. Do NOT batch. Recommend + WHY. If this section turned up zero findings, state "No issues, moving on" and proceed. If the section has findings, you MUST call AskUserQuestion as a tool_use — a finding with an "obvious fix" is still a finding and still needs user approval before any change lands in the plan. Do NOT proceed until the user responds. **Reminder: Do NOT make any code changes. Review only.** diff --git a/plan-design-review/SKILL.md b/plan-design-review/SKILL.md index ce70998cde..0555169f3a 100644 --- a/plan-design-review/SKILL.md +++ b/plan-design-review/SKILL.md @@ -558,7 +558,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/plan-devex-review/SKILL.md b/plan-devex-review/SKILL.md index 2bb031cbf2..eb3b814935 100644 --- a/plan-devex-review/SKILL.md +++ b/plan-devex-review/SKILL.md @@ -564,7 +564,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/plan-eng-review/SKILL.md b/plan-eng-review/SKILL.md index b6cd234410..54078177aa 100644 --- a/plan-eng-review/SKILL.md +++ b/plan-eng-review/SKILL.md @@ -562,7 +562,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/plan-tune/SKILL.md b/plan-tune/SKILL.md index 6f5875d0d8..5360371f10 100644 --- a/plan-tune/SKILL.md +++ b/plan-tune/SKILL.md @@ -567,7 +567,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/qa-only/SKILL.md b/qa-only/SKILL.md index 7a58b76ed9..5904fb8e0a 100644 --- a/qa-only/SKILL.md +++ b/qa-only/SKILL.md @@ -557,7 +557,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/qa/SKILL.md b/qa/SKILL.md index 6779c47cfc..4ce06ed990 100644 --- a/qa/SKILL.md +++ b/qa/SKILL.md @@ -563,7 +563,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/retro/SKILL.md b/retro/SKILL.md index ddbee15515..40f8e70734 100644 --- a/retro/SKILL.md +++ b/retro/SKILL.md @@ -574,7 +574,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/review/SKILL.md b/review/SKILL.md index dd6914a88c..f458a1031c 100644 --- a/review/SKILL.md +++ b/review/SKILL.md @@ -559,7 +559,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/scrape/SKILL.md b/scrape/SKILL.md index dccdd0db73..0a89d78e78 100644 --- a/scrape/SKILL.md +++ b/scrape/SKILL.md @@ -555,7 +555,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/scripts/resolvers/preamble/generate-voice-directive.ts b/scripts/resolvers/preamble/generate-voice-directive.ts index dab989bc71..27ef592531 100644 --- a/scripts/resolvers/preamble/generate-voice-directive.ts +++ b/scripts/resolvers/preamble/generate-voice-directive.ts @@ -6,7 +6,7 @@ export function generateVoiceDirective(tier: number): string { Direct, concrete, builder-to-builder. Name the file, function, command, and user-visible impact. No filler. -No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted. Never corporate or academic. Short paragraphs. End with what to do. +No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing. Never corporate or academic. Short paragraphs. End with what to do. The user has context you do not. Cross-model agreement is a recommendation, not a decision. The user decides.`; } @@ -21,7 +21,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/setup-browser-cookies/SKILL.md b/setup-browser-cookies/SKILL.md index 2f91f4d687..e73483374d 100644 --- a/setup-browser-cookies/SKILL.md +++ b/setup-browser-cookies/SKILL.md @@ -433,7 +433,7 @@ equivalents (cat, sed, find, grep). The dedicated tools are cheaper and clearer. Direct, concrete, builder-to-builder. Name the file, function, command, and user-visible impact. No filler. -No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted. Never corporate or academic. Short paragraphs. End with what to do. +No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing. Never corporate or academic. Short paragraphs. End with what to do. The user has context you do not. Cross-model agreement is a recommendation, not a decision. The user decides. diff --git a/setup-deploy/SKILL.md b/setup-deploy/SKILL.md index 3e69b015d0..4bae747945 100644 --- a/setup-deploy/SKILL.md +++ b/setup-deploy/SKILL.md @@ -558,7 +558,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/setup-gbrain/SKILL.md b/setup-gbrain/SKILL.md index 12d8e2ce13..ae0eed0794 100644 --- a/setup-gbrain/SKILL.md +++ b/setup-gbrain/SKILL.md @@ -557,7 +557,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/ship/SKILL.md b/ship/SKILL.md index 9611072f74..cbd24827b4 100644 --- a/ship/SKILL.md +++ b/ship/SKILL.md @@ -559,7 +559,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/skillify/SKILL.md b/skillify/SKILL.md index 8b81f1ce8d..bdcd0ec690 100644 --- a/skillify/SKILL.md +++ b/skillify/SKILL.md @@ -555,7 +555,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/spec/SKILL.md b/spec/SKILL.md index 3e7187d180..46de2bbfee 100644 --- a/spec/SKILL.md +++ b/spec/SKILL.md @@ -556,7 +556,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." @@ -1495,7 +1495,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/sync-gbrain/SKILL.md b/sync-gbrain/SKILL.md index 96ac9057aa..e87140a738 100644 --- a/sync-gbrain/SKILL.md +++ b/sync-gbrain/SKILL.md @@ -557,7 +557,7 @@ GStack voice: Garry-shaped product and engineering judgment, compressed for runt - Be direct about quality. Bugs matter. Edge cases matter. Fix the whole thing, not the demo path. - Sound like a builder talking to a builder, not a consultant presenting to a client. - Never corporate, academic, PR, or hype. Avoid filler, throat-clearing, generic optimism, and founder cosplay. -- No em dashes. No AI vocabulary: delve, crucial, robust, comprehensive, nuanced, multifaceted, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. +- No em dashes. No AI vocabulary or stock phrases: delve, crucial, robust, comprehensive, nuanced, multifaceted, load[- ]bearing, furthermore, moreover, additionally, pivotal, landscape, tapestry, underscore, foster, showcase, intricate, vibrant, fundamental, significant. - The user has context you do not: domain knowledge, timing, relationships, taste. Cross-model agreement is a recommendation, not a decision. The user decides. Good: "auth.ts:47 returns undefined when the session cookie expires. Users hit a white screen. Fix: add a null check and redirect to /login. Two lines." diff --git a/test/cso-preserved.test.ts b/test/cso-preserved.test.ts index 83fe6bbc81..46b7e07ac2 100644 --- a/test/cso-preserved.test.ts +++ b/test/cso-preserved.test.ts @@ -1,7 +1,7 @@ /** * cso security-guidance preservation test (v1.45.0.0 T6). * - * The cso skill carries load-bearing security prose: OWASP Top 10 mappings, + * The cso skill carries decision-critical security prose: OWASP Top 10 mappings, * STRIDE threat-model phrasing, "do not auto-fix without user approval" * gates. Codex 2nd-pass critique #9: "cso exemption too broad ... should * still get resolver dedup, catalog trim, sectioning if safe, and targeted @@ -37,7 +37,7 @@ const MUST_PRESERVE_HEADINGS = [ '## Preamble', // from PREAMBLE resolver ]; -describe('cso skill preserves load-bearing security guidance', () => { +describe('cso skill preserves decision-critical security guidance', () => { test('cso/SKILL.md exists and is non-trivial', () => { expect(fs.existsSync(CSO_SKILL)).toBe(true); const content = fs.readFileSync(CSO_SKILL, 'utf-8'); @@ -54,7 +54,7 @@ describe('cso skill preserves load-bearing security guidance', () => { if (missing.length > 0) { throw new Error( `cso/SKILL.md is missing required security phrases: ${missing.join(', ')}. ` + - `These are load-bearing for the skill's audit posture. If you intentionally ` + + `These are decision-critical for the skill's audit posture. If you intentionally ` + `removed them, update this test with the new phrasing.`, ); } diff --git a/test/fixtures/ios-qa/FixtureApp/Tests/DebugBridgeCoreTests/StateServerSmokeTests.swift b/test/fixtures/ios-qa/FixtureApp/Tests/DebugBridgeCoreTests/StateServerSmokeTests.swift index abb4325161..63920d4784 100644 --- a/test/fixtures/ios-qa/FixtureApp/Tests/DebugBridgeCoreTests/StateServerSmokeTests.swift +++ b/test/fixtures/ios-qa/FixtureApp/Tests/DebugBridgeCoreTests/StateServerSmokeTests.swift @@ -58,7 +58,7 @@ final class StateServerSmokeTests: XCTestCase { XCTAssertEqual(status, 401, "mutating endpoint without bearer must return 401") } - /// Boot token rotation is the load-bearing security property. Confirm: + /// Boot token rotation is the decision-critical security property. Confirm: /// 1. Boot token is required for /auth/rotate /// 2. After rotation, boot token is dead /// 3. Rotated token works for subsequent calls diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts index 0a0c9741ba..753331cb44 100644 --- a/test/gen-skill-docs.test.ts +++ b/test/gen-skill-docs.test.ts @@ -313,7 +313,7 @@ describe('gen-skill-docs', () => { ]; // Plan skills carry the same preamble surface as other tier-≥2 skills - // (Artifacts Sync, Context Recovery, Routing Injection are load-bearing + // (Artifacts Sync, Context Recovery, Routing Injection are decision-critical // functionality, not optional). Budget is set to current size + small // headroom; ratchet down if a future slim trims real bytes. // Ratcheted from 33000 → 35000 when the gbrain context-load block was @@ -3037,7 +3037,7 @@ describe('plan-mode-info resolver (handshake-replacement)', () => { }); // GSTACK REVIEW REPORT report-at-bottom contract — verifies the prompt-text -// fix in scripts/resolvers/review.ts (the load-bearing change for the +// fix in scripts/resolvers/review.ts (the decision-critical change for the // "report not at bottom of plan in plan mode" bug). The bug is in the // prompt's contradictory write-flow instructions, not in observable // runtime behavior we can cheaply gate in CI. Verifying the prompt text diff --git a/test/helpers/parity-harness.ts b/test/helpers/parity-harness.ts index 4071a6caea..dfe4258e18 100644 --- a/test/helpers/parity-harness.ts +++ b/test/helpers/parity-harness.ts @@ -15,7 +15,7 @@ * size discipline only. This module supports content invariants per skill * family (e.g., cso must preserve OWASP/STRIDE; plan-ceo must preserve * mode-selection phrasing) so future compression can't silently strip - * load-bearing prose even when size stays within ratio. + * decision-critical prose even when size stays within ratio. */ import * as fs from 'fs'; diff --git a/test/helpers/providers/gpt.ts b/test/helpers/providers/gpt.ts index 07757dc2f4..2d7e438ac9 100644 --- a/test/helpers/providers/gpt.ts +++ b/test/helpers/providers/gpt.ts @@ -31,7 +31,7 @@ export class GptAdapter implements ProviderAdapter { async run(opts: RunOpts): Promise { const start = Date.now(); - // `-s read-only` is load-bearing safety. With `--skip-git-repo-check` we + // `-s read-only` is decision-critical safety. With `--skip-git-repo-check` we // bypass codex's interactive trust prompt for unknown directories (benchmarks // often run in temp dirs / non-git paths), so the read-only sandbox is now // the only boundary preventing codex from mutating the workdir. If you ever diff --git a/test/preamble-compose.test.ts b/test/preamble-compose.test.ts index 22fdfd7c88..59d0e6d222 100644 --- a/test/preamble-compose.test.ts +++ b/test/preamble-compose.test.ts @@ -3,7 +3,7 @@ * * Asserts that the AskUserQuestion Format section renders BEFORE the * Model-Specific Behavioral Patch section in tier-≥2 preamble output. - * This order is load-bearing: Opus 4.7 reads top-to-bottom and absorbs + * This order is decision-critical: Opus 4.7 reads top-to-bottom and absorbs * the first pacing directive it hits. v1.6.4.0 regressed plan-review * cadence because the overlay rendered first with "Batch your questions" * as the ambient default. diff --git a/test/skill-e2e-auto-decide-preserved.test.ts b/test/skill-e2e-auto-decide-preserved.test.ts index 8b773d5fc7..afdc5ff1da 100644 --- a/test/skill-e2e-auto-decide-preserved.test.ts +++ b/test/skill-e2e-auto-decide-preserved.test.ts @@ -34,7 +34,7 @@ * preference — that's a regression against the opt-in feature. If outcome * is 'plan_ready' with no AUTO_DECIDE text, the model auto-decided BUT * skipped the annotation (acceptable; AUTO_DECIDE annotation is good - * practice but not the load-bearing behavior). + * practice but not the decision-critical behavior). */ import { describe, test, expect } from 'bun:test'; diff --git a/test/skill-e2e-plan-ceo-finding-count.test.ts b/test/skill-e2e-plan-ceo-finding-count.test.ts index 850c1a0334..31fdc8c697 100644 --- a/test/skill-e2e-plan-ceo-finding-count.test.ts +++ b/test/skill-e2e-plan-ceo-finding-count.test.ts @@ -1,7 +1,7 @@ /** * /plan-ceo-review per-finding AskUserQuestion count (periodic, paid, real-PTY). * - * Asserts the load-bearing rule "One issue = one AskUserQuestion call" by + * Asserts the decision-critical rule "One issue = one AskUserQuestion call" by * driving /plan-ceo-review against a 5-finding seeded plan and counting * distinct review-phase AUQs. Passes when count is in [N-1, N+2]. * diff --git a/test/skill-e2e-ship-idempotency.test.ts b/test/skill-e2e-ship-idempotency.test.ts index e4e3b049c9..dabbee693d 100644 --- a/test/skill-e2e-ship-idempotency.test.ts +++ b/test/skill-e2e-ship-idempotency.test.ts @@ -121,7 +121,7 @@ function buildShippedFixture(): ShipFixture { return { workTree, bareRemote, setupLog }; } -/** Snapshot the load-bearing fixture state so we can compare post-run. */ +/** Snapshot the decision-critical fixture state so we can compare post-run. */ interface FixtureSnapshot { versionFile: string; packageVersion: string; diff --git a/test/v0-dormancy.test.ts b/test/v0-dormancy.test.ts index 61800013b3..c2f5620aad 100644 --- a/test/v0-dormancy.test.ts +++ b/test/v0-dormancy.test.ts @@ -10,7 +10,7 @@ * V0 machinery: * - plan-tune/ — the conversational inspection skill for /plan-tune * - office-hours/ — sets the declared profile - * For these, V0 vocabulary is load-bearing and must appear. + * For these, V0 vocabulary is decision-critical and must appear. * * All other tier-≥2 skills: 5D dim names + archetype names must NOT appear. */ From 72ffdbb949ed95f11908e744052e52ffb1d0aebf Mon Sep 17 00:00:00 2001 From: Nader Helmy Date: Fri, 29 May 2026 18:58:26 -0500 Subject: [PATCH 2/2] docs: refine stock phrase rewrites --- CHANGELOG.md | 30 +++++++++---------- CLAUDE.md | 2 +- bin/gstack-gbrain-lib.sh | 4 +-- browse/src/browser-skill-commands.ts | 4 +-- browse/src/browser-skill-write.ts | 2 +- browse/src/domain-skills.ts | 2 +- browse/test/cli-supervisor.test.ts | 2 +- browse/test/domain-skills-e2e.test.ts | 2 +- browse/test/server-pty-lease-routes.test.ts | 2 +- browse/test/sidebar-tabs.test.ts | 2 +- browse/test/sidepanel-reattach.test.ts | 2 +- .../terminal-agent-detach-reattach.test.ts | 2 +- .../test/terminal-agent-integration.test.ts | 2 +- browse/test/terminal-agent-watchdog.test.ts | 2 +- design/src/daemon.ts | 2 +- docs/designs/PACING_UPDATES_V0.md | 4 +-- docs/designs/SIDEBAR_MESSAGE_FLOW.md | 2 +- docs/designs/SLATE_HOST.md | 2 +- docs/designs/v2_PLAN.md | 4 +-- docs/explanation-diataxis-in-gstack.md | 2 +- docs/skills.md | 4 +-- extension/sidepanel.js | 2 +- plan-ceo-review/SKILL.md | 2 +- plan-ceo-review/SKILL.md.tmpl | 2 +- test/cso-preserved.test.ts | 6 ++-- .../StateServerSmokeTests.swift | 2 +- test/gen-skill-docs.test.ts | 4 +-- test/helpers/parity-harness.ts | 2 +- test/helpers/providers/gpt.ts | 2 +- test/preamble-compose.test.ts | 2 +- test/skill-e2e-auto-decide-preserved.test.ts | 2 +- test/skill-e2e-plan-ceo-finding-count.test.ts | 2 +- test/skill-e2e-ship-idempotency.test.ts | 2 +- test/v0-dormancy.test.ts | 2 +- 34 files changed, 56 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2845098f41..4cf92bb60a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -147,7 +147,7 @@ If you run gstack in CI, the new `EVALS_BUDGET_HARD_CAP=$30` cap (per-suite: gat **Added** - `scripts/capture-baseline.ts` + `test/helpers/capture-parity-baseline.ts` — captures per-skill SKILL.md sizes, token estimates, frontmatter description lengths, and eval coverage flags. Writes JSON snapshots used by the parity and size-budget gates. Locks `test/fixtures/parity-baseline-v1.44.1.json` as the v1→v2 reference. -- `test/helpers/parity-harness.ts` + `test/parity-suite.test.ts` — cathedral parity-eval suite floor. `PARITY_INVARIANTS` registry pins must-preserve phrases per skill family (cso: OWASP/STRIDE; plan-ceo: SCOPE EXPANSION / HOLD SCOPE; ship: VERSION/CHANGELOG/PR) so future compression can't silently strip decision-critical prose. +- `test/helpers/parity-harness.ts` + `test/parity-suite.test.ts` — cathedral parity-eval suite floor. `PARITY_INVARIANTS` registry pins must-preserve phrases per skill family (cso: OWASP/STRIDE; plan-ceo: SCOPE EXPANSION / HOLD SCOPE; ship: VERSION/CHANGELOG/PR) so future compression can't silently strip prose the skills depend on. - `test/skill-coverage-matrix.ts` + `test/skill-coverage-matrix.test.ts` — single source of truth mapping each skill to gate + periodic tests; CI gate asserts every skill has at least one gate-tier entry. 51 skills, 51 entries. - `test/skill-coverage-floor.test.ts` — per-skill structural-compliance smoke test (file-IO, free). Verifies frontmatter shape, generated header, body non-trivial, no leaked `{{TEMPLATE}}` placeholders, catalog-trim contract on description. 309 assertions across 51 skills. - `test/skill-size-budget.test.ts` — per-skill SKILL.md byte budget (×1.05 default ratio), total corpus budget, catalog token budget (≤7000 for v1.46). Caught regressions get a per-skill breakdown + override path. @@ -218,7 +218,7 @@ Open `/design-shotgun` Monday morning, work through three rounds of variants, wa #### Changed - **Board JS uses relative URLs** instead of an injected `__GSTACK_SERVER_URL` global. The same generated HTML works at `/` (legacy `--no-daemon`) and `/boards//` (daemon). `location.protocol` feature-detect keeps the `file://` DOM-only fallback path working. -- **Bare `GET /boards/` returns 301** to `/boards//`. The trailing slash is decision-critical for relative-URL resolution in the board JS; without it, `fetch('./api/feedback')` would resolve to the wrong scope. +- **Bare `GET /boards/` returns 301** to `/boards//`. The trailing slash controls relative-URL resolution in the board JS; without it, `fetch('./api/feedback')` would resolve to the wrong scope. - **Reload guard rejects directory paths**. `design/src/serve.ts:200-212` previously let `resolvedReload === allowedDir` through, which then crashed `readFileSync` with `EISDIR`. Now requires `statSync(resolvedReload).isFile()` with a clear 400 instead. - **Feedback files carry `boardId` and `publishedAt`** so agents polling `feedback.json` / `feedback-pending.json` in a multi-board world can verify which board produced what. - **`sourceDir` is derived from `realpath(html)` server-side**, never trusted from the publish POST body. @@ -331,7 +331,7 @@ Open the sidebar once. Use it. Close your laptop. Wake up tomorrow. Type a key. #### For contributors - **Test framework `bunfig.toml` + `test-setup.ts`** — Global afterEach restores `process.env.PATH` only. Narrow on purpose — broader snapshot/restore breaks tests that legitimately set `process.env.GSTACK_HOME` at module load (`domain-skills-storage.test.ts`). -- **12 new test files, 83 new unit-tier tests.** Static-grep tripwires defend the decision-critical protocol contracts (close codes, lease lifecycle, watchdog identity check, supervisor crash-loop guard, ring buffer ESC boundaries) without paying for live WebSocket cycles in CI. +- **12 new test files, 83 new unit-tier tests.** Static-grep tripwires defend the protocol contracts that matter here (close codes, lease lifecycle, watchdog identity check, supervisor crash-loop guard, ring buffer ESC boundaries) without paying for live WebSocket cycles in CI. - **Eng review + outside voice (codex) ran on this branch.** 17 decisions baked: 10 from the in-review architecture pass (D1-D10), 6 from codex cross-model tension resolution (T1-T6, all adopted in codex's favor — most consequential was T1, separating sessionId from auth token), and 1 from in-PR scope-up of the outer supervisor. ## [1.43.3.0] - 2026-05-21 @@ -535,7 +535,7 @@ If you have `VOYAGE_API_KEY` set and run `/setup-gbrain` on a fresh machine, `gb ## **iOS QA on a real iPhone — no XCTest, no WebDriverAgent, no simulators.** ## **Verified end-to-end on a real iPhone 17 Pro Max running iOS 26.5; any agent that speaks HTTP can run full QA against a real iOS app, locally over USB or remotely over Tailscale.** -Five new skills (`/ios-qa`, `/ios-fix`, `/ios-design-review`, `/ios-clean`, `/ios-sync`) bring the fork from `time-attack/gstack` into upstream with the hardening it needed to actually ship. The architecture's decision-critical insight: drop XCTest, drop the simulator, drop WebDriverAgent. Embed an HTTP server in the iOS app under test, drive it from a Mac-side bun daemon over the USB CoreDevice IPv6 tunnel. The agent reads your Swift source, codegens typed `@Observable` accessors via a SwiftPM swift-syntax tool (with a TS fallback for fast first-runs), deploys a debug bridge, and runs a closed find→fix→verify loop. With the optional `--tailnet` flag, the Mac daemon also binds Tailscale and accepts authenticated remote calls — your Mac plus an iPhone you already own becomes the iOS QA surface for any agent on your tailnet. +Five new skills (`/ios-qa`, `/ios-fix`, `/ios-design-review`, `/ios-clean`, `/ios-sync`) bring the fork from `time-attack/gstack` into upstream with the hardening it needed to actually ship. The architecture's main insight: drop XCTest, drop the simulator, drop WebDriverAgent. Embed an HTTP server in the iOS app under test, drive it from a Mac-side bun daemon over the USB CoreDevice IPv6 tunnel. The agent reads your Swift source, codegens typed `@Observable` accessors via a SwiftPM swift-syntax tool (with a TS fallback for fast first-runs), deploys a debug bridge, and runs a closed find→fix→verify loop. With the optional `--tailnet` flag, the Mac daemon also binds Tailscale and accepts authenticated remote calls — your Mac plus an iPhone you already own becomes the iOS QA surface for any agent on your tailnet. Two Mac-side CLIs ship alongside the skills: `gstack-ios-qa-daemon` brokers traffic between the agent and the connected iPhone, and `gstack-ios-qa-mint` is the owner-grant tool for the tailnet allowlist (grant / revoke / list). The full end-to-end walkthrough lives at [docs/howto-ios-testing-with-gstack.md](docs/howto-ios-testing-with-gstack.md). @@ -971,7 +971,7 @@ When the model finishes a plan-* review and is about to exit plan mode, it reads #### For contributors -- The implementation sequence is decision-critical: resolver → index → templates → preamble → `bun run gen:skill-docs` → tests. Adding the test before regeneration fails on missing gate; regenerating before the resolver edits produces no-op output. Bisectable commits should respect this order. +- The implementation sequence matters: resolver → index → templates → preamble → `bun run gen:skill-docs` → tests. Adding the test before regeneration fails on missing gate; regenerating before the resolver edits produces no-op output. Bisectable commits should respect this order. - The codex gate is intentionally NOT terminal in `codex/SKILL.md`. Codex has three modes (review/challenge/consult) and only review mode writes to plan files. The gate's check-2 ("last heading is GSTACK REVIEW REPORT") short-circuits cleanly when no plan file is in context, so non-plan codex invocations are unaffected. ## [1.39.0.0] - 2026-05-14 @@ -1545,7 +1545,7 @@ If you've been hitting the 35-minute hang on `/sync-gbrain`, it's gone. The arch Seven community PRs land together, hand-picked through `/plan-eng-review` plus a Codex outside-voice review that reshaped the wave mid-flight. The headline fixes are real: the root-token authentication path no longer throws on a multibyte input that matches JS character length but not UTF-8 byte length, direct `http://[fe80::N]/` URLs are now rejected the same way ULA addresses already were, `gbrain put` strips NUL bytes from pasted transcript content so Postgres doesn't reject the write, and the build script doesn't tear down when run on a fresh worktree with no git HEAD yet. -Two PRs in the original 9-PR plan got moved to follow-up reviews after Codex caught decision-critical problems: the SVG-XSS fix (#1153) needs a sanitizer integration rebuild, and the hook-command variable swap (#1141) needs runtime verification in plugin + dev-symlink modes. Both will land as their own PRs. +Two PRs in the original 9-PR plan got moved to follow-up reviews after Codex caught problems that blocked safe merge: the SVG-XSS fix (#1153) needs a sanitizer integration rebuild, and the hook-command variable swap (#1141) needs runtime verification in plugin + dev-symlink modes. Both will land as their own PRs. ### The numbers that matter @@ -1586,7 +1586,7 @@ If you run `pair-agent` and someone hits your tunnel with a multibyte token gues #### For contributors - The AskUserQuestion preamble byte budget ratchets from 36,500 → 39,000 to absorb the new CJK rule (rule 12 + self-check item). Generated SKILL.md files for all 35 tier-≥2 skills regenerate as a single mechanical commit. -- Two PRs from the original 9-PR plan moved to follow-up reviews after Codex outside-voice caught decision-critical problems: #1153 (SVG sanitizer) needs the sanitizer integration rebuilt against the current `setTabContent` boundary in `browse/src/write-commands.ts:319` (the original PR removed `.svg` from the allowlist; the right fix is to keep it allowed and sanitize via DOMPurify before `setTabContent`). #1141 (CLAUDE_PLUGIN_ROOT) needs runtime verification in both plugin-installed and dev-symlink modes plus scope expansion to the non-frontmatter shell snippet at `investigate/SKILL.md.tmpl:107`. +- Two PRs from the original 9-PR plan moved to follow-up reviews after Codex outside-voice caught problems that blocked safe merge: #1153 (SVG sanitizer) needs the sanitizer integration rebuilt against the current `setTabContent` boundary in `browse/src/write-commands.ts:319` (the original PR removed `.svg` from the allowlist; the right fix is to keep it allowed and sanitize via DOMPurify before `setTabContent`). #1141 (CLAUDE_PLUGIN_ROOT) needs runtime verification in both plugin-installed and dev-symlink modes plus scope expansion to the non-frontmatter shell snippet at `investigate/SKILL.md.tmpl:107`. - Five gate-tier evals hardened against non-determinism / TTY rendering quirks after the wave's first `test:gate` run surfaced them as flakes (verified pre-existing on `main`, then fixed): `office-hours-builder-wildness` retiers `gate` → `periodic` because LLM-judge creativity scoring belongs in periodic per the tier-classification rules. `plan-design-with-ui` AUQ-detection tail expands 2.5KB → 5KB so the full Step 0 box-rendered AUQ fits inside the regex window. `ask-user-question-format-compliance` budget stretches 300s → 540s (poll), 360s → 600s (PTY session), 420s → 660s (bun wrapper) to accommodate `/plan-ceo-review`'s multi-bash-block preamble on substantive branches. `benchmark-providers` gemini smoke drops the brittle `toContain('ok')` assertion in favor of a shape check on the adapter result. `skillify` scrape-prototype-path accepts JSON shape variants (`results`, `data`, `hits`, bare arrays of `{title, score}` objects) instead of grepping for the literal `"items":[` key. - Housekeeping: the three source PRs absorbed into v1.31.1.0 (#1242, #1394, #1393) get closed with credit comments pointing at the merge SHA. @@ -1646,7 +1646,7 @@ the truncated 2KB evidence window. | LLM judge classifications | 0 | 4 (waiting/working/hung/unknown) | +4 | | Diff size on this branch (after merge with main) | — | -721 / +928 | net +207 | -The deleted "fallback" clause was the decision-critical instruction the +The deleted "fallback" clause was the instruction the model was rationalizing as a general escape hatch from "fanning out round-trip AUQs." Once it's gone, the anti-shortcut clause and STOP gates in `plan-eng-review` Sections 1-4 stand without a contradicting @@ -1661,7 +1661,7 @@ quietly batched into a "## Decisions to confirm" plan-file write that gets buried under ExitPlanMode. The harness improvements (prose-AUQ detector, LLM judge, snapshot logs at `~/.gstack/analytics/pty-judge.jsonl` and `~/.gstack/analytics/pty-snapshots/` when `GSTACK_PTY_LOG=1`) are -decision-critical for any future plan-mode regression test that needs to +needed for any future plan-mode regression test that needs to distinguish "model is thinking" from "model is waiting for me." ### Itemized changes @@ -1722,7 +1722,7 @@ distinguish "model is thinking" from "model is waiting for me." #### For contributors - Three subagent investigations across the debugging cycle were the - decision-critical diagnostic step: the architectural fix, the prose-AUQ + diagnostic step that found the fix: the architectural fix, the prose-AUQ detector design, and the test-fictional-state retraction. The pattern that worked: have a fresh-context subagent verify the parent's mental model against actual file contents before committing @@ -2310,7 +2310,7 @@ The `## GSTACK REVIEW REPORT` section had a write rule that contradicted itself: - `test/gen-skill-docs.test.ts` — new `GSTACK REVIEW REPORT delete-then-append flow` describe block: 4 SKILL.md target tests + 1 source resolver test. Static, deterministic, free. #### For contributors -- The `/autoplan` E2E approach attempted in the plan was dropped after a paid run revealed that `--disallowedTools AskUserQuestion` makes autoplan bail at the Phase 1 premise gate via the plan-file fallback. The PTY harness can't drive autoplan through its review phases without auto-progression of AskUserQuestions. The static prompt-text test catches the decision-critical change without needing that infrastructure. +- The `/autoplan` E2E approach attempted in the plan was dropped after a paid run revealed that `--disallowedTools AskUserQuestion` makes autoplan bail at the Phase 1 premise gate via the plan-file fallback. The PTY harness can't drive autoplan through its review phases without auto-progression of AskUserQuestions. The static prompt-text test catches the change that matters without needing that infrastructure. ## [1.26.3.0] - 2026-05-03 @@ -3088,7 +3088,7 @@ The harness itself is a reusable primitive. `runPlanSkillObservation()` watches - 18 preamble resolvers compressed: `generate-ask-user-format.ts`, `generate-brain-sync-block.ts`, `generate-completeness-section.ts`, `generate-completion-status.ts`, `generate-confusion-protocol.ts`, `generate-context-health.ts`, `generate-context-recovery.ts`, `generate-continuous-checkpoint.ts`, `generate-lake-intro.ts`, `generate-preamble-bash.ts`, `generate-proactive-prompt.ts`, `generate-routing-injection.ts`, `generate-telemetry-prompt.ts`, `generate-upgrade-check.ts`, `generate-vendoring-deprecation.ts`, `generate-voice-directive.ts`, `generate-writing-style-migration.ts`, `generate-writing-style.ts`. - All 47 generated `SKILL.md` files regenerated; 3 ship golden fixtures regenerated. -- Plan-* skills retain full preamble surface (Brain Sync, Context Recovery, Routing Injection) — the early slim attempt that cut these was reverted after diagnosing them as decision-critical. +- Plan-* skills retain full preamble surface (Brain Sync, Context Recovery, Routing Injection) — the early slim attempt that cut these was reverted after diagnosing them as needed. - 5 existing plan-mode tests (`plan-ceo`, `plan-eng`, `plan-design`, `plan-devex`, `plan-mode-no-op`) rewritten onto the new harness with a 300s observation budget. All 5 verify-pass under `EVALS=1 EVALS_TIER=gate` against the real `claude` binary in 790s sequential. - `isNumberedOptionListVisible` regex tolerates whitespace collapse from TTY cursor-positioning escapes (`\x1b[40C`) which `stripAnsi` removes — `\b2\.` was failing on word-to-word transitions where stripped output read `text2.`. @@ -3105,7 +3105,7 @@ The harness itself is a reusable primitive. `runPlanSkillObservation()` watches - `test/helpers/touchfiles.ts`: 5 plan-mode test selections + e2e-harness-audit selection now point at `claude-pty-runner.ts` instead of the deleted helper. 6 new entries (`ask-user-question-format-pty`, `plan-ceo-mode-routing`, `plan-design-with-ui-scope`, `budget-regression-pty`, `ship-idempotency-pty`, `autoplan-chain-pty`) with tier classifications: 3 gate, 3 periodic. - `test/e2e-harness-audit.test.ts`: recognizes `runPlanSkillObservation` as a valid coverage path alongside the legacy `canUseTool` / `runPlanModeSkillTest` patterns. -- New unit test: `test/gen-skill-docs.test.ts` asserts plan-review preambles stay under 33 KB and the slim Voice section preserves its decision-critical semantic contract (lead-with-the-point, name-the-file, user-outcome framing, no-corporate, no-AI-vocab, user-sovereignty). +- New unit test: `test/gen-skill-docs.test.ts` asserts plan-review preambles stay under 33 KB and the slim Voice section preserves the semantic contract the skills rely on (lead-with-the-point, name-the-file, user-outcome framing, no-corporate, no-AI-vocab, user-sovereignty). - `test/touchfiles.test.ts`: skill-specific change selection count updated 15 → 18 to match the 6 new touchfile entries that depend on `plan-ceo-review/**`. ## [1.14.0.0] - 2026-04-25 @@ -3581,7 +3581,7 @@ Measured across the v1.10.0.0 fix. Verify any claim with `git log 1.9.0.0..1.10. | Periodic evals defending against escape-hatch abuse | 0 | **4** | +4 (2 positive, 2 negative-case) | | Cross-model review findings incorporated before landing | N/A | **5 of 8** | Codex caught real bugs CEO+Eng missed | -Two of the five Codex findings were decision-critical. (1) The overlay reorder theory wasn't enough on its own. The `(recommended)` label on a neutral-posture question had to stay, because `question-tuning.ts:29` reads it to power AUTO_DECIDE. Omitting it would have silently broken auto-decide on every cherry-pick prompt. (2) The "31 sites global replace" in the original plan was factually wrong. Actual count, verified with `rg`, is 16 sites across 4 templates, and eng/design/devex templates used different phrasing than CEO. Without the audit, the fix would have shipped half-applied. +Two of the five Codex findings changed the implementation. (1) The overlay reorder theory wasn't enough on its own. The `(recommended)` label on a neutral-posture question had to stay, because `question-tuning.ts:29` reads it to power AUTO_DECIDE. Omitting it would have silently broken auto-decide on every cherry-pick prompt. (2) The "31 sites global replace" in the original plan was factually wrong. Actual count, verified with `rg`, is 16 sites across 4 templates, and eng/design/devex templates used different phrasing than CEO. Without the audit, the fix would have shipped half-applied. ### What this means for anyone running plan reviews on Opus 4.7 @@ -4087,7 +4087,7 @@ Same 200 cases, before and after the fixes above: **4.4x lift in detection.** FP rate also climbed 3.7x — Haiku is more aggressive and fires on edge cases that TestSavantAI smiles through. The review banner makes those FPs recoverable: user sees the suspected excerpt + layer scores, clicks Allow once, session continues. A P1 follow-up is tuning the Haiku WARN threshold (currently 0.6, probably should be 0.7-0.85) against real-world attempts.jsonl data once gstack users start reporting. -Honest shipping posture: this is meaningfully safer than v1.3.x, not bulletproof. Canary (deterministic), content-security L1-L3 (structural), and the review banner remain the decision-critical defenses when the ML layers miss or over-fire. +Honest shipping posture: this is meaningfully safer than v1.3.x, not bulletproof. Canary (deterministic), content-security L1-L3 (structural), and the review banner remain the defenses that matter when the ML layers miss or over-fire. ### Env knobs diff --git a/CLAUDE.md b/CLAUDE.md index a873c17263..8725c43fd6 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -277,7 +277,7 @@ to `~/.gstack/security/attempts.jsonl` via `tunnel-denial-log.ts`. Before editin `server.ts`, `sse-session-cookie.ts`, or `tunnel-denial-log.ts`, read [ARCHITECTURE.md](ARCHITECTURE.md#dual-listener-tunnel-architecture-v1600) — the module boundary (no imports from `token-registry.ts` into `sse-session-cookie.ts`) -is decision-critical for scope isolation. +is what keeps scope isolated. **Unicode sanitization at server egress** (v1.38.0.0+). Every server egress that ships page-content-derived strings MUST go through `JSON.stringify(payload, diff --git a/bin/gstack-gbrain-lib.sh b/bin/gstack-gbrain-lib.sh index 06b6cf41fc..306ddb5b27 100644 --- a/bin/gstack-gbrain-lib.sh +++ b/bin/gstack-gbrain-lib.sh @@ -22,12 +22,12 @@ # Exported after read so sub-processes inherit the secret. Caller # is responsible for `unset ` when done. # -# Decision-critical for D3-eng (shared secret helper across PAT + URL paste), +# Required for D3-eng (shared secret helper across PAT + URL paste), # D10 (env-var handoff, never argv), D11 (PAT scope disclosure + SIGINT # restore), D16 (pooler URL paste hygiene with redacted preview). # _gstack_gbrain_validate_varname — returns 0 if usable, 2 otherwise. -# `local LC_ALL=C` is decision-critical twice over: +# `local LC_ALL=C` is needed twice over: # 1. In many macOS shells the default locale (e.g. en_US.UTF-8) makes `case` # glob brackets like `[A-Z]` match lowercase letters too. Without the # LC_ALL=C pin, names like `lower-case` pass validation and then trip diff --git a/browse/src/browser-skill-commands.ts b/browse/src/browser-skill-commands.ts index 2bbd7222d2..cea287e17d 100644 --- a/browse/src/browser-skill-commands.ts +++ b/browse/src/browser-skill-commands.ts @@ -8,7 +8,7 @@ * test — run script.test.ts via bun test * rm [--global] — tombstone a user-tier skill * - * Decision-critical: spawnSkill mints a per-spawn scoped token (read+write scope) + * Security boundary: spawnSkill mints a per-spawn scoped token (read+write scope) * and passes it via GSTACK_SKILL_TOKEN. The skill never sees the daemon root * token. Untrusted skills get a scrubbed env (no $HOME, $PATH minimal, no * secrets like $GITHUB_TOKEN/$OPENAI_API_KEY/etc.) and a locked cwd. Trusted @@ -216,7 +216,7 @@ function handleRm(args: string[], ctx: SkillCommandContext): string { return `Tombstoned "${name}" (${effectiveTier} tier) → ${dst}\n`; } -// ─── spawnSkill (decision-critical) ────────────────────────────────── +// ─── spawnSkill scoped token boundary ──────────────────────────────── export interface SpawnSkillOptions { skill: BrowserSkill; diff --git a/browse/src/browser-skill-write.ts b/browse/src/browser-skill-write.ts index 4b35590aa0..334beacf7c 100644 --- a/browse/src/browser-skill-write.ts +++ b/browse/src/browser-skill-write.ts @@ -178,7 +178,7 @@ export function commitSkill(opts: CommitSkillOptions): string { * test failure (step 8 of /skillify) or approval rejection (step 9). * * Idempotent: missing dirs are not an error. Best-effort: failures are - * swallowed (cleanup is fire-and-forget, not decision-critical). + * swallowed (cleanup is fire-and-forget, not required for success). */ export function discardStaged(stagedDir: string): void { // Remove the leaf skill dir first, then the wrapper skillify-/. diff --git a/browse/src/domain-skills.ts b/browse/src/domain-skills.ts index 9503c3caae..ee9223c77d 100644 --- a/browse/src/domain-skills.ts +++ b/browse/src/domain-skills.ts @@ -296,7 +296,7 @@ export async function writeSkill(input: WriteSkillInput): Promise 0 gate is decision-critical: handleSave currently writes + * The classifier_score > 0 gate matters because handleSave currently writes * classifier_score=0 with the comment "L4 deferred to load-time / sidebar-agent * fills this in on first prompt-injection load," but sidebar-agent was ripped * (CLAUDE.md "Sidebar architecture") and nothing else updates the score, so diff --git a/browse/test/cli-supervisor.test.ts b/browse/test/cli-supervisor.test.ts index 2a7838a436..a55cf0981b 100644 --- a/browse/test/cli-supervisor.test.ts +++ b/browse/test/cli-supervisor.test.ts @@ -12,7 +12,7 @@ import * as path from 'path'; // terminal-agent watchdog. // // Live respawn tests belong in the e2e tier (real Bun.spawn cycles take -// 3-8s each). These tripwires defend the decision-critical invariants: +// 3-8s each). These tripwires defend the invariants that matter: // opt-in by default, signal handlers wired, crash-loop guard, env knobs. const CLI_TS = path.resolve(new URL(import.meta.url).pathname, '..', '..', 'src', 'cli.ts'); diff --git a/browse/test/domain-skills-e2e.test.ts b/browse/test/domain-skills-e2e.test.ts index 8ecb4b36df..32fedd9c96 100644 --- a/browse/test/domain-skills-e2e.test.ts +++ b/browse/test/domain-skills-e2e.test.ts @@ -92,7 +92,7 @@ describe('$B domain-skill (E2E gate tier)', () => { expect(await readSkill('127.0.0.1', 'e2e-test-slug')).toBeNull(); // Three uses without flag with classifier_score=0 (the default until L4 is - // rewired) MUST stay quarantined per #1369. The gate is decision-critical: a + // rewired) MUST stay quarantined per #1369. The gate matters because a // quarantined skill written under the influence of a poisoned page would // otherwise auto-promote after three benign uses without the L4 body scan // ever running. diff --git a/browse/test/server-pty-lease-routes.test.ts b/browse/test/server-pty-lease-routes.test.ts index 2827a8df5b..ed3c12eb24 100644 --- a/browse/test/server-pty-lease-routes.test.ts +++ b/browse/test/server-pty-lease-routes.test.ts @@ -5,7 +5,7 @@ import * as path from 'path'; // Server-side route shape for the v1.44 lease + restart + dispose + // lease-refresh wiring. Live route exercises require the terminal-agent // loopback to be live (e2e-tier); these static-grep tripwires pin the -// decision-critical protocol invariants. +// protocol invariants. const SERVER_TS = path.resolve(new URL(import.meta.url).pathname, '..', '..', 'src', 'server.ts'); diff --git a/browse/test/sidebar-tabs.test.ts b/browse/test/sidebar-tabs.test.ts index 14285776a7..4708095a7a 100644 --- a/browse/test/sidebar-tabs.test.ts +++ b/browse/test/sidebar-tabs.test.ts @@ -7,7 +7,7 @@ * endpoints are gone, and the primary-tab nav (Terminal | Chat) is * gone. Terminal is now the sole primary surface. * - * This file locks the decision-critical invariants of that layout so a + * This file locks the layout invariants so a * future refactor can't silently re-introduce the old surface or break * the new one. */ diff --git a/browse/test/sidepanel-reattach.test.ts b/browse/test/sidepanel-reattach.test.ts index 130edd60ff..705e372cdf 100644 --- a/browse/test/sidepanel-reattach.test.ts +++ b/browse/test/sidepanel-reattach.test.ts @@ -9,7 +9,7 @@ import * as path from 'path'; // opens a new WS with the fresh attachToken, writes RIS to xterm when // the agent sends {type:"reattach-begin"}, then treats the next binary // frame as the scrollback replay payload. Static-grep tripwires defend -// the decision-critical protocol invariants; live re-attach exercises belong +// the protocol invariants; live re-attach exercises belong // in the e2e tier. const TERMINAL_JS = path.resolve( diff --git a/browse/test/terminal-agent-detach-reattach.test.ts b/browse/test/terminal-agent-detach-reattach.test.ts index 6faa5407c0..c0e8c74074 100644 --- a/browse/test/terminal-agent-detach-reattach.test.ts +++ b/browse/test/terminal-agent-detach-reattach.test.ts @@ -7,7 +7,7 @@ import * as path from 'path'; // The state machine is what turns a single network blip from "fall through // to ENDED state, click Restart" into "silent re-attach with scrollback // intact, keep typing." Live WS cycles + buffer-overflow exercises belong -// in the e2e tier; these static-grep tripwires defend the decision-critical +// in the e2e tier; these static-grep tripwires defend the required // protocol + correctness properties. const AGENT_TS = path.resolve(new URL(import.meta.url).pathname, '..', '..', 'src', 'terminal-agent.ts'); diff --git a/browse/test/terminal-agent-integration.test.ts b/browse/test/terminal-agent-integration.test.ts index 279fe17dca..9311aba868 100644 --- a/browse/test/terminal-agent-integration.test.ts +++ b/browse/test/terminal-agent-integration.test.ts @@ -204,7 +204,7 @@ describe('terminal-agent: PTY round-trip via real WebSocket (Cookie auth)', () = // `protocols` cleanly when also passed `headers` (the constructor // detects the third-arg form unreliably). Real browsers (Chromium) // use the standard protocols arg fine — the server-side handler is - // identical either way, so this test still locks the decision-critical + // identical either way, so this test still locks the required // invariant: the agent accepts a token via Sec-WebSocket-Protocol // and echoes the protocol back so a browser would accept the upgrade. const handshakeKey = 'dGhlIHNhbXBsZSBub25jZQ=='; diff --git a/browse/test/terminal-agent-watchdog.test.ts b/browse/test/terminal-agent-watchdog.test.ts index 8414eb142c..c6f343ba23 100644 --- a/browse/test/terminal-agent-watchdog.test.ts +++ b/browse/test/terminal-agent-watchdog.test.ts @@ -7,7 +7,7 @@ import * as path from 'path'; // The watchdog respawns terminal-agent when its PID dies. Live process-tree // tests would require spawning, killing, and observing across two real Bun // processes — slow and flaky in the free tier. These tripwires defend the -// decision-critical properties: identity-based liveness check (not name match), +// properties that matter: identity-based liveness check (not name match), // crash-loop guard, gated on ownsTerminalAgent, and cleared on shutdown. const SERVER_TS = path.resolve(new URL(import.meta.url).pathname, '..', '..', 'src', 'server.ts'); diff --git a/design/src/daemon.ts b/design/src/daemon.ts index be3e84e4fd..2edccb047a 100644 --- a/design/src/daemon.ts +++ b/design/src/daemon.ts @@ -358,7 +358,7 @@ async function handlePublish(req: Request, origin: string): Promise { function handleBoardGet(board: Board): Response { board.lastTouched = Date.now(); // No __GSTACK_SERVER_URL injection — board JS uses relative URLs that - // resolve against /boards// (the trailing slash is decision-critical here; + // resolve against /boards// (the trailing slash matters here; // the 301 from the bare /boards/ form ensures it). return new Response(board.htmlContent, { headers: { "Content-Type": "text/html; charset=utf-8" }, diff --git a/docs/designs/PACING_UPDATES_V0.md b/docs/designs/PACING_UPDATES_V0.md index 1f340cc390..290b045fd3 100644 --- a/docs/designs/PACING_UPDATES_V0.md +++ b/docs/designs/PACING_UPDATES_V0.md @@ -28,7 +28,7 @@ During V1 planning, a pacing workstream was drafted: rank findings, auto-accept 4. **Pacing as prose can't invert existing control flow.** V1 planned to add a "rank findings, then ask" rule to preamble prose. But existing skill templates like `plan-eng-review/SKILL.md.tmpl` have per-section STOP/AskUserQuestion sequences. A prose rule in preamble can't reliably override a hardcoded per-section STOP. The behavioral change is sequencing, not prompt wording. 5. **Flip mechanism has no implementation.** "Reply `flip ` to change" was prose. No command parser, no state store, no replay behavior. If the conversation compacts and the Silent Decisions block leaves context, the original decision is lost. 6. **Migration prompt is itself an interrupt.** V1's post-upgrade migration prompt (offering to restore V0 prose) counts against the interruption budget V1.1 is trying to reduce. V1.1 must decide: exempt from budget, or include as interrupt-1-of-N? -7. **First-run preamble prompts count too.** Lake intro, telemetry, proactive, routing injection — Louise saw all of them on first run. They're interruptions before the first real skill runs. V1.1 must audit which of these are decision-critical for new users vs. deferrable until session N. +7. **First-run preamble prompts count too.** Lake intro, telemetry, proactive, routing injection — Louise saw all of them on first run. They're interruptions before the first real skill runs. V1.1 must audit which of these a new user truly needs vs. what can wait until session N. 8. **Ranking formula not calibrated against real data.** V1 considered `product 0-8` (broken: `{0,1,2,4,8}` distribution), then `sum 0-6` with threshold ≥ 4. But neither was validated against actual finding distribution. V1.1 should instrument V0 question-log to measure what real findings look like, then calibrate. 9. **"Every one-way door surfaces" vs "max 3 per phase" contradicts.** One-way cap = uncapped (safety); two-way cap = 3. But the plan had both rules without explicit precedence. V1.1 must state: one-way doors surface uncapped regardless of phase budget. 10. **Undefined verification values.** V1 plan had "Silent Decisions block ≥ N entries" with N never defined, and `active: true` field in throughput JSON never defined. V1.1 gets concrete values. @@ -49,7 +49,7 @@ During V1 planning, a pacing workstream was drafted: rank findings, auto-accept 6. **Migration-prompt budget decision.** Explicit rule: one-shot migration prompts are exempt from the per-phase interruption budget. Rationale: they fire before review phases start, not during. -7. **First-run preamble audit.** Audit lake intro, telemetry, proactive, routing injection. For each: is this decision-critical for a first-time user, or deferrable? Likely outcome: suppress all but lake intro until session 2+. Offer remaining ones via a `/plan-tune first-run` command that users can invoke voluntarily. +7. **First-run preamble audit.** Audit lake intro, telemetry, proactive, routing injection. For each: does a first-time user need this now, or can it wait? Likely outcome: suppress all but lake intro until session 2+. Offer remaining ones via a `/plan-tune first-run` command that users can invoke voluntarily. 8. **Ranking threshold calibration.** Instrument V0's question-log (already running, has history). Measure the actual distribution of `severity × irreversibility × user-decision-matters` across recent CEO + Eng + DX + Design reviews. Pick threshold based on real data. Target: ~20% of findings surface, ~80% auto-accept. diff --git a/docs/designs/SIDEBAR_MESSAGE_FLOW.md b/docs/designs/SIDEBAR_MESSAGE_FLOW.md index c7ce453150..5553aca0cb 100644 --- a/docs/designs/SIDEBAR_MESSAGE_FLOW.md +++ b/docs/designs/SIDEBAR_MESSAGE_FLOW.md @@ -121,7 +121,7 @@ purpose — the user is typing directly to claude, there's no untrusted page content in the loop. Trust source is the keyboard, same as any local terminal. -That trust assumption is decision-critical on three transport guarantees: +That trust assumption depends on three transport guarantees: 1. **Local-only listener.** terminal-agent.ts binds `127.0.0.1` only. The dual-listener tunnel surface (server.ts `TUNNEL_PATHS`) does diff --git a/docs/designs/SLATE_HOST.md b/docs/designs/SLATE_HOST.md index 5ab39912fd..5bfec80acf 100644 --- a/docs/designs/SLATE_HOST.md +++ b/docs/designs/SLATE_HOST.md @@ -158,7 +158,7 @@ OPENCODE_TERMINAL ### Critical env vars for gstack integration **`SLATE_DISABLE_CLAUDE_CODE_SKILLS`** — When set, `.claude/skills/` loading is disabled. -This makes publishing to `.slate/skills/` decision-critical, not just an optimization. +This makes publishing to `.slate/skills/` required, not just an optimization. Without native `.slate/` publishing, gstack skills vanish when this flag is set. **`SLATE_TEST_HOME`** — Useful for E2E tests. Can redirect Slate's home directory diff --git a/docs/designs/v2_PLAN.md b/docs/designs/v2_PLAN.md index 3f45b84305..a78b322721 100644 --- a/docs/designs/v2_PLAN.md +++ b/docs/designs/v2_PLAN.md @@ -82,9 +82,9 @@ reviews internally measured pack" exte ## Phase 0 — Eval coverage matrix (v1.45.0.0) -**Goal:** every skill in gstack ships with at least one gate-tier eval AND one periodic-tier eval that asserts a must-have behavior. The eval suite becomes the design spec. This is the decision-critical claim of the plan — must come first. +**Goal:** every skill in gstack ships with at least one gate-tier eval AND one periodic-tier eval that asserts a must-have behavior. The eval suite becomes the design spec. This is the claim the plan depends on and must come first. -**Cross-model tension noted:** Codex argued this is a procrastination trap and shape-asserts are shallow. User explicitly chose full tiered coverage anyway (D9 = A), with rationale: "the eval suite IS the design spec; that commitment is the decision-critical claim of the whole plan." We accept the larger upfront investment. +**Cross-model tension noted:** Codex argued this is a procrastination trap and shape-asserts are shallow. User explicitly chose full tiered coverage anyway (D9 = A), with rationale: "the eval suite IS the design spec; that commitment is the claim the whole plan depends on." We accept the larger upfront investment. **Mitigation of Codex's "shape vs quality" critique:** for orchestration/judgment skills (plan-ceo, office-hours, autoplan), the must-have isn't deterministic output — it's structural compliance (does it call AskUserQuestion in the right shape? does it follow the section order? does it persist artifacts?). Eval design must capture structural contracts, not output content. Where structural eval is impossible, that section is explicitly noted as "judgment-dependent, not eval-protected" — Codex's #2 critique is honored by NOT then stripping unprotected judgment prose. diff --git a/docs/explanation-diataxis-in-gstack.md b/docs/explanation-diataxis-in-gstack.md index f1ef43f1ba..2b8a6d5fb0 100644 --- a/docs/explanation-diataxis-in-gstack.md +++ b/docs/explanation-diataxis-in-gstack.md @@ -1,6 +1,6 @@ # Why gstack uses Diataxis for documentation -The two doc skills in gstack — `/document-release` and `/document-generate` — both speak Diataxis. New entities get scored across four quadrants. Coverage gaps surface in PR bodies tagged by quadrant. This doc explains why that vocabulary is decision-critical, and why a simpler "just write markdown" approach falls down at the scale gstack operates at. +The two doc skills in gstack — `/document-release` and `/document-generate` — both speak Diataxis. New entities get scored across four quadrants. Coverage gaps surface in PR bodies tagged by quadrant. This doc explains why that vocabulary is structural to the workflow, and why a simpler "just write markdown" approach falls down at the scale gstack operates at. ## The problem diff --git a/docs/skills.md b/docs/skills.md index 65c461af36..e38262535a 100644 --- a/docs/skills.md +++ b/docs/skills.md @@ -94,7 +94,7 @@ After the reframe, it presents premises for you to validate. Not "does this soun 3. The narrowest wedge is a daily briefing that actually works 4. CRM integration is a must-have, not a nice-to-have -You agree, disagree, or adjust. Every premise you accept becomes decision-critical in the design doc. +You agree, disagree, or adjust. Every premise you accept becomes binding in the design doc. ### Implementation alternatives @@ -1189,7 +1189,7 @@ Three Greptile comments. One real fix. One auto-acknowledged. One false positive ## `/ios-qa` -Live-device iOS QA. The fork's decision-critical insight was: don't simulate, don't run XCTest, don't bring up WebDriverAgent. Embed an HTTP server in the app under test, drive it from a Mac-side daemon over the USB CoreDevice IPv6 tunnel. +Live-device iOS QA. The fork's main insight was: don't simulate, don't run XCTest, don't bring up WebDriverAgent. Embed an HTTP server in the app under test, drive it from a Mac-side daemon over the USB CoreDevice IPv6 tunnel. The agent reads your Swift source, finds `@Observable` classes with `@Snapshotable`-marked fields, codegens typed accessors, deploys a debug bridge, then runs a closed find→fix→verify loop. diff --git a/extension/sidepanel.js b/extension/sidepanel.js index 91e9bf94f1..5a51a50e47 100644 --- a/extension/sidepanel.js +++ b/extension/sidepanel.js @@ -1090,7 +1090,7 @@ chrome.runtime.onMessage.addListener((msg) => { // "intentional close" (sidebar closed, browser quit, extension reload) // from "transient blip" (wifi hiccup) reliably — Chrome routes the // former through code 1001 (going-away) and the latter through 1006 -// (abnormal), but neither is a decision-critical contract across browsers +// (abnormal), but neither is a stable contract across browsers // and extension lifecycles. // // pagehide fires reliably for tab close, panel close, extension reload, diff --git a/plan-ceo-review/SKILL.md b/plan-ceo-review/SKILL.md index d57a3d93df..a03b24b65d 100644 --- a/plan-ceo-review/SKILL.md +++ b/plan-ceo-review/SKILL.md @@ -1550,7 +1550,7 @@ Evaluate: **EXPANSION and SELECTIVE EXPANSION additions:** * What comes after this ships? Phase 2? Phase 3? Does the architecture support that trajectory? * Platform potential. Does this create capabilities other features can leverage? -* (SELECTIVE EXPANSION only) Retrospective: Were the right cherry-picks accepted? Did any rejected expansions turn out to be decision-critical for the accepted ones? +* (SELECTIVE EXPANSION only) Retrospective: Were the right cherry-picks accepted? Did any rejected expansions turn out to be needed for the accepted ones? **STOP.** AskUserQuestion once per issue. Do NOT batch. Recommend + WHY. If this section turned up zero findings, state "No issues, moving on" and proceed. If the section has findings, you MUST call AskUserQuestion as a tool_use — a finding with an "obvious fix" is still a finding and still needs user approval before any change lands in the plan. Do NOT proceed until the user responds. **Reminder: Do NOT make any code changes. Review only.** diff --git a/plan-ceo-review/SKILL.md.tmpl b/plan-ceo-review/SKILL.md.tmpl index 93fcb37626..d69d4c60b5 100644 --- a/plan-ceo-review/SKILL.md.tmpl +++ b/plan-ceo-review/SKILL.md.tmpl @@ -633,7 +633,7 @@ Evaluate: **EXPANSION and SELECTIVE EXPANSION additions:** * What comes after this ships? Phase 2? Phase 3? Does the architecture support that trajectory? * Platform potential. Does this create capabilities other features can leverage? -* (SELECTIVE EXPANSION only) Retrospective: Were the right cherry-picks accepted? Did any rejected expansions turn out to be decision-critical for the accepted ones? +* (SELECTIVE EXPANSION only) Retrospective: Were the right cherry-picks accepted? Did any rejected expansions turn out to be needed for the accepted ones? **STOP.** AskUserQuestion once per issue. Do NOT batch. Recommend + WHY. If this section turned up zero findings, state "No issues, moving on" and proceed. If the section has findings, you MUST call AskUserQuestion as a tool_use — a finding with an "obvious fix" is still a finding and still needs user approval before any change lands in the plan. Do NOT proceed until the user responds. **Reminder: Do NOT make any code changes. Review only.** diff --git a/test/cso-preserved.test.ts b/test/cso-preserved.test.ts index 46b7e07ac2..c0cb14a7dc 100644 --- a/test/cso-preserved.test.ts +++ b/test/cso-preserved.test.ts @@ -1,7 +1,7 @@ /** * cso security-guidance preservation test (v1.45.0.0 T6). * - * The cso skill carries decision-critical security prose: OWASP Top 10 mappings, + * The cso skill carries security prose the audit depends on: OWASP Top 10 mappings, * STRIDE threat-model phrasing, "do not auto-fix without user approval" * gates. Codex 2nd-pass critique #9: "cso exemption too broad ... should * still get resolver dedup, catalog trim, sectioning if safe, and targeted @@ -37,7 +37,7 @@ const MUST_PRESERVE_HEADINGS = [ '## Preamble', // from PREAMBLE resolver ]; -describe('cso skill preserves decision-critical security guidance', () => { +describe('cso skill preserves required security guidance', () => { test('cso/SKILL.md exists and is non-trivial', () => { expect(fs.existsSync(CSO_SKILL)).toBe(true); const content = fs.readFileSync(CSO_SKILL, 'utf-8'); @@ -54,7 +54,7 @@ describe('cso skill preserves decision-critical security guidance', () => { if (missing.length > 0) { throw new Error( `cso/SKILL.md is missing required security phrases: ${missing.join(', ')}. ` + - `These are decision-critical for the skill's audit posture. If you intentionally ` + + `These define the skill's audit posture. If you intentionally ` + `removed them, update this test with the new phrasing.`, ); } diff --git a/test/fixtures/ios-qa/FixtureApp/Tests/DebugBridgeCoreTests/StateServerSmokeTests.swift b/test/fixtures/ios-qa/FixtureApp/Tests/DebugBridgeCoreTests/StateServerSmokeTests.swift index 63920d4784..5f6c1e1c55 100644 --- a/test/fixtures/ios-qa/FixtureApp/Tests/DebugBridgeCoreTests/StateServerSmokeTests.swift +++ b/test/fixtures/ios-qa/FixtureApp/Tests/DebugBridgeCoreTests/StateServerSmokeTests.swift @@ -58,7 +58,7 @@ final class StateServerSmokeTests: XCTestCase { XCTAssertEqual(status, 401, "mutating endpoint without bearer must return 401") } - /// Boot token rotation is the decision-critical security property. Confirm: + /// Boot token rotation is the security property. Confirm: /// 1. Boot token is required for /auth/rotate /// 2. After rotation, boot token is dead /// 3. Rotated token works for subsequent calls diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts index 753331cb44..37a4443b3b 100644 --- a/test/gen-skill-docs.test.ts +++ b/test/gen-skill-docs.test.ts @@ -313,7 +313,7 @@ describe('gen-skill-docs', () => { ]; // Plan skills carry the same preamble surface as other tier-≥2 skills - // (Artifacts Sync, Context Recovery, Routing Injection are decision-critical + // (Artifacts Sync, Context Recovery, Routing Injection are required // functionality, not optional). Budget is set to current size + small // headroom; ratchet down if a future slim trims real bytes. // Ratcheted from 33000 → 35000 when the gbrain context-load block was @@ -3037,7 +3037,7 @@ describe('plan-mode-info resolver (handshake-replacement)', () => { }); // GSTACK REVIEW REPORT report-at-bottom contract — verifies the prompt-text -// fix in scripts/resolvers/review.ts (the decision-critical change for the +// fix in scripts/resolvers/review.ts (the change that fixes the // "report not at bottom of plan in plan mode" bug). The bug is in the // prompt's contradictory write-flow instructions, not in observable // runtime behavior we can cheaply gate in CI. Verifying the prompt text diff --git a/test/helpers/parity-harness.ts b/test/helpers/parity-harness.ts index dfe4258e18..4895f62658 100644 --- a/test/helpers/parity-harness.ts +++ b/test/helpers/parity-harness.ts @@ -15,7 +15,7 @@ * size discipline only. This module supports content invariants per skill * family (e.g., cso must preserve OWASP/STRIDE; plan-ceo must preserve * mode-selection phrasing) so future compression can't silently strip - * decision-critical prose even when size stays within ratio. + * prose the skills depend on even when size stays within ratio. */ import * as fs from 'fs'; diff --git a/test/helpers/providers/gpt.ts b/test/helpers/providers/gpt.ts index 2d7e438ac9..c55aaac205 100644 --- a/test/helpers/providers/gpt.ts +++ b/test/helpers/providers/gpt.ts @@ -31,7 +31,7 @@ export class GptAdapter implements ProviderAdapter { async run(opts: RunOpts): Promise { const start = Date.now(); - // `-s read-only` is decision-critical safety. With `--skip-git-repo-check` we + // `-s read-only` is the safety setting that matters. With `--skip-git-repo-check` we // bypass codex's interactive trust prompt for unknown directories (benchmarks // often run in temp dirs / non-git paths), so the read-only sandbox is now // the only boundary preventing codex from mutating the workdir. If you ever diff --git a/test/preamble-compose.test.ts b/test/preamble-compose.test.ts index 59d0e6d222..36e1fe227e 100644 --- a/test/preamble-compose.test.ts +++ b/test/preamble-compose.test.ts @@ -3,7 +3,7 @@ * * Asserts that the AskUserQuestion Format section renders BEFORE the * Model-Specific Behavioral Patch section in tier-≥2 preamble output. - * This order is decision-critical: Opus 4.7 reads top-to-bottom and absorbs + * This order matters: Opus 4.7 reads top-to-bottom and absorbs * the first pacing directive it hits. v1.6.4.0 regressed plan-review * cadence because the overlay rendered first with "Batch your questions" * as the ambient default. diff --git a/test/skill-e2e-auto-decide-preserved.test.ts b/test/skill-e2e-auto-decide-preserved.test.ts index afdc5ff1da..92b3f4096a 100644 --- a/test/skill-e2e-auto-decide-preserved.test.ts +++ b/test/skill-e2e-auto-decide-preserved.test.ts @@ -34,7 +34,7 @@ * preference — that's a regression against the opt-in feature. If outcome * is 'plan_ready' with no AUTO_DECIDE text, the model auto-decided BUT * skipped the annotation (acceptable; AUTO_DECIDE annotation is good - * practice but not the decision-critical behavior). + * practice but not the behavior under test). */ import { describe, test, expect } from 'bun:test'; diff --git a/test/skill-e2e-plan-ceo-finding-count.test.ts b/test/skill-e2e-plan-ceo-finding-count.test.ts index 31fdc8c697..6030ba4e92 100644 --- a/test/skill-e2e-plan-ceo-finding-count.test.ts +++ b/test/skill-e2e-plan-ceo-finding-count.test.ts @@ -1,7 +1,7 @@ /** * /plan-ceo-review per-finding AskUserQuestion count (periodic, paid, real-PTY). * - * Asserts the decision-critical rule "One issue = one AskUserQuestion call" by + * Asserts the rule "One issue = one AskUserQuestion call" by * driving /plan-ceo-review against a 5-finding seeded plan and counting * distinct review-phase AUQs. Passes when count is in [N-1, N+2]. * diff --git a/test/skill-e2e-ship-idempotency.test.ts b/test/skill-e2e-ship-idempotency.test.ts index dabbee693d..f0154c3cdb 100644 --- a/test/skill-e2e-ship-idempotency.test.ts +++ b/test/skill-e2e-ship-idempotency.test.ts @@ -121,7 +121,7 @@ function buildShippedFixture(): ShipFixture { return { workTree, bareRemote, setupLog }; } -/** Snapshot the decision-critical fixture state so we can compare post-run. */ +/** Snapshot the fixture state we compare post-run. */ interface FixtureSnapshot { versionFile: string; packageVersion: string; diff --git a/test/v0-dormancy.test.ts b/test/v0-dormancy.test.ts index c2f5620aad..504e8758c1 100644 --- a/test/v0-dormancy.test.ts +++ b/test/v0-dormancy.test.ts @@ -10,7 +10,7 @@ * V0 machinery: * - plan-tune/ — the conversational inspection skill for /plan-tune * - office-hours/ — sets the declared profile - * For these, V0 vocabulary is decision-critical and must appear. + * For these, V0 vocabulary defines the contract and must appear. * * All other tier-≥2 skills: 5D dim names + archetype names must NOT appear. */