microsoft · aspire-repo-bot · May 27, 2026 · May 27, 2026 · May 27, 2026 · May 27, 2026
diff --git a/.github/actions/create-pull-request/action.yml b/.github/actions/create-pull-request/action.yml
@@ -29,6 +29,10 @@ inputs:
     description: 'Set to true if the branch is already pushed remotely (skips commit/push)'
     required: false
     default: 'false'
+  draft:
+    description: 'Create the pull request as a draft'
+    required: false
+    default: 'false'
 outputs:
   pull-request-number:
     description: 'The pull request number'
@@ -91,6 +95,7 @@ runs:
         PR_TITLE: ${{ inputs.title }}
         PR_BODY: ${{ inputs.body }}
         LABELS: ${{ inputs.labels }}
+        DRAFT: ${{ inputs.draft }}
       run: |
         # Check if a PR already exists for this branch
         EXISTING_PR=$(gh pr list --head "$BRANCH" --base "$BASE" --json number,url --jq '.[0] // empty')
@@ -133,12 +138,18 @@ runs:
           trap 'rm -f "$BODY_FILE"' EXIT
           printf '%s\n' "$PR_BODY" > "$BODY_FILE"
 
+          DRAFT_ARGS=()
+          if [ "$DRAFT" = "true" ]; then
+            DRAFT_ARGS+=(--draft)
+          fi
+
           # Create the pull request without eval — all args are properly quoted
           PR_URL=$(gh pr create \
             --title "$PR_TITLE" \
             --body-file "$BODY_FILE" \
             --base "$BASE" \
             --head "$BRANCH" \
+            "${DRAFT_ARGS[@]}" \
             "${LABEL_ARGS[@]}")
 
           rm -f "$BODY_FILE"

diff --git a/.github/skills/cli-e2e-testing/SKILL.md b/.github/skills/cli-e2e-testing/SKILL.md
@@ -45,29 +45,53 @@ public sealed class SmokeTests(ITestOutputHelper output)
     [Fact]
     public async Task MyCliTest()
     {
+        var repoRoot = CliE2ETestHelpers.GetRepoRoot();
+        var strategy = CliInstallStrategy.Detect(output.WriteLine);
         var workspace = TemporaryWorkspace.Create(output);
-        var installMode = CliE2ETestHelpers.DetectDockerInstallMode();
 
-        using var terminal = CliE2ETestHelpers.CreateDockerTestTerminal();
-        var pendingRun = terminal.RunAsync(TestContext.Current.CancellationToken);
+        using var terminal = CliE2ETestHelpers.CreateDockerTestTerminal(repoRoot, strategy, output, workspace: workspace);
 
         var counter = new SequenceCounter();
         var auto = new Hex1bTerminalAutomator(terminal, defaultTimeout: TimeSpan.FromSeconds(500));
+        await using var terminalRun = CliE2ETestHelpers.StartRun(terminal, workspace, auto, counter, TestContext.Current.CancellationToken);
 
         await auto.PrepareDockerEnvironmentAsync(counter, workspace);
-        await auto.InstallAspireCliInDockerAsync(installMode, counter);
+        await auto.InstallAspireCliAsync(strategy, counter);
 
         await auto.TypeAsync("aspire --version");
         await auto.EnterAsync();
         await auto.WaitForSuccessPromptAsync(counter);
-
-        await auto.TypeAsync("exit");
-        await auto.EnterAsync();
-        await pendingRun;
     }
 }
 ```
 
+### TerminalRun Pattern
+
+**Always use `CliE2ETestHelpers.StartRun`** to wrap the terminal run. This returns a `TerminalRun` (implements `IAsyncDisposable`) that automatically:
+1. Captures Aspire diagnostics via `CaptureAspireDiagnosticsAsync` (best effort)
+2. Types `exit` and presses Enter to close the terminal
+3. Awaits the pending run task
+
+This eliminates the need for manual `exit`/`await pendingRun` at the end of every test and ensures diagnostics are always captured, even when tests fail.
+
+```csharp
+// DO: Use StartRun for consistent diagnostics capture and cleanup
+using var terminal = CliE2ETestHelpers.CreateDockerTestTerminal(repoRoot, strategy, output, workspace: workspace);
+
+var counter = new SequenceCounter();
+var auto = new Hex1bTerminalAutomator(terminal, defaultTimeout: TimeSpan.FromSeconds(500));
+await using var terminalRun = CliE2ETestHelpers.StartRun(terminal, workspace, auto, counter, TestContext.Current.CancellationToken);
+
+// ... test body — no exit/pendingRun needed at the end
+
+// DON'T: Manually handle exit and pendingRun
+var pendingRun = terminal.RunAsync(TestContext.Current.CancellationToken);
+// ... test body ...
+await auto.TypeAsync("exit");
+await auto.EnterAsync();
+await pendingRun;
+```
+
 ## Running Tests Locally
 
 CLI E2E tests run inside Docker containers on Linux. The workflow is: build a portable archive with `localhive`, then point the tests at it. This is the primary way to iterate on E2E tests during development.
@@ -246,10 +270,10 @@ await auto.WaitUntilAsync(
 
 | Method | Description |
 |--------|-------------|
-| `WaitForSuccessPromptAsync(counter, timeout?)` | Waits for `[N OK] $ ` prompt and increments counter |
+| `WaitForSuccessPromptAsync(counter, timeout?)` | Waits for `[N OK] $ ` prompt, fails immediately if error prompt appears, and increments counter |
 | `WaitForAnyPromptAsync(counter, timeout?)` | Waits for any prompt (`OK` or `ERR`) and increments counter |
 | `WaitForErrorPromptAsync(counter, timeout?)` | Waits for `[N ERR:code] $ ` prompt and increments counter |
-| `WaitForSuccessPromptFailFastAsync(counter, timeout?)` | Waits for success prompt, fails immediately if error prompt appears |
+| `RunCommandAsync(command, counter, timeout?)` | Types a command, presses Enter, and waits for success prompt (fails fast on error) |
 | `DeclineAgentInitPromptAsync()` | Declines the `aspire agent init` prompt if it appears |
 | `AspireNewAsync(projectName, counter, template?, useRedisCache?)` | Runs `aspire new` interactively, handling template selection, project name, output path, URLs, Redis, and test project prompts |
 
@@ -277,8 +301,7 @@ The following extensions on `Hex1bTerminalInputSequenceBuilder` are still availa
 |--------|-------------|
 | `WaitForSuccessPrompt(counter, timeout?)` | *(legacy)* Waits for `[N OK] $ ` prompt and increments counter |
 | `PrepareEnvironment(workspace, counter)` | *(legacy)* Sets up custom prompt with command tracking |
-| `InstallAspireCliFromPullRequest(prNumber, counter)` | *(legacy)* Downloads and installs CLI from PR artifacts |
-| `SourceAspireCliEnvironment(counter)` | *(legacy)* Adds `~/.aspire/bin` to PATH |
+| `SourceAspireBundleEnvironment(counter)` | *(legacy)* Sources bundle PATH environment variables |
 
 ## DO: Use CellPatternSearcher for Output Detection
 
@@ -586,6 +609,8 @@ Each test class runs as a separate CI job via the unified `TestEnumerationRunshe
 
 When CLI E2E tests fail in CI, follow these steps to diagnose the issue:
 
+> **Flaky test investigation:** for recurring/intermittent failures, see [`troubleshooting.md`](./troubleshooting.md) for a catalog of known flake classes (Y/n input race, prompt-counter desync, etc.) and the recipes to identify them from `.cast` recordings.
+
 ### Quick Start: Download and Play Recordings
 
 The fastest way to debug a CLI E2E test failure is to download and play the asciinema recording.

diff --git a/.github/skills/cli-e2e-testing/troubleshooting.md b/.github/skills/cli-e2e-testing/troubleshooting.md
@@ -0,0 +1,132 @@
+# CLI E2E test troubleshooting
+
+This document is a catalog of recurring flake patterns observed in `tests/Aspire.Cli.EndToEnd.Tests/` and the recipes to diagnose them. The target audience is future agent sessions investigating a CLI E2E flake. It complements `SKILL.md` (which is a "how to write tests" guide) with diagnostic detail.
+
+## Step 1 — Get the *right* artifact for the failing attempt
+
+CI re-runs (manual or automatic) on a failed job upload artifacts with the **same name** as earlier attempts of the same job in the same workflow run. `gh run download` always returns the latest one, which is often a *passing* rerun. The investigation must look at the *failing* attempt's artifact.
+
+Recipe:
+
+```bash
+# 1. From the failing job URL, get the run id and the failing attempt's started_at.
+gh api repos/microsoft/aspire/actions/runs/<RUN_ID>/attempts/<ATTEMPT_NUMBER> \
+  --jq '{attempt:.run_attempt, started:.run_started_at}'
+
+# 2. List artifacts of that workflow run filtered by artifact name.
+gh api -X GET "repos/microsoft/aspire/actions/artifacts" \
+  -f name="logs-ChannelUpdateWorkflowTests-ubuntu-latest" \
+  --jq '.artifacts[] | select(.workflow_run.id == <RUN_ID>) | {id, created_at, name}'
+
+# 3. Pick the artifact whose created_at is closest to (and after) the failing
+#    attempt's started_at — that's the one uploaded by the failing attempt.
+#    Download it explicitly by id.
+gh api repos/microsoft/aspire/actions/artifacts/<ARTIFACT_ID>/zip > /tmp/failed.zip
+unzip -q /tmp/failed.zip -d /tmp/failed
+```
+
+`gh run download` cannot disambiguate by attempt — only `gh api .../artifacts/<id>/zip` will reliably get the failing recording.
+
+## Step 2 — Reconstruct the terminal stream from the `.cast` file
+
+Each test that uses Hex1b records an asciinema cast at `testresults/recordings/<TestName>.cast`. The file is JSONL: line 1 is the header (cols, rows, env), subsequent lines are `[time, "o", payload]` events. To see what the terminal looked like around a specific moment:
+
+```python
+import json, re, sys
+
+ANSI = re.compile(r'\x1b\[[0-9;?]*[A-Za-z]|\x1b[\(\)][AB012]|\x1b\][^\x07]*\x07')
+with open("/tmp/failed/<TestName>.cast") as f:
+    header = json.loads(next(f))
+    events = [json.loads(line) for line in f]
+stream = "".join(e[2] for e in events if e[1] == "o")
+clean = ANSI.sub("", stream)
+# Show a window around the offending text.
+i = clean.find("aspire.config.json")
+print(clean[max(0, i - 2000): i + 2000])
+```
+
+For chronological analysis (was X printed before Y?), keep the event timestamps:
+
+```python
+needle = "Perform updates?"
+acc = ""
+for t, kind, payload in ((e[0], e[1], e[2]) for e in events if e[1] == "o"):
+    acc += payload
+    if needle in acc:
+        print(f"t={t:.3f}s after {needle!r} appeared")
+        break
+```
+
+## Step 3 — Understand the prompt-counter convention
+
+Tests rely on a deterministic shell prompt for "command finished" detection:
+
+- `tests/Shared/CliInstallStrategy.cs` configures bash with `PROMPT_COMMAND='s=$?;((CMDCOUNT++));PS1="[$CMDCOUNT $([ $s -eq 0 ] && echo OK || echo ERR:$s)] \$ "'`. Bash bumps `CMDCOUNT` and renders `[N OK] $ ` (or `[N ERR:<code>] $ `) every time it returns to the prompt.
+- `SequenceCounter` (`tests/Shared/Hex1bTestHelpers.cs`) is the test's mirror of `CMDCOUNT`. The wait helpers (`WaitForSuccessPromptAsync`, `WaitForAnyPromptAsync`, `WaitForAspireAddSuccessAsync`) search the snapshot for `[<counter.Value> OK] $ ` as a substring, then `Increment()` the counter on a match.
+- The counters are supposed to be in lockstep. The test increments its counter once per `Wait...Prompt` call; bash increments `CMDCOUNT` once per prompt display. **Anything that causes bash to display an extra prompt the test does not account for desyncs the two counters** — and turns the next "wait for prompt N" into a potential false positive on an already-on-screen prompt line that contains the substring `N OK] $ `.
+
+## Step 4 — Diagnose the "Y/n input race"
+
+This is the most-observed flake class so far. It is what broke `ChannelUpdateWorkflowTests.UpdateProjectChannelToStable_TypeScript_PreviewsStablePackagesAndPreservesChannel` on PR #17522 (run `26489967289`, job `78006625708`).
+
+### Symptom
+
+An assertion that reads on-disk state (typically `aspire.config.json` after `aspire add`) fails saying the expected content is missing. Inspecting the cast shows the CLI eventually *did* succeed — the test read the file too early.
+
+### Recording signature
+
+In the cast around the failing read, look for an **extra empty `[N OK] $` prompt cycle** right after a `[Y/n]:` prompt was answered, followed by the next typed command at `[N+1]`:
+
+```
+Perform updates? [Y/n]: n
+[21 OK] $
+[22 OK] $ aspire add Aspire.Hosting.PostgreSQL
+```
+
+A passing recording for the same test goes straight from `[Y/n]: n` to `[21 OK] $ aspire add ...` with no empty intermediate prompt.
+
+### Mechanism
+
+`TypeAsync("n") + EnterAsync()` writes two bytes into the TTY input queue: `n` and `\n`. Spectre.Console's `[Y/n]` prompt is a single-character reader — it returns on the `n` keystroke and tears down its stdin handler before the `\n` is consumed. Whichever process owns the TTY when that `\n` is delivered receives it. If the CLI has already exited (or is in the middle of its teardown when bash reclaims the TTY), bash reads the stray `\n` as an empty command line, fires `PROMPT_COMMAND`, and increments `CMDCOUNT`.
+
+The test's `SequenceCounter` doesn't know about that extra cycle. It advances through the next `WaitForSuccessPromptAsync` (which still matches — the real post-command prompt is on screen), ending up coincidentally equal to bash's drifted `CMDCOUNT`. The next typed command — say `aspire add Postgres` — gets a fresh prompt at `[<N+1> OK] $ aspire add Postgres`. When the test then calls `WaitForAspireAddSuccessAsync`, the substring matcher finds `<N+1> OK] $ ` *in the typed-command header line that bash printed when accepting the command*, not in the post-completion prompt, and returns success **before** `aspire add` has done any work.
+
+The test then reads `aspire.config.json` while the CLI is still spinning. For a polyglot (TypeScript/Java) apphost, `aspire add` writes the file via `GuestAppHostProject.SaveConfiguration` *after* a full `BuildAndGenerateSdkAsync` round-trip (which starts an `AppHostServerSession` for code generation). That step is not instant, so the early read sees the pre-add config.
+
+### Fix
+
+The Spectre.Console `[Y/n]` confirmation prompt accepts a single character — it does not require Enter. Drop the Enter:
+
+```diff
+- await auto.TypeAsync("n");
+- await auto.EnterAsync();
++ await auto.TypeAsync("n");
+```
+
+This is the same pattern already used in `Hex1bAutomatorTestHelpers.DeclineAgentInitPromptAsync`. Its comment documents the same race.
+
+This fix is **only required when both a character and Enter are sent** for a single-character prompt. The following do *not* have the race:
+
+- `EnterAsync()` alone to accept a Y/n default — the `\n` *is* the commit byte for the line reader.
+- `TypeAsync("/some/path") + EnterAsync()` for a text-input prompt — the line reader reads through `\n` to terminate the line.
+- Arrow keys + `EnterAsync()` for a Spectre selection list — Enter is the commit byte for the selection.
+
+### Why a "wait for the prompt text to disappear" helper does *not* work
+
+A tempting "general" fix is to block until the prompt text is no longer visible in the snapshot. Spectre.Console typically leaves the answered prompt visible as scrollback (the question is rendered once and stays in the terminal buffer, sometimes rewritten to include the chosen answer). Such a helper would either never observe the prompt "disappear" (and time out) or would only work when subsequent output happens to scroll the prompt off-screen — neither is reliable.
+
+The single-character-prompt fix above is more surgical and matches the established convention in the codebase.
+
+## Other flake classes (placeholders — fill in as encountered)
+
+- **Spinner-scroll obscuring an awaited line.** When `aspire run` / `aspire add` print a spinner that updates frequently, an awaited status line (e.g., `Update successful!`) can be redrawn off the visible 160×48 grid before `WaitUntilTextAsync` runs its next snapshot poll. Document mitigations here when first observed.
+- **Race between `aspire start` and dashboard readiness.** The "dashboard at <URL>" line can appear before the dashboard's `/health` actually responds. Document the canonical post-start synchronization here.
+- **`aspire add` version-picker shown vs not-shown.** Some package configurations cause the picker to appear; others auto-select. Tests that always send Down/Enter break in the auto-select case. Document the "is the picker on screen?" check here.
+
+## Workflow-infrastructure gotchas worth knowing about
+
+These bit the PR #17522 investigation and are worth a callout (but are not the test's fault):
+
+- **Same-name artifact collision across rerun attempts.** See Step 1 above.
+- **`CaptureWorkspaceOnFailureAttribute` captures live workspace state to `testresults/workspaces/<TestName>/`** but the CI upload globs don't include that directory, so the captured `aspire.config.json` (which would directly answer "what was on disk at failure time?") is currently lost. Worth fixing separately.
+- **`testresults/recordings/<TestName>.cast` is overwritten on retry.** If the test's xUnit retry policy or a manual rerun re-executes the same method, the failing recording is clobbered by the passing one. The artifact saved with the *failing attempt* is the only durable copy — another reason Step 1 matters.
diff --git a/.github/workflows/deployment-tests.yml b/.github/workflows/deployment-tests.yml
@@ -251,17 +251,15 @@ jobs:
           docker info | head -20
           echo "✅ Docker is available"
 
-      # Pin helm to a version that supports `helm install --server-side`/--force-conflicts
-      # (added in helm v3.18.0; helm v4 keeps both but `--server-side` is now a string flag
-      # accepting true|false|auto, which is why our code passes `--server-side=true` rather
-      # than the bare flag — that form parses identically on v3.18+ and v4). The default
-      # helm preinstalled on the runner image lags behind and rejects these flags, which
-      # breaks AKS cert-manager scenarios that need server-side apply to coexist with the
-      # AKS Azure Policy add-on's webhook mutations.
+      # Pin Helm to satisfy Aspire.Hosting.Kubernetes' minimum supported version
+      # (HelmVersionValidator.MinimumHelmVersion, currently v4.2.0). The new
+      # check-helm-prereqs-{env} pipeline step now fails fast on older Helm
+      # CLIs, so the runner image's preinstalled Helm — which lags behind —
+      # would otherwise break every AKS deployment scenario.
       - name: Setup Helm
         uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # v4.3.1
         with:
-          version: v4.1.4
+          version: v4.2.0
 
       - name: Run deployment test (${{ matrix.shortname }})
         id: run_tests