Hmbown · Hmbown · Jun 22, 2026 · Jun 20, 2026 · Jun 21, 2026 · Jun 21, 2026
@@ -67,19 +67,37 @@ jobs:
         run: ./scripts/release/check-versions.sh
 
       - name: Create and push tag
+        id: create
         if: steps.check.outputs.exists == 'false'
         env:
           TAG: ${{ steps.ver.outputs.tag }}
         run: |
           git config user.name "github-actions[bot]"
           git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git fetch --tags --quiet
+          if git rev-parse -q --verify "refs/tags/${TAG}" >/dev/null \
+             || git ls-remote --tags origin "refs/tags/${TAG}" | grep -q .; then
+            echo "pushed=false" >> "$GITHUB_OUTPUT"
+            echo "Tag ${TAG} already exists after refresh; nothing to do."
+            exit 0
+          fi
           git tag "${TAG}"
-          git push origin "${TAG}"
+          if ! git push origin "${TAG}"; then
+            if git ls-remote --tags origin "refs/tags/${TAG}" | grep -q .; then
+              echo "pushed=false" >> "$GITHUB_OUTPUT"
+              echo "Tag ${TAG} appeared during push; treating as already handled."
+              exit 0
+            fi
+            exit 1
+          fi
+          echo "pushed=true" >> "$GITHUB_OUTPUT"
           echo "Pushed ${TAG}. release.yml should now run (requires RELEASE_TAG_PAT for trigger)."
 
       - name: Warn if PAT missing
-        if: steps.check.outputs.exists == 'false' && env.HAS_PAT != 'true'
+        if: steps.create.outputs.pushed == 'true'
         env:
           HAS_PAT: ${{ secrets.RELEASE_TAG_PAT != '' }}
         run: |
-          echo "::warning::RELEASE_TAG_PAT secret is not set. The tag was pushed using GITHUB_TOKEN, which does NOT trigger release.yml. Manually re-push the tag from a developer machine, or run 'gh workflow run release.yml --ref ${{ steps.ver.outputs.tag }}'."
+          if [ "${HAS_PAT}" != "true" ]; then
+            echo "::warning::RELEASE_TAG_PAT secret is not set. The tag was pushed using GITHUB_TOKEN, which does NOT trigger release.yml. Manually re-push the tag from a developer machine, or run 'gh workflow run release.yml --ref ${{ steps.ver.outputs.tag }}'."
+          fi
@@ -77,9 +77,12 @@ jobs:
     runs-on: ${{ matrix.os }}
     steps:
       - uses: actions/checkout@v7
-      - uses: dtolnay/rust-toolchain@stable
+      - uses: dtolnay/rust-toolchain@master
         with:
+          toolchain: '1.88'
           targets: ${{ matrix.target }}
+      - name: Install Rust target
+        run: rustup target add --toolchain 1.88 ${{ matrix.target }}
       - uses: Swatinem/rust-cache@v2
         with:
           cache-bin: false

@@ -28,7 +28,7 @@ jobs:
           toolchain: '1.88'
           components: clippy, rustfmt
       - name: Install Linux system dependencies
-        if: runner.os == 'Linux' && matrix.target != 'x86_64-unknown-linux-musl'
+        if: runner.os == 'Linux'
         run: |
           for i in 1 2 3 4 5; do
             sudo apt-get update && break
@@ -173,6 +173,8 @@ jobs:
         with:
           toolchain: '1.88'
           targets: ${{ matrix.target }}
+      - name: Install Rust target
+        run: rustup target add --toolchain 1.88 ${{ matrix.target }}
       - uses: Swatinem/rust-cache@v2
         with:
           cache-bin: false
@@ -191,7 +193,7 @@ jobs:
         run: |
           sudo apt-get update
           sudo apt-get install -y musl-tools
-          rustup target add x86_64-unknown-linux-musl
+          rustup target add --toolchain 1.88 x86_64-unknown-linux-musl
           cargo build --release --locked --target x86_64-unknown-linux-musl
       - name: Install RISC-V cross-compilation toolchain
         if: matrix.target == 'riscv64gc-unknown-linux-gnu'

@@ -104,12 +104,6 @@ apps/
 # Maintainer-internal design notes (trade-secret material, never published)
 .private/
 
-# Maintainer-local SWE-bench scratch (instance workspaces, venvs, predictions,
-# Docker harness logs). Never published.
-.swebench/
-deep-swe/
-all_preds.jsonl
-
 # Agent handoffs and version-specific setup plans are working-state notes, not
 # public docs. Keep durable setup guidance in docs/runbooks instead.
 docs/*HANDOFF*.md
@@ -123,21 +117,14 @@ docs/*_PLAN.md
 scripts/run_deep_swe.py
 .claude/
 
-# Benchmark artifacts and caches re-included by !scripts/**
+# Local run artifacts and caches re-included by !scripts/**
 results/
-benchmark_results/*
-!benchmark_results/.gitkeep
 scripts/**/__pycache__/
 
-# Maintainer-local verification artifacts and benchmark corpora
-.harbor-datasets/
-.pinchbench-skill/
-.terminal-bench-datasets/
-.venv-bench/
+# Maintainer-local verification artifacts
 .uv-bin/
 .uv-cache/
 .uv-tools/
-codewhale__*.json
 issues/
 logs/
 notes/
@@ -55,7 +55,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   while Ctrl-X is scoped to Tasks-sidebar background shell cancellation. Shell
   jobs launched by sub-agents now render with their child-agent owner in the
   Tasks sidebar and transcript.
-- **Benchmark-turn recovery and context economy.** Repeated read-only search
+- **Long-turn recovery and context economy.** Repeated read-only search
   loop blocks now return guidance instead of fatal tool failures, Python build
   failures that are missing `setuptools` include an install/retry hint, long
   foreground shell timeouts steer models toward background execution, and noisy
@@ -123,7 +123,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   unchanged.
 - **Base prompt / delegate skill guidance** updated to encourage parallel
   read-only exploration (2-4 `type: "explore"` sub-agents) for broad repo,
-  version, branch, benchmark, and API-surface investigations, while keeping
+  version, branch, release, and API-surface investigations, while keeping
   architecture, integration, and final verification in the parent. The
   delegate skill examples now use provider-neutral `model_strength` instead of
   hardcoded DeepSeek model ids.
@@ -297,7 +297,7 @@ folds in several community contributions.
 - Work sidebar no longer shows stale `phase now:` / `phase next:` strategy rows once the checklist
   is 100% complete.
 - Plan mode no longer shortcuts investigation for requests that name a repository, URL, version,
-  release, build state, benchmark, bug, PR, issue, API surface, or local code path.
+  release, build state, bug, PR, issue, API surface, or local code path.
 - Oversized pasted text stays editable in the composer, with a file backup appended at submit
   time for model access; thanks @idling11 (#3267, closes #3263).
 - Bare digit keys `1`-`8` now insert text instead of firing hotbar slots; use `Alt+digit` for
@@ -796,8 +796,6 @@ folds in several community contributions.
 
 ### Added
 
-- **Benchmark harness runners.** Added CodeWhale-native benchmark entry points for SWE-bench, Terminal-Bench, and PinchBench, plus a local PinchBench runner that can grade tool-use traces with an LLM judge.
-- **Direct MiMo benchmark routing.** The benchmark runner now defaults to direct Xiaomi MiMo v2.5 Pro routing when configured, while keeping provider/model selection explicit.
 - Added `/restore list [N]` so users can inspect more side-git rollback
   snapshots with UTC timestamps before choosing a restore point. Plain
   `/restore` now shows the 20 most recent snapshots, numeric restore targets can
@@ -1138,7 +1136,6 @@ folds in several community contributions.
 
 ### Fixed
 
-- **Benchmark workspace copying.** Fixed benchmark workspace file copying so local benchmark tasks can preserve their intended file layout during agent runs.
 - **MiMo default tests.** Guarded Xiaomi MiMo default-model tests against ambient CI provider environment variables.
 - Stream/body decode failures such as `Stream read error: error decoding
   response body` are now classified as recoverable network interruptions

@@ -11,8 +11,8 @@ when something fails.
 It's open source (MIT, Rust), it runs on your machine, and it works with the
 models people actually use. DeepSeek and open-weight models are first-class,
 but Claude, GPT, Kimi, and a local vLLM/Ollama box on your LAN are all full
-peers. The goal is simple: stay current with the best research and features in
-commercial coding agents, and surpass them.
+peers. The goal is simple: keep the local terminal workflow current with the
+best research and practical features in coding agents.
 
 Developers from all over the world have shaped CodeWhale into what it is. If
 there's a model, endpoint, or feature you don't see that you want, open an issue
@@ -113,7 +113,7 @@ codewhale exec --allowed-tools read_file,exec_shell --max-turns 10 "fix the fail
 
 ## The models
 
-Twenty-five providers route through the same harness and the same tools. If the
+Supported providers route through the same runtime and the same tools. If the
 one you want isn't here, that's a good issue to open.
 
 - **Open models, hosted:** `deepseek` (first among equals), `openrouter`,
@@ -189,8 +189,8 @@ structure intact.
 - **Sub-agents.** Independent investigations and implementation slices run in
   parallel with provider-specific fanout caps, clean context, and
   provider-aware model tiers (big vs. cheap).
-- **25 providers.** DeepSeek, GLM, Claude, GPT, Kimi, MiniMax, OpenRouter, and
-  local vLLM/SGLang/Ollama, all behind the same harness and tools. Switch
+- **Broad provider support.** DeepSeek, GLM, Claude, GPT, Kimi, MiniMax,
+  OpenRouter, and local vLLM/SGLang/Ollama, all behind the same runtime and tools. Switch
   mid-session with `/provider` and `/model`.
 - **Rollback.** Side-git snapshots and `/restore`, kept outside your repo's
   `.git` — undoing a turn never touches your history.

@@ -104,7 +104,7 @@ check_for_updates = true
 #   5 mode.yolo         6 palette.open      7 sidebar.toggle   8 trust.toggle
 #
 # Invalid slots are skipped with a warning, duplicate slots use the last entry,
-# and unknown actions are preserved so the UI can show a disabled placeholder.
+# and unknown actions are preserved so the UI can show a disabled entry.
 # Slash commands can be bound as slash.<name>, for example slash.mode. Commands
 # that require arguments pre-fill the composer instead of running incomplete.
 #
@@ -607,7 +607,7 @@ osc8_links = true            # emit OSC 8 escapes around URLs (Cmd+click in iTer
 # Supported keys: mode, model, cost, balance (DeepSeek / DeepSeekCN only),
 # status, agents,
 # reasoning_replay, prefix_stability, cache, context_percent, git_branch,
-# last_tool_elapsed (placeholder), rate_limit (placeholder), tokens.
+# last_tool_elapsed (reserved), rate_limit (reserved), tokens.
 # status_items = ["mode", "model", "status", "git_branch", "tokens", "cache"]
 # notification_condition = "always" # always | never — overrides [notifications].threshold_secs.
 #                                    "always" = notify on every successful turn (no threshold);
@@ -965,7 +965,7 @@ default_text_model = "deepseek-ai/deepseek-v4-pro"
 # LOGFILE="$LOGDIR/exec_shell.log"
 # input=$(cat)
 # echo "[$(date -Iseconds)] $input" >> "$LOGFILE"
-# printf '%s\n' '{"content":"audit wrapper placeholder: configure an executor","success":false}'
+# printf '%s\n' '{"content":"audit wrapper dry run: configure an executor","success":false}'
 # ```
 
 # ─────────────────────────────────────────────────────────────────────────────────

@@ -4,7 +4,7 @@ version.workspace = true
 edition.workspace = true
 license.workspace = true
 repository.workspace = true
-description = "Model/provider registry and fallback strategy for DeepSeek workspace architecture"
+description = "Model/provider registry and fallback strategy for CodeWhale"
 
 [dependencies]
 codewhale-config = { path = "../config", version = "0.8.63" }

@@ -4,7 +4,7 @@ version.workspace = true
 edition.workspace = true
 license.workspace = true
 repository.workspace = true
-description = "Codex-style app-server transport for DeepSeek workspace architecture"
+description = "App-server transport for CodeWhale runtime integrations"
 # `codewhale app-server` is owned by codewhale-cli; this crate is library-only.
 autobins = false
 

@@ -224,9 +224,25 @@ pub(crate) async fn chat_completions_handler(
 
     let url = upstream_url(&endpoint);
 
+    if endpoint.insecure_skip_tls_verify {
+        return (
+            StatusCode::BAD_REQUEST,
+            Json(serde_json::json!({
+                "error": {
+                    "message": format!(
+                        "TLS certificate verification cannot be disabled for provider {:?}; use SSL_CERT_FILE with a trusted custom CA bundle",
+                        endpoint.provider
+                    ),
+                    "type": "invalid_request_error",
+                    "code": "tls_verification_required"
+                }
+            })),
+        )
+            .into_response();
+    }
+
     // Build upstream request.
     let upstream_req = reqwest::Client::builder()
-        .danger_accept_invalid_certs(endpoint.insecure_skip_tls_verify)
         .build()
         .map_err(|e| {
             (
@@ -385,6 +401,14 @@ mod tests {
     fn app_with_mock_upstream(
         auth_token: Option<&str>,
         mock_base_url: &str,
+    ) -> (axum::Router, tempfile::TempDir) {
+        app_with_mock_upstream_with_provider_extra(auth_token, mock_base_url, "")
+    }
+
+    fn app_with_mock_upstream_with_provider_extra(
+        auth_token: Option<&str>,
+        mock_base_url: &str,
+        provider_extra: &str,
     ) -> (axum::Router, tempfile::TempDir) {
         let tmp = tempfile::tempdir().expect("tempdir");
         let config_path = tmp.path().join("config.toml");
@@ -397,6 +421,7 @@ api_key = "sk-deepseek-secret"
 base_url = "{mock_base_url}"
 model = "trinity-large-thinking"
 api_key = "arcee-configured-key"
+{provider_extra}
 "#
         );
         fs::write(&config_path, config_content).expect("write config");
@@ -596,6 +621,46 @@ api_key = "arcee-configured-key"
         );
     }
 
+    #[tokio::test]
+    async fn insecure_tls_skip_verify_is_rejected() {
+        install_crypto_provider();
+        let (mock_url, _mock) = start_mock_upstream().await;
+        let (app, _tmp) = app_with_mock_upstream_with_provider_extra(
+            None,
+            &mock_url,
+            "insecure_skip_tls_verify = true",
+        );
+
+        let body = serde_json::json!({
+            "model": "trinity-large-thinking",
+            "messages": [
+                {"role": "user", "content": "hello"}
+            ]
+        });
+
+        let response = app
+            .oneshot(
+                Request::builder()
+                    .method(Method::POST)
+                    .uri("/v1/chat/completions")
+                    .header("content-type", "application/json")
+                    .body(Body::from(serde_json::to_vec(&body).unwrap()))
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(response.status(), StatusCode::BAD_REQUEST);
+        let resp_body = response_body_json(response).await;
+        assert_eq!(resp_body["error"]["code"], "tls_verification_required");
+        assert!(
+            resp_body["error"]["message"]
+                .as_str()
+                .unwrap()
+                .contains("SSL_CERT_FILE")
+        );
+    }
+
     #[tokio::test]
     async fn streaming_request_rejected() {
         install_crypto_provider();

@@ -411,16 +411,22 @@ fn resolve_auth_token(options: &AppServerOptions) -> Result<Option<String>> {
     let token = configured
         .map(str::to_string)
         .unwrap_or_else(|| format!("cwapp_{}", Uuid::new_v4().simple()));
-    if has_explicit_token {
-        eprintln!("app-server auth: bearer token required for HTTP routes.");
-    } else {
-        eprintln!("app-server auth: generated bearer token for this process.");
-        eprintln!("  Authorization: Bearer {token}");
-        eprintln!("  Pass --auth-token or set CODEWHALE_APP_SERVER_TOKEN for a stable token.");
+    for line in app_server_auth_status_lines(has_explicit_token) {
+        eprintln!("{line}");
     }
     Ok(Some(token))
 }
 
+fn app_server_auth_status_lines(has_explicit_token: bool) -> Vec<&'static str> {
+    if has_explicit_token {
+        return vec!["app-server auth: bearer token required for HTTP routes."];
+    }
+    vec![
+        "app-server auth: generated bearer token for this process (not printed).",
+        "  Pass --auth-token or set CODEWHALE_APP_SERVER_TOKEN when another client needs to connect.",
+    ]
+}
+
 fn cors_layer(extra_origins: &[String]) -> CorsLayer {
     let mut origins: Vec<HeaderValue> = DEFAULT_CORS_ORIGINS
         .iter()
@@ -1405,6 +1411,15 @@ mod tests {
         assert!(token.unwrap().starts_with("cwapp_"));
     }
 
+    #[test]
+    fn generated_auth_status_does_not_render_token() {
+        let rendered = app_server_auth_status_lines(false).join("\n");
+
+        assert!(!rendered.contains("Authorization: Bearer"));
+        assert!(rendered.contains("not printed"));
+        assert!(rendered.contains("CODEWHALE_APP_SERVER_TOKEN"));
+    }
+
     #[test]
     fn auth_token_explicit_is_preserved() {
         let options = AppServerOptions {