doublewordai · hachall · Jun 11, 2026 · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026
diff --git a/.gitignore b/.gitignore
@@ -33,8 +33,10 @@ yarn-error.log*
 # env files (can opt-in for committing if needed)
 .env*
 
-# generated search index
-/public/search-index.json
+# Generated search index. The committed copy is an empty `[]` placeholder so the
+# static import in src/lib/search-index.ts resolves in dev / CI; `npm run build`
+# (and the Vercel build) regenerate it with real content before next build.
+# It is intentionally tracked — do not commit the regenerated (non-empty) copy.
 
 # vercel
 .vercel

diff --git a/AGENTS.md b/AGENTS.md
@@ -106,7 +106,7 @@ All redirects live in `next.config.ts` under `async redirects()`. Add to the app
 
 - **[`doublewordai/batch-skill`](https://github.com/doublewordai/skill)** (agents skill): `SKILL.md` contains explicit links to doc pages. Grep it for the affected slug and update.
 - **`llms.txt`**: Auto-generated from Sanity by `src/app/llms.txt/route.ts`. No manual action, but sanity-check the output after the change.
-- **Search index**: Rebuilt at build time by `scripts/build-search-index.mjs`. No manual action.
+- **Search index**: Rebuilt at build time by `scripts/build-search-index.mjs`, which the `build` script runs **explicitly** before `next build` (do not move it back to a `prebuild` hook — `.npmrc` sets `ignore-scripts=true`, so npm/pnpm pre/post lifecycle hooks no longer run). The generated `data/search-index.json` is **imported statically** by `src/lib/search-index.ts` so webpack inlines it into the `/api/search` function bundle. Do **not** switch this back to a runtime `fs` read: generated files are unreliable to read from a Vercel serverless function (files in `public/` are stripped; traced files don't always land at `process.cwd()`; an edge runtime has no `fs`) — every disk-based attempt 500'd. The committed `data/search-index.json` is an empty `[]` placeholder so dev/CI resolve the import; the build overwrites it with real content. No manual action.
 - **External SDK docs and blog posts**: Search the `doublewordai` org on GitHub for the old slug. Update or rely on the redirect.
 - **Sitemap**: Auto-generated by `src/app/sitemap.ts`.
 - **Marketing site / blog**: Check `doubleword.ai` and `blog.doubleword.ai` for inbound links.

diff --git a/data/search-index.json b/data/search-index.json
@@ -0,0 +1 @@
+[]
diff --git a/package.json b/package.json
@@ -4,8 +4,8 @@
   "private": true,
   "scripts": {
     "dev": "next dev",
-    "prebuild": "node scripts/build-search-index.mjs",
-    "build": "next build",
+    "build:search-index": "node scripts/build-search-index.mjs",
+    "build": "node scripts/build-search-index.mjs && next build",
     "start": "next start",
     "lint": "eslint",
     "test": "vitest",

diff --git a/scripts/build-search-index.mjs b/scripts/build-search-index.mjs
@@ -15,7 +15,10 @@ import { join, dirname } from "node:path";
 import { fileURLToPath } from "node:url";
 
 const __dirname = dirname(fileURLToPath(import.meta.url));
-const OUTPUT_DIR = join(__dirname, "..", "public");
+// Written to data/ and imported statically by src/lib/search-index.ts (webpack
+// inlines it into the /api/search function bundle). Must run before `next build`
+// so the import resolves to real content rather than the committed `[]` seed.
+const OUTPUT_DIR = join(__dirname, "..", "data");
 const OUTPUT_PATH = join(OUTPUT_DIR, "search-index.json");
 
 const client = createClient({
@@ -70,6 +73,26 @@ async function fetchExternalContent(url) {
   }
 }
 
+// Sanity bodies are Portable Text (an array of blocks), not strings. The search
+// index must store plain strings — the runtime ranking calls .replace() on body
+// (see src/lib/search.ts), and an array there throws. Flatten blocks to text;
+// pass strings through unchanged; anything else becomes "".
+function toPlainText(value) {
+  if (typeof value === "string") return value;
+  if (!Array.isArray(value)) return "";
+  return value
+    .map((block) => {
+      if (!block || block._type !== "block" || !Array.isArray(block.children)) {
+        return "";
+      }
+      return block.children
+        .map((child) => (typeof child?.text === "string" ? child.text : ""))
+        .join("");
+    })
+    .filter(Boolean)
+    .join("\n");
+}
+
 async function resolveBody(doc) {
   if (doc.externalSource) {
     const content = await fetchExternalContent(doc.externalSource);
@@ -165,15 +188,29 @@ async function getModelArtifactSearchItems() {
   const apiKey = process.env.DOUBLEWORD_SYSTEM_API_KEY;
   if (!apiKey) return [];
 
-  const response = await fetch("https://app.doubleword.ai/admin/api/v1/models?include=pricing", {
-    headers: {
-      Authorization: `Bearer ${apiKey}`,
-      Accept: "application/json",
-    },
-  });
-  if (!response.ok) return [];
+  // Model pricing is optional enrichment. The Vercel build box can't always
+  // reach app.doubleword.ai (network egress / WAF), and a fetch failure here
+  // must NOT fail the whole docs build — the core index (Sanity + external
+  // docs) is what matters. Swallow any error and skip these entries.
+  let rawData;
+  try {
+    const response = await fetch("https://app.doubleword.ai/admin/api/v1/models?include=pricing", {
+      headers: {
+        Authorization: `Bearer ${apiKey}`,
+        Accept: "application/json",
+      },
+    });
+    if (!response.ok) {
+      console.warn(`Skipping model artifacts: HTTP ${response.status}`);
+      return [];
+    }
+    rawData = await response.json();
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    console.warn(`Skipping model artifacts: ${message}`);
+    return [];
+  }
 
-  const rawData = await response.json();
   const models = rawData.data || [];
 
   const formatPricePer1M = (price) => `$${(Number(price) * 1_000_000).toFixed(2)}`;
@@ -232,7 +269,7 @@ async function main() {
       );
       if (needsFetch) externalFetches++;
 
-      const body = await resolveBody(doc);
+      const body = toPlainText(await resolveBody(doc));
       if (needsFetch && !body) failures++;
 
       return {

diff --git a/src/app/api/search/route.ts b/src/app/api/search/route.ts
@@ -14,23 +14,29 @@ export async function GET(request: NextRequest) {
     return NextResponse.json({matches: []});
   }
 
-  const allDocs = loadSearchIndex();
-  const docs = productSlug
-    ? allDocs.filter((d) => d.productSlug === productSlug)
-    : allDocs;
+  try {
+    const allDocs = loadSearchIndex();
+    const docs = productSlug
+      ? allDocs.filter((d) => d.productSlug === productSlug)
+      : allDocs;
 
-  const matches = searchDocs(docs, query)
-    .slice(0, limit)
-    .map((result) => ({
-      id: result._id,
-      title: result.sidebarLabel || result.title,
-      productName: result.productName,
-      categoryName: result.categoryName || "",
-      snippet: result.snippet,
-      score: result.score,
-      href: `/${result.productSlug}/${result.slug}`,
-      path: `/${result.productSlug}/${result.slug}`,
-    }));
+    const matches = searchDocs(docs, query)
+      .slice(0, limit)
+      .map((result) => ({
+        id: result._id,
+        title: result.sidebarLabel || result.title,
+        productName: result.productName,
+        categoryName: result.categoryName || "",
+        snippet: result.snippet,
+        score: result.score,
+        href: `/${result.productSlug}/${result.slug}`,
+        path: `/${result.productSlug}/${result.slug}`,
+      }));
 
-  return NextResponse.json({matches});
+    return NextResponse.json({matches});
+  } catch (err) {
+    // Don't let a single malformed doc take down search with an opaque 500.
+    console.error("search route error:", err);
+    return NextResponse.json({error: "search failed"}, {status: 500});
+  }
 }
diff --git a/src/app/control-layer/api-reference/route.ts b/src/app/control-layer/api-reference/route.ts
@@ -7,6 +7,12 @@ import { withCspNonce } from "@/lib/scalar-api-reference";
 export const GET = withCspNonce(
   ApiReference({
     url: "/api/control-layer-openapi",
+    // Scalar loads its default web fonts from Google Fonts for BOTH the
+    // Inference and Control Layer references — independent of the Test Request
+    // button (hidden here) — and our `font-src 'self' data:` CSP blocks them.
+    // Use the system font stack instead of allowlisting an external font host.
+    // Keep this even though hideTestRequestButton is true.
+    withDefaultFonts: false,
     metaData: {
       title: "API Reference | Control Layer | Doubleword Docs",
       description: "Complete API reference for the Doubleword Control Layer API",

diff --git a/src/app/inference-api/api-reference/route.ts b/src/app/inference-api/api-reference/route.ts
@@ -7,6 +7,10 @@ import { withCspNonce } from "@/lib/scalar-api-reference";
 export const GET = withCspNonce(
   ApiReference({
     url: "/api/openapi",
+    // Scalar otherwise loads its default web fonts from Google Fonts, which our
+    // `font-src 'self' data:` CSP blocks ("Refused to load the font"). Use the
+    // system font stack instead of allowlisting an external font host.
+    withDefaultFonts: false,
     metaData: {
       title: "API Reference | Doubleword Inference API | Doubleword Docs",
       description: "Complete API reference for the Doubleword API",

diff --git a/src/lib/scalar-api-reference.test.ts b/src/lib/scalar-api-reference.test.ts
@@ -10,6 +10,8 @@ vi.mock("next/headers", () => ({
 }));
 
 import { withCspNonce } from "./scalar-api-reference";
+import { GET as inferenceApiReference } from "@/app/inference-api/api-reference/route";
+import { GET as controlLayerApiReference } from "@/app/control-layer/api-reference/route";
 
 const NONCE = "test-nonce-Zm9vYmFy";
 
@@ -81,3 +83,19 @@ describe("withCspNonce", () => {
     expect(html).not.toContain("nonce=");
   });
 });
+
+// Scalar's client otherwise pulls its default web fonts from Google Fonts, which
+// our `font-src 'self' data:` CSP blocks ("Refused to load the font"). Both
+// API-reference routes must opt out (`withDefaultFonts: false`) so Scalar uses
+// the system font stack instead of an external font host. Scalar serializes the
+// config as JSON into its inline init script, so the flag appears verbatim in the
+// rendered HTML.
+describe("Scalar API-reference routes opt out of external fonts", () => {
+  it.each([
+    ["inference-api", inferenceApiReference],
+    ["control-layer", controlLayerApiReference],
+  ])("%s reference disables Scalar's default fonts", async (_name, handler) => {
+    const html = await (await handler()).text();
+    expect(html).toMatch(/"withDefaultFonts"\s*:\s*false/);
+  });
+});
diff --git a/src/lib/search-index.ts b/src/lib/search-index.ts
@@ -1,13 +1,15 @@
-import { readFileSync } from "node:fs";
-import { join } from "node:path";
 import type { DocSearchIndexItem } from "@/sanity/types";
-
-let cached: DocSearchIndexItem[] | null = null;
+// Generated at build time by scripts/build-search-index.mjs, which the `build`
+// script runs before `next build`. We import it STATICALLY (rather than reading
+// it from disk at runtime) so webpack inlines the data into the /api/search
+// function bundle. Runtime fs reads of a generated file are unreliable on
+// Vercel — files in public/ are stripped from the function, traced files don't
+// always land where process.cwd() expects, and an edge runtime has no fs at
+// all. A static import sidesteps all of that. The committed file is an empty
+// `[]` placeholder so dev / typecheck / lint resolve the module; the real build
+// overwrites it before webpack compiles.
+import searchIndex from "../../data/search-index.json";
 
 export function loadSearchIndex(): DocSearchIndexItem[] {
-  if (cached) return cached;
-  const filePath = join(process.cwd(), "public", "search-index.json");
-  const data: DocSearchIndexItem[] = JSON.parse(readFileSync(filePath, "utf-8"));
-  cached = data;
-  return data;
+  return searchIndex as DocSearchIndexItem[];
 }
diff --git a/src/lib/search.test.ts b/src/lib/search.test.ts
@@ -0,0 +1,39 @@
+import {describe, expect, it} from "vitest";
+import {stripMarkdown, searchDocs} from "./search";
+import type {DocSearchIndexItem} from "@/sanity/types";
+
+describe("stripMarkdown", () => {
+  it("strips markdown from strings", () => {
+    expect(stripMarkdown("# Title\n`code` **bold**")).toBe("Title code bold");
+  });
+
+  it("returns '' for non-string input instead of throwing", () => {
+    // Sanity bodies are Portable Text (arrays). Before the guard, this threw
+    // `e.replace is not a function` and 500'd the whole /api/search request.
+    expect(stripMarkdown([{_type: "block"}] as unknown as string)).toBe("");
+    expect(stripMarkdown(undefined as unknown as string)).toBe("");
+    expect(stripMarkdown({} as unknown as string)).toBe("");
+  });
+});
+
+describe("searchDocs", () => {
+  it("does not throw when a doc body is Portable Text (non-string)", () => {
+    const docs = [
+      {
+        _id: "1",
+        title: "Batch inference",
+        // Portable Text array, exactly what Sanity stores — not a string.
+        body: [
+          {_type: "block", children: [{_type: "span", text: "about batches"}]},
+        ],
+        slug: "batch-inference",
+        productSlug: "inference-api",
+        productName: "Inference API",
+      },
+    ] as unknown as DocSearchIndexItem[];
+
+    expect(() => searchDocs(docs, "batch")).not.toThrow();
+    const results = searchDocs(docs, "batch");
+    expect(results[0]?._id).toBe("1");
+  });
+});
diff --git a/src/lib/search.ts b/src/lib/search.ts
@@ -6,7 +6,10 @@ export function normalize(text: string): string {
   return text.toLowerCase().trim();
 }
 
-export function stripMarkdown(markdown: string): string {
+export function stripMarkdown(markdown: unknown): string {
+  // Defensive: the index can carry non-string bodies (e.g. Sanity Portable Text
+  // arrays). Never let that crash search — coerce anything non-string to "".
+  if (typeof markdown !== "string") return "";
   return markdown
     .replace(/```[\s\S]*?```/g, " ")
     .replace(/`[^`]*`/g, " ")

diff --git a/src/middleware.test.ts b/src/middleware.test.ts
@@ -0,0 +1,56 @@
+import { describe, expect, it } from "vitest";
+import { NextRequest } from "next/server";
+import { middleware } from "./middleware";
+
+// Pull the CSP off the response middleware emits for a normal document request.
+function cspFor(
+  url = "https://docs.doubleword.ai/inference-api/api-reference",
+): string {
+  const res = middleware(new NextRequest(url));
+  return res.headers.get("content-security-policy") ?? "";
+}
+
+// Return the directive (e.g. "connect-src ...") as a single trimmed string.
+function directive(csp: string, name: string): string {
+  return (
+    csp
+      .split(";")
+      .map((d) => d.trim())
+      .find((d) => d === name || d.startsWith(`${name} `)) ?? ""
+  );
+}
+
+describe("CSP middleware", () => {
+  it("allows the inference API host so Scalar's Test Request works", () => {
+    // The OpenAPI spec's server is https://api.doubleword.ai/v1, so Scalar fires
+    // its try-it fetch at that host from the browser. Without this entry the
+    // request is blocked and the user sees "Failed to fetch".
+    expect(directive(cspFor(), "connect-src")).toContain(
+      "https://api.doubleword.ai",
+    );
+  });
+
+  it("keeps the other connect-src allowances intact", () => {
+    const connectSrc = directive(cspFor(), "connect-src");
+    expect(connectSrc).toContain("'self'"); // PostHog via /ingest rewrite
+    expect(connectSrc).toContain("https://app.doubleword.ai"); // SSO session check
+    expect(connectSrc).toContain("https://status.doubleword.ai"); // StatusWidget
+  });
+
+  it("does not allowlist a font host — fonts stay self/data only", () => {
+    // We fixed the Scalar font violation by disabling its default fonts, not by
+    // opening font-src. Guard against a future regression that re-opens it.
+    expect(directive(cspFor(), "font-src")).toBe("font-src 'self' data:");
+  });
+
+  it("emits a per-request nonce in script-src", () => {
+    expect(cspFor()).toMatch(/script-src[^;]*'nonce-[^']+'/);
+  });
+
+  it("uses a unique nonce per response", () => {
+    const first = cspFor().match(/'nonce-([^']+)'/)?.[1];
+    const second = cspFor().match(/'nonce-([^']+)'/)?.[1];
+    expect(first).toBeTruthy();
+    expect(first).not.toBe(second);
+  });
+});
diff --git a/src/middleware.ts b/src/middleware.ts
@@ -24,6 +24,12 @@ import {NextRequest, NextResponse} from 'next/server'
 //     `credentials: 'include'` to verify the SSO session. CORS is already
 //     allowed on the control-layer side (see
 //     internal/values/control-layer.yaml `allowed_origins`).
+//   - `https://api.doubleword.ai` for the Scalar API-reference "Test Request"
+//     feature at /inference-api/api-reference. The OpenAPI spec's server is
+//     `https://api.doubleword.ai/v1`, so Scalar fires the try-it `fetch` at that
+//     host from the browser; without it CSP blocks the request ("Failed to
+//     fetch"). CORS is already allowed on the api side — docs.doubleword.ai is
+//     in the same `allowed_origins` list (which covers the api proxy too).
 //   - `https://status.doubleword.ai` for the StatusWidget component, which
 //     fetches `/api/v1/summary` from the public status page to render
 //     live incident status inline in docs pages.
@@ -35,7 +41,7 @@ function buildCsp(nonce: string): string {
     "style-src 'self' 'unsafe-inline'",
     "img-src 'self' data: blob: https://cdn.sanity.io",
     "font-src 'self' data:",
-    "connect-src 'self' https://app.doubleword.ai https://status.doubleword.ai",
+    "connect-src 'self' https://app.doubleword.ai https://api.doubleword.ai https://status.doubleword.ai",
     'frame-src https://www.youtube.com https://www.youtube-nocookie.com https://player.vimeo.com',
     "worker-src 'self' blob:",
     "frame-ancestors 'none'",