From 5738a540ab630d65853fbabbbabfd056b62be6ef Mon Sep 17 00:00:00 2001
From: Luca Steeb <contact@luca-steeb.com>
Date: Sun, 29 Mar 2026 19:13:09 +0700
Subject: [PATCH 01/14] refactor: queue chat logs in middleware

---
 apps/gateway/src/api-individual.e2e.ts        |    6 +
 apps/gateway/src/api.spec.ts                  |   30 +-
 apps/gateway/src/chat/chat.ts                 | 7155 ++++++++---------
 .../chat/middleware/chat-completion-log.ts    |  144 +
 .../src/chat/tools/chat-log-context.ts        |  126 +
 apps/gateway/src/test-utils/test-helpers.ts   |    4 +
 apps/gateway/src/vars.ts                      |    2 +
 7 files changed, 3868 insertions(+), 3599 deletions(-)
 create mode 100644 apps/gateway/src/chat/middleware/chat-completion-log.ts
 create mode 100644 apps/gateway/src/chat/tools/chat-log-context.ts

diff --git a/apps/gateway/src/api-individual.e2e.ts b/apps/gateway/src/api-individual.e2e.ts
index a96234bd38..89de64dd5e 100644
--- a/apps/gateway/src/api-individual.e2e.ts
+++ b/apps/gateway/src/api-individual.e2e.ts
@@ -293,6 +293,12 @@ describe("e2e individual tests", () => {
 			expect((log.errorDetails as { message?: string })?.message).toContain(
 				"the word 'json'",
 			);
+
+			const matchingLogs = await db
+				.select()
+				.from(tables.log)
+				.where(eq(tables.log.requestId, requestId));
+			expect(matchingLogs).toHaveLength(1);
 		},
 	);
 
diff --git a/apps/gateway/src/api.spec.ts b/apps/gateway/src/api.spec.ts
index fb61959d0d..2bf6fe4d3c 100644
--- a/apps/gateway/src/api.spec.ts
+++ b/apps/gateway/src/api.spec.ts
@@ -1,11 +1,16 @@
 import { afterAll, beforeAll, describe, expect, test, vi } from "vitest";
 
-import { db, tables } from "@llmgateway/db";
+import { db, eq, tables } from "@llmgateway/db";
 import { logger } from "@llmgateway/logger";
 
 import { app } from "./app.js";
 import { createGatewayApiTestHarness } from "./test-utils/gateway-api-test-harness.js";
-import { readAll, waitForLogs } from "./test-utils/test-helpers.js";
+import {
+	readAll,
+	processPendingLogs,
+	waitForLogByRequestId,
+	waitForLogs,
+} from "./test-utils/test-helpers.js";
 
 describe("api", () => {
 	const harness = createGatewayApiTestHarness({
@@ -1343,10 +1348,12 @@ describe("api", () => {
 
 	// test for missing Authorization header
 	test("/v1/chat/completions missing Authorization header", async () => {
+		const requestId = "missing-auth-request-id";
 		const res = await app.request("/v1/chat/completions", {
 			method: "POST",
 			headers: {
 				"Content-Type": "application/json",
+				"x-request-id": requestId,
 				// Intentionally not setting Authorization header
 			},
 			body: JSON.stringify({
@@ -1360,6 +1367,13 @@ describe("api", () => {
 			}),
 		});
 		expect(res.status).toBe(401);
+
+		await processPendingLogs();
+		const logs = await db
+			.select()
+			.from(tables.log)
+			.where(eq(tables.log.requestId, requestId));
+		expect(logs).toHaveLength(0);
 	});
 
 	// test for explicitly specifying a provider in the format "provider/model"
@@ -1483,6 +1497,7 @@ describe("api", () => {
 
 	// test for missing provider API key
 	test("/v1/chat/completions with missing provider API key", async () => {
+		const requestId = "missing-provider-key-request-id";
 		await db.insert(tables.apiKey).values({
 			id: "token-id",
 			token: "real-token",
@@ -1495,6 +1510,7 @@ describe("api", () => {
 			method: "POST",
 			headers: {
 				"Content-Type": "application/json",
+				"x-request-id": requestId,
 				Authorization: `Bearer real-token`,
 			},
 			body: JSON.stringify({
@@ -1512,6 +1528,16 @@ describe("api", () => {
 		expect(errorMessage).toMatchInlineSnapshot(
 			`"{"error":true,"status":400,"message":"No provider key set for any of the providers that support model gpt-4o-mini. Please add the provider key in the settings or switch the project mode to credits or hybrid."}"`,
 		);
+
+		const log = await waitForLogByRequestId(requestId);
+		expect(log.finishReason).toBe("client_error");
+		expect(log.unifiedFinishReason).toBe("client_error");
+
+		const matchingLogs = await db
+			.select()
+			.from(tables.log)
+			.where(eq(tables.log.requestId, requestId));
+		expect(matchingLogs).toHaveLength(1);
 	});
 
 	// test for provider error response and error logging
diff --git a/apps/gateway/src/chat/chat.ts b/apps/gateway/src/chat/chat.ts
index e93aece19a..79ab0dc17a 100644
--- a/apps/gateway/src/chat/chat.ts
+++ b/apps/gateway/src/chat/chat.ts
@@ -21,7 +21,6 @@ import { throwIamException, validateModelAccess } from "@/lib/iam.js";
 import {
 	calculateDataStorageCost,
 	getUnifiedFinishReason,
-	insertLog as _insertLog,
 } from "@/lib/logs.js";
 import {
 	checkProviderRateLimit,
@@ -86,7 +85,14 @@ import {
 	stripRegionFromModelName,
 } from "@llmgateway/models";
 
+import { chatCompletionLogMiddleware } from "./middleware/chat-completion-log.js";
 import { completionsRequestSchema } from "./schemas/completions.js";
+import {
+	enqueueChatLog,
+	finishStreamCompletion,
+	registerStreamCompletion,
+	updateBaseLogOptions,
+} from "./tools/chat-log-context.js";
 import {
 	checkContentFilter,
 	getContentFilterMethod,
@@ -95,7 +101,6 @@ import {
 } from "./tools/check-content-filter.js";
 import { convertImagesToBase64 } from "./tools/convert-images-to-base64.js";
 import { countInputImages } from "./tools/count-input-images.js";
-import { createLogEntry } from "./tools/create-log-entry.js";
 import { estimateTokensFromContent } from "./tools/estimate-tokens-from-content.js";
 import { estimateTokens } from "./tools/estimate-tokens.js";
 import {
@@ -350,6 +355,8 @@ const sharedTextDecoder = new TextDecoder();
 
 export const chat = new OpenAPIHono<ServerTypes>();
 
+chat.use("/completions", chatCompletionLogMiddleware);
+
 const completions = createRoute({
 	operationId: "v1_chat_completions",
 	summary: "Chat Completions",
@@ -654,6 +661,7 @@ chat.openapi(completions, async (c) => {
 
 	// Extract custom X-LLMGateway-* headers
 	const customHeaders = extractCustomHeaders(c);
+	const requestPluginIds = plugins?.map((plugin) => plugin.id) ?? [];
 
 	// Check for X-No-Fallback header to disable provider fallback on low uptime
 	const noFallback =
@@ -848,6 +856,36 @@ chat.openapi(completions, async (c) => {
 		});
 	}
 
+	updateBaseLogOptions(c, {
+		requestId,
+		project,
+		apiKey,
+		usedModel: initialRequestedModel,
+		usedModelMapping: requestedModel,
+		usedProvider: requestedProvider ?? "llmgateway",
+		requestedModel: initialRequestedModel,
+		requestedProvider,
+		messages,
+		temperature,
+		max_tokens,
+		top_p,
+		frequency_penalty,
+		presence_penalty,
+		reasoningEffort: reasoning_effort,
+		reasoningMaxTokens: reasoning_max_tokens,
+		effort,
+		responseFormat: response_format,
+		tools,
+		toolChoice: tool_choice,
+		source,
+		customHeaders,
+		debugMode,
+		userAgent,
+		imageConfig: image_config,
+		rawRequest: rawBody,
+		plugins: requestPluginIds,
+	});
+
 	// Run guardrails check for enterprise organizations
 	let guardrailResult: Awaited<ReturnType<typeof checkGuardrails>> | undefined;
 	if (organization.plan === "enterprise") {
@@ -888,6 +926,9 @@ chat.openapi(completions, async (c) => {
 				messages as Parameters<typeof applyRedactions>[0],
 				guardrailResult.redactions,
 			) as typeof messages;
+			updateBaseLogOptions(c, {
+				messages,
+			});
 		}
 
 		// Log non-blocking violations (redact/warn)
@@ -1121,7 +1162,7 @@ chat.openapi(completions, async (c) => {
 			// Filter by context size requirement, reasoning capability, and deprecation status
 			const suitableProviders = availableModelProviders.filter((provider) => {
 				// Skip deprecated provider mappings
-				if (provider.deprecatedAt && now > provider.deprecatedAt!) {
+				if (provider.deprecatedAt && now > provider.deprecatedAt) {
 					return false;
 				}
 
@@ -2157,6 +2198,10 @@ chat.openapi(completions, async (c) => {
 		}
 	}
 
+	updateBaseLogOptions(c, {
+		reasoningEffort: reasoning_effort,
+	});
+
 	let url: string | undefined;
 
 	// Get the provider key for the selected provider based on project mode
@@ -2462,6 +2507,35 @@ chat.openapi(completions, async (c) => {
 		});
 	}
 
+	updateBaseLogOptions(c, {
+		providerKeyId: providerKey?.id,
+		usedModel: usedModelFormatted,
+		usedModelMapping,
+		usedProvider,
+		requestedModel: initialRequestedModel,
+		requestedProvider,
+		messages,
+		temperature,
+		max_tokens,
+		top_p,
+		frequency_penalty,
+		presence_penalty,
+		reasoningEffort: reasoning_effort,
+		reasoningMaxTokens: reasoning_max_tokens,
+		effort,
+		responseFormat: response_format,
+		tools,
+		toolChoice: tool_choice,
+		source,
+		customHeaders,
+		debugMode,
+		userAgent,
+		imageConfig: image_config,
+		routingMetadata,
+		rawRequest: rawBody,
+		plugins: requestPluginIds,
+	});
+
 	// Check gateway-level content filter before routing the request upstream.
 	const contentFilterMode = getContentFilterMode();
 	const contentFilterMethod = getContentFilterMethod();
@@ -2499,50 +2573,27 @@ chat.openapi(completions, async (c) => {
 		.length
 		? openAIContentFilterResult.responses
 		: null;
-	const insertLog = (logData: Parameters<typeof _insertLog>[0]) =>
-		_insertLog({
-			...logData,
-			internalContentFilter: shouldTagContentFilter
-				? true
-				: logData.internalContentFilter,
-			gatewayContentFilterResponse:
-				logData.gatewayContentFilterResponse ?? gatewayContentFilterResponse,
-		});
+	updateBaseLogOptions(c, {
+		gatewayContentFilterResponse,
+	});
+	const chatCompletionLogState = c.get("chatCompletionLogState");
+	if (chatCompletionLogState) {
+		chatCompletionLogState.internalContentFilter = shouldTagContentFilter;
+	}
 
 	if (contentFilterBlocked) {
 		const contentFilterResponseId = `chatcmpl-${Date.now()}`;
 		const contentFilterCreated = Math.floor(Date.now() / 1000);
 
-		// Log the filtered request
-		try {
-			await insertLog({
-				...createLogEntry(
-					requestId,
-					project,
-					apiKey,
-					undefined,
-					"",
-					undefined,
-					"llmgateway",
-					requestedModel,
-					requestedProvider,
-					messages as any[],
-					temperature,
-					max_tokens,
-					top_p,
-					frequency_penalty,
-					presence_penalty,
-					undefined,
-					undefined,
-					effort as "low" | "medium" | "high" | undefined,
-					response_format,
-					tools,
-					tool_choice,
-					source,
-					customHeaders,
-					c.req.header("x-debug") === "true",
-					c.req.header("user-agent"),
-				),
+		enqueueChatLog(
+			c,
+			{
+				providerKeyId: undefined,
+				usedModel: "",
+				usedModelMapping: undefined,
+				usedProvider: "llmgateway",
+			},
+			{
 				content: null,
 				responseSize: 0,
 				finishReason: "llmgateway_content_filter",
@@ -2558,6 +2609,7 @@ chat.openapi(completions, async (c) => {
 				errorDetails: null,
 				duration: 0,
 				timeToFirstToken: null,
+				timeToFirstReasoningToken: null,
 				inputCost: 0,
 				outputCost: 0,
 				cachedInputCost: 0,
@@ -2572,31 +2624,36 @@ chat.openapi(completions, async (c) => {
 				discount: null,
 				pricingTier: null,
 				dataStorageCost: "0",
-			});
-		} catch {
-			// Silently ignore logging failures
-		}
+				cached: false,
+				toolResults: null,
+			},
+		);
 
 		if (stream) {
+			void registerStreamCompletion(c);
 			return streamSSE(c, async (sseStream) => {
-				const chunk = {
-					id: contentFilterResponseId,
-					object: "chat.completion.chunk",
-					created: contentFilterCreated,
-					model: requestedModel,
-					choices: [
-						{
-							index: 0,
-							delta: {},
-							finish_reason: "content_filter",
-						},
-					],
-				};
-				await sseStream.writeSSE({
-					data: JSON.stringify(chunk),
-					id: "0",
-				});
-				await sseStream.writeSSE({ data: "[DONE]" });
+				try {
+					const chunk = {
+						id: contentFilterResponseId,
+						object: "chat.completion.chunk",
+						created: contentFilterCreated,
+						model: requestedModel,
+						choices: [
+							{
+								index: 0,
+								delta: {},
+								finish_reason: "content_filter",
+							},
+						],
+					};
+					await sseStream.writeSSE({
+						data: JSON.stringify(chunk),
+						id: "0",
+					});
+					await sseStream.writeSSE({ data: "[DONE]" });
+				} finally {
+					finishStreamCompletion(c);
+				}
 			});
 		}
 
@@ -2793,46 +2850,6 @@ chat.openapi(completions, async (c) => {
 					}
 				}
 
-				// Log the cached streaming request with reconstructed content
-				// Extract plugin IDs for logging (cached streaming)
-				const cachedStreamingPluginIds = plugins?.map((p) => p.id) ?? [];
-
-				const baseLogEntry = createLogEntry(
-					requestId,
-					project,
-					apiKey,
-					providerKey?.id,
-					usedModelFormatted,
-					usedModelMapping,
-					usedProvider,
-					initialRequestedModel,
-					requestedProvider,
-					messages,
-					temperature,
-					max_tokens,
-					top_p,
-					frequency_penalty,
-					presence_penalty,
-					reasoning_effort,
-					reasoning_max_tokens,
-					effort,
-					response_format,
-					tools,
-					tool_choice,
-					source,
-					customHeaders,
-					debugMode,
-					userAgent,
-					image_config,
-					routingMetadata,
-					rawBody,
-					rawCachedResponseData, // Raw SSE data from cached response
-					null, // No upstream request for cached response
-					rawCachedResponseData, // Raw SSE data from cached response (same for both)
-					cachedStreamingPluginIds,
-					undefined, // No plugin results for cached response
-				);
-
 				// Calculate costs for cached response
 				const costs = await calculateCosts(
 					usedModel,
@@ -2849,82 +2866,121 @@ chat.openapi(completions, async (c) => {
 					project.organizationId,
 				);
 
-				await insertLog({
-					...baseLogEntry,
-					duration: 0, // No processing time for cached response
-					timeToFirstToken: null, // Not applicable for cached response
-					timeToFirstReasoningToken: null, // Not applicable for cached response
-					responseSize: cachedResponseSize,
-					content: fullContent || null,
-					reasoningContent: fullReasoningContent || null,
-					finishReason: cachedStreamingResponse.metadata.finishReason,
-					promptTokens:
-						(costs.promptTokens ?? promptTokens)?.toString() ?? null,
-					completionTokens: completionTokens?.toString() ?? null,
-					totalTokens: costs.imageInputTokens
-						? (
-								(costs.promptTokens ?? promptTokens ?? 0) +
-								(completionTokens ?? 0) +
-								(reasoningTokens ?? 0)
-							).toString()
-						: (totalTokens?.toString() ?? null),
-					reasoningTokens: reasoningTokens?.toString() ?? null,
-					cachedTokens: cachedTokens?.toString() ?? null,
-					hasError: false,
-					streamed: true,
-					canceled: false,
-					errorDetails: null,
-					inputCost: costs.inputCost ?? 0,
-					outputCost: costs.outputCost ?? 0,
-					cachedInputCost: costs.cachedInputCost ?? 0,
-					requestCost: costs.requestCost ?? 0,
-					webSearchCost: costs.webSearchCost ?? 0,
-					imageInputTokens: costs.imageInputTokens?.toString() ?? null,
-					imageOutputTokens: costs.imageOutputTokens?.toString() ?? null,
-					imageInputCost: costs.imageInputCost ?? null,
-					imageOutputCost: costs.imageOutputCost ?? null,
-					cost: costs.totalCost ?? 0,
-					estimatedCost: costs.estimatedCost,
-					discount: costs.discount ?? null,
-					pricingTier: costs.pricingTier ?? null,
-					dataStorageCost: calculateDataStorageCost(
-						costs.promptTokens ?? promptTokens,
-						cachedTokens,
-						completionTokens,
-						reasoningTokens,
-						retentionLevel,
-					),
-					cached: true,
-					toolResults:
-						(cachedStreamingResponse.metadata as { toolResults?: any })
-							?.toolResults ?? null,
-				});
+				enqueueChatLog(
+					c,
+					{
+						providerKeyId: providerKey?.id,
+						usedModel: usedModelFormatted,
+						usedModelMapping,
+						usedProvider,
+						requestedModel: initialRequestedModel,
+						requestedProvider,
+						messages,
+						temperature,
+						max_tokens,
+						top_p,
+						frequency_penalty,
+						presence_penalty,
+						reasoningEffort: reasoning_effort,
+						reasoningMaxTokens: reasoning_max_tokens,
+						effort,
+						responseFormat: response_format,
+						tools,
+						toolChoice: tool_choice,
+						source,
+						customHeaders,
+						debugMode,
+						userAgent,
+						imageConfig: image_config,
+						routingMetadata,
+						rawRequest: rawBody,
+						rawResponse: rawCachedResponseData,
+						upstreamRequest: null,
+						upstreamResponse: rawCachedResponseData,
+						plugins: requestPluginIds,
+						pluginResults: undefined,
+					},
+					{
+						duration: 0,
+						timeToFirstToken: null,
+						timeToFirstReasoningToken: null,
+						responseSize: cachedResponseSize,
+						content: fullContent || null,
+						reasoningContent: fullReasoningContent || null,
+						finishReason: cachedStreamingResponse.metadata.finishReason,
+						promptTokens:
+							(costs.promptTokens ?? promptTokens)?.toString() ?? null,
+						completionTokens: completionTokens?.toString() ?? null,
+						totalTokens: costs.imageInputTokens
+							? (
+									(costs.promptTokens ?? promptTokens ?? 0) +
+									(completionTokens ?? 0) +
+									(reasoningTokens ?? 0)
+								).toString()
+							: (totalTokens?.toString() ?? null),
+						reasoningTokens: reasoningTokens?.toString() ?? null,
+						cachedTokens: cachedTokens?.toString() ?? null,
+						hasError: false,
+						streamed: true,
+						canceled: false,
+						errorDetails: null,
+						inputCost: costs.inputCost ?? 0,
+						outputCost: costs.outputCost ?? 0,
+						cachedInputCost: costs.cachedInputCost ?? 0,
+						requestCost: costs.requestCost ?? 0,
+						webSearchCost: costs.webSearchCost ?? 0,
+						imageInputTokens: costs.imageInputTokens?.toString() ?? null,
+						imageOutputTokens: costs.imageOutputTokens?.toString() ?? null,
+						imageInputCost: costs.imageInputCost ?? null,
+						imageOutputCost: costs.imageOutputCost ?? null,
+						cost: costs.totalCost ?? 0,
+						estimatedCost: costs.estimatedCost,
+						discount: costs.discount ?? null,
+						pricingTier: costs.pricingTier ?? null,
+						dataStorageCost: calculateDataStorageCost(
+							costs.promptTokens ?? promptTokens,
+							cachedTokens,
+							completionTokens,
+							reasoningTokens,
+							retentionLevel,
+						),
+						cached: true,
+						toolResults:
+							(cachedStreamingResponse.metadata as { toolResults?: any })
+								?.toolResults ?? null,
+					},
+				);
 
 				// Return cached streaming response by replaying chunks with original timing
+				void registerStreamCompletion(c);
 				return streamSSE(
 					c,
 					async (stream) => {
-						let previousTimestamp = 0;
+						try {
+							let previousTimestamp = 0;
 
-						for (const chunk of cachedStreamingResponse.chunks) {
-							// Calculate delay based on original chunk timing
-							const delay = Math.max(0, chunk.timestamp - previousTimestamp);
-							// Cap the delay to prevent excessively long waits (max 1 second)
-							const cappedDelay = Math.min(delay, 1000);
+							for (const chunk of cachedStreamingResponse.chunks) {
+								// Calculate delay based on original chunk timing
+								const delay = Math.max(0, chunk.timestamp - previousTimestamp);
+								// Cap the delay to prevent excessively long waits (max 1 second)
+								const cappedDelay = Math.min(delay, 1000);
 
-							if (cappedDelay > 0) {
-								await new Promise<void>((resolve) => {
-									setTimeout(() => resolve(), cappedDelay);
-								});
-							}
+								if (cappedDelay > 0) {
+									await new Promise<void>((resolve) => {
+										setTimeout(() => resolve(), cappedDelay);
+									});
+								}
 
-							await stream.writeSSE({
-								data: chunk.data,
-								id: String(chunk.eventId),
-								event: chunk.event,
-							});
+								await stream.writeSSE({
+									data: chunk.data,
+									id: String(chunk.eventId),
+									event: chunk.event,
+								});
 
-							previousTimestamp = chunk.timestamp;
+								previousTimestamp = chunk.timestamp;
+							}
+						} finally {
+							finishStreamCompletion(c);
 						}
 					},
 					async (error) => {
@@ -2944,44 +3000,6 @@ chat.openapi(completions, async (c) => {
 			if (cachedResponse) {
 				// Log the cached request
 				const duration = 0; // No processing time needed
-				// Extract plugin IDs for logging (cached non-streaming)
-				const cachedPluginIds = plugins?.map((p) => p.id) ?? [];
-
-				const baseLogEntry = createLogEntry(
-					requestId,
-					project,
-					apiKey,
-					providerKey?.id,
-					usedModelFormatted,
-					usedModelMapping,
-					usedProvider,
-					initialRequestedModel,
-					requestedProvider,
-					messages,
-					temperature,
-					max_tokens,
-					top_p,
-					frequency_penalty,
-					presence_penalty,
-					reasoning_effort,
-					reasoning_max_tokens,
-					effort,
-					response_format,
-					tools,
-					tool_choice,
-					source,
-					customHeaders,
-					debugMode,
-					userAgent,
-					image_config,
-					routingMetadata,
-					rawBody,
-					cachedResponse,
-					null, // No upstream request for cached response
-					cachedResponse, // upstream response is same as cached response
-					cachedPluginIds,
-					undefined, // No plugin results for cached response
-				);
 
 				// Calculate costs for cached response
 				const cachedCosts = await calculateCosts(
@@ -3008,59 +3026,96 @@ chat.openapi(completions, async (c) => {
 					(cachedReasoningContent?.length ?? 0) +
 					500; // overhead for metadata
 
-				await insertLog({
-					...baseLogEntry,
-					duration,
-					timeToFirstToken: null, // Not applicable for cached response
-					timeToFirstReasoningToken: null, // Not applicable for cached response
-					responseSize: estimatedCachedSize,
-					content: cachedContent ?? null,
-					reasoningContent: cachedReasoningContent ?? null,
-					finishReason: cachedResponse.choices?.[0]?.finish_reason ?? null,
-					promptTokens:
-						(
-							cachedCosts.promptTokens ?? cachedResponse.usage?.prompt_tokens
-						)?.toString() ?? null,
-					completionTokens: cachedResponse.usage?.completion_tokens ?? null,
-					totalTokens: cachedCosts.imageInputTokens
-						? (
-								(cachedCosts.promptTokens ??
-									cachedResponse.usage?.prompt_tokens ??
-									0) +
-								(cachedResponse.usage?.completion_tokens ?? 0) +
-								(cachedResponse.usage?.reasoning_tokens ?? 0)
-							).toString()
-						: (cachedResponse.usage?.total_tokens ?? null),
-					reasoningTokens: cachedResponse.usage?.reasoning_tokens ?? null,
-					cachedTokens:
-						cachedResponse.usage?.prompt_tokens_details?.cached_tokens ?? null,
-					hasError: false,
-					streamed: false,
-					canceled: false,
-					errorDetails: null,
-					inputCost: cachedCosts.inputCost ?? 0,
-					outputCost: cachedCosts.outputCost ?? 0,
-					cachedInputCost: cachedCosts.cachedInputCost ?? 0,
-					requestCost: cachedCosts.requestCost ?? 0,
-					webSearchCost: cachedCosts.webSearchCost ?? 0,
-					imageInputTokens: cachedCosts.imageInputTokens?.toString() ?? null,
-					imageOutputTokens: cachedCosts.imageOutputTokens?.toString() ?? null,
-					imageInputCost: cachedCosts.imageInputCost ?? null,
-					imageOutputCost: cachedCosts.imageOutputCost ?? null,
-					cost: cachedCosts.totalCost ?? 0,
-					estimatedCost: cachedCosts.estimatedCost,
-					discount: cachedCosts.discount ?? null,
-					pricingTier: cachedCosts.pricingTier ?? null,
-					dataStorageCost: calculateDataStorageCost(
-						cachedCosts.promptTokens ?? cachedResponse.usage?.prompt_tokens,
-						cachedResponse.usage?.prompt_tokens_details?.cached_tokens,
-						cachedResponse.usage?.completion_tokens,
-						cachedResponse.usage?.reasoning_tokens,
-						retentionLevel,
-					),
-					cached: true,
-					toolResults: cachedResponse.choices?.[0]?.message?.tool_calls ?? null,
-				});
+				enqueueChatLog(
+					c,
+					{
+						providerKeyId: providerKey?.id,
+						usedModel: usedModelFormatted,
+						usedModelMapping,
+						usedProvider,
+						requestedModel: initialRequestedModel,
+						requestedProvider,
+						messages,
+						temperature,
+						max_tokens,
+						top_p,
+						frequency_penalty,
+						presence_penalty,
+						reasoningEffort: reasoning_effort,
+						reasoningMaxTokens: reasoning_max_tokens,
+						effort,
+						responseFormat: response_format,
+						tools,
+						toolChoice: tool_choice,
+						source,
+						customHeaders,
+						debugMode,
+						userAgent,
+						imageConfig: image_config,
+						routingMetadata,
+						rawRequest: rawBody,
+						rawResponse: cachedResponse,
+						upstreamRequest: null,
+						upstreamResponse: cachedResponse,
+						plugins: requestPluginIds,
+						pluginResults: undefined,
+					},
+					{
+						duration,
+						timeToFirstToken: null,
+						timeToFirstReasoningToken: null,
+						responseSize: estimatedCachedSize,
+						content: cachedContent ?? null,
+						reasoningContent: cachedReasoningContent ?? null,
+						finishReason: cachedResponse.choices?.[0]?.finish_reason ?? null,
+						promptTokens:
+							(
+								cachedCosts.promptTokens ?? cachedResponse.usage?.prompt_tokens
+							)?.toString() ?? null,
+						completionTokens: cachedResponse.usage?.completion_tokens ?? null,
+						totalTokens: cachedCosts.imageInputTokens
+							? (
+									(cachedCosts.promptTokens ??
+										cachedResponse.usage?.prompt_tokens ??
+										0) +
+									(cachedResponse.usage?.completion_tokens ?? 0) +
+									(cachedResponse.usage?.reasoning_tokens ?? 0)
+								).toString()
+							: (cachedResponse.usage?.total_tokens ?? null),
+						reasoningTokens: cachedResponse.usage?.reasoning_tokens ?? null,
+						cachedTokens:
+							cachedResponse.usage?.prompt_tokens_details?.cached_tokens ??
+							null,
+						hasError: false,
+						streamed: false,
+						canceled: false,
+						errorDetails: null,
+						inputCost: cachedCosts.inputCost ?? 0,
+						outputCost: cachedCosts.outputCost ?? 0,
+						cachedInputCost: cachedCosts.cachedInputCost ?? 0,
+						requestCost: cachedCosts.requestCost ?? 0,
+						webSearchCost: cachedCosts.webSearchCost ?? 0,
+						imageInputTokens: cachedCosts.imageInputTokens?.toString() ?? null,
+						imageOutputTokens:
+							cachedCosts.imageOutputTokens?.toString() ?? null,
+						imageInputCost: cachedCosts.imageInputCost ?? null,
+						imageOutputCost: cachedCosts.imageOutputCost ?? null,
+						cost: cachedCosts.totalCost ?? 0,
+						estimatedCost: cachedCosts.estimatedCost,
+						discount: cachedCosts.discount ?? null,
+						pricingTier: cachedCosts.pricingTier ?? null,
+						dataStorageCost: calculateDataStorageCost(
+							cachedCosts.promptTokens ?? cachedResponse.usage?.prompt_tokens,
+							cachedResponse.usage?.prompt_tokens_details?.cached_tokens,
+							cachedResponse.usage?.completion_tokens,
+							cachedResponse.usage?.reasoning_tokens,
+							retentionLevel,
+						),
+						cached: true,
+						toolResults:
+							cachedResponse.choices?.[0]?.message?.tool_calls ?? null,
+					},
+				);
 
 				return c.json(cachedResponse);
 			}
@@ -3316,1934 +3371,1907 @@ chat.openapi(completions, async (c) => {
 	// Handle streaming response if requested
 	// For image generation models, we skip real streaming and use fake streaming later
 	if (effectiveStream) {
+		void registerStreamCompletion(c);
 		return streamSSE(
 			c,
 			async (stream) => {
-				let eventId = 0;
-				let canceled = false;
-				let streamingError: unknown = null;
-				let doneSent = false; // Track if [DONE] has been sent downstream
-
-				// Raw logging variables
-				let streamingRawResponseData = ""; // Raw SSE data sent back to the client
-
-				// Streaming cache variables
-				const streamingChunks: Array<{
-					data: string;
-					eventId: number;
-					event?: string;
-					timestamp: number;
-				}> = [];
-				const streamStartTime = Date.now();
-
-				// SSE keepalive to prevent proxy/load balancer timeouts
-				// Sends SSE comments (ignored by clients) every 15 seconds to keep connection alive
-				const KEEPALIVE_INTERVAL_MS = 15000;
-				const keepaliveInterval = setInterval(() => {
-					stream.write(": ping\n\n").catch(() => {
-						// Stream likely closed, cleanup will happen via abort handler or finally
-					});
-				}, KEEPALIVE_INTERVAL_MS);
-				const clearKeepalive = () => clearInterval(keepaliveInterval);
-
-				// Timing tracking variables
-				let timeToFirstToken: number | null = null;
-				let timeToFirstReasoningToken: number | null = null;
-				let firstTokenReceived = false;
-				let firstReasoningTokenReceived = false;
-
-				// Helper function to write SSE and capture for cache
-				const writeSSEAndCache = async (sseData: {
-					data: string;
-					event?: string;
-					id?: string;
-				}) => {
-					await stream.writeSSE(sseData);
-
-					// Collect raw response data for logging only in debug mode and within size limit
-					if (
-						debugMode &&
-						streamingRawResponseData.length < MAX_RAW_DATA_SIZE
-					) {
-						const sseString = `${sseData.event ? `event: ${sseData.event}\n` : ""}data: ${sseData.data}${sseData.id ? `\nid: ${sseData.id}` : ""}\n\n`;
-						streamingRawResponseData += sseString;
-					}
-
-					// Capture for streaming cache if enabled
-					if (cachingEnabled && streamingCacheKey) {
-						streamingChunks.push({
-							data: sseData.data,
-							eventId: sseData.id ? parseInt(sseData.id, 10) : eventId,
-							event: sseData.event,
-							timestamp: Date.now() - streamStartTime,
+				return await (async () => {
+					let eventId = 0;
+					let canceled = false;
+					let streamingError: unknown = null;
+					let doneSent = false; // Track if [DONE] has been sent downstream
+
+					// Raw logging variables
+					let streamingRawResponseData = ""; // Raw SSE data sent back to the client
+
+					// Streaming cache variables
+					const streamingChunks: Array<{
+						data: string;
+						eventId: number;
+						event?: string;
+						timestamp: number;
+					}> = [];
+					const streamStartTime = Date.now();
+
+					// SSE keepalive to prevent proxy/load balancer timeouts
+					// Sends SSE comments (ignored by clients) every 15 seconds to keep connection alive
+					const KEEPALIVE_INTERVAL_MS = 15000;
+					const keepaliveInterval = setInterval(() => {
+						stream.write(": ping\n\n").catch(() => {
+							// Stream likely closed, cleanup will happen via abort handler or finally
 						});
-					}
-				};
+					}, KEEPALIVE_INTERVAL_MS);
+					const clearKeepalive = () => clearInterval(keepaliveInterval);
+
+					// Timing tracking variables
+					let timeToFirstToken: number | null = null;
+					let timeToFirstReasoningToken: number | null = null;
+					let firstTokenReceived = false;
+					let firstReasoningTokenReceived = false;
+
+					// Helper function to write SSE and capture for cache
+					const writeSSEAndCache = async (sseData: {
+						data: string;
+						event?: string;
+						id?: string;
+					}) => {
+						await stream.writeSSE(sseData);
+
+						// Collect raw response data for logging only in debug mode and within size limit
+						if (
+							debugMode &&
+							streamingRawResponseData.length < MAX_RAW_DATA_SIZE
+						) {
+							const sseString = `${sseData.event ? `event: ${sseData.event}\n` : ""}data: ${sseData.data}${sseData.id ? `\nid: ${sseData.id}` : ""}\n\n`;
+							streamingRawResponseData += sseString;
+						}
 
-				const writeStreamingContentFilterResponse = async ({
-					billingModel,
-					billingProvider,
-					responseModel,
-					metadata,
-				}: {
-					billingModel: string;
-					billingProvider: Provider;
-					responseModel: string;
-					metadata?: Record<string, unknown>;
-				}) => {
-					const { calculatedPromptTokens } = estimateTokens(
-						billingProvider,
-						messages,
-						null,
-						null,
-						0,
-					);
-					const promptTokenCount = Math.max(
-						1,
-						Math.round(calculatedPromptTokens ?? 1),
-					);
-					const streamingCosts = await calculateCosts(
+						// Capture for streaming cache if enabled
+						if (cachingEnabled && streamingCacheKey) {
+							streamingChunks.push({
+								data: sseData.data,
+								eventId: sseData.id ? parseInt(sseData.id, 10) : eventId,
+								event: sseData.event,
+								timestamp: Date.now() - streamStartTime,
+							});
+						}
+					};
+
+					const writeStreamingContentFilterResponse = async ({
 						billingModel,
 						billingProvider,
-						promptTokenCount,
-						0,
-						null,
-						{
-							prompt: messages
-								.map((m) => messageContentToString(m.content))
-								.join("\n"),
-							completion: "",
-						},
-						null,
-						0,
-						image_config?.image_size,
-						inputImageCount,
-						0,
-						project.organizationId,
-					);
-
-					await writeSSEAndCache({
-						data: JSON.stringify({
-							id: `chatcmpl-${Date.now()}`,
-							object: "chat.completion.chunk",
-							created: Math.floor(Date.now() / 1000),
-							model: responseModel,
-							choices: [
-								{
-									index: 0,
-									delta: {},
-									finish_reason: "content_filter",
-								},
-							],
-							...(metadata && { metadata }),
-						}),
-						id: String(eventId++),
-					});
-
-					await writeSSEAndCache({
-						data: JSON.stringify({
-							id: `chatcmpl-${Date.now()}`,
-							object: "chat.completion.chunk",
-							created: Math.floor(Date.now() / 1000),
-							model: responseModel,
-							choices: [
-								{
-									index: 0,
-									delta: {},
-									finish_reason: null,
-								},
-							],
-							usage: {
-								prompt_tokens: promptTokenCount,
-								completion_tokens: 0,
-								total_tokens: promptTokenCount,
-								cost_usd_total: streamingCosts.totalCost,
-								cost_usd_input: streamingCosts.inputCost,
-								cost_usd_output: streamingCosts.outputCost,
-								cost_usd_cached_input: streamingCosts.cachedInputCost,
-								cost_usd_request: streamingCosts.requestCost,
-								cost_usd_image_input: streamingCosts.imageInputCost,
-								cost_usd_image_output: streamingCosts.imageOutputCost,
-							},
-						}),
-						id: String(eventId++),
-					});
-
-					await writeSSEAndCache({
-						event: "done",
-						data: "[DONE]",
-						id: String(eventId++),
-					});
-					doneSent = true;
-				};
-
-				// Set up cancellation handling
-				const controller = new AbortController();
-				// Set up a listener for the request being aborted
-				const onAbort = () => {
-					clearKeepalive();
-					if (requestCanBeCanceled) {
-						canceled = true;
-						controller.abort();
-					}
-				};
-
-				// Add event listener for the abort event on the connection
-				c.req.raw.signal.addEventListener("abort", onAbort);
-
-				// --- Retry loop for provider fallback ---
-				const routingAttempts: RoutingAttempt[] = [];
-				const failedProviderIds = new Set<string>();
-				let res: Response | undefined;
-				const finalLogId = shortid();
-				for (
-					let retryAttempt = 0;
-					retryAttempt <= MAX_RETRIES;
-					retryAttempt++
-				) {
-					const perAttemptStartTime = Date.now();
-
-					// Type guard: narrow variables that TypeScript widens due to loop reassignment
-					if (
-						!usedProvider ||
-						!usedToken ||
-						!url ||
-						!usedModelFormatted ||
-						!usedModelMapping
-					) {
-						throw new Error("Provider context not initialized");
-					}
-
-					if (retryAttempt > 0) {
-						// Re-add abort listener (catch block removes it on error)
-						c.req.raw.signal.addEventListener("abort", onAbort);
-
-						const nextProvider = selectNextProvider(
-							routingMetadata?.providerScores ?? [],
-							failedProviderIds,
-							iamFilteredModelProviders,
+						responseModel,
+						metadata,
+					}: {
+						billingModel: string;
+						billingProvider: Provider;
+						responseModel: string;
+						metadata?: Record<string, unknown>;
+					}) => {
+						const { calculatedPromptTokens } = estimateTokens(
+							billingProvider,
+							messages,
+							null,
+							null,
+							0,
 						);
-						if (!nextProvider) {
-							break;
-						}
-
-						// Check if the fallback candidate is rate-limited
-						const retryRateLimitPeek = await peekProviderRateLimit(
+						const promptTokenCount = Math.max(
+							1,
+							Math.round(calculatedPromptTokens ?? 1),
+						);
+						const streamingCosts = await calculateCosts(
+							billingModel,
+							billingProvider,
+							promptTokenCount,
+							0,
+							null,
+							{
+								prompt: messages
+									.map((m) => messageContentToString(m.content))
+									.join("\n"),
+								completion: "",
+							},
+							null,
+							0,
+							image_config?.image_size,
+							inputImageCount,
+							0,
 							project.organizationId,
-							nextProvider.providerId,
-							modelInfo.id,
-							nextProvider.modelName,
 						);
-						if (retryRateLimitPeek.rateLimited) {
-							failedProviderIds.add(
-								providerRetryKey(nextProvider.providerId, nextProvider.region),
-							);
-							// Mark as rate-limited in routing metadata
-							const scoreEntry = routingMetadata?.providerScores.find(
-								(s) => s.providerId === nextProvider.providerId,
-							);
-							if (scoreEntry) {
-								scoreEntry.rate_limited = true;
-							}
-							// Don't consume a retry slot for rate-limit skips
-							retryAttempt--;
-							continue;
-						}
 
-						try {
-							const ctx = await resolveProviderContext(
-								nextProvider,
-								{
-									mode: project.mode,
-									organizationId: project.organizationId,
-								},
-								{
-									id: organization.id,
-									credits: organization.credits,
-									devPlan: organization.devPlan,
-									devPlanCreditsLimit: organization.devPlanCreditsLimit,
-									devPlanCreditsUsed: organization.devPlanCreditsUsed,
-									devPlanExpiresAt: organization.devPlanExpiresAt,
-								},
-								modelInfo,
-								originalRequestParams,
-								{
-									requestId,
-									stream: true,
-									effectiveStream,
-									messages: messages as BaseMessage[],
-									response_format,
-									tools,
-									tool_choice,
-									reasoning_effort,
-									reasoning_max_tokens,
-									effort,
-									webSearchTool,
-									image_config,
-									sensitive_word_check,
-									maxImageSizeMB,
-									userPlan,
-									hasExistingToolCalls,
-									customProviderName,
-									webSearchEnabled: !!webSearchTool,
+						await writeSSEAndCache({
+							data: JSON.stringify({
+								id: `chatcmpl-${Date.now()}`,
+								object: "chat.completion.chunk",
+								created: Math.floor(Date.now() / 1000),
+								model: responseModel,
+								choices: [
+									{
+										index: 0,
+										delta: {},
+										finish_reason: "content_filter",
+									},
+								],
+								...(metadata && { metadata }),
+							}),
+							id: String(eventId++),
+						});
+
+						await writeSSEAndCache({
+							data: JSON.stringify({
+								id: `chatcmpl-${Date.now()}`,
+								object: "chat.completion.chunk",
+								created: Math.floor(Date.now() / 1000),
+								model: responseModel,
+								choices: [
+									{
+										index: 0,
+										delta: {},
+										finish_reason: null,
+									},
+								],
+								usage: {
+									prompt_tokens: promptTokenCount,
+									completion_tokens: 0,
+									total_tokens: promptTokenCount,
+									cost_usd_total: streamingCosts.totalCost,
+									cost_usd_input: streamingCosts.inputCost,
+									cost_usd_output: streamingCosts.outputCost,
+									cost_usd_cached_input: streamingCosts.cachedInputCost,
+									cost_usd_request: streamingCosts.requestCost,
+									cost_usd_image_input: streamingCosts.imageInputCost,
+									cost_usd_image_output: streamingCosts.imageOutputCost,
 								},
-							);
-							usedProvider = ctx.usedProvider;
-							usedModel = ctx.usedModel;
-							usedModelFormatted = ctx.usedModelFormatted;
-							usedModelMapping = ctx.usedModelMapping;
-							baseModelName = ctx.baseModelName;
-							usedToken = ctx.usedToken;
-							providerKey = ctx.providerKey;
-							configIndex = ctx.configIndex;
-							envVarName = ctx.envVarName;
-							url = ctx.url;
-							requestBody = ctx.requestBody;
-							useResponsesApi = ctx.useResponsesApi;
-							requestCanBeCanceled = ctx.requestCanBeCanceled;
-							isImageGeneration = ctx.isImageGeneration;
-							supportsReasoning = ctx.supportsReasoning;
-							temperature = ctx.temperature;
-							max_tokens = ctx.max_tokens;
-							top_p = ctx.top_p;
-							frequency_penalty = ctx.frequency_penalty;
-							presence_penalty = ctx.presence_penalty;
-							usedRegion = ctx.usedRegion;
-						} catch {
-							failedProviderIds.add(
-								providerRetryKey(nextProvider.providerId, nextProvider.region),
-							);
-							// Don't consume a retry slot for context-resolution failures
-							retryAttempt--;
-							continue;
-						}
-					}
+							}),
+							id: String(eventId++),
+						});
 
-					try {
-						const headers = getProviderHeaders(usedProvider, usedToken, {
-							webSearchEnabled: !!webSearchTool,
+						await writeSSEAndCache({
+							event: "done",
+							data: "[DONE]",
+							id: String(eventId++),
 						});
-						headers["Content-Type"] = "application/json";
-
-						// Add effort beta header for Anthropic if effort parameter is specified
-						if (usedProvider === "anthropic" && effort !== undefined) {
-							const currentBeta = headers["anthropic-beta"];
-							headers["anthropic-beta"] = currentBeta
-								? `${currentBeta},effort-2025-11-24`
-								: "effort-2025-11-24";
+						doneSent = true;
+					};
+
+					// Set up cancellation handling
+					const controller = new AbortController();
+					// Set up a listener for the request being aborted
+					const onAbort = () => {
+						clearKeepalive();
+						if (requestCanBeCanceled) {
+							canceled = true;
+							controller.abort();
 						}
+					};
+
+					// Add event listener for the abort event on the connection
+					c.req.raw.signal.addEventListener("abort", onAbort);
+
+					// --- Retry loop for provider fallback ---
+					const routingAttempts: RoutingAttempt[] = [];
+					const failedProviderIds = new Set<string>();
+					let res: Response | undefined;
+					const finalLogId = shortid();
+					for (
+						let retryAttempt = 0;
+						retryAttempt <= MAX_RETRIES;
+						retryAttempt++
+					) {
+						const perAttemptStartTime = Date.now();
 
-						// Add structured outputs beta header for Anthropic if json_schema response_format is specified
+						// Type guard: narrow variables that TypeScript widens due to loop reassignment
 						if (
-							usedProvider === "anthropic" &&
-							response_format?.type === "json_schema"
+							!usedProvider ||
+							!usedToken ||
+							!url ||
+							!usedModelFormatted ||
+							!usedModelMapping
 						) {
-							const currentBeta = headers["anthropic-beta"];
-							headers["anthropic-beta"] = currentBeta
-								? `${currentBeta},structured-outputs-2025-11-13`
-								: "structured-outputs-2025-11-13";
+							throw new Error("Provider context not initialized");
 						}
 
-						// Create a combined signal for both timeout and cancellation
-						const fetchSignal = createStreamingCombinedSignal(
-							requestCanBeCanceled ? controller : undefined,
-						);
-
-						res = await fetch(url, {
-							method: "POST",
-							headers,
-							body: JSON.stringify(requestBody),
-							signal: fetchSignal,
-						});
-					} catch (error) {
-						// Clean up the event listeners
-						c.req.raw.signal.removeEventListener("abort", onAbort);
-
-						// Check for timeout error first (AbortSignal.timeout throws TimeoutError)
-						if (isTimeoutError(error)) {
-							// Handle timeout error
-							const errorMessage =
-								error instanceof Error ? error.message : "Request timeout";
-							const timeoutCause = extractErrorCause(error);
-							logger.warn("Upstream request timeout", {
-								error: errorMessage,
-								cause: timeoutCause,
-								usedProvider,
-								requestedProvider,
-								usedModel,
-								initialRequestedModel,
-								unifiedFinishReason: getUnifiedFinishReason(
-									"upstream_error",
-									usedProvider,
-								),
-							});
-
-							// Log the timeout error in the database
-							const timeoutPluginIds = plugins?.map((p) => p.id) ?? [];
-
-							// Check if we should retry before logging so we can mark the log as retried
-							const willRetryTimeout = shouldRetryRequest({
-								requestedProvider,
-								noFallback,
-								statusCode: 0,
-								retryCount: retryAttempt,
-								remainingProviders:
-									(routingMetadata?.providerScores.length ?? 0) -
-									failedProviderIds.size -
-									1,
-								usedProvider,
-							});
+						if (retryAttempt > 0) {
+							// Re-add abort listener (catch block removes it on error)
+							c.req.raw.signal.addEventListener("abort", onAbort);
 
-							const baseLogEntry = createLogEntry(
-								requestId,
-								project,
-								apiKey,
-								providerKey?.id,
-								usedModelFormatted,
-								usedModelMapping,
-								usedProvider,
-								initialRequestedModel,
-								requestedProvider,
-								messages,
-								temperature,
-								max_tokens,
-								top_p,
-								frequency_penalty,
-								presence_penalty,
-								reasoning_effort,
-								reasoning_max_tokens,
-								effort,
-								response_format,
-								tools,
-								tool_choice,
-								source,
-								customHeaders,
-								debugMode,
-								userAgent,
-								image_config,
-								routingMetadata,
-								rawBody,
-								null, // No response for timeout error
-								requestBody,
-								null, // No upstream response for timeout error
-								timeoutPluginIds,
-								undefined, // No plugin results for error case
+							const nextProvider = selectNextProvider(
+								routingMetadata?.providerScores ?? [],
+								failedProviderIds,
+								iamFilteredModelProviders,
 							);
+							if (!nextProvider) {
+								break;
+							}
 
-							await insertLog({
-								...baseLogEntry,
-								duration: Date.now() - perAttemptStartTime,
-								timeToFirstToken: null,
-								timeToFirstReasoningToken: null,
-								responseSize: 0,
-								content: null,
-								reasoningContent: null,
-								finishReason: "upstream_error",
-								promptTokens: null,
-								completionTokens: null,
-								totalTokens: null,
-								reasoningTokens: null,
-								cachedTokens: null,
-								hasError: true,
-								streamed: true,
-								canceled: false,
-								errorDetails: {
-									statusCode: 0,
-									statusText: "TimeoutError",
-									responseText: errorMessage,
-									cause: timeoutCause,
-								},
-								cachedInputCost: null,
-								requestCost: null,
-								webSearchCost: null,
-								imageInputTokens: null,
-								imageOutputTokens: null,
-								imageInputCost: null,
-								imageOutputCost: null,
-								discount: null,
-								dataStorageCost: "0",
-								cached: false,
-								toolResults: null,
-								retried: willRetryTimeout,
-								retriedByLogId: willRetryTimeout ? finalLogId : null,
-							});
-
-							if (willRetryTimeout) {
-								routingAttempts.push({
-									provider: usedProvider,
-									model: baseModelName,
-									...(usedRegion && { region: usedRegion }),
-									status_code: 0,
-									error_type: getErrorType(0),
-									succeeded: false,
-								});
+							// Check if the fallback candidate is rate-limited
+							const retryRateLimitPeek = await peekProviderRateLimit(
+								project.organizationId,
+								nextProvider.providerId,
+								modelInfo.id,
+								nextProvider.modelName,
+							);
+							if (retryRateLimitPeek.rateLimited) {
 								failedProviderIds.add(
-									providerRetryKey(usedProvider, usedRegion),
+									providerRetryKey(
+										nextProvider.providerId,
+										nextProvider.region,
+									),
 								);
+								// Mark as rate-limited in routing metadata
+								const scoreEntry = routingMetadata?.providerScores.find(
+									(s) => s.providerId === nextProvider.providerId,
+								);
+								if (scoreEntry) {
+									scoreEntry.rate_limited = true;
+								}
+								// Don't consume a retry slot for rate-limit skips
+								retryAttempt--;
 								continue;
 							}
 
-							await stream.writeSSE({
-								event: "error",
-								data: JSON.stringify({
-									error: {
-										message: `Upstream provider timeout: ${errorMessage}`,
-										type: "upstream_timeout",
-										code: "timeout",
+							try {
+								const ctx = await resolveProviderContext(
+									nextProvider,
+									{
+										mode: project.mode,
+										organizationId: project.organizationId,
 									},
-								}),
-								id: String(eventId++),
-							});
-							return;
-						} else if (error instanceof Error && error.name === "AbortError") {
-							// Log the canceled request
-							// Extract plugin IDs for logging (canceled request)
-							const canceledPluginIds = plugins?.map((p) => p.id) ?? [];
-
-							// Calculate costs for cancelled request if billing is enabled
-							const billCancelled = shouldBillCancelledRequests();
-							let cancelledCosts: Awaited<
-								ReturnType<typeof calculateCosts>
-							> | null = null;
-							let estimatedPromptTokens: number | null = null;
-
-							if (billCancelled) {
-								// Estimate prompt tokens from messages
-								const tokenEstimation = estimateTokens(
-									usedProvider,
-									messages,
-									null,
-									null,
-									null,
-								);
-								estimatedPromptTokens = tokenEstimation.calculatedPromptTokens;
-
-								// Calculate costs based on prompt tokens only (no completion yet)
-								// If web search tool was enabled, count it as 1 search for billing
-								cancelledCosts = await calculateCosts(
-									usedModel,
-									usedProvider,
-									estimatedPromptTokens,
-									0, // No completion tokens yet
-									null, // No cached tokens
 									{
-										prompt: messages
-											.map((m) => messageContentToString(m.content))
-											.join("\n"),
-										completion: "",
+										id: organization.id,
+										credits: organization.credits,
+										devPlan: organization.devPlan,
+										devPlanCreditsLimit: organization.devPlanCreditsLimit,
+										devPlanCreditsUsed: organization.devPlanCreditsUsed,
+										devPlanExpiresAt: organization.devPlanExpiresAt,
+									},
+									modelInfo,
+									originalRequestParams,
+									{
+										requestId,
+										stream: true,
+										effectiveStream,
+										messages: messages as BaseMessage[],
+										response_format,
+										tools,
+										tool_choice,
+										reasoning_effort,
+										reasoning_max_tokens,
+										effort,
+										webSearchTool,
+										image_config,
+										sensitive_word_check,
+										maxImageSizeMB,
+										userPlan,
+										hasExistingToolCalls,
+										customProviderName,
+										webSearchEnabled: !!webSearchTool,
 									},
-									null, // No reasoning tokens
-									0, // No output images
-									undefined,
-									inputImageCount,
-									webSearchTool ? 1 : null, // Bill for web search if it was enabled
-									project.organizationId,
 								);
+								usedProvider = ctx.usedProvider;
+								usedModel = ctx.usedModel;
+								usedModelFormatted = ctx.usedModelFormatted;
+								usedModelMapping = ctx.usedModelMapping;
+								baseModelName = ctx.baseModelName;
+								usedToken = ctx.usedToken;
+								providerKey = ctx.providerKey;
+								configIndex = ctx.configIndex;
+								envVarName = ctx.envVarName;
+								url = ctx.url;
+								requestBody = ctx.requestBody;
+								useResponsesApi = ctx.useResponsesApi;
+								requestCanBeCanceled = ctx.requestCanBeCanceled;
+								isImageGeneration = ctx.isImageGeneration;
+								supportsReasoning = ctx.supportsReasoning;
+								temperature = ctx.temperature;
+								max_tokens = ctx.max_tokens;
+								top_p = ctx.top_p;
+								frequency_penalty = ctx.frequency_penalty;
+								presence_penalty = ctx.presence_penalty;
+								usedRegion = ctx.usedRegion;
+							} catch {
+								failedProviderIds.add(
+									providerRetryKey(
+										nextProvider.providerId,
+										nextProvider.region,
+									),
+								);
+								// Don't consume a retry slot for context-resolution failures
+								retryAttempt--;
+								continue;
 							}
+						}
 
-							const baseLogEntry = createLogEntry(
-								requestId,
-								project,
-								apiKey,
-								providerKey?.id,
-								usedModelFormatted,
-								usedModelMapping,
-								usedProvider,
-								initialRequestedModel,
-								requestedProvider,
-								messages,
-								temperature,
-								max_tokens,
-								top_p,
-								frequency_penalty,
-								presence_penalty,
-								reasoning_effort,
-								reasoning_max_tokens,
-								effort,
-								response_format,
-								tools,
-								tool_choice,
-								source,
-								customHeaders,
-								debugMode,
-								userAgent,
-								image_config,
-								routingMetadata,
-								rawBody,
-								null, // No response for canceled request
-								requestBody, // The request that was sent before cancellation
-								null, // No upstream response for canceled request
-								canceledPluginIds,
-								undefined, // No plugin results for canceled request
+						try {
+							const headers = getProviderHeaders(usedProvider, usedToken, {
+								webSearchEnabled: !!webSearchTool,
+							});
+							headers["Content-Type"] = "application/json";
+
+							// Add effort beta header for Anthropic if effort parameter is specified
+							if (usedProvider === "anthropic" && effort !== undefined) {
+								const currentBeta = headers["anthropic-beta"];
+								headers["anthropic-beta"] = currentBeta
+									? `${currentBeta},effort-2025-11-24`
+									: "effort-2025-11-24";
+							}
+
+							// Add structured outputs beta header for Anthropic if json_schema response_format is specified
+							if (
+								usedProvider === "anthropic" &&
+								response_format?.type === "json_schema"
+							) {
+								const currentBeta = headers["anthropic-beta"];
+								headers["anthropic-beta"] = currentBeta
+									? `${currentBeta},structured-outputs-2025-11-13`
+									: "structured-outputs-2025-11-13";
+							}
+
+							// Create a combined signal for both timeout and cancellation
+							const fetchSignal = createStreamingCombinedSignal(
+								requestCanBeCanceled ? controller : undefined,
 							);
 
-							await insertLog({
-								...baseLogEntry,
-								duration: Date.now() - perAttemptStartTime,
-								timeToFirstToken: null, // Not applicable for canceled request
-								timeToFirstReasoningToken: null, // Not applicable for canceled request
-								responseSize: 0,
-								content: null,
-								reasoningContent: null,
-								finishReason: "canceled",
-								promptTokens: billCancelled
-									? (
-											cancelledCosts?.promptTokens ?? estimatedPromptTokens
-										)?.toString()
-									: null,
-								completionTokens: billCancelled ? "0" : null,
-								totalTokens: billCancelled
-									? (
-											cancelledCosts?.promptTokens ?? estimatedPromptTokens
-										)?.toString()
-									: null,
-								reasoningTokens: null,
-								cachedTokens: null,
-								hasError: false,
-								streamed: true,
-								canceled: true,
-								errorDetails: null,
-								inputCost: cancelledCosts?.inputCost ?? null,
-								outputCost: cancelledCosts?.outputCost ?? null,
-								cachedInputCost: cancelledCosts?.cachedInputCost ?? null,
-								requestCost: cancelledCosts?.requestCost ?? null,
-								webSearchCost: cancelledCosts?.webSearchCost ?? null,
-								imageInputTokens:
-									cancelledCosts?.imageInputTokens?.toString() ?? null,
-								imageOutputTokens:
-									cancelledCosts?.imageOutputTokens?.toString() ?? null,
-								imageInputCost: cancelledCosts?.imageInputCost ?? null,
-								imageOutputCost: cancelledCosts?.imageOutputCost ?? null,
-								cost: cancelledCosts?.totalCost ?? null,
-								estimatedCost: cancelledCosts?.estimatedCost ?? false,
-								discount: cancelledCosts?.discount ?? null,
-								dataStorageCost: billCancelled
-									? calculateDataStorageCost(
-											cancelledCosts?.promptTokens ?? estimatedPromptTokens,
-											null,
-											0,
-											null,
-											retentionLevel,
-										)
-									: "0",
-								cached: false,
-								toolResults: null,
+							res = await fetch(url, {
+								method: "POST",
+								headers,
+								body: JSON.stringify(requestBody),
+								signal: fetchSignal,
 							});
+						} catch (error) {
+							// Clean up the event listeners
+							c.req.raw.signal.removeEventListener("abort", onAbort);
+
+							// Check for timeout error first (AbortSignal.timeout throws TimeoutError)
+							if (isTimeoutError(error)) {
+								// Handle timeout error
+								const errorMessage =
+									error instanceof Error ? error.message : "Request timeout";
+								const timeoutCause = extractErrorCause(error);
+								logger.warn("Upstream request timeout", {
+									error: errorMessage,
+									cause: timeoutCause,
+									usedProvider,
+									requestedProvider,
+									usedModel,
+									initialRequestedModel,
+									unifiedFinishReason: getUnifiedFinishReason(
+										"upstream_error",
+										usedProvider,
+									),
+								});
 
-							// Send a cancellation event to the client
-							await writeSSEAndCache({
-								event: "canceled",
-								data: JSON.stringify({
-									message: "Request canceled by client",
-								}),
-								id: String(eventId++),
-							});
-							await writeSSEAndCache({
-								event: "done",
-								data: "[DONE]",
-								id: String(eventId++),
-							});
-							clearKeepalive();
-							return;
-						} else if (error instanceof Error) {
-							// Handle fetch errors (timeout, connection failures, etc.)
-							const errorMessage = error.message;
-							const fetchCause = extractErrorCause(error);
-							logger.warn("Fetch error", {
-								error: errorMessage,
-								cause: fetchCause,
-								usedProvider,
-								requestedProvider,
-								usedModel,
-								initialRequestedModel,
-								unifiedFinishReason: getUnifiedFinishReason(
-									"upstream_error",
+								// Check if we should retry before logging so we can mark the log as retried
+								const willRetryTimeout = shouldRetryRequest({
+									requestedProvider,
+									noFallback,
+									statusCode: 0,
+									retryCount: retryAttempt,
+									remainingProviders:
+										(routingMetadata?.providerScores.length ?? 0) -
+										failedProviderIds.size -
+										1,
 									usedProvider,
-								),
-							});
+								});
 
-							// Log the error in the database
-							// Extract plugin IDs for logging (fetch error)
-							const fetchErrorPluginIds = plugins?.map((p) => p.id) ?? [];
+								enqueueChatLog(
+									c,
+									{
+										providerKeyId: providerKey?.id,
+										usedModel: usedModelFormatted,
+										usedModelMapping,
+										usedProvider,
+										requestedModel: initialRequestedModel,
+										requestedProvider,
+										messages,
+										temperature,
+										max_tokens,
+										top_p,
+										frequency_penalty,
+										presence_penalty,
+										reasoningEffort: reasoning_effort,
+										reasoningMaxTokens: reasoning_max_tokens,
+										effort,
+										responseFormat: response_format,
+										tools,
+										toolChoice: tool_choice,
+										source,
+										customHeaders,
+										debugMode,
+										userAgent,
+										imageConfig: image_config,
+										routingMetadata,
+										rawRequest: rawBody,
+										rawResponse: null,
+										upstreamRequest: requestBody,
+										upstreamResponse: null,
+										plugins: requestPluginIds,
+										pluginResults: undefined,
+									},
+									{
+										duration: Date.now() - perAttemptStartTime,
+										timeToFirstToken: null,
+										timeToFirstReasoningToken: null,
+										responseSize: 0,
+										content: null,
+										reasoningContent: null,
+										finishReason: "upstream_error",
+										promptTokens: null,
+										completionTokens: null,
+										totalTokens: null,
+										reasoningTokens: null,
+										cachedTokens: null,
+										hasError: true,
+										streamed: true,
+										canceled: false,
+										errorDetails: {
+											statusCode: 0,
+											statusText: "TimeoutError",
+											responseText: errorMessage,
+											cause: timeoutCause,
+										},
+										cachedInputCost: null,
+										requestCost: null,
+										webSearchCost: null,
+										imageInputTokens: null,
+										imageOutputTokens: null,
+										imageInputCost: null,
+										imageOutputCost: null,
+										discount: null,
+										dataStorageCost: "0",
+										cached: false,
+										toolResults: null,
+										retried: willRetryTimeout,
+										retriedByLogId: willRetryTimeout ? finalLogId : null,
+									},
+								);
 
-							// Check if we should retry before logging so we can mark the log as retried
-							const willRetryFetch = shouldRetryRequest({
-								requestedProvider,
-								noFallback,
-								statusCode: 0,
-								retryCount: retryAttempt,
-								remainingProviders:
-									(routingMetadata?.providerScores.length ?? 0) -
-									failedProviderIds.size -
-									1,
-								usedProvider,
-							});
-
-							const baseLogEntry = createLogEntry(
-								requestId,
-								project,
-								apiKey,
-								providerKey?.id,
-								usedModelFormatted,
-								usedModelMapping,
-								usedProvider,
-								initialRequestedModel,
-								requestedProvider,
-								messages,
-								temperature,
-								max_tokens,
-								top_p,
-								frequency_penalty,
-								presence_penalty,
-								reasoning_effort,
-								reasoning_max_tokens,
-								effort,
-								response_format,
-								tools,
-								tool_choice,
-								source,
-								customHeaders,
-								debugMode,
-								userAgent,
-								image_config,
-								routingMetadata,
-								rawBody,
-								null, // No response for fetch error
-								requestBody, // The request that resulted in error
-								null, // No upstream response for fetch error
-								fetchErrorPluginIds,
-								undefined, // No plugin results for error case
-							);
-
-							await insertLog({
-								...baseLogEntry,
-								duration: Date.now() - perAttemptStartTime,
-								timeToFirstToken: null, // Not applicable for error case
-								timeToFirstReasoningToken: null, // Not applicable for error case
-								responseSize: 0,
-								content: null,
-								reasoningContent: null,
-								finishReason: "upstream_error",
-								promptTokens: null,
-								completionTokens: null,
-								totalTokens: null,
-								reasoningTokens: null,
-								cachedTokens: null,
-								hasError: true,
-								streamed: true,
-								canceled: false,
-								errorDetails: {
-									statusCode: 0,
-									statusText: error.name,
-									responseText: errorMessage,
-									cause: fetchCause,
-								},
-								cachedInputCost: null,
-								requestCost: null,
-								webSearchCost: null,
-								imageInputTokens: null,
-								imageOutputTokens: null,
-								imageInputCost: null,
-								imageOutputCost: null,
-								discount: null,
-								dataStorageCost: "0",
-								cached: false,
-								toolResults: null,
-								retried: willRetryFetch,
-								retriedByLogId: willRetryFetch ? finalLogId : null,
-							});
-
-							// Report key health for environment-based tokens
-							if (envVarName !== undefined) {
-								reportKeyError(envVarName, configIndex, 0);
-							}
+								if (willRetryTimeout) {
+									routingAttempts.push({
+										provider: usedProvider,
+										model: baseModelName,
+										...(usedRegion && { region: usedRegion }),
+										status_code: 0,
+										error_type: getErrorType(0),
+										succeeded: false,
+									});
+									failedProviderIds.add(
+										providerRetryKey(usedProvider, usedRegion),
+									);
+									continue;
+								}
 
-							if (willRetryFetch) {
-								routingAttempts.push({
-									provider: usedProvider,
-									model: baseModelName,
-									...(usedRegion && { region: usedRegion }),
-									status_code: 0,
-									error_type: getErrorType(0),
-									succeeded: false,
+								await stream.writeSSE({
+									event: "error",
+									data: JSON.stringify({
+										error: {
+											message: `Upstream provider timeout: ${errorMessage}`,
+											type: "upstream_timeout",
+											code: "timeout",
+										},
+									}),
+									id: String(eventId++),
 								});
-								failedProviderIds.add(
-									providerRetryKey(usedProvider, usedRegion),
-								);
-								continue;
-							}
+								return;
+							} else if (
+								error instanceof Error &&
+								error.name === "AbortError"
+							) {
+								// Calculate costs for cancelled request if billing is enabled
+								const billCancelled = shouldBillCancelledRequests();
+								let cancelledCosts: Awaited<
+									ReturnType<typeof calculateCosts>
+								> | null = null;
+								let estimatedPromptTokens: number | null = null;
+
+								if (billCancelled) {
+									// Estimate prompt tokens from messages
+									const tokenEstimation = estimateTokens(
+										usedProvider,
+										messages,
+										null,
+										null,
+										null,
+									);
+									estimatedPromptTokens =
+										tokenEstimation.calculatedPromptTokens;
 
-							// Send error event to the client
-							await writeSSEAndCache({
-								event: "error",
-								data: JSON.stringify({
-									error: {
-										message: `Failed to connect to provider: ${errorMessage}`,
-										type: "upstream_error",
-										code: "fetch_failed",
-									},
-								}),
-								id: String(eventId++),
-							});
-							await writeSSEAndCache({
-								event: "done",
-								data: "[DONE]",
-								id: String(eventId++),
-							});
-							clearKeepalive();
-							return;
-						} else {
-							throw error;
-						}
-					}
+									// Calculate costs based on prompt tokens only (no completion yet)
+									// If web search tool was enabled, count it as 1 search for billing
+									cancelledCosts = await calculateCosts(
+										usedModel,
+										usedProvider,
+										estimatedPromptTokens,
+										0, // No completion tokens yet
+										null, // No cached tokens
+										{
+											prompt: messages
+												.map((m) => messageContentToString(m.content))
+												.join("\n"),
+											completion: "",
+										},
+										null, // No reasoning tokens
+										0, // No output images
+										undefined,
+										inputImageCount,
+										webSearchTool ? 1 : null, // Bill for web search if it was enabled
+										project.organizationId,
+									);
+								}
 
-					if (!res.ok) {
-						const rawErrorResponseText = await res.text();
-						const errorResponseText =
-							usedProvider === "aws-bedrock"
-								? extractAwsBedrockHttpError(res, rawErrorResponseText)
-								: rawErrorResponseText;
-
-						// Determine the finish reason for error handling
-						const finishReason = getFinishReasonFromError(
-							res.status,
-							errorResponseText,
-						);
+								enqueueChatLog(
+									c,
+									{
+										providerKeyId: providerKey?.id,
+										usedModel: usedModelFormatted,
+										usedModelMapping,
+										usedProvider,
+										requestedModel: initialRequestedModel,
+										requestedProvider,
+										messages,
+										temperature,
+										max_tokens,
+										top_p,
+										frequency_penalty,
+										presence_penalty,
+										reasoningEffort: reasoning_effort,
+										reasoningMaxTokens: reasoning_max_tokens,
+										effort,
+										responseFormat: response_format,
+										tools,
+										toolChoice: tool_choice,
+										source,
+										customHeaders,
+										debugMode,
+										userAgent,
+										imageConfig: image_config,
+										routingMetadata,
+										rawRequest: rawBody,
+										rawResponse: null,
+										upstreamRequest: requestBody,
+										upstreamResponse: null,
+										plugins: requestPluginIds,
+										pluginResults: undefined,
+									},
+									{
+										duration: Date.now() - perAttemptStartTime,
+										timeToFirstToken: null,
+										timeToFirstReasoningToken: null,
+										responseSize: 0,
+										content: null,
+										reasoningContent: null,
+										finishReason: "canceled",
+										promptTokens: billCancelled
+											? (
+													cancelledCosts?.promptTokens ?? estimatedPromptTokens
+												)?.toString()
+											: null,
+										completionTokens: billCancelled ? "0" : null,
+										totalTokens: billCancelled
+											? (
+													cancelledCosts?.promptTokens ?? estimatedPromptTokens
+												)?.toString()
+											: null,
+										reasoningTokens: null,
+										cachedTokens: null,
+										hasError: false,
+										streamed: true,
+										canceled: true,
+										errorDetails: null,
+										inputCost: cancelledCosts?.inputCost ?? null,
+										outputCost: cancelledCosts?.outputCost ?? null,
+										cachedInputCost: cancelledCosts?.cachedInputCost ?? null,
+										requestCost: cancelledCosts?.requestCost ?? null,
+										webSearchCost: cancelledCosts?.webSearchCost ?? null,
+										imageInputTokens:
+											cancelledCosts?.imageInputTokens?.toString() ?? null,
+										imageOutputTokens:
+											cancelledCosts?.imageOutputTokens?.toString() ?? null,
+										imageInputCost: cancelledCosts?.imageInputCost ?? null,
+										imageOutputCost: cancelledCosts?.imageOutputCost ?? null,
+										cost: cancelledCosts?.totalCost ?? null,
+										estimatedCost: cancelledCosts?.estimatedCost ?? false,
+										discount: cancelledCosts?.discount ?? null,
+										dataStorageCost: billCancelled
+											? calculateDataStorageCost(
+													cancelledCosts?.promptTokens ?? estimatedPromptTokens,
+													null,
+													0,
+													null,
+													retentionLevel,
+												)
+											: "0",
+										cached: false,
+										toolResults: null,
+									},
+								);
 
-						if (
-							finishReason !== "client_error" &&
-							finishReason !== "content_filter"
-						) {
-							logger.warn("Provider error", {
-								status: res.status,
-								errorText: errorResponseText,
-								usedProvider,
-								requestedProvider,
-								usedModel,
-								initialRequestedModel,
-								organizationId: project.organizationId,
-								projectId: apiKey.projectId,
-								apiKeyId: apiKey.id,
-								unifiedFinishReason: getUnifiedFinishReason(
-									finishReason,
+								// Send a cancellation event to the client
+								await writeSSEAndCache({
+									event: "canceled",
+									data: JSON.stringify({
+										message: "Request canceled by client",
+									}),
+									id: String(eventId++),
+								});
+								await writeSSEAndCache({
+									event: "done",
+									data: "[DONE]",
+									id: String(eventId++),
+								});
+								clearKeepalive();
+								return;
+							} else if (error instanceof Error) {
+								// Handle fetch errors (timeout, connection failures, etc.)
+								const errorMessage = error.message;
+								const fetchCause = extractErrorCause(error);
+								logger.warn("Fetch error", {
+									error: errorMessage,
+									cause: fetchCause,
 									usedProvider,
-								),
-							});
-						}
-
-						// Log the request in the database
-						// Extract plugin IDs for logging
-						const streamingErrorPluginIds = plugins?.map((p) => p.id) ?? [];
-
-						// Check if we should retry before logging so we can mark the log as retried
-						const willRetryHttpError = shouldRetryRequest({
-							requestedProvider,
-							noFallback,
-							statusCode: res.status,
-							retryCount: retryAttempt,
-							remainingProviders:
-								(routingMetadata?.providerScores.length ?? 0) -
-								failedProviderIds.size -
-								1,
-							usedProvider,
-						});
+									requestedProvider,
+									usedModel,
+									initialRequestedModel,
+									unifiedFinishReason: getUnifiedFinishReason(
+										"upstream_error",
+										usedProvider,
+									),
+								});
 
-						const baseLogEntry = createLogEntry(
-							requestId,
-							project,
-							apiKey,
-							providerKey?.id,
-							usedModelFormatted,
-							usedModelMapping,
-							usedProvider,
-							initialRequestedModel,
-							requestedProvider,
-							messages,
-							temperature,
-							max_tokens,
-							top_p,
-							frequency_penalty,
-							presence_penalty,
-							reasoning_effort,
-							reasoning_max_tokens,
-							effort,
-							response_format,
-							tools,
-							tool_choice,
-							source,
-							customHeaders,
-							debugMode,
-							userAgent,
-							image_config,
-							routingMetadata,
-							rawBody,
-							null, // No response for error case
-							requestBody, // The request that was sent and resulted in error
-							null, // No upstream response for error case
-							streamingErrorPluginIds,
-							undefined, // No plugin results for error case
-						);
+								// Check if we should retry before logging so we can mark the log as retried
+								const willRetryFetch = shouldRetryRequest({
+									requestedProvider,
+									noFallback,
+									statusCode: 0,
+									retryCount: retryAttempt,
+									remainingProviders:
+										(routingMetadata?.providerScores.length ?? 0) -
+										failedProviderIds.size -
+										1,
+									usedProvider,
+								});
 
-						await insertLog({
-							...baseLogEntry,
-							duration: Date.now() - perAttemptStartTime,
-							timeToFirstToken: null,
-							timeToFirstReasoningToken: null,
-							responseSize: errorResponseText.length,
-							content: null,
-							reasoningContent: null,
-							finishReason,
-							promptTokens:
-								finishReason === "content_filter"
-									? (
-											estimateTokens(usedProvider, messages, null, null, 0)
-												.calculatedPromptTokens ?? null
-										)?.toString()
-									: null,
-							completionTokens: null,
-							totalTokens:
-								finishReason === "content_filter"
-									? (
-											estimateTokens(usedProvider, messages, null, null, 0)
-												.calculatedPromptTokens ?? null
-										)?.toString()
-									: null,
-							reasoningTokens: null,
-							cachedTokens: null,
-							hasError: finishReason !== "content_filter", // content_filter is not an error
-							streamed: true,
-							canceled: false,
-							errorDetails:
-								finishReason === "content_filter"
-									? null
-									: {
-											statusCode: res.status,
-											statusText: res.statusText,
-											responseText: errorResponseText,
+								enqueueChatLog(
+									c,
+									{
+										providerKeyId: providerKey?.id,
+										usedModel: usedModelFormatted,
+										usedModelMapping,
+										usedProvider,
+										requestedModel: initialRequestedModel,
+										requestedProvider,
+										messages,
+										temperature,
+										max_tokens,
+										top_p,
+										frequency_penalty,
+										presence_penalty,
+										reasoningEffort: reasoning_effort,
+										reasoningMaxTokens: reasoning_max_tokens,
+										effort,
+										responseFormat: response_format,
+										tools,
+										toolChoice: tool_choice,
+										source,
+										customHeaders,
+										debugMode,
+										userAgent,
+										imageConfig: image_config,
+										routingMetadata,
+										rawRequest: rawBody,
+										rawResponse: null,
+										upstreamRequest: requestBody,
+										upstreamResponse: null,
+										plugins: requestPluginIds,
+										pluginResults: undefined,
+									},
+									{
+										duration: Date.now() - perAttemptStartTime,
+										timeToFirstToken: null,
+										timeToFirstReasoningToken: null,
+										responseSize: 0,
+										content: null,
+										reasoningContent: null,
+										finishReason: "upstream_error",
+										promptTokens: null,
+										completionTokens: null,
+										totalTokens: null,
+										reasoningTokens: null,
+										cachedTokens: null,
+										hasError: true,
+										streamed: true,
+										canceled: false,
+										errorDetails: {
+											statusCode: 0,
+											statusText: error.name,
+											responseText: errorMessage,
+											cause: fetchCause,
 										},
-							cachedInputCost: null,
-							requestCost: null,
-							webSearchCost: null,
-							imageInputTokens: null,
-							imageOutputTokens: null,
-							imageInputCost: null,
-							imageOutputCost: null,
-							discount: null,
-							dataStorageCost: "0",
-							cached: false,
-							toolResults: null,
-							retried: willRetryHttpError,
-							retriedByLogId: willRetryHttpError ? finalLogId : null,
-						});
+										cachedInputCost: null,
+										requestCost: null,
+										webSearchCost: null,
+										imageInputTokens: null,
+										imageOutputTokens: null,
+										imageInputCost: null,
+										imageOutputCost: null,
+										discount: null,
+										dataStorageCost: "0",
+										cached: false,
+										toolResults: null,
+										retried: willRetryFetch,
+										retriedByLogId: willRetryFetch ? finalLogId : null,
+									},
+								);
 
-						// Report key health for environment-based tokens
-						// Don't report content_filter as a key error - it's intentional provider behavior
-						if (envVarName !== undefined && finishReason !== "content_filter") {
-							reportKeyError(
-								envVarName,
-								configIndex,
-								res.status,
-								errorResponseText,
-							);
-						}
+								// Report key health for environment-based tokens
+								if (envVarName !== undefined) {
+									reportKeyError(envVarName, configIndex, 0);
+								}
 
-						if (willRetryHttpError) {
-							routingAttempts.push({
-								provider: usedProvider,
-								model: baseModelName,
-								...(usedRegion && { region: usedRegion }),
-								status_code: res.status,
-								error_type: getErrorType(res.status),
-								succeeded: false,
-							});
-							failedProviderIds.add(providerRetryKey(usedProvider, usedRegion));
-							continue;
-						}
+								if (willRetryFetch) {
+									routingAttempts.push({
+										provider: usedProvider,
+										model: baseModelName,
+										...(usedRegion && { region: usedRegion }),
+										status_code: 0,
+										error_type: getErrorType(0),
+										succeeded: false,
+									});
+									failedProviderIds.add(
+										providerRetryKey(usedProvider, usedRegion),
+									);
+									continue;
+								}
 
-						// For content_filter, return a proper completion chunk (not an error)
-						// This handles Azure ResponsibleAIPolicyViolation and similar content filtering errors
-						if (finishReason === "content_filter") {
-							await writeStreamingContentFilterResponse({
-								billingModel: usedModel,
-								billingProvider: usedProvider,
-								responseModel: `${usedProvider}/${baseModelName}`,
-								metadata: {
-									requested_model: initialRequestedModel,
-									requested_provider: requestedProvider,
-									used_model: baseModelName,
-									used_provider: usedProvider,
-									...(usedRegion && { used_region: usedRegion }),
-									underlying_used_model: usedModel,
-								},
-							});
-						} else {
-							// For client errors, return the original provider error response
-							let errorData;
-							if (finishReason === "client_error") {
-								try {
-									errorData = JSON.parse(errorResponseText);
-								} catch {
-									// If we can't parse the original error, fall back to our format
-									errorData = {
+								// Send error event to the client
+								await writeSSEAndCache({
+									event: "error",
+									data: JSON.stringify({
 										error: {
-											message: `Error from provider: ${res.status} ${res.statusText} ${errorResponseText}`,
-											type: finishReason,
-											param: null,
-											code: finishReason,
-											responseText: errorResponseText,
+											message: `Failed to connect to provider: ${errorMessage}`,
+											type: "upstream_error",
+											code: "fetch_failed",
 										},
-									};
-								}
+									}),
+									id: String(eventId++),
+								});
+								await writeSSEAndCache({
+									event: "done",
+									data: "[DONE]",
+									id: String(eventId++),
+								});
+								clearKeepalive();
+								return;
 							} else {
-								errorData = {
-									error: {
-										message: `Error from provider: ${res.status} ${res.statusText} ${errorResponseText}`,
-										type: finishReason,
-										param: null,
-										code: finishReason,
-										responseText: errorResponseText,
-									},
-								};
+								throw error;
 							}
-
-							await writeSSEAndCache({
-								event: "error",
-								data: JSON.stringify(errorData),
-								id: String(eventId++),
-							});
-							await writeSSEAndCache({
-								event: "done",
-								data: "[DONE]",
-								id: String(eventId++),
-							});
 						}
 
-						clearKeepalive();
-						return;
-					}
+						if (!res.ok) {
+							const rawErrorResponseText = await res.text();
+							const errorResponseText =
+								usedProvider === "aws-bedrock"
+									? extractAwsBedrockHttpError(res, rawErrorResponseText)
+									: rawErrorResponseText;
 
-					break; // Fetch succeeded, exit retry loop
-				} // End of retry for loop
-
-				// Add the final attempt (successful or last failed) to routing
-				if (res && res.ok && usedProvider) {
-					routingAttempts.push({
-						provider: usedProvider,
-						model: baseModelName,
-						...(usedRegion && { region: usedRegion }),
-						status_code: res.status,
-						error_type: "none",
-						succeeded: true,
-					});
-				}
+							// Determine the finish reason for error handling
+							const finishReason = getFinishReasonFromError(
+								res.status,
+								errorResponseText,
+							);
 
-				// Update routingMetadata with all routing attempts for DB logging
-				if (routingMetadata) {
-					// Enrich providerScores with failure info from routing attempts
-					const failedMap = new Map(
-						routingAttempts
-							.filter((a) => !a.succeeded)
-							.map((f) => [f.provider, f]),
-					);
-					routingMetadata = {
-						...routingMetadata,
-						routing: routingAttempts,
-						providerScores: routingMetadata.providerScores.map((score) => {
-							const failure = failedMap.get(score.providerId);
-							if (failure) {
-								return {
-									...score,
-									failed: true,
-									status_code: failure.status_code,
-									error_type: failure.error_type,
-								};
+							if (
+								finishReason !== "client_error" &&
+								finishReason !== "content_filter"
+							) {
+								logger.warn("Provider error", {
+									status: res.status,
+									errorText: errorResponseText,
+									usedProvider,
+									requestedProvider,
+									usedModel,
+									initialRequestedModel,
+									organizationId: project.organizationId,
+									projectId: apiKey.projectId,
+									apiKeyId: apiKey.id,
+									unifiedFinishReason: getUnifiedFinishReason(
+										finishReason,
+										usedProvider,
+									),
+								});
 							}
-							return score;
-						}),
-					};
-				}
-
-				// If all retries exhausted without a successful response
-				if (!res || !res.ok) {
-					await writeSSEAndCache({
-						event: "error",
-						data: JSON.stringify({
-							error: {
-								message: "All provider attempts failed",
-								type: "upstream_error",
-								code: "all_providers_failed",
-							},
-						}),
-						id: String(eventId++),
-					});
-					await writeSSEAndCache({
-						event: "done",
-						data: "[DONE]",
-						id: String(eventId++),
-					});
-					clearKeepalive();
-					return;
-				}
 
-				// After retry loop: narrow provider variables for the rest of the streaming body
-				if (
-					!usedProvider ||
-					!usedToken ||
-					!url ||
-					!usedModelFormatted ||
-					!usedModelMapping
-				) {
-					throw new Error("Provider context not initialized");
-				}
-
-				if (!res.body) {
-					await writeSSEAndCache({
-						event: "error",
-						data: JSON.stringify({
-							error: {
-								message: "No response body from provider",
-								type: "gateway_error",
-								param: null,
-								code: "gateway_error",
-							},
-						}),
-						id: String(eventId++),
-					});
-					await writeSSEAndCache({
-						event: "done",
-						data: "[DONE]",
-						id: String(eventId++),
-					});
-					clearKeepalive();
-					return;
-				}
+							// Check if we should retry before logging so we can mark the log as retried
+							const willRetryHttpError = shouldRetryRequest({
+								requestedProvider,
+								noFallback,
+								statusCode: res.status,
+								retryCount: retryAttempt,
+								remainingProviders:
+									(routingMetadata?.providerScores.length ?? 0) -
+									failedProviderIds.size -
+									1,
+								usedProvider,
+							});
 
-				const reader = res.body.getReader();
-				let fullContent = "";
-				let fullReasoningContent = "";
-				let finishReason = null;
-				let promptTokens = null;
-				let completionTokens = null;
-				let totalTokens = null;
-				let reasoningTokens = null;
-				let cachedTokens = null;
-				let streamingToolCalls = null;
-				let imageByteSize = 0; // Track total image data size for token estimation
-				let outputImageCount = 0; // Track number of output images for cost calculation
-				let webSearchCount = 0; // Track web search calls for cost calculation
-				const serverToolUseIndices = new Set<number>(); // Track Anthropic server_tool_use block indices
-				let sawUpstreamDoneSentinel = false;
-				let sawProviderTerminalEvent = false;
-				let sawOpenAiResponsesDoneEvent = false;
-				let sawOpenAiResponsesCompletedStatus = false;
-				let sentDownstreamFinishReasonChunk = false;
-				let handledTerminalProviderEvent = false;
-				let buffer = ""; // Buffer for accumulating partial data across chunks (string for SSE)
-				let binaryBuffer = new Uint8Array(0); // Buffer for binary event streams (AWS Bedrock)
-				let rawUpstreamData = ""; // Raw data received from upstream provider
-				const isAwsBedrock = usedProvider === "aws-bedrock";
-				let shouldTerminateStream = false;
-
-				// Response healing for streaming mode
-				const streamingResponseHealingEnabled = plugins?.some(
-					(p) => p.id === "response-healing",
-				);
-				const streamingIsJsonResponseFormat =
-					response_format?.type === "json_object" ||
-					response_format?.type === "json_schema";
-				const shouldBufferForHealing =
-					streamingIsJsonResponseFormat &&
-					(streamingResponseHealingEnabled === true ||
-						usedProvider === "novita" ||
-						usedProvider === "minimax");
-
-				// Buffer for storing chunks when healing is enabled
-				// We need to buffer content, track last chunk info, and replay healed content at the end
-				const bufferedContentChunks: string[] = [];
-				let lastChunkId: string | null = null;
-				let lastChunkModel: string | null = null;
-				let lastChunkCreated: number | null = null;
-				const streamingPluginResults: {
-					responseHealing?: {
-						healed: boolean;
-						healingMethod?: string;
-					};
-				} = {};
+							enqueueChatLog(
+								c,
+								{
+									providerKeyId: providerKey?.id,
+									usedModel: usedModelFormatted,
+									usedModelMapping,
+									usedProvider,
+									requestedModel: initialRequestedModel,
+									requestedProvider,
+									messages,
+									temperature,
+									max_tokens,
+									top_p,
+									frequency_penalty,
+									presence_penalty,
+									reasoningEffort: reasoning_effort,
+									reasoningMaxTokens: reasoning_max_tokens,
+									effort,
+									responseFormat: response_format,
+									tools,
+									toolChoice: tool_choice,
+									source,
+									customHeaders,
+									debugMode,
+									userAgent,
+									imageConfig: image_config,
+									routingMetadata,
+									rawRequest: rawBody,
+									rawResponse: null,
+									upstreamRequest: requestBody,
+									upstreamResponse: null,
+									plugins: requestPluginIds,
+									pluginResults: undefined,
+								},
+								{
+									duration: Date.now() - perAttemptStartTime,
+									timeToFirstToken: null,
+									timeToFirstReasoningToken: null,
+									responseSize: errorResponseText.length,
+									content: null,
+									reasoningContent: null,
+									finishReason,
+									promptTokens:
+										finishReason === "content_filter"
+											? (
+													estimateTokens(usedProvider, messages, null, null, 0)
+														.calculatedPromptTokens ?? null
+												)?.toString()
+											: null,
+									completionTokens: null,
+									totalTokens:
+										finishReason === "content_filter"
+											? (
+													estimateTokens(usedProvider, messages, null, null, 0)
+														.calculatedPromptTokens ?? null
+												)?.toString()
+											: null,
+									reasoningTokens: null,
+									cachedTokens: null,
+									hasError: finishReason !== "content_filter",
+									streamed: true,
+									canceled: false,
+									errorDetails:
+										finishReason === "content_filter"
+											? null
+											: {
+													statusCode: res.status,
+													statusText: res.statusText,
+													responseText: errorResponseText,
+												},
+									cachedInputCost: null,
+									requestCost: null,
+									webSearchCost: null,
+									imageInputTokens: null,
+									imageOutputTokens: null,
+									imageInputCost: null,
+									imageOutputCost: null,
+									discount: null,
+									dataStorageCost: "0",
+									cached: false,
+									toolResults: null,
+									retried: willRetryHttpError,
+									retriedByLogId: willRetryHttpError ? finalLogId : null,
+								},
+							);
 
-				try {
-					while (true) {
-						const { done, value } = await reader.read();
-						if (done) {
-							break;
-						}
+							// Report key health for environment-based tokens
+							// Don't report content_filter as a key error - it's intentional provider behavior
+							if (
+								envVarName !== undefined &&
+								finishReason !== "content_filter"
+							) {
+								reportKeyError(
+									envVarName,
+									configIndex,
+									res.status,
+									errorResponseText,
+								);
+							}
 
-						// For AWS Bedrock, convert binary event stream to SSE format
-						let chunk: string;
-						if (isAwsBedrock) {
-							// Append binary data to buffer
-							const newBuffer = new Uint8Array(
-								binaryBuffer.length + value.length,
-							);
-							newBuffer.set(binaryBuffer);
-							newBuffer.set(value, binaryBuffer.length);
-							binaryBuffer = newBuffer;
-
-							// Parse and convert available events
-							const { sse, bytesConsumed } =
-								convertAwsEventStreamToSSE(binaryBuffer);
-							chunk = sse;
-
-							// Remove consumed bytes from binary buffer
-							if (bytesConsumed > 0) {
-								binaryBuffer = binaryBuffer.slice(bytesConsumed);
+							if (willRetryHttpError) {
+								routingAttempts.push({
+									provider: usedProvider,
+									model: baseModelName,
+									...(usedRegion && { region: usedRegion }),
+									status_code: res.status,
+									error_type: getErrorType(res.status),
+									succeeded: false,
+								});
+								failedProviderIds.add(
+									providerRetryKey(usedProvider, usedRegion),
+								);
+								continue;
 							}
-						} else {
-							// Convert the Uint8Array to a string for SSE
-							chunk = sharedTextDecoder.decode(value, { stream: true });
-						}
-
-						// Log error on large chunks (1MB+) - should almost never happen
-						if (chunk.length > 1024 * 1024) {
-							logger.error(
-								`Large chunk received: ${(chunk.length / 1024 / 1024).toFixed(2)}MB`,
-							);
-						}
-
-						buffer += chunk;
-						// Collect raw upstream data for logging only in debug mode and within size limit
-						if (debugMode && rawUpstreamData.length < MAX_RAW_DATA_SIZE) {
-							rawUpstreamData += chunk;
-						}
 
-						// Check buffer size to prevent memory exhaustion
-						if (buffer.length > MAX_BUFFER_SIZE) {
-							const bufferSizeMB = MAX_BUFFER_SIZE / 1024 / 1024;
-							logger.error(
-								`Buffer size exceeded ${bufferSizeMB}MB limit, aborting stream`,
-							);
-
-							// Send error to client
-							try {
-								await stream.writeSSE({
-									event: "error",
-									data: JSON.stringify({
+							// For content_filter, return a proper completion chunk (not an error)
+							// This handles Azure ResponsibleAIPolicyViolation and similar content filtering errors
+							if (finishReason === "content_filter") {
+								await writeStreamingContentFilterResponse({
+									billingModel: usedModel,
+									billingProvider: usedProvider,
+									responseModel: `${usedProvider}/${baseModelName}`,
+									metadata: {
+										requested_model: initialRequestedModel,
+										requested_provider: requestedProvider,
+										used_model: baseModelName,
+										used_provider: usedProvider,
+										...(usedRegion && { used_region: usedRegion }),
+										underlying_used_model: usedModel,
+									},
+								});
+							} else {
+								// For client errors, return the original provider error response
+								let errorData;
+								if (finishReason === "client_error") {
+									try {
+										errorData = JSON.parse(errorResponseText);
+									} catch {
+										// If we can't parse the original error, fall back to our format
+										errorData = {
+											error: {
+												message: `Error from provider: ${res.status} ${res.statusText} ${errorResponseText}`,
+												type: finishReason,
+												param: null,
+												code: finishReason,
+												responseText: errorResponseText,
+											},
+										};
+									}
+								} else {
+									errorData = {
 										error: {
-											message: `Streaming buffer exceeded ${bufferSizeMB}MB limit`,
-											type: "gateway_error",
+											message: `Error from provider: ${res.status} ${res.statusText} ${errorResponseText}`,
+											type: finishReason,
 											param: null,
-											code: "buffer_overflow",
+											code: finishReason,
+											responseText: errorResponseText,
 										},
-									}),
+									};
+								}
+
+								await writeSSEAndCache({
+									event: "error",
+									data: JSON.stringify(errorData),
 									id: String(eventId++),
 								});
-								await stream.writeSSE({
+								await writeSSEAndCache({
 									event: "done",
 									data: "[DONE]",
 									id: String(eventId++),
 								});
-								doneSent = true;
-							} catch (sseError) {
-								logger.error(
-									"Failed to send buffer overflow error SSE",
-									sseError instanceof Error
-										? sseError
-										: new Error(String(sseError)),
-								);
 							}
 
-							// Set error for logging
-							streamingError = {
-								message: `Streaming buffer exceeded ${bufferSizeMB}MB limit`,
-								type: "buffer_overflow",
-								code: "buffer_overflow",
-								details: {
-									bufferSize: buffer.length,
-									maxBufferSize: MAX_BUFFER_SIZE,
-									provider: usedProvider,
-									model: usedModel,
+							clearKeepalive();
+							return;
+						}
+
+						break; // Fetch succeeded, exit retry loop
+					} // End of retry for loop
+
+					// Add the final attempt (successful or last failed) to routing
+					if (res && res.ok && usedProvider) {
+						routingAttempts.push({
+							provider: usedProvider,
+							model: baseModelName,
+							...(usedRegion && { region: usedRegion }),
+							status_code: res.status,
+							error_type: "none",
+							succeeded: true,
+						});
+					}
+
+					// Update routingMetadata with all routing attempts for DB logging
+					if (routingMetadata) {
+						// Enrich providerScores with failure info from routing attempts
+						const failedMap = new Map(
+							routingAttempts
+								.filter((a) => !a.succeeded)
+								.map((f) => [f.provider, f]),
+						);
+						routingMetadata = {
+							...routingMetadata,
+							routing: routingAttempts,
+							providerScores: routingMetadata.providerScores.map((score) => {
+								const failure = failedMap.get(score.providerId);
+								if (failure) {
+									return {
+										...score,
+										failed: true,
+										status_code: failure.status_code,
+										error_type: failure.error_type,
+									};
+								}
+								return score;
+							}),
+						};
+					}
+
+					// If all retries exhausted without a successful response
+					if (!res || !res.ok) {
+						await writeSSEAndCache({
+							event: "error",
+							data: JSON.stringify({
+								error: {
+									message: "All provider attempts failed",
+									type: "upstream_error",
+									code: "all_providers_failed",
 								},
-							};
+							}),
+							id: String(eventId++),
+						});
+						await writeSSEAndCache({
+							event: "done",
+							data: "[DONE]",
+							id: String(eventId++),
+						});
+						clearKeepalive();
+						return;
+					}
 
-							break;
-						}
+					// After retry loop: narrow provider variables for the rest of the streaming body
+					if (
+						!usedProvider ||
+						!usedToken ||
+						!url ||
+						!usedModelFormatted ||
+						!usedModelMapping
+					) {
+						throw new Error("Provider context not initialized");
+					}
+
+					if (!res.body) {
+						await writeSSEAndCache({
+							event: "error",
+							data: JSON.stringify({
+								error: {
+									message: "No response body from provider",
+									type: "gateway_error",
+									param: null,
+									code: "gateway_error",
+								},
+							}),
+							id: String(eventId++),
+						});
+						await writeSSEAndCache({
+							event: "done",
+							data: "[DONE]",
+							id: String(eventId++),
+						});
+						clearKeepalive();
+						return;
+					}
 
-						// Process SSE events from buffer
-						let processedLength = 0;
-						const bufferCopy = buffer;
+					const reader = res.body.getReader();
+					let fullContent = "";
+					let fullReasoningContent = "";
+					let finishReason = null;
+					let promptTokens = null;
+					let completionTokens = null;
+					let totalTokens = null;
+					let reasoningTokens = null;
+					let cachedTokens = null;
+					let streamingToolCalls = null;
+					let imageByteSize = 0; // Track total image data size for token estimation
+					let outputImageCount = 0; // Track number of output images for cost calculation
+					let webSearchCount = 0; // Track web search calls for cost calculation
+					const serverToolUseIndices = new Set<number>(); // Track Anthropic server_tool_use block indices
+					let sawUpstreamDoneSentinel = false;
+					let sawProviderTerminalEvent = false;
+					let handledTerminalProviderEvent = false;
+					let buffer = ""; // Buffer for accumulating partial data across chunks (string for SSE)
+					let binaryBuffer = new Uint8Array(0); // Buffer for binary event streams (AWS Bedrock)
+					let rawUpstreamData = ""; // Raw data received from upstream provider
+					const isAwsBedrock = usedProvider === "aws-bedrock";
+					let shouldTerminateStream = false;
+
+					// Response healing for streaming mode
+					const streamingResponseHealingEnabled = plugins?.some(
+						(p) => p.id === "response-healing",
+					);
+					const streamingIsJsonResponseFormat =
+						response_format?.type === "json_object" ||
+						response_format?.type === "json_schema";
+					const shouldBufferForHealing =
+						streamingIsJsonResponseFormat &&
+						(streamingResponseHealingEnabled === true ||
+							usedProvider === "novita" ||
+							usedProvider === "minimax");
+
+					// Buffer for storing chunks when healing is enabled
+					// We need to buffer content, track last chunk info, and replay healed content at the end
+					const bufferedContentChunks: string[] = [];
+					let lastChunkId: string | null = null;
+					let lastChunkModel: string | null = null;
+					let lastChunkCreated: number | null = null;
+					const streamingPluginResults: {
+						responseHealing?: {
+							healed: boolean;
+							healingMethod?: string;
+						};
+					} = {};
 
-						// Look for complete SSE events, handling events at buffer start
-						let searchStart = 0;
-						while (searchStart < bufferCopy.length) {
-							// Find "data: " - could be at start of buffer or after newline
-							let dataIndex = -1;
+					try {
+						while (true) {
+							const { done, value } = await reader.read();
+							if (done) {
+								break;
+							}
 
-							if (searchStart === 0 && bufferCopy.startsWith("data: ")) {
-								// Event at buffer start
-								dataIndex = 0;
-							} else {
-								// Look for "\ndata: " pattern
-								const newlineDataIndex = bufferCopy.indexOf(
-									"\ndata: ",
-									searchStart,
+							// For AWS Bedrock, convert binary event stream to SSE format
+							let chunk: string;
+							if (isAwsBedrock) {
+								// Append binary data to buffer
+								const newBuffer = new Uint8Array(
+									binaryBuffer.length + value.length,
 								);
-								if (newlineDataIndex !== -1) {
-									dataIndex = newlineDataIndex + 1; // Skip the newline
+								newBuffer.set(binaryBuffer);
+								newBuffer.set(value, binaryBuffer.length);
+								binaryBuffer = newBuffer;
+
+								// Parse and convert available events
+								const { sse, bytesConsumed } =
+									convertAwsEventStreamToSSE(binaryBuffer);
+								chunk = sse;
+
+								// Remove consumed bytes from binary buffer
+								if (bytesConsumed > 0) {
+									binaryBuffer = binaryBuffer.slice(bytesConsumed);
 								}
+							} else {
+								// Convert the Uint8Array to a string for SSE
+								chunk = sharedTextDecoder.decode(value, { stream: true });
+							}
+
+							// Log error on large chunks (1MB+) - should almost never happen
+							if (chunk.length > 1024 * 1024) {
+								logger.error(
+									`Large chunk received: ${(chunk.length / 1024 / 1024).toFixed(2)}MB`,
+								);
+							}
+
+							buffer += chunk;
+							// Collect raw upstream data for logging only in debug mode and within size limit
+							if (debugMode && rawUpstreamData.length < MAX_RAW_DATA_SIZE) {
+								rawUpstreamData += chunk;
 							}
 
-							if (dataIndex === -1) {
+							// Check buffer size to prevent memory exhaustion
+							if (buffer.length > MAX_BUFFER_SIZE) {
+								const bufferSizeMB = MAX_BUFFER_SIZE / 1024 / 1024;
+								logger.error(
+									`Buffer size exceeded ${bufferSizeMB}MB limit, aborting stream`,
+								);
+
+								// Send error to client
+								try {
+									await stream.writeSSE({
+										event: "error",
+										data: JSON.stringify({
+											error: {
+												message: `Streaming buffer exceeded ${bufferSizeMB}MB limit`,
+												type: "gateway_error",
+												param: null,
+												code: "buffer_overflow",
+											},
+										}),
+										id: String(eventId++),
+									});
+									await stream.writeSSE({
+										event: "done",
+										data: "[DONE]",
+										id: String(eventId++),
+									});
+									doneSent = true;
+								} catch (sseError) {
+									logger.error(
+										"Failed to send buffer overflow error SSE",
+										sseError instanceof Error
+											? sseError
+											: new Error(String(sseError)),
+									);
+								}
+
+								// Set error for logging
+								streamingError = {
+									message: `Streaming buffer exceeded ${bufferSizeMB}MB limit`,
+									type: "buffer_overflow",
+									code: "buffer_overflow",
+									details: {
+										bufferSize: buffer.length,
+										maxBufferSize: MAX_BUFFER_SIZE,
+										provider: usedProvider,
+										model: usedModel,
+									},
+								};
+
 								break;
 							}
 
-							// Find the end of this SSE event
-							// Look for next event or proper event termination
-							let eventEnd = -1;
+							// Process SSE events from buffer
+							let processedLength = 0;
+							const bufferCopy = buffer;
 
-							// First, look for the next "data: " event (after a newline)
-							const nextEventIndex = bufferCopy.indexOf(
-								"\ndata: ",
-								dataIndex + 6,
-							);
-							if (nextEventIndex !== -1) {
-								// Found next data event, but we still need to check if there are SSE fields in between
-								// For Anthropic, we might have: data: {...}\n\nevent: something\n\ndata: {...}
-								const betweenEvents = bufferCopy.slice(
+							// Look for complete SSE events, handling events at buffer start
+							let searchStart = 0;
+							while (searchStart < bufferCopy.length) {
+								// Find "data: " - could be at start of buffer or after newline
+								let dataIndex = -1;
+
+								if (searchStart === 0 && bufferCopy.startsWith("data: ")) {
+									// Event at buffer start
+									dataIndex = 0;
+								} else {
+									// Look for "\ndata: " pattern
+									const newlineDataIndex = bufferCopy.indexOf(
+										"\ndata: ",
+										searchStart,
+									);
+									if (newlineDataIndex !== -1) {
+										dataIndex = newlineDataIndex + 1; // Skip the newline
+									}
+								}
+
+								if (dataIndex === -1) {
+									break;
+								}
+
+								// Find the end of this SSE event
+								// Look for next event or proper event termination
+								let eventEnd = -1;
+
+								// First, look for the next "data: " event (after a newline)
+								const nextEventIndex = bufferCopy.indexOf(
+									"\ndata: ",
 									dataIndex + 6,
-									nextEventIndex,
 								);
-								const firstNewline = betweenEvents.indexOf("\n");
-
-								if (firstNewline !== -1) {
-									// Check if JSON up to first newline is valid
-									const jsonCandidate = betweenEvents
-										.slice(0, firstNewline)
-										.trim();
-									// Quick heuristic check before expensive JSON.parse
-									let isValidJson = false;
-									if (mightBeCompleteJson(jsonCandidate)) {
-										try {
-											JSON.parse(jsonCandidate);
-											isValidJson = true;
-										} catch {
-											// JSON is not complete
+								if (nextEventIndex !== -1) {
+									// Found next data event, but we still need to check if there are SSE fields in between
+									// For Anthropic, we might have: data: {...}\n\nevent: something\n\ndata: {...}
+									const betweenEvents = bufferCopy.slice(
+										dataIndex + 6,
+										nextEventIndex,
+									);
+									const firstNewline = betweenEvents.indexOf("\n");
+
+									if (firstNewline !== -1) {
+										// Check if JSON up to first newline is valid
+										const jsonCandidate = betweenEvents
+											.slice(0, firstNewline)
+											.trim();
+										// Quick heuristic check before expensive JSON.parse
+										let isValidJson = false;
+										if (mightBeCompleteJson(jsonCandidate)) {
+											try {
+												JSON.parse(jsonCandidate);
+												isValidJson = true;
+											} catch {
+												// JSON is not complete
+											}
+										}
+										if (isValidJson) {
+											// JSON is valid - end at first newline to exclude SSE fields
+											eventEnd = dataIndex + 6 + firstNewline;
+										} else {
+											// JSON is not complete, use the full segment to next data event
+											eventEnd = nextEventIndex;
 										}
-									}
-									if (isValidJson) {
-										// JSON is valid - end at first newline to exclude SSE fields
-										eventEnd = dataIndex + 6 + firstNewline;
 									} else {
-										// JSON is not complete, use the full segment to next data event
+										// No newline found, use full segment
 										eventEnd = nextEventIndex;
 									}
 								} else {
-									// No newline found, use full segment
-									eventEnd = nextEventIndex;
-								}
-							} else {
-								// No next event found - check for proper event termination
-								// SSE events should end with at least one newline
-								const eventStartPos = dataIndex + 6; // Start of event data
-
-								// For Anthropic SSE format, we need to be more careful about event boundaries
-								// Try to find the end of the JSON data by looking for the closing brace
-								const newlinePos = bufferCopy.indexOf("\n", eventStartPos);
-								if (newlinePos !== -1) {
-									// We found a newline - check if the JSON before it is valid
-									const jsonCandidate = bufferCopy
-										.slice(eventStartPos, newlinePos)
-										.trim();
-									// Quick heuristic check before expensive JSON.parse
-									let isValidJson = false;
-									if (mightBeCompleteJson(jsonCandidate)) {
-										try {
-											JSON.parse(jsonCandidate);
-											isValidJson = true;
-										} catch {
-											// JSON is not complete
+									// No next event found - check for proper event termination
+									// SSE events should end with at least one newline
+									const eventStartPos = dataIndex + 6; // Start of event data
+
+									// For Anthropic SSE format, we need to be more careful about event boundaries
+									// Try to find the end of the JSON data by looking for the closing brace
+									const newlinePos = bufferCopy.indexOf("\n", eventStartPos);
+									if (newlinePos !== -1) {
+										// We found a newline - check if the JSON before it is valid
+										const jsonCandidate = bufferCopy
+											.slice(eventStartPos, newlinePos)
+											.trim();
+										// Quick heuristic check before expensive JSON.parse
+										let isValidJson = false;
+										if (mightBeCompleteJson(jsonCandidate)) {
+											try {
+												JSON.parse(jsonCandidate);
+												isValidJson = true;
+											} catch {
+												// JSON is not complete
+											}
 										}
-									}
-									if (isValidJson) {
-										// JSON is valid - this newline marks the end of our data
-										eventEnd = newlinePos;
-									} else {
-										// JSON is not valid, check if there's more content after the newline
-										if (newlinePos + 1 >= bufferCopy.length) {
-											// Newline is at the end of buffer - event is incomplete
-											break;
+										if (isValidJson) {
+											// JSON is valid - this newline marks the end of our data
+											eventEnd = newlinePos;
 										} else {
-											// There's content after the newline
-											// Check if it's another SSE field (like event:, id:, retry:, etc.) or if the event continues
-											const restOfBuffer = bufferCopy.slice(newlinePos + 1);
-
-											// Check for SSE field patterns (event:, id:, retry:, etc.)
-											// Skip leading newlines efficiently without creating new strings
-											let trimStart = 0;
-											while (
-												trimStart < restOfBuffer.length &&
-												restOfBuffer[trimStart] === "\n"
-											) {
-												trimStart++;
-											}
+											// JSON is not valid, check if there's more content after the newline
+											if (newlinePos + 1 >= bufferCopy.length) {
+												// Newline is at the end of buffer - event is incomplete
+												break;
+											} else {
+												// There's content after the newline
+												// Check if it's another SSE field (like event:, id:, retry:, etc.) or if the event continues
+												const restOfBuffer = bufferCopy.slice(newlinePos + 1);
+
+												// Check for SSE field patterns (event:, id:, retry:, etc.)
+												// Skip leading newlines efficiently without creating new strings
+												let trimStart = 0;
+												while (
+													trimStart < restOfBuffer.length &&
+													restOfBuffer[trimStart] === "\n"
+												) {
+													trimStart++;
+												}
 
-											if (
-												restOfBuffer.startsWith("\n") || // Empty line - end of event
-												restOfBuffer.startsWith("data: ") // Next data field
-											) {
-												// This is the end of our data event
-												eventEnd = newlinePos;
-											} else if (trimStart > 0) {
-												// Had leading newlines - check for SSE fields after them
-												const afterNewlines = restOfBuffer.substring(trimStart);
 												if (
-													afterNewlines.startsWith("event:") ||
-													afterNewlines.startsWith("id:") ||
-													afterNewlines.startsWith("retry:") ||
-													SSE_FIELD_PATTERN.test(afterNewlines)
+													restOfBuffer.startsWith("\n") || // Empty line - end of event
+													restOfBuffer.startsWith("data: ") // Next data field
 												) {
+													// This is the end of our data event
 													eventEnd = newlinePos;
+												} else if (trimStart > 0) {
+													// Had leading newlines - check for SSE fields after them
+													const afterNewlines =
+														restOfBuffer.substring(trimStart);
+													if (
+														afterNewlines.startsWith("event:") ||
+														afterNewlines.startsWith("id:") ||
+														afterNewlines.startsWith("retry:") ||
+														SSE_FIELD_PATTERN.test(afterNewlines)
+													) {
+														eventEnd = newlinePos;
+													} else {
+														// Content continues on next line - use full buffer
+														eventEnd = bufferCopy.length;
+													}
 												} else {
-													// Content continues on next line - use full buffer
-													eventEnd = bufferCopy.length;
-												}
-											} else {
-												// No leading newlines - check SSE field directly
-												if (SSE_FIELD_PATTERN.test(restOfBuffer)) {
-													eventEnd = newlinePos;
-												} else {
-													// Content continues on next line - use full buffer
-													eventEnd = bufferCopy.length;
+													// No leading newlines - check SSE field directly
+													if (SSE_FIELD_PATTERN.test(restOfBuffer)) {
+														eventEnd = newlinePos;
+													} else {
+														// Content continues on next line - use full buffer
+														eventEnd = bufferCopy.length;
+													}
 												}
 											}
 										}
-									}
-								} else {
-									// No newline found after event data - event is incomplete
-									// Try to detect if we have a complete JSON object
-									const eventDataCandidate = bufferCopy.slice(eventStartPos);
-									if (eventDataCandidate.length > 0) {
-										// Quick heuristic check before expensive JSON.parse
-										const trimmedCandidate = eventDataCandidate.trim();
-										if (mightBeCompleteJson(trimmedCandidate)) {
-											try {
-												JSON.parse(trimmedCandidate);
-												// If we can parse it, it's complete
-												eventEnd = bufferCopy.length;
-											} catch {
-												// JSON parsing failed - event is incomplete
+									} else {
+										// No newline found after event data - event is incomplete
+										// Try to detect if we have a complete JSON object
+										const eventDataCandidate = bufferCopy.slice(eventStartPos);
+										if (eventDataCandidate.length > 0) {
+											// Quick heuristic check before expensive JSON.parse
+											const trimmedCandidate = eventDataCandidate.trim();
+											if (mightBeCompleteJson(trimmedCandidate)) {
+												try {
+													JSON.parse(trimmedCandidate);
+													// If we can parse it, it's complete
+													eventEnd = bufferCopy.length;
+												} catch {
+													// JSON parsing failed - event is incomplete
+													break;
+												}
+											} else {
+												// Heuristic says incomplete - don't bother parsing
 												break;
 											}
 										} else {
-											// Heuristic says incomplete - don't bother parsing
+											// No event data yet
 											break;
 										}
-									} else {
-										// No event data yet
-										break;
 									}
 								}
-							}
 
-							const eventData = bufferCopy
-								.slice(dataIndex + 6, eventEnd)
-								.trim();
-
-							// Debug logging for troublesome events
-							// Only scan for SSE field contamination on small events to avoid
-							// O(n) scans on multi-MB payloads (e.g. base64 image data).
-							// Large events (>64KB) are almost always valid image/binary data.
-							if (
-								eventData.length < 65536 &&
-								(eventData.includes("event:") || eventData.includes("id:"))
-							) {
-								logger.warn("Event data contains SSE field", {
-									eventData:
-										eventData.substring(0, 200) +
-										(eventData.length > 200 ? "..." : ""),
-									dataIndex,
-									eventEnd,
-									bufferLength: bufferCopy.length,
-									provider: usedProvider,
-								});
-							}
+								const eventData = bufferCopy
+									.slice(dataIndex + 6, eventEnd)
+									.trim();
 
-							if (eventData === "[DONE]") {
-								sawUpstreamDoneSentinel = true;
-								// Set default finish_reason if not provided by the stream
-								// Some providers (like Novita) don't send finish_reason in streaming chunks
-								if (finishReason === null) {
-									// Default to "stop" unless we have tool calls
-									finishReason =
-										streamingToolCalls && streamingToolCalls.length > 0
-											? "tool_calls"
-											: "stop";
+								// Debug logging for troublesome events
+								// Only scan for SSE field contamination on small events to avoid
+								// O(n) scans on multi-MB payloads (e.g. base64 image data).
+								// Large events (>64KB) are almost always valid image/binary data.
+								if (
+									eventData.length < 65536 &&
+									(eventData.includes("event:") || eventData.includes("id:"))
+								) {
+									logger.warn("Event data contains SSE field", {
+										eventData:
+											eventData.substring(0, 200) +
+											(eventData.length > 200 ? "..." : ""),
+										dataIndex,
+										eventEnd,
+										bufferLength: bufferCopy.length,
+										provider: usedProvider,
+									});
 								}
 
-								// Calculate final usage if we don't have complete data
-								let finalPromptTokens = promptTokens;
-								let finalCompletionTokens = completionTokens;
-								let finalTotalTokens = totalTokens;
+								if (eventData === "[DONE]") {
+									sawUpstreamDoneSentinel = true;
+									// Set default finish_reason if not provided by the stream
+									// Some providers (like Novita) don't send finish_reason in streaming chunks
+									if (finishReason === null) {
+										// Default to "stop" unless we have tool calls
+										finishReason =
+											streamingToolCalls && streamingToolCalls.length > 0
+												? "tool_calls"
+												: "stop";
+									}
 
-								// Estimate missing tokens if needed using helper function
-								if (finalPromptTokens === null || finalPromptTokens === 0) {
-									const estimation = estimateTokens(
-										usedProvider,
-										messages,
-										null,
-										null,
-										null,
-									);
-									finalPromptTokens = estimation.calculatedPromptTokens;
-								}
+									// Calculate final usage if we don't have complete data
+									let finalPromptTokens = promptTokens;
+									let finalCompletionTokens = completionTokens;
+									let finalTotalTokens = totalTokens;
 
-								if (finalCompletionTokens === null) {
-									const textTokens = estimateTokensFromContent(fullContent);
-									// For images, estimate ~258 tokens per image + 1 token per 750 bytes
-									// This is based on Google's image token calculation
-									let imageTokens = 0;
-									if (imageByteSize > 0) {
-										// Base tokens per image (258) + additional tokens based on size
-										imageTokens = 258 + Math.ceil(imageByteSize / 750);
+									// Estimate missing tokens if needed using helper function
+									if (finalPromptTokens === null || finalPromptTokens === 0) {
+										const estimation = estimateTokens(
+											usedProvider,
+											messages,
+											null,
+											null,
+											null,
+										);
+										finalPromptTokens = estimation.calculatedPromptTokens;
 									}
-									finalCompletionTokens = textTokens + imageTokens;
-								}
-
-								if (finalTotalTokens === null) {
-									finalTotalTokens =
-										(finalPromptTokens ?? 0) +
-										(finalCompletionTokens ?? 0) +
-										(reasoningTokens ?? 0);
-								}
 
-								// Send final usage chunk before [DONE] if we have any usage data
-								if (
-									finalPromptTokens !== null ||
-									finalCompletionTokens !== null ||
-									finalTotalTokens !== null
-								) {
-									// Calculate costs for streaming response
-									const streamingCosts = await calculateCosts(
-										usedModel,
-										usedProvider,
-										finalPromptTokens,
-										finalCompletionTokens,
-										cachedTokens,
-										{
-											prompt: messages
-												.map((m) => messageContentToString(m.content))
-												.join("\n"),
-											completion: fullContent,
-											toolResults: streamingToolCalls ?? undefined,
-										},
-										reasoningTokens,
-										outputImageCount,
-										image_config?.image_size,
-										inputImageCount,
-										webSearchCount,
-										project.organizationId,
-									);
+									if (finalCompletionTokens === null) {
+										const textTokens = estimateTokensFromContent(fullContent);
+										// For images, estimate ~258 tokens per image + 1 token per 750 bytes
+										// This is based on Google's image token calculation
+										let imageTokens = 0;
+										if (imageByteSize > 0) {
+											// Base tokens per image (258) + additional tokens based on size
+											imageTokens = 258 + Math.ceil(imageByteSize / 750);
+										}
+										finalCompletionTokens = textTokens + imageTokens;
+									}
 
-									// Include costs in response for all users
-									const shouldIncludeCosts = true;
+									if (finalTotalTokens === null) {
+										finalTotalTokens =
+											(finalPromptTokens ?? 0) +
+											(finalCompletionTokens ?? 0) +
+											(reasoningTokens ?? 0);
+									}
 
-									const finalUsageChunk = {
-										id: `chatcmpl-${Date.now()}`,
-										object: "chat.completion.chunk",
-										created: Math.floor(Date.now() / 1000),
-										model: usedModel,
-										choices: [
+									// Send final usage chunk before [DONE] if we have any usage data
+									if (
+										finalPromptTokens !== null ||
+										finalCompletionTokens !== null ||
+										finalTotalTokens !== null
+									) {
+										// Calculate costs for streaming response
+										const streamingCosts = await calculateCosts(
+											usedModel,
+											usedProvider,
+											finalPromptTokens,
+											finalCompletionTokens,
+											cachedTokens,
 											{
-												index: 0,
-												delta: {},
-												finish_reason: null,
+												prompt: messages
+													.map((m) => messageContentToString(m.content))
+													.join("\n"),
+												completion: fullContent,
+												toolResults: streamingToolCalls ?? undefined,
 											},
-										],
-										usage: {
-											prompt_tokens: Math.max(
-												1,
-												streamingCosts.promptTokens ?? finalPromptTokens ?? 1,
-											),
-											completion_tokens:
-												streamingCosts.completionTokens ??
-												finalCompletionTokens ??
-												0,
-											total_tokens: Math.max(
-												1,
-												(streamingCosts.promptTokens ??
-													finalPromptTokens ??
-													0) +
-													(streamingCosts.completionTokens ??
-														finalCompletionTokens ??
+											reasoningTokens,
+											outputImageCount,
+											image_config?.image_size,
+											inputImageCount,
+											webSearchCount,
+											project.organizationId,
+										);
+
+										// Include costs in response for all users
+										const shouldIncludeCosts = true;
+
+										const finalUsageChunk = {
+											id: `chatcmpl-${Date.now()}`,
+											object: "chat.completion.chunk",
+											created: Math.floor(Date.now() / 1000),
+											model: usedModel,
+											choices: [
+												{
+													index: 0,
+													delta: {},
+													finish_reason: null,
+												},
+											],
+											usage: {
+												prompt_tokens: Math.max(
+													1,
+													streamingCosts.promptTokens ?? finalPromptTokens ?? 1,
+												),
+												completion_tokens:
+													streamingCosts.completionTokens ??
+													finalCompletionTokens ??
+													0,
+												total_tokens: Math.max(
+													1,
+													(streamingCosts.promptTokens ??
+														finalPromptTokens ??
 														0) +
-													(reasoningTokens ?? 0),
-											),
-											...(shouldIncludeCosts && {
-												cost_usd_total: streamingCosts.totalCost,
-												cost_usd_input: streamingCosts.inputCost,
-												cost_usd_output: streamingCosts.outputCost,
-												cost_usd_cached_input: streamingCosts.cachedInputCost,
-												cost_usd_request: streamingCosts.requestCost,
-												cost_usd_image_input: streamingCosts.imageInputCost,
-												cost_usd_image_output: streamingCosts.imageOutputCost,
-											}),
-										},
-									};
+														(streamingCosts.completionTokens ??
+															finalCompletionTokens ??
+															0) +
+														(reasoningTokens ?? 0),
+												),
+												...(shouldIncludeCosts && {
+													cost_usd_total: streamingCosts.totalCost,
+													cost_usd_input: streamingCosts.inputCost,
+													cost_usd_output: streamingCosts.outputCost,
+													cost_usd_cached_input: streamingCosts.cachedInputCost,
+													cost_usd_request: streamingCosts.requestCost,
+													cost_usd_image_input: streamingCosts.imageInputCost,
+													cost_usd_image_output: streamingCosts.imageOutputCost,
+												}),
+											},
+										};
 
-									await writeSSEAndCache({
-										data: JSON.stringify(finalUsageChunk),
-										id: String(eventId++),
-									});
-								}
+										await writeSSEAndCache({
+											data: JSON.stringify(finalUsageChunk),
+											id: String(eventId++),
+										});
+									}
 
-								if (!shouldBufferForHealing) {
-									await writeSSEAndCache({
-										event: "done",
-										data: "[DONE]",
-										id: String(eventId++),
-									});
-									doneSent = true;
-								}
+									if (!shouldBufferForHealing) {
+										await writeSSEAndCache({
+											event: "done",
+											data: "[DONE]",
+											id: String(eventId++),
+										});
+										doneSent = true;
+									}
 
-								processedLength = eventEnd;
-							} else {
-								// Try to parse JSON data - it might span multiple lines
-								let data;
-								try {
-									data = JSON.parse(eventData);
-								} catch (e) {
-									// If JSON parsing fails, this might be an incomplete event
-									// Since we already validated JSON completeness above, this is likely a format issue
-									// Create structured error for logging
-									streamingError = {
-										message: e instanceof Error ? e.message : String(e),
-										type: "json_parse_error",
-										code: "json_parse_error",
-										details: {
-											name: e instanceof Error ? e.name : "ParseError",
-											eventData: eventData.substring(0, 5000),
+									processedLength = eventEnd;
+								} else {
+									// Try to parse JSON data - it might span multiple lines
+									let data;
+									try {
+										data = JSON.parse(eventData);
+									} catch (e) {
+										// If JSON parsing fails, this might be an incomplete event
+										// Since we already validated JSON completeness above, this is likely a format issue
+										// Create structured error for logging
+										streamingError = {
+											message: e instanceof Error ? e.message : String(e),
+											type: "json_parse_error",
+											code: "json_parse_error",
+											details: {
+												name: e instanceof Error ? e.name : "ParseError",
+												eventData: eventData.substring(0, 5000),
+												provider: usedProvider,
+												model: usedModel,
+												eventLength: eventData.length,
+												bufferEnd: eventEnd,
+												bufferLength: bufferCopy.length,
+												timestamp: new Date().toISOString(),
+											},
+										};
+										logger.warn("Failed to parse streaming JSON", {
+											error: e instanceof Error ? e.message : String(e),
+											eventData:
+												eventData.substring(0, 200) +
+												(eventData.length > 200 ? "..." : ""),
 											provider: usedProvider,
-											model: usedModel,
 											eventLength: eventData.length,
 											bufferEnd: eventEnd,
 											bufferLength: bufferCopy.length,
-											timestamp: new Date().toISOString(),
-										},
-									};
-									logger.warn("Failed to parse streaming JSON", {
-										error: e instanceof Error ? e.message : String(e),
-										eventData:
-											eventData.substring(0, 200) +
-											(eventData.length > 200 ? "..." : ""),
-										provider: usedProvider,
-										eventLength: eventData.length,
-										bufferEnd: eventEnd,
-										bufferLength: bufferCopy.length,
-									});
+										});
 
-									processedLength = eventEnd;
-									searchStart = eventEnd;
-									continue;
-								}
+										processedLength = eventEnd;
+										searchStart = eventEnd;
+										continue;
+									}
 
-								const awsBedrockStreamError =
-									usedProvider === "aws-bedrock"
-										? extractAwsBedrockStreamError(data)
-										: null;
-								if (
-									data &&
-									typeof data === "object" &&
-									"response" in data &&
-									data.response &&
-									typeof data.response === "object" &&
-									"status" in data.response &&
-									data.response.status === "completed"
-								) {
-									sawOpenAiResponsesCompletedStatus = true;
-								}
-								if (
-									data &&
-									typeof data === "object" &&
-									"type" in data &&
-									typeof data.type === "string" &&
-									(data.type === "response.content_part.done" ||
-										data.type === "response.output_item.done" ||
-										data.type === "response.output_text.done")
-								) {
-									sawOpenAiResponsesDoneEvent = true;
-								}
-								const openAiCompatibleStreamError =
-									!awsBedrockStreamError &&
-									data &&
-									typeof data === "object" &&
-									"error" in data &&
-									data.error &&
-									typeof data.error === "object"
-										? (data.error as Record<string, unknown>)
-										: null;
-								if (openAiCompatibleStreamError) {
-									const errorResponseText = JSON.stringify(data);
-									if (
-										debugMode &&
-										streamingRawResponseData.length < MAX_RAW_DATA_SIZE
-									) {
-										const rawProviderSseEvent = `data: ${errorResponseText}\n\n`;
-										streamingRawResponseData += rawProviderSseEvent.substring(
-											0,
-											Math.max(
+									const awsBedrockStreamError =
+										usedProvider === "aws-bedrock"
+											? extractAwsBedrockStreamError(data)
+											: null;
+									const openAiCompatibleStreamError =
+										!awsBedrockStreamError &&
+										data &&
+										typeof data === "object" &&
+										"error" in data &&
+										data.error &&
+										typeof data.error === "object"
+											? (data.error as Record<string, unknown>)
+											: null;
+									if (openAiCompatibleStreamError) {
+										const errorResponseText = JSON.stringify(data);
+										if (
+											debugMode &&
+											streamingRawResponseData.length < MAX_RAW_DATA_SIZE
+										) {
+											const rawProviderSseEvent = `data: ${errorResponseText}\n\n`;
+											streamingRawResponseData += rawProviderSseEvent.substring(
 												0,
-												MAX_RAW_DATA_SIZE - streamingRawResponseData.length,
-											),
+												Math.max(
+													0,
+													MAX_RAW_DATA_SIZE - streamingRawResponseData.length,
+												),
+											);
+										}
+										const inferredStatusCode =
+											typeof openAiCompatibleStreamError.status_code ===
+											"number"
+												? openAiCompatibleStreamError.status_code
+												: typeof openAiCompatibleStreamError.status === "number"
+													? openAiCompatibleStreamError.status
+													: 400;
+										const errorType = getFinishReasonFromError(
+											inferredStatusCode,
+											errorResponseText,
 										);
-									}
-									const inferredStatusCode =
-										typeof openAiCompatibleStreamError.status_code === "number"
-											? openAiCompatibleStreamError.status_code
-											: typeof openAiCompatibleStreamError.status === "number"
-												? openAiCompatibleStreamError.status
-												: 400;
-									const errorType = getFinishReasonFromError(
-										inferredStatusCode,
-										errorResponseText,
-									);
-									const errorMessage =
-										typeof openAiCompatibleStreamError.message === "string"
-											? openAiCompatibleStreamError.message
-											: "Upstream provider returned a streaming error";
-									const errorCode =
-										typeof openAiCompatibleStreamError.code === "string"
-											? openAiCompatibleStreamError.code
-											: typeof openAiCompatibleStreamError.type === "string"
-												? openAiCompatibleStreamError.type
-												: errorType;
-
-									logger.info("[streaming] Provider SSE error received", {
-										requestId,
-										provider: usedProvider,
-										model: usedModel,
-										errorType,
-										errorCode,
-										inferredStatusCode,
-										errorMessage,
-										errorPayload: errorResponseText.substring(0, 5000),
-									});
+										const errorMessage =
+											typeof openAiCompatibleStreamError.message === "string"
+												? openAiCompatibleStreamError.message
+												: "Upstream provider returned a streaming error";
+										const errorCode =
+											typeof openAiCompatibleStreamError.code === "string"
+												? openAiCompatibleStreamError.code
+												: typeof openAiCompatibleStreamError.type === "string"
+													? openAiCompatibleStreamError.type
+													: errorType;
+
+										logger.info("[streaming] Provider SSE error received", {
+											requestId,
+											provider: usedProvider,
+											model: usedModel,
+											errorType,
+											errorCode,
+											inferredStatusCode,
+											errorMessage,
+											errorPayload: errorResponseText.substring(0, 5000),
+										});
 
-									finishReason = errorType;
+										finishReason = errorType;
+
+										if (errorType === "content_filter") {
+											await writeStreamingContentFilterResponse({
+												billingModel: usedModel,
+												billingProvider: usedProvider,
+												responseModel: data.model ?? usedModel,
+											});
+											handledTerminalProviderEvent = true;
+										} else {
+											streamingError = {
+												message: errorMessage,
+												type: errorType,
+												code: errorCode,
+												details: {
+													statusCode: inferredStatusCode,
+													statusText:
+														typeof openAiCompatibleStreamError.type === "string"
+															? openAiCompatibleStreamError.type
+															: "stream_error",
+													responseText: errorResponseText,
+												},
+											};
+
+											await writeSSEAndCache({
+												event: "error",
+												data: JSON.stringify({
+													error: {
+														message: errorMessage,
+														type: errorType,
+														code: errorCode,
+														param:
+															"param" in openAiCompatibleStreamError
+																? (openAiCompatibleStreamError.param ?? null)
+																: null,
+														responseText: errorResponseText,
+													},
+												}),
+												id: String(eventId++),
+											});
+										}
+
+										if (!doneSent) {
+											await writeSSEAndCache({
+												event: "done",
+												data: "[DONE]",
+												id: String(eventId++),
+											});
+											doneSent = true;
+										}
+										shouldTerminateStream = true;
+										processedLength = eventEnd;
+										searchStart = eventEnd;
+										break;
+									}
+									if (awsBedrockStreamError) {
+										const errorType = getFinishReasonFromError(
+											awsBedrockStreamError.statusCode,
+											awsBedrockStreamError.responseText,
+										);
 
-									if (errorType === "content_filter") {
-										await writeStreamingContentFilterResponse({
-											billingModel: usedModel,
-											billingProvider: usedProvider,
-											responseModel: data.model ?? usedModel,
-										});
-										handledTerminalProviderEvent = true;
-									} else {
 										streamingError = {
-											message: errorMessage,
+											message: awsBedrockStreamError.message,
 											type: errorType,
-											code: errorCode,
+											code: awsBedrockStreamError.eventType,
 											details: {
-												statusCode: inferredStatusCode,
-												statusText:
-													typeof openAiCompatibleStreamError.type === "string"
-														? openAiCompatibleStreamError.type
-														: "stream_error",
-												responseText: errorResponseText,
+												statusCode: awsBedrockStreamError.statusCode,
+												statusText: awsBedrockStreamError.eventType,
+												responseText: awsBedrockStreamError.responseText,
 											},
 										};
+										finishReason = errorType;
 
 										await writeSSEAndCache({
 											event: "error",
 											data: JSON.stringify({
 												error: {
-													message: errorMessage,
+													message: awsBedrockStreamError.message,
 													type: errorType,
-													code: errorCode,
-													param:
-														"param" in openAiCompatibleStreamError
-															? (openAiCompatibleStreamError.param ?? null)
-															: null,
-													responseText: errorResponseText,
+													code: awsBedrockStreamError.eventType,
+													param: null,
+													responseText: awsBedrockStreamError.responseText,
 												},
 											}),
 											id: String(eventId++),
 										});
-									}
-
-									if (!doneSent) {
 										await writeSSEAndCache({
 											event: "done",
 											data: "[DONE]",
 											id: String(eventId++),
 										});
 										doneSent = true;
+										shouldTerminateStream = true;
+										processedLength = eventEnd;
+										searchStart = eventEnd;
+										break;
 									}
-									shouldTerminateStream = true;
-									processedLength = eventEnd;
-									searchStart = eventEnd;
-									break;
-								}
-								if (awsBedrockStreamError) {
-									const errorType = getFinishReasonFromError(
-										awsBedrockStreamError.statusCode,
-										awsBedrockStreamError.responseText,
-									);
 
-									streamingError = {
-										message: awsBedrockStreamError.message,
-										type: errorType,
-										code: awsBedrockStreamError.eventType,
-										details: {
-											statusCode: awsBedrockStreamError.statusCode,
-											statusText: awsBedrockStreamError.eventType,
-											responseText: awsBedrockStreamError.responseText,
-										},
-									};
-									finishReason = errorType;
-
-									await writeSSEAndCache({
-										event: "error",
-										data: JSON.stringify({
-											error: {
-												message: awsBedrockStreamError.message,
-												type: errorType,
-												code: awsBedrockStreamError.eventType,
-												param: null,
-												responseText: awsBedrockStreamError.responseText,
-											},
-										}),
-										id: String(eventId++),
-									});
-									await writeSSEAndCache({
-										event: "done",
-										data: "[DONE]",
-										id: String(eventId++),
-									});
-									doneSent = true;
-									shouldTerminateStream = true;
-									processedLength = eventEnd;
-									searchStart = eventEnd;
-									break;
-								}
+									// Transform streaming responses to OpenAI format for all providers
+									const transformedData = transformStreamingToOpenai(
+										usedProvider,
+										usedModel,
+										data,
+										messages,
+										serverToolUseIndices,
+									);
 
-								// Transform streaming responses to OpenAI format for all providers
-								const transformedData = transformStreamingToOpenai(
-									usedProvider,
-									usedModel,
-									data,
-									messages,
-									serverToolUseIndices,
-								);
+									// Skip null events (some providers have non-data events)
+									if (!transformedData) {
+										processedLength = eventEnd;
+										searchStart = eventEnd;
+										continue;
+									}
 
-								// Skip null events (some providers have non-data events)
-								if (!transformedData) {
-									processedLength = eventEnd;
-									searchStart = eventEnd;
-									continue;
-								}
+									// For Anthropic, if we have partial usage data, complete it
+									if (usedProvider === "anthropic" && transformedData.usage) {
+										const usage = transformedData.usage;
+										if (
+											usage.output_tokens !== undefined &&
+											usage.prompt_tokens === undefined
+										) {
+											// Estimate prompt tokens if not provided
+											const estimation = estimateTokens(
+												usedProvider,
+												messages,
+												null,
+												null,
+												null,
+											);
+											const estimatedPromptTokens =
+												estimation.calculatedPromptTokens;
+											transformedData.usage = {
+												prompt_tokens: estimatedPromptTokens,
+												completion_tokens: usage.output_tokens,
+												total_tokens:
+													estimatedPromptTokens + usage.output_tokens,
+											};
+										}
+									}
 
-								// For Anthropic, if we have partial usage data, complete it
-								if (usedProvider === "anthropic" && transformedData.usage) {
-									const usage = transformedData.usage;
-									if (
-										usage.output_tokens !== undefined &&
-										usage.prompt_tokens === undefined
-									) {
-										// Estimate prompt tokens if not provided
-										const estimation = estimateTokens(
+									// For Google providers, add usage information when available
+									if (isGoogleCompatibleProvider(usedProvider)) {
+										const usage = extractTokenUsage(
+											data,
 											usedProvider,
-											messages,
-											null,
-											null,
-											null,
+											fullContent,
+											imageByteSize,
 										);
-										const estimatedPromptTokens =
-											estimation.calculatedPromptTokens;
-										transformedData.usage = {
-											prompt_tokens: estimatedPromptTokens,
-											completion_tokens: usage.output_tokens,
-											total_tokens: estimatedPromptTokens + usage.output_tokens,
-										};
-									}
-								}
-
-								// For Google providers, add usage information when available
-								if (isGoogleCompatibleProvider(usedProvider)) {
-									const usage = extractTokenUsage(
-										data,
-										usedProvider,
-										fullContent,
-										imageByteSize,
-									);
 
-									// If we have usage data from Google, add it to the streaming chunk
-									if (
-										usage.promptTokens !== null ||
-										usage.completionTokens !== null ||
-										usage.totalTokens !== null
-									) {
-										transformedData.usage = {
-											prompt_tokens: usage.promptTokens ?? 0,
-											completion_tokens: usage.completionTokens ?? 0,
-											total_tokens: usage.totalTokens ?? 0,
-											...(usage.reasoningTokens !== null && {
-												reasoning_tokens: usage.reasoningTokens,
-											}),
-										};
+										// If we have usage data from Google, add it to the streaming chunk
+										if (
+											usage.promptTokens !== null ||
+											usage.completionTokens !== null ||
+											usage.totalTokens !== null
+										) {
+											transformedData.usage = {
+												prompt_tokens: usage.promptTokens ?? 0,
+												completion_tokens: usage.completionTokens ?? 0,
+												total_tokens: usage.totalTokens ?? 0,
+												...(usage.reasoningTokens !== null && {
+													reasoning_tokens: usage.reasoningTokens,
+												}),
+											};
+										}
 									}
-								}
 
-								// Normalize usage.prompt_tokens_details to always include cached_tokens
-								if (transformedData.usage) {
-									if (transformedData.usage.prompt_tokens_details) {
-										// Preserve all existing keys and only default cached_tokens
-										transformedData.usage.prompt_tokens_details = {
-											...transformedData.usage.prompt_tokens_details,
-											cached_tokens:
-												transformedData.usage.prompt_tokens_details
-													.cached_tokens ?? 0,
-										};
-									} else {
-										// Create prompt_tokens_details with cached_tokens set to 0
-										transformedData.usage.prompt_tokens_details = {
-											cached_tokens: 0,
-										};
+									// Normalize usage.prompt_tokens_details to always include cached_tokens
+									if (transformedData.usage) {
+										if (transformedData.usage.prompt_tokens_details) {
+											// Preserve all existing keys and only default cached_tokens
+											transformedData.usage.prompt_tokens_details = {
+												...transformedData.usage.prompt_tokens_details,
+												cached_tokens:
+													transformedData.usage.prompt_tokens_details
+														.cached_tokens ?? 0,
+											};
+										} else {
+											// Create prompt_tokens_details with cached_tokens set to 0
+											transformedData.usage.prompt_tokens_details = {
+												cached_tokens: 0,
+											};
+										}
 									}
-								}
 
-								// For Anthropic streaming tool calls, enrich delta chunks with id/type/name
-								// from the initial content_block_start event. This ensures OpenAI SDK compatibility.
-								if (usedProvider === "anthropic") {
-									const toolCalls =
-										transformedData.choices?.[0]?.delta?.tool_calls;
-									if (toolCalls && toolCalls.length > 0) {
-										// First, extract tool calls to update our tracking
-										const rawToolCalls = extractToolCalls(data, usedProvider);
-										if (rawToolCalls && rawToolCalls.length > 0) {
-											streamingToolCalls ??= [];
-											for (const newCall of rawToolCalls) {
-												// For content_block_start events (have id), add to tracking
-												if (newCall.id) {
-													const contentBlockIndex: number =
-														typeof data.index === "number"
-															? data.index
-															: streamingToolCalls.length;
-													// Store at the content block index position
-													streamingToolCalls[contentBlockIndex] = {
-														...newCall,
-														_contentBlockIndex: contentBlockIndex,
-													};
-												}
-												// For content_block_delta events, enrich with stored id/type/name
-												else if (newCall._contentBlockIndex !== undefined) {
-													const existingCall =
-														streamingToolCalls[newCall._contentBlockIndex];
-													if (existingCall) {
-														// Enrich the transformed data with id, type, and function.name
-														for (const tc of toolCalls) {
-															if (tc.index === newCall._contentBlockIndex) {
-																tc.id = existingCall.id;
-																tc.type = "function";
-																tc.function ??= {};
-																tc.function.name = existingCall.function.name;
+									// For Anthropic streaming tool calls, enrich delta chunks with id/type/name
+									// from the initial content_block_start event. This ensures OpenAI SDK compatibility.
+									if (usedProvider === "anthropic") {
+										const toolCalls =
+											transformedData.choices?.[0]?.delta?.tool_calls;
+										if (toolCalls && toolCalls.length > 0) {
+											// First, extract tool calls to update our tracking
+											const rawToolCalls = extractToolCalls(data, usedProvider);
+											if (rawToolCalls && rawToolCalls.length > 0) {
+												streamingToolCalls ??= [];
+												for (const newCall of rawToolCalls) {
+													// For content_block_start events (have id), add to tracking
+													if (newCall.id) {
+														const contentBlockIndex: number =
+															typeof data.index === "number"
+																? data.index
+																: streamingToolCalls.length;
+														// Store at the content block index position
+														streamingToolCalls[contentBlockIndex] = {
+															...newCall,
+															_contentBlockIndex: contentBlockIndex,
+														};
+													}
+													// For content_block_delta events, enrich with stored id/type/name
+													else if (newCall._contentBlockIndex !== undefined) {
+														const existingCall =
+															streamingToolCalls[newCall._contentBlockIndex];
+														if (existingCall) {
+															// Enrich the transformed data with id, type, and function.name
+															for (const tc of toolCalls) {
+																if (tc.index === newCall._contentBlockIndex) {
+																	tc.id = existingCall.id;
+																	tc.type = "function";
+																	tc.function ??= {};
+																	tc.function.name = existingCall.function.name;
+																}
 															}
 														}
 													}
@@ -5251,707 +5279,738 @@ chat.openapi(completions, async (c) => {
 											}
 										}
 									}
-								}
 
-								// When buffering for healing, strip content from chunks and buffer it
-								// We still send metadata (usage, finish_reason, tool_calls) but buffer text content
-								if (shouldBufferForHealing) {
-									const deltaContent =
-										transformedData.choices?.[0]?.delta?.content;
-									if (deltaContent) {
-										bufferedContentChunks.push(deltaContent);
-										// Store chunk metadata for later use when sending healed content
-										lastChunkId = transformedData.id ?? lastChunkId;
-										lastChunkModel = transformedData.model ?? lastChunkModel;
-										lastChunkCreated =
-											transformedData.created ?? lastChunkCreated;
-									}
+									// When buffering for healing, strip content from chunks and buffer it
+									// We still send metadata (usage, finish_reason, tool_calls) but buffer text content
+									if (shouldBufferForHealing) {
+										const deltaContent =
+											transformedData.choices?.[0]?.delta?.content;
+										if (deltaContent) {
+											bufferedContentChunks.push(deltaContent);
+											// Store chunk metadata for later use when sending healed content
+											lastChunkId = transformedData.id ?? lastChunkId;
+											lastChunkModel = transformedData.model ?? lastChunkModel;
+											lastChunkCreated =
+												transformedData.created ?? lastChunkCreated;
+										}
 
-									// Create a copy without content in delta for streaming
-									const chunkWithoutContent = JSON.parse(
-										JSON.stringify(transformedData),
-									);
-									if (chunkWithoutContent.choices?.[0]?.delta?.content) {
-										delete chunkWithoutContent.choices[0].delta.content;
-									}
+										// Create a copy without content in delta for streaming
+										const chunkWithoutContent = JSON.parse(
+											JSON.stringify(transformedData),
+										);
+										if (chunkWithoutContent.choices?.[0]?.delta?.content) {
+											delete chunkWithoutContent.choices[0].delta.content;
+										}
 
-									// Only send chunk if it has meaningful data (not just empty delta)
-									const hasUsage = !!chunkWithoutContent.usage;
-									const hasToolCalls =
-										!!chunkWithoutContent.choices?.[0]?.delta?.tool_calls;
-									const hasFinishReason =
-										!!chunkWithoutContent.choices?.[0]?.finish_reason;
-									const hasRole =
-										!!chunkWithoutContent.choices?.[0]?.delta?.role;
+										// Only send chunk if it has meaningful data (not just empty delta)
+										const hasUsage = !!chunkWithoutContent.usage;
+										const hasToolCalls =
+											!!chunkWithoutContent.choices?.[0]?.delta?.tool_calls;
+										const hasFinishReason =
+											!!chunkWithoutContent.choices?.[0]?.finish_reason;
+										const hasRole =
+											!!chunkWithoutContent.choices?.[0]?.delta?.role;
 
-									if (hasUsage || hasToolCalls || hasFinishReason || hasRole) {
+										if (
+											hasUsage ||
+											hasToolCalls ||
+											hasFinishReason ||
+											hasRole
+										) {
+											await writeSSEAndCache({
+												data: JSON.stringify(chunkWithoutContent),
+												id: String(eventId++),
+											});
+										}
+									} else {
 										await writeSSEAndCache({
-											data: JSON.stringify(chunkWithoutContent),
+											data: JSON.stringify(transformedData),
 											id: String(eventId++),
 										});
 									}
-								} else {
-									await writeSSEAndCache({
-										data: JSON.stringify(transformedData),
-										id: String(eventId++),
-									});
-								}
 
-								// Extract usage data from transformedData to update tracking variables
-								if (
-									transformedData.usage &&
-									(usedProvider === "openai" || usedProvider === "azure")
-								) {
-									const usage = transformedData.usage;
-									if (
-										usage.prompt_tokens !== undefined &&
-										usage.prompt_tokens > 0
-									) {
-										promptTokens = usage.prompt_tokens;
-									}
+									// Extract usage data from transformedData to update tracking variables
 									if (
-										usage.completion_tokens !== undefined &&
-										usage.completion_tokens > 0
+										transformedData.usage &&
+										(usedProvider === "openai" || usedProvider === "azure")
 									) {
-										completionTokens = usage.completion_tokens;
-									}
-									if (
-										usage.total_tokens !== undefined &&
-										usage.total_tokens > 0
-									) {
-										totalTokens = usage.total_tokens;
-									}
-									if (usage.reasoning_tokens !== undefined) {
-										reasoningTokens = usage.reasoning_tokens;
-									}
-								}
-
-								// Extract finishReason from transformedData to update tracking variable
-								if (transformedData.choices?.[0]?.finish_reason) {
-									finishReason = transformedData.choices[0].finish_reason;
-									sawProviderTerminalEvent = true;
-									sentDownstreamFinishReasonChunk = true;
-								}
-
-								// Extract content for logging using helper function
-								// For providers with custom extraction logic (google-ai-studio, anthropic),
-								// use raw data. For others (like aws-bedrock), use transformed OpenAI format.
-								const contentChunk = extractContent(
-									isGoogleCompatibleProvider(usedProvider) ||
-										usedProvider === "anthropic"
-										? data
-										: transformedData,
-									usedProvider,
-								);
-								if (contentChunk) {
-									fullContent += contentChunk;
-
-									// Track time to first token if this is the first content chunk
-									if (!firstTokenReceived) {
-										timeToFirstToken = Date.now() - startTime;
-										firstTokenReceived = true;
-									}
-								}
-
-								// Track image data size for Google providers (for token estimation)
-								if (isGoogleCompatibleProvider(usedProvider)) {
-									const parts = data.candidates?.[0]?.content?.parts ?? [];
-									for (const part of parts) {
-										if (part.inlineData?.data) {
-											// Base64 string length * 0.75 ≈ actual byte size
-											imageByteSize += Math.ceil(
-												part.inlineData.data.length * 0.75,
-											);
-											outputImageCount++;
+										const usage = transformedData.usage;
+										if (
+											usage.prompt_tokens !== undefined &&
+											usage.prompt_tokens > 0
+										) {
+											promptTokens = usage.prompt_tokens;
 										}
-									}
-								}
-
-								// Track web search calls for cost calculation
-								// Check for web search results based on provider-specific data
-								if (usedProvider === "anthropic") {
-									// For Anthropic, count web_search_tool_result blocks
-									if (
-										data.type === "content_block_start" &&
-										data.content_block?.type === "web_search_tool_result"
-									) {
-										webSearchCount++;
-									}
-								} else if (isGoogleCompatibleProvider(usedProvider)) {
-									// For Google, count when grounding metadata is present
-									if (data.candidates?.[0]?.groundingMetadata) {
-										const groundingMetadata =
-											data.candidates[0].groundingMetadata;
 										if (
-											groundingMetadata.webSearchQueries &&
-											groundingMetadata.webSearchQueries.length > 0 &&
-											webSearchCount === 0
+											usage.completion_tokens !== undefined &&
+											usage.completion_tokens > 0
 										) {
-											// Only count once for the entire response
-											webSearchCount =
-												groundingMetadata.webSearchQueries.length;
-										} else if (
-											groundingMetadata.groundingChunks &&
-											webSearchCount === 0
+											completionTokens = usage.completion_tokens;
+										}
+										if (
+											usage.total_tokens !== undefined &&
+											usage.total_tokens > 0
 										) {
-											// Fallback: count once if we have grounding chunks
-											webSearchCount = 1;
+											totalTokens = usage.total_tokens;
+										}
+										if (usage.reasoning_tokens !== undefined) {
+											reasoningTokens = usage.reasoning_tokens;
 										}
 									}
-								} else if (usedProvider === "openai") {
-									// For OpenAI Responses API, count web_search_call.completed events
-									if (data.type === "response.web_search_call.completed") {
-										webSearchCount++;
-									}
-								}
-
-								// Extract reasoning content for logging using helper function
-								// For providers with custom extraction logic (google-ai-studio, anthropic),
-								// use raw data. For others, use transformed OpenAI format.
-								const reasoningContentChunk = extractReasoning(
-									isGoogleCompatibleProvider(usedProvider) ||
-										usedProvider === "anthropic"
-										? data
-										: transformedData,
-									usedProvider,
-								);
-								if (reasoningContentChunk) {
-									fullReasoningContent += reasoningContentChunk;
 
-									// Track time to first reasoning token if this is the first reasoning chunk
-									if (!firstReasoningTokenReceived) {
-										timeToFirstReasoningToken = Date.now() - startTime;
-										firstReasoningTokenReceived = true;
+									// Extract finishReason from transformedData to update tracking variable
+									if (transformedData.choices?.[0]?.finish_reason) {
+										finishReason = transformedData.choices[0].finish_reason;
 									}
-								}
-
-								const toolCallsChunk = extractToolCalls(
-									data,
-									usedProvider,
-									transformedData,
-								);
-								if (toolCallsChunk && toolCallsChunk.length > 0) {
-									streamingToolCalls ??= [];
-									// Merge tool calls (accumulating function arguments)
-									for (const newCall of toolCallsChunk) {
-										let existingCall = null;
 
-										// For Anthropic content_block_delta events, match by content block index
-										if (
-											usedProvider === "anthropic" &&
-											newCall._contentBlockIndex !== undefined
-										) {
-											existingCall =
-												streamingToolCalls[newCall._contentBlockIndex];
-										} else {
-											// For other providers and Anthropic content_block_start, match by ID
-											// Note: Array may have sparse entries due to index-based assignment, so check for null/undefined
-											existingCall = streamingToolCalls.find(
-												(call) => call && call.id === newCall.id,
-											);
+									// Extract content for logging using helper function
+									// For providers with custom extraction logic (google-ai-studio, anthropic),
+									// use raw data. For others (like aws-bedrock), use transformed OpenAI format.
+									const contentChunk = extractContent(
+										isGoogleCompatibleProvider(usedProvider) ||
+											usedProvider === "anthropic"
+											? data
+											: transformedData,
+										usedProvider,
+									);
+									if (contentChunk) {
+										fullContent += contentChunk;
+
+										// Track time to first token if this is the first content chunk
+										if (!firstTokenReceived) {
+											timeToFirstToken = Date.now() - startTime;
+											firstTokenReceived = true;
 										}
+									}
 
-										if (existingCall) {
-											// Accumulate function arguments
-											if (newCall.function?.arguments) {
-												existingCall.function.arguments =
-													(existingCall.function.arguments ?? "") +
-													newCall.function.arguments;
+									// Track image data size for Google providers (for token estimation)
+									if (isGoogleCompatibleProvider(usedProvider)) {
+										const parts = data.candidates?.[0]?.content?.parts ?? [];
+										for (const part of parts) {
+											if (part.inlineData?.data) {
+												// Base64 string length * 0.75 ≈ actual byte size
+												imageByteSize += Math.ceil(
+													part.inlineData.data.length * 0.75,
+												);
+												outputImageCount++;
 											}
-										} else {
-											// Clean up temporary fields and add new tool call
-											const cleanCall = { ...newCall };
-											delete cleanCall._contentBlockIndex;
-											streamingToolCalls.push(cleanCall);
 										}
 									}
-								}
 
-								// Handle provider-specific finish reason extraction
-								switch (usedProvider) {
-									case "google-ai-studio":
-									case "google-vertex":
-									case "quartz":
-									case "obsidian":
-										// Preserve original Google finish reason for logging
-										if (data.promptFeedback?.blockReason) {
-											finishReason = data.promptFeedback.blockReason;
-											sawProviderTerminalEvent = true;
-										} else if (data.candidates?.[0]?.finishReason) {
-											finishReason = data.candidates[0].finishReason;
-											sawProviderTerminalEvent = true;
-										}
-										break;
-									case "anthropic":
+									// Track web search calls for cost calculation
+									// Check for web search results based on provider-specific data
+									if (usedProvider === "anthropic") {
+										// For Anthropic, count web_search_tool_result blocks
 										if (
-											data.type === "message_delta" &&
-											data.delta?.stop_reason
+											data.type === "content_block_start" &&
+											data.content_block?.type === "web_search_tool_result"
 										) {
-											finishReason = data.delta.stop_reason;
-											sawProviderTerminalEvent = true;
-										} else if (
-											data.type === "message_stop" ||
-											data.stop_reason
-										) {
-											finishReason = data.stop_reason ?? "end_turn";
-											sawProviderTerminalEvent = true;
-										} else if (data.delta?.stop_reason) {
-											finishReason = data.delta.stop_reason;
-											sawProviderTerminalEvent = true;
+											webSearchCount++;
 										}
-										break;
-									default: // OpenAI format
-										if (data.choices && data.choices[0]?.finish_reason) {
-											finishReason = data.choices[0].finish_reason;
+									} else if (isGoogleCompatibleProvider(usedProvider)) {
+										// For Google, count when grounding metadata is present
+										if (data.candidates?.[0]?.groundingMetadata) {
+											const groundingMetadata =
+												data.candidates[0].groundingMetadata;
+											if (
+												groundingMetadata.webSearchQueries &&
+												groundingMetadata.webSearchQueries.length > 0 &&
+												webSearchCount === 0
+											) {
+												// Only count once for the entire response
+												webSearchCount =
+													groundingMetadata.webSearchQueries.length;
+											} else if (
+												groundingMetadata.groundingChunks &&
+												webSearchCount === 0
+											) {
+												// Fallback: count once if we have grounding chunks
+												webSearchCount = 1;
+											}
 										}
-										break;
-								}
+									} else if (usedProvider === "openai") {
+										// For OpenAI Responses API, count web_search_call.completed events
+										if (data.type === "response.web_search_call.completed") {
+											webSearchCount++;
+										}
+									}
 
-								// Extract token usage using helper function
-								const usage = extractTokenUsage(
-									data,
-									usedProvider,
-									fullContent,
-									imageByteSize,
-								);
-								if (usage.promptTokens !== null) {
-									promptTokens = usage.promptTokens;
-								}
-								if (usage.completionTokens !== null) {
-									completionTokens = usage.completionTokens;
-								}
-								if (usage.totalTokens !== null) {
-									totalTokens = usage.totalTokens;
-								}
-								if (usage.reasoningTokens !== null) {
-									reasoningTokens = usage.reasoningTokens;
-								}
-								if (usage.cachedTokens !== null) {
-									cachedTokens = usage.cachedTokens;
-								}
+									// Extract reasoning content for logging using helper function
+									// For providers with custom extraction logic (google-ai-studio, anthropic),
+									// use raw data. For others, use transformed OpenAI format.
+									const reasoningContentChunk = extractReasoning(
+										isGoogleCompatibleProvider(usedProvider) ||
+											usedProvider === "anthropic"
+											? data
+											: transformedData,
+										usedProvider,
+									);
+									if (reasoningContentChunk) {
+										fullReasoningContent += reasoningContentChunk;
 
-								// Estimate tokens if not provided and we have a finish reason
-								if (finishReason && (!promptTokens || !completionTokens)) {
-									if (!promptTokens) {
-										const estimation = estimateTokens(
-											usedProvider,
-											messages,
-											null,
-											null,
-											null,
-										);
-										promptTokens = estimation.calculatedPromptTokens;
+										// Track time to first reasoning token if this is the first reasoning chunk
+										if (!firstReasoningTokenReceived) {
+											timeToFirstReasoningToken = Date.now() - startTime;
+											firstReasoningTokenReceived = true;
+										}
 									}
 
-									if (!completionTokens) {
-										const textTokens = estimateTokensFromContent(fullContent);
-										// For images, estimate ~258 tokens per image + 1 token per 750 bytes
-										let imageTokens = 0;
-										if (imageByteSize > 0) {
-											imageTokens = 258 + Math.ceil(imageByteSize / 750);
+									const toolCallsChunk = extractToolCalls(
+										data,
+										usedProvider,
+										transformedData,
+									);
+									if (toolCallsChunk && toolCallsChunk.length > 0) {
+										streamingToolCalls ??= [];
+										// Merge tool calls (accumulating function arguments)
+										for (const newCall of toolCallsChunk) {
+											let existingCall = null;
+
+											// For Anthropic content_block_delta events, match by content block index
+											if (
+												usedProvider === "anthropic" &&
+												newCall._contentBlockIndex !== undefined
+											) {
+												existingCall =
+													streamingToolCalls[newCall._contentBlockIndex];
+											} else {
+												// For other providers and Anthropic content_block_start, match by ID
+												// Note: Array may have sparse entries due to index-based assignment, so check for null/undefined
+												existingCall = streamingToolCalls.find(
+													(call) => call && call.id === newCall.id,
+												);
+											}
+
+											if (existingCall) {
+												// Accumulate function arguments
+												if (newCall.function?.arguments) {
+													existingCall.function.arguments =
+														(existingCall.function.arguments ?? "") +
+														newCall.function.arguments;
+												}
+											} else {
+												// Clean up temporary fields and add new tool call
+												const cleanCall = { ...newCall };
+												delete cleanCall._contentBlockIndex;
+												streamingToolCalls.push(cleanCall);
+											}
 										}
-										completionTokens = textTokens + imageTokens;
 									}
 
-									totalTokens = (promptTokens ?? 0) + (completionTokens ?? 0);
-								}
+									// Handle provider-specific finish reason extraction
+									switch (usedProvider) {
+										case "google-ai-studio":
+										case "google-vertex":
+										case "quartz":
+										case "obsidian":
+											// Preserve original Google finish reason for logging
+											if (data.promptFeedback?.blockReason) {
+												finishReason = data.promptFeedback.blockReason;
+												sawProviderTerminalEvent = true;
+											} else if (data.candidates?.[0]?.finishReason) {
+												finishReason = data.candidates[0].finishReason;
+												sawProviderTerminalEvent = true;
+											}
+											break;
+										case "anthropic":
+											if (
+												data.type === "message_delta" &&
+												data.delta?.stop_reason
+											) {
+												finishReason = data.delta.stop_reason;
+												sawProviderTerminalEvent = true;
+											} else if (
+												data.type === "message_stop" ||
+												data.stop_reason
+											) {
+												finishReason = data.stop_reason ?? "end_turn";
+												sawProviderTerminalEvent = true;
+											} else if (data.delta?.stop_reason) {
+												finishReason = data.delta.stop_reason;
+												sawProviderTerminalEvent = true;
+											}
+											break;
+										default: // OpenAI format
+											if (data.choices && data.choices[0]?.finish_reason) {
+												finishReason = data.choices[0].finish_reason;
+											}
+											break;
+									}
 
-								processedLength = eventEnd;
-							}
+									// Extract token usage using helper function
+									const usage = extractTokenUsage(
+										data,
+										usedProvider,
+										fullContent,
+										imageByteSize,
+									);
+									if (usage.promptTokens !== null) {
+										promptTokens = usage.promptTokens;
+									}
+									if (usage.completionTokens !== null) {
+										completionTokens = usage.completionTokens;
+									}
+									if (usage.totalTokens !== null) {
+										totalTokens = usage.totalTokens;
+									}
+									if (usage.reasoningTokens !== null) {
+										reasoningTokens = usage.reasoningTokens;
+									}
+									if (usage.cachedTokens !== null) {
+										cachedTokens = usage.cachedTokens;
+									}
 
-							searchStart = eventEnd;
-						}
+									// Estimate tokens if not provided and we have a finish reason
+									if (finishReason && (!promptTokens || !completionTokens)) {
+										if (!promptTokens) {
+											const estimation = estimateTokens(
+												usedProvider,
+												messages,
+												null,
+												null,
+												null,
+											);
+											promptTokens = estimation.calculatedPromptTokens;
+										}
 
-						// Remove processed data from buffer
-						if (processedLength > 0) {
-							buffer = bufferCopy.slice(processedLength);
-						}
+										if (!completionTokens) {
+											const textTokens = estimateTokensFromContent(fullContent);
+											// For images, estimate ~258 tokens per image + 1 token per 750 bytes
+											let imageTokens = 0;
+											if (imageByteSize > 0) {
+												imageTokens = 258 + Math.ceil(imageByteSize / 750);
+											}
+											completionTokens = textTokens + imageTokens;
+										}
 
-						if (shouldTerminateStream) {
-							break;
-						}
-					}
-				} catch (error) {
-					if (error instanceof Error && error.name === "AbortError") {
-						canceled = true;
-					} else if (isTimeoutError(error)) {
-						const errorMessage =
-							error instanceof Error ? error.message : "Stream reading timeout";
-						logger.warn("Stream reading timeout", {
-							error: errorMessage,
-							usedProvider,
-							requestedProvider,
-							usedModel,
-							initialRequestedModel,
-							unifiedFinishReason: getUnifiedFinishReason(
-								"upstream_error",
-								usedProvider,
-							),
-						});
+										totalTokens = (promptTokens ?? 0) + (completionTokens ?? 0);
+									}
 
-						try {
-							await stream.writeSSE({
-								event: "error",
-								data: JSON.stringify({
-									error: {
-										message: `Upstream provider timeout: ${errorMessage}`,
-										type: "upstream_timeout",
-										param: null,
-										code: "timeout",
-									},
-								}),
-								id: String(eventId++),
-							});
-							await stream.writeSSE({
-								event: "done",
-								data: "[DONE]",
-								id: String(eventId++),
-							});
-							doneSent = true;
-						} catch (sseError) {
-							logger.error(
-								"Failed to send timeout error SSE",
-								sseError instanceof Error
-									? sseError
-									: new Error(String(sseError)),
-							);
-						}
+									processedLength = eventEnd;
+								}
 
-						streamingError = {
-							message: errorMessage,
-							type: "upstream_timeout",
-							code: "timeout",
-							details: {
-								name: "TimeoutError",
-								timestamp: new Date().toISOString(),
-								provider: usedProvider,
-								model: usedModel,
-							},
-						};
-					} else {
-						const normalizedStreamingError = normalizeStreamingError({
-							error,
-							provider: usedProvider,
-							model: usedModel,
-							bufferSnapshot: buffer ? buffer.substring(0, 5000) : undefined,
-							phase: "upstream_read",
-						});
+								searchStart = eventEnd;
+							}
 
-						logger.error(
-							"Error reading upstream stream",
-							error instanceof Error ? error : new Error(String(error)),
-							{
-								requestId,
+							// Remove processed data from buffer
+							if (processedLength > 0) {
+								buffer = bufferCopy.slice(processedLength);
+							}
+
+							if (shouldTerminateStream) {
+								break;
+							}
+						}
+					} catch (error) {
+						if (error instanceof Error && error.name === "AbortError") {
+							canceled = true;
+						} else if (isTimeoutError(error)) {
+							const errorMessage =
+								error instanceof Error
+									? error.message
+									: "Stream reading timeout";
+							logger.warn("Stream reading timeout", {
+								error: errorMessage,
 								usedProvider,
 								requestedProvider,
 								usedModel,
 								initialRequestedModel,
-								upstreamStatus: res?.status ?? null,
-								upstreamStatusText: res?.statusText ?? null,
-								upstreamHeaders: res
-									? {
-											contentType: res.headers.get("content-type"),
-											contentLength: res.headers.get("content-length"),
-											transferEncoding: res.headers.get("transfer-encoding"),
-											requestId:
-												res.headers.get("x-request-id") ??
-												res.headers.get("request-id") ??
-												res.headers.get("openai-request-id"),
-										}
-									: null,
-								streamingDiagnostics: normalizedStreamingError.log.details,
-								timeToFirstToken,
-								timeToFirstReasoningToken,
-								firstTokenReceived,
-								firstReasoningTokenReceived,
 								unifiedFinishReason: getUnifiedFinishReason(
-									normalizedStreamingError.client.type === "gateway_error"
-										? "gateway_error"
-										: "upstream_error",
+									"upstream_error",
 									usedProvider,
 								),
-							},
-						);
-
-						// Forward the error to the client with the buffered content that caused the error
-						try {
-							await stream.writeSSE({
-								event: "error",
-								data: JSON.stringify({
-									error: normalizedStreamingError.client,
-								}),
-								id: String(eventId++),
 							});
-							await stream.writeSSE({
-								event: "done",
-								data: "[DONE]",
-								id: String(eventId++),
+
+							try {
+								await stream.writeSSE({
+									event: "error",
+									data: JSON.stringify({
+										error: {
+											message: `Upstream provider timeout: ${errorMessage}`,
+											type: "upstream_timeout",
+											param: null,
+											code: "timeout",
+										},
+									}),
+									id: String(eventId++),
+								});
+								await stream.writeSSE({
+									event: "done",
+									data: "[DONE]",
+									id: String(eventId++),
+								});
+								doneSent = true;
+							} catch (sseError) {
+								logger.error(
+									"Failed to send timeout error SSE",
+									sseError instanceof Error
+										? sseError
+										: new Error(String(sseError)),
+								);
+							}
+
+							streamingError = {
+								message: errorMessage,
+								type: "upstream_timeout",
+								code: "timeout",
+								details: {
+									name: "TimeoutError",
+									timestamp: new Date().toISOString(),
+									provider: usedProvider,
+									model: usedModel,
+								},
+							};
+						} else {
+							const normalizedStreamingError = normalizeStreamingError({
+								error,
+								provider: usedProvider,
+								model: usedModel,
+								bufferSnapshot: buffer ? buffer.substring(0, 5000) : undefined,
+								phase: "upstream_read",
 							});
-							doneSent = true;
-						} catch (sseError) {
+
 							logger.error(
-								"Failed to send error SSE",
-								sseError instanceof Error
-									? sseError
-									: new Error(String(sseError)),
+								"Error reading upstream stream",
+								error instanceof Error ? error : new Error(String(error)),
+								{
+									requestId,
+									usedProvider,
+									requestedProvider,
+									usedModel,
+									initialRequestedModel,
+									upstreamStatus: res?.status ?? null,
+									upstreamStatusText: res?.statusText ?? null,
+									upstreamHeaders: res
+										? {
+												contentType: res.headers.get("content-type"),
+												contentLength: res.headers.get("content-length"),
+												transferEncoding: res.headers.get("transfer-encoding"),
+												requestId:
+													res.headers.get("x-request-id") ??
+													res.headers.get("request-id") ??
+													res.headers.get("openai-request-id"),
+											}
+										: null,
+									streamingDiagnostics: normalizedStreamingError.log.details,
+									timeToFirstToken,
+									timeToFirstReasoningToken,
+									firstTokenReceived,
+									firstReasoningTokenReceived,
+									unifiedFinishReason: getUnifiedFinishReason(
+										normalizedStreamingError.client.type === "gateway_error"
+											? "gateway_error"
+											: "upstream_error",
+										usedProvider,
+									),
+								},
 							);
-						}
 
-						streamingError = normalizedStreamingError.log;
-					}
-				} finally {
-					// Clean up the reader to prevent file descriptor leaks
-					try {
-						await reader.cancel();
-					} catch {
-						// Ignore errors from cancel - the stream may already be aborted due to timeout
-					}
-					// Clean up the event listeners
-					c.req.raw.signal.removeEventListener("abort", onAbort);
+							// Forward the error to the client with the buffered content that caused the error
+							try {
+								await stream.writeSSE({
+									event: "error",
+									data: JSON.stringify({
+										error: normalizedStreamingError.client,
+									}),
+									id: String(eventId++),
+								});
+								await stream.writeSSE({
+									event: "done",
+									data: "[DONE]",
+									id: String(eventId++),
+								});
+								doneSent = true;
+							} catch (sseError) {
+								logger.error(
+									"Failed to send error SSE",
+									sseError instanceof Error
+										? sseError
+										: new Error(String(sseError)),
+								);
+							}
 
-					// Log the streaming request
-					const duration = Date.now() - startTime;
+							streamingError = normalizedStreamingError.log;
+						}
+					} finally {
+						// Clean up the reader to prevent file descriptor leaks
+						try {
+							await reader.cancel();
+						} catch {
+							// Ignore errors from cancel - the stream may already be aborted due to timeout
+						}
+						// Clean up the event listeners
+						c.req.raw.signal.removeEventListener("abort", onAbort);
 
-					// Calculate estimated tokens if not provided
-					let calculatedPromptTokens = promptTokens;
-					let calculatedCompletionTokens = completionTokens;
-					let calculatedTotalTokens = totalTokens;
+						// Log the streaming request
+						const duration = Date.now() - startTime;
 
-					// Estimate tokens for providers that don't provide them during streaming
-					if (!promptTokens || !completionTokens) {
-						if (!promptTokens && messages && messages.length > 0) {
-							calculatedPromptTokens = encodeChatMessages(messages);
-						}
+						// Calculate estimated tokens if not provided
+						let calculatedPromptTokens = promptTokens;
+						let calculatedCompletionTokens = completionTokens;
+						let calculatedTotalTokens = totalTokens;
 
-						if (!completionTokens && (fullContent || imageByteSize > 0)) {
-							// For images, estimate ~258 tokens per image + 1 token per 750 bytes
-							let imageTokens = 0;
-							if (imageByteSize > 0) {
-								imageTokens = 258 + Math.ceil(imageByteSize / 750);
+						// Estimate tokens for providers that don't provide them during streaming
+						if (!promptTokens || !completionTokens) {
+							if (!promptTokens && messages && messages.length > 0) {
+								calculatedPromptTokens = encodeChatMessages(messages);
 							}
 
-							// Skip expensive token encoding for image responses - use simple estimation
-							// Token encoding on large base64 content causes CPU spikes
-							if (imageByteSize > 0) {
-								const textTokens = estimateTokensFromContent(fullContent);
-								calculatedCompletionTokens = textTokens + imageTokens;
-							} else {
-								try {
-									const textTokens = fullContent
-										? encode(JSON.stringify(fullContent)).length
-										: 0;
-									calculatedCompletionTokens = textTokens + imageTokens;
-								} catch (error) {
-									// Fallback to simple estimation if encoding fails
-									logger.error(
-										"Failed to encode completion text in streaming",
-										error instanceof Error ? error : new Error(String(error)),
-									);
+							if (!completionTokens && (fullContent || imageByteSize > 0)) {
+								// For images, estimate ~258 tokens per image + 1 token per 750 bytes
+								let imageTokens = 0;
+								if (imageByteSize > 0) {
+									imageTokens = 258 + Math.ceil(imageByteSize / 750);
+								}
+
+								// Skip expensive token encoding for image responses - use simple estimation
+								// Token encoding on large base64 content causes CPU spikes
+								if (imageByteSize > 0) {
 									const textTokens = estimateTokensFromContent(fullContent);
 									calculatedCompletionTokens = textTokens + imageTokens;
+								} else {
+									try {
+										const textTokens = fullContent
+											? encode(JSON.stringify(fullContent)).length
+											: 0;
+										calculatedCompletionTokens = textTokens + imageTokens;
+									} catch (error) {
+										// Fallback to simple estimation if encoding fails
+										logger.error(
+											"Failed to encode completion text in streaming",
+											error instanceof Error ? error : new Error(String(error)),
+										);
+										const textTokens = estimateTokensFromContent(fullContent);
+										calculatedCompletionTokens = textTokens + imageTokens;
+									}
 								}
 							}
+
+							calculatedTotalTokens =
+								(calculatedPromptTokens ?? 0) +
+								(calculatedCompletionTokens ?? 0);
+						}
+
+						// Estimate reasoning tokens if not provided but reasoning content exists
+						let calculatedReasoningTokens = reasoningTokens;
+						if (!reasoningTokens && fullReasoningContent) {
+							try {
+								calculatedReasoningTokens = encode(fullReasoningContent).length;
+							} catch (error) {
+								// Fallback to simple estimation if encoding fails
+								logger.error(
+									"Failed to encode reasoning text in streaming",
+									error instanceof Error ? error : new Error(String(error)),
+								);
+								calculatedReasoningTokens =
+									estimateTokensFromContent(fullReasoningContent);
+							}
+						}
+
+						const streamHasVerifiedTerminalEvent =
+							sawUpstreamDoneSentinel ||
+							sawProviderTerminalEvent ||
+							handledTerminalProviderEvent;
+						const streamEndedWithoutTerminalEvent =
+							!streamingError &&
+							!canceled &&
+							(!streamHasVerifiedTerminalEvent || finishReason === null);
+						if (streamEndedWithoutTerminalEvent) {
+							const hasBufferedNonWhitespace = /\S/u.test(buffer);
+							const responseText = hasBufferedNonWhitespace
+								? buffer.slice(0, 5000)
+								: "Stream ended before a terminal finish reason or [DONE] event";
+							const errorMessage =
+								"Upstream stream terminated unexpectedly before completion";
+
+							logger.warn("[streaming] Stream ended without terminal event", {
+								provider: usedProvider,
+								model: usedModel,
+								bufferLength: buffer.length,
+								fullContentLength: fullContent.length,
+								hasToolCalls:
+									!!streamingToolCalls && streamingToolCalls.length > 0,
+								unifiedFinishReason: getUnifiedFinishReason(
+									"upstream_error",
+									usedProvider,
+								),
+							});
+
+							streamingError = {
+								message: errorMessage,
+								type: "upstream_error",
+								code: "stream_truncated",
+								details: {
+									statusCode: 502,
+									statusText: "Upstream Stream Terminated",
+									responseText,
+									timestamp: new Date().toISOString(),
+									provider: usedProvider,
+									model: usedModel,
+									bufferLength: buffer.length,
+								},
+							};
+							finishReason = "upstream_error";
+
+							try {
+								await writeSSEAndCache({
+									event: "error",
+									data: JSON.stringify({
+										error: {
+											message: errorMessage,
+											type: "upstream_error",
+											code: "stream_truncated",
+											param: null,
+											responseText,
+										},
+									}),
+									id: String(eventId++),
+								});
+								await writeSSEAndCache({
+									event: "done",
+									data: "[DONE]",
+									id: String(eventId++),
+								});
+								doneSent = true;
+							} catch (sseError) {
+								logger.error(
+									"Failed to send truncated stream error SSE",
+									sseError instanceof Error
+										? sseError
+										: new Error(String(sseError)),
+								);
+							}
 						}
 
-						calculatedTotalTokens =
-							(calculatedPromptTokens ?? 0) + (calculatedCompletionTokens ?? 0);
-					}
-
-					// Estimate reasoning tokens if not provided but reasoning content exists
-					let calculatedReasoningTokens = reasoningTokens;
-					if (!reasoningTokens && fullReasoningContent) {
-						try {
-							calculatedReasoningTokens = encode(fullReasoningContent).length;
-						} catch (error) {
-							// Fallback to simple estimation if encoding fails
-							logger.error(
-								"Failed to encode reasoning text in streaming",
-								error instanceof Error ? error : new Error(String(error)),
-							);
-							calculatedReasoningTokens =
-								estimateTokensFromContent(fullReasoningContent);
-						}
-					}
-
-					if (
-						!streamingError &&
-						!canceled &&
-						finishReason === null &&
-						sawOpenAiResponsesDoneEvent &&
-						sawOpenAiResponsesCompletedStatus
-					) {
-						sawProviderTerminalEvent = true;
-						finishReason =
-							streamingToolCalls && streamingToolCalls.length > 0
-								? "tool_calls"
-								: "stop";
-					}
-
-					const streamHasVerifiedTerminalEvent =
-						sawUpstreamDoneSentinel ||
-						sawProviderTerminalEvent ||
-						handledTerminalProviderEvent;
-					const streamEndedWithoutTerminalEvent =
-						!streamingError &&
-						!canceled &&
-						(!streamHasVerifiedTerminalEvent || finishReason === null);
-					if (streamEndedWithoutTerminalEvent) {
-						const hasBufferedNonWhitespace = /\S/u.test(buffer);
-						const responseText = hasBufferedNonWhitespace
-							? buffer.slice(0, 5000)
-							: "Stream ended before a terminal finish reason or [DONE] event";
-						const errorMessage =
-							"Upstream stream terminated unexpectedly before completion";
-
-						logger.warn("[streaming] Stream ended without terminal event", {
-							provider: usedProvider,
-							model: usedModel,
-							bufferLength: buffer.length,
-							fullContentLength: fullContent.length,
-							hasToolCalls:
-								!!streamingToolCalls && streamingToolCalls.length > 0,
-							unifiedFinishReason: getUnifiedFinishReason(
-								"upstream_error",
-								usedProvider,
-							),
-						});
-
-						streamingError = {
-							message: errorMessage,
-							type: "upstream_error",
-							code: "stream_truncated",
-							details: {
-								statusCode: 502,
-								statusText: "Upstream Stream Terminated",
-								responseText,
-								timestamp: new Date().toISOString(),
+						// Check if the response finished successfully but has no content, tokens, or tool calls
+						// This indicates an empty response which should be marked as an error
+						// Do this check BEFORE sending usage chunks to ensure proper event ordering
+						// Exclude content_filter responses as they are intentionally empty (blocked by provider)
+						// For Google, check for original finish reasons that indicate content filtering
+						// These include both finishReason values and promptFeedback.blockReason values
+						const isGoogleContentFilterStreaming =
+							isGoogleCompatibleProvider(usedProvider) &&
+							(finishReason === "SAFETY" ||
+								finishReason === "PROHIBITED_CONTENT" ||
+								finishReason === "RECITATION" ||
+								finishReason === "BLOCKLIST" ||
+								finishReason === "SPII" ||
+								finishReason === "OTHER");
+						const hasEmptyResponse =
+							!streamingError &&
+							finishReason &&
+							finishReason !== "content_filter" &&
+							finishReason !== "incomplete" &&
+							!isGoogleContentFilterStreaming &&
+							(!calculatedCompletionTokens ||
+								calculatedCompletionTokens === 0) &&
+							(!calculatedReasoningTokens || calculatedReasoningTokens === 0) &&
+							(!fullContent || fullContent.trim() === "") &&
+							(!streamingToolCalls || streamingToolCalls.length === 0);
+
+						let streamingCostsEarly:
+							| Awaited<ReturnType<typeof calculateCosts>>
+							| undefined;
+
+						if (hasEmptyResponse) {
+							logger.warn("[streaming] Empty response detected", {
 								provider: usedProvider,
 								model: usedModel,
-								bufferLength: buffer.length,
-							},
-						};
-						finishReason = "upstream_error";
-
-						try {
-							await writeSSEAndCache({
-								event: "error",
-								data: JSON.stringify({
-									error: {
-										message: errorMessage,
-										type: "upstream_error",
-										code: "stream_truncated",
-										param: null,
-										responseText,
-									},
-								}),
-								id: String(eventId++),
-							});
-							await writeSSEAndCache({
-								event: "done",
-								data: "[DONE]",
-								id: String(eventId++),
+								finishReason,
+								calculatedCompletionTokens,
+								calculatedReasoningTokens,
+								fullContentLength: fullContent?.length ?? 0,
+								fullContentTrimmed: fullContent?.trim()?.length ?? 0,
+								streamingToolCallsCount: streamingToolCalls?.length ?? 0,
+								promptTokens,
+								completionTokens,
+								totalTokens,
+								reasoningTokens,
+								unifiedFinishReason: getUnifiedFinishReason(
+									"upstream_error",
+									usedProvider,
+								),
 							});
-							doneSent = true;
-						} catch (sseError) {
-							logger.error(
-								"Failed to send truncated stream error SSE",
-								sseError instanceof Error
-									? sseError
-									: new Error(String(sseError)),
-							);
-						}
-					}
+							const errorMessage =
+								"Response finished successfully but returned no content or tool calls";
+							streamingError = errorMessage;
+							finishReason = "upstream_error";
 
-					// Check if the response finished successfully but has no content, tokens, or tool calls
-					// This indicates an empty response which should be marked as an error
-					// Do this check BEFORE sending usage chunks to ensure proper event ordering
-					// Exclude content_filter responses as they are intentionally empty (blocked by provider)
-					// For Google, check for original finish reasons that indicate content filtering
-					// These include both finishReason values and promptFeedback.blockReason values
-					const isGoogleContentFilterStreaming =
-						isGoogleCompatibleProvider(usedProvider) &&
-						(finishReason === "SAFETY" ||
-							finishReason === "PROHIBITED_CONTENT" ||
-							finishReason === "RECITATION" ||
-							finishReason === "BLOCKLIST" ||
-							finishReason === "SPII" ||
-							finishReason === "OTHER");
-					const hasEmptyResponse =
-						!streamingError &&
-						finishReason &&
-						finishReason !== "content_filter" &&
-						finishReason !== "incomplete" &&
-						!isGoogleContentFilterStreaming &&
-						(!calculatedCompletionTokens || calculatedCompletionTokens === 0) &&
-						(!calculatedReasoningTokens || calculatedReasoningTokens === 0) &&
-						(!fullContent || fullContent.trim() === "") &&
-						(!streamingToolCalls || streamingToolCalls.length === 0);
-
-					let streamingCostsEarly:
-						| Awaited<ReturnType<typeof calculateCosts>>
-						| undefined;
-
-					if (hasEmptyResponse) {
-						logger.warn("[streaming] Empty response detected", {
-							provider: usedProvider,
-							model: usedModel,
-							finishReason,
-							calculatedCompletionTokens,
-							calculatedReasoningTokens,
-							fullContentLength: fullContent?.length ?? 0,
-							fullContentTrimmed: fullContent?.trim()?.length ?? 0,
-							streamingToolCallsCount: streamingToolCalls?.length ?? 0,
-							promptTokens,
-							completionTokens,
-							totalTokens,
-							reasoningTokens,
-							unifiedFinishReason: getUnifiedFinishReason(
-								"upstream_error",
-								usedProvider,
-							),
-						});
-						const errorMessage =
-							"Response finished successfully but returned no content or tool calls";
-						streamingError = errorMessage;
-						finishReason = "upstream_error";
+							// Send error event to client using writeSSEAndCache to cache the error
+							try {
+								await writeSSEAndCache({
+									event: "error",
+									data: JSON.stringify({
+										error: {
+											message: errorMessage,
+											type: "upstream_error",
+											code: "upstream_error",
+											param: null,
+											responseText: errorMessage,
+										},
+									}),
+									id: String(eventId++),
+								});
+								await writeSSEAndCache({
+									event: "done",
+									data: "[DONE]",
+									id: String(eventId++),
+								});
+								doneSent = true;
+							} catch (sseError) {
+								logger.error(
+									"Failed to send upstream error SSE",
+									sseError instanceof Error
+										? sseError
+										: new Error(String(sseError)),
+								);
+							}
+						} else if (!streamingError && !doneSent) {
+							// Calculate costs before sending usage chunk so we can include cost data
+							const billCancelledRequestsEarly = shouldBillCancelledRequests();
+							streamingCostsEarly =
+								canceled && !billCancelledRequestsEarly
+									? {
+											inputCost: null,
+											outputCost: null,
+											cachedInputCost: null,
+											requestCost: null,
+											webSearchCost: null,
+											imageInputTokens: null,
+											imageOutputTokens: null,
+											imageInputCost: null,
+											imageOutputCost: null,
+											totalCost: null,
+											promptTokens: null,
+											completionTokens: null,
+											cachedTokens: null,
+											estimatedCost: false,
+											discount: undefined,
+											pricingTier: undefined,
+										}
+									: await calculateCosts(
+											usedModel,
+											usedProvider,
+											calculatedPromptTokens,
+											calculatedCompletionTokens,
+											cachedTokens,
+											{
+												prompt: messages
+													.map((m) => messageContentToString(m.content))
+													.join("\n"),
+												completion: fullContent,
+												toolResults: streamingToolCalls ?? undefined,
+											},
+											reasoningTokens,
+											outputImageCount,
+											image_config?.image_size,
+											inputImageCount,
+											webSearchCount,
+											project.organizationId,
+										);
 
-						// Send error event to client using writeSSEAndCache to cache the error
-						try {
-							await writeSSEAndCache({
-								event: "error",
-								data: JSON.stringify({
-									error: {
-										message: errorMessage,
-										type: "upstream_error",
-										code: "upstream_error",
-										param: null,
-										responseText: errorMessage,
-									},
-								}),
-								id: String(eventId++),
-							});
-							await writeSSEAndCache({
-								event: "done",
-								data: "[DONE]",
-								id: String(eventId++),
-							});
-							doneSent = true;
-						} catch (sseError) {
-							logger.error(
-								"Failed to send upstream error SSE",
-								sseError instanceof Error
-									? sseError
-									: new Error(String(sseError)),
-							);
-						}
-					} else if (!streamingError && !doneSent) {
-						if (
-							finishReason &&
-							!sentDownstreamFinishReasonChunk &&
-							!shouldBufferForHealing
-						) {
+							// Always send final usage chunk with cost data for SDK compatibility
 							try {
-								const finishChunk = {
+								const finalUsageChunk = {
 									id: `chatcmpl-${Date.now()}`,
 									object: "chat.completion.chunk",
 									created: Math.floor(Date.now() / 1000),
@@ -5960,28 +6019,204 @@ chat.openapi(completions, async (c) => {
 										{
 											index: 0,
 											delta: {},
-											finish_reason: finishReason,
+											finish_reason: null,
 										},
 									],
+									usage: (() => {
+										// Only add image input tokens for providers that
+										// exclude them from upstream usage (Google)
+										const providerExcludesImageInput =
+											isGoogleCompatibleProvider(usedProvider);
+										const imageInputAdj = providerExcludesImageInput
+											? inputImageCount * 560
+											: 0;
+										const adjPrompt = Math.max(
+											1,
+											Math.round(
+												promptTokens && promptTokens > 0
+													? promptTokens + imageInputAdj
+													: (calculatedPromptTokens ?? 1) + imageInputAdj,
+											),
+										);
+										const adjCompletion = Math.round(
+											completionTokens ?? calculatedCompletionTokens ?? 0,
+										);
+										return {
+											prompt_tokens: adjPrompt,
+											completion_tokens: adjCompletion,
+											total_tokens: Math.max(
+												1,
+												Math.round(adjPrompt + adjCompletion),
+											),
+											...(cachedTokens !== null && {
+												prompt_tokens_details: {
+													cached_tokens: cachedTokens,
+												},
+											}),
+											cost_usd_total: streamingCostsEarly.totalCost,
+											cost_usd_input: streamingCostsEarly.inputCost,
+											cost_usd_output: streamingCostsEarly.outputCost,
+											cost_usd_cached_input:
+												streamingCostsEarly.cachedInputCost,
+											cost_usd_request: streamingCostsEarly.requestCost,
+											cost_usd_image_input: streamingCostsEarly.imageInputCost,
+											cost_usd_image_output:
+												streamingCostsEarly.imageOutputCost,
+										};
+									})(),
 								};
 
 								await writeSSEAndCache({
-									data: JSON.stringify(finishChunk),
+									data: JSON.stringify(finalUsageChunk),
 									id: String(eventId++),
 								});
-								sentDownstreamFinishReasonChunk = true;
 							} catch (error) {
 								logger.error(
-									"Error sending synthesized finish chunk",
+									"Error sending final usage chunk",
 									error instanceof Error ? error : new Error(String(error)),
 								);
 							}
+
+							// Send healed content if buffering was enabled
+							if (
+								shouldBufferForHealing &&
+								bufferedContentChunks.length > 0 &&
+								!streamingError
+							) {
+								try {
+									// Combine buffered content and apply healing
+									const bufferedContent = bufferedContentChunks.join("");
+									const healingResult = healJsonResponse(bufferedContent);
+
+									// Store plugin results for logging
+									streamingPluginResults.responseHealing = {
+										healed: healingResult.healed,
+										healingMethod: healingResult.healingMethod,
+									};
+
+									if (healingResult.healed) {
+										logger.debug("Streaming response healing applied", {
+											method: healingResult.healingMethod,
+											originalLength: healingResult.originalContent.length,
+											healedLength: healingResult.content.length,
+										});
+										// Update fullContent with healed version for logging
+										fullContent = healingResult.content;
+									}
+
+									// Send the healed (or original if no healing needed) content as a single chunk
+									const healedContentChunk = {
+										id: lastChunkId ?? `chatcmpl-${Date.now()}`,
+										object: "chat.completion.chunk",
+										created: lastChunkCreated ?? Math.floor(Date.now() / 1000),
+										model: lastChunkModel ?? usedModel,
+										choices: [
+											{
+												index: 0,
+												delta: {
+													content: healingResult.content,
+												},
+												finish_reason: null,
+											},
+										],
+									};
+
+									await writeSSEAndCache({
+										data: JSON.stringify(healedContentChunk),
+										id: String(eventId++),
+									});
+
+									// Send finish_reason chunk
+									const finishChunk = {
+										id: lastChunkId ?? `chatcmpl-${Date.now()}`,
+										object: "chat.completion.chunk",
+										created: lastChunkCreated ?? Math.floor(Date.now() / 1000),
+										model: lastChunkModel ?? usedModel,
+										choices: [
+											{
+												index: 0,
+												delta: {},
+												finish_reason: finishReason ?? "stop",
+											},
+										],
+									};
+
+									await writeSSEAndCache({
+										data: JSON.stringify(finishChunk),
+										id: String(eventId++),
+									});
+								} catch (error) {
+									logger.error(
+										"Error sending healed content chunk",
+										error instanceof Error ? error : new Error(String(error)),
+									);
+								}
+							}
+
+							// Send routing metadata for all attempts (including successful)
+							if (routingAttempts.length > 0 && !doneSent) {
+								try {
+									const routingChunk = {
+										id: `chatcmpl-${Date.now()}`,
+										object: "chat.completion.chunk",
+										created: Math.floor(Date.now() / 1000),
+										model: usedModel,
+										choices: [
+											{
+												index: 0,
+												delta: {},
+												finish_reason: null,
+											},
+										],
+										metadata: {
+											requested_model: initialRequestedModel,
+											requested_provider: requestedProvider ?? null,
+											used_model: baseModelName,
+											used_provider: usedProvider,
+											...(usedRegion && { used_region: usedRegion }),
+											underlying_used_model: usedModel,
+											routing: routingAttempts,
+										},
+									};
+									await writeSSEAndCache({
+										data: JSON.stringify(routingChunk),
+										id: String(eventId++),
+									});
+								} catch (error) {
+									logger.error(
+										"Error sending routing metadata chunk",
+										error instanceof Error ? error : new Error(String(error)),
+									);
+								}
+							}
+
+							// Always send [DONE] at the end of streaming if not already sent
+							if (!doneSent) {
+								try {
+									await writeSSEAndCache({
+										event: "done",
+										data: "[DONE]",
+										id: String(eventId++),
+									});
+								} catch (error) {
+									logger.error(
+										"Error sending [DONE] event",
+										error instanceof Error ? error : new Error(String(error)),
+									);
+								}
+							}
 						}
 
-						// Calculate costs before sending usage chunk so we can include cost data
-						const billCancelledRequestsEarly = shouldBillCancelledRequests();
-						streamingCostsEarly =
-							canceled && !billCancelledRequestsEarly
+						// Clean up keepalive before any potentially-throwing operations (insertLog, etc.)
+						// clearInterval is idempotent so calling it multiple times is safe
+						clearKeepalive();
+
+						// Reuse costs calculated earlier (before usage chunk was sent)
+						// If we came through the error path (hasEmptyResponse), calculate now
+						const billCancelledRequests = shouldBillCancelledRequests();
+						const costs =
+							streamingCostsEarly ??
+							(canceled && !billCancelledRequests
 								? {
 										inputCost: null,
 										outputCost: null,
@@ -6019,473 +6254,226 @@ chat.openapi(completions, async (c) => {
 										inputImageCount,
 										webSearchCount,
 										project.organizationId,
-									);
+									));
 
-						// Always send final usage chunk with cost data for SDK compatibility
-						try {
-							const finalUsageChunk = {
-								id: `chatcmpl-${Date.now()}`,
-								object: "chat.completion.chunk",
-								created: Math.floor(Date.now() / 1000),
-								model: usedModel,
-								choices: [
-									{
-										index: 0,
-										delta: {},
-										finish_reason: null,
-									},
-								],
-								usage: (() => {
-									// Only add image input tokens for providers that
-									// exclude them from upstream usage (Google)
-									const providerExcludesImageInput =
-										isGoogleCompatibleProvider(usedProvider);
-									const imageInputAdj = providerExcludesImageInput
-										? inputImageCount * 560
-										: 0;
-									const adjPrompt = Math.max(
-										1,
-										Math.round(
-											promptTokens && promptTokens > 0
-												? promptTokens + imageInputAdj
-												: (calculatedPromptTokens ?? 1) + imageInputAdj,
-										),
-									);
-									const adjCompletion = Math.round(
-										completionTokens ?? calculatedCompletionTokens ?? 0,
-									);
-									return {
-										prompt_tokens: adjPrompt,
-										completion_tokens: adjCompletion,
-										total_tokens: Math.max(
-											1,
-											Math.round(adjPrompt + adjCompletion),
-										),
-										...(cachedTokens !== null && {
-											prompt_tokens_details: {
-												cached_tokens: cachedTokens,
-											},
-										}),
-										cost_usd_total: streamingCostsEarly.totalCost,
-										cost_usd_input: streamingCostsEarly.inputCost,
-										cost_usd_output: streamingCostsEarly.outputCost,
-										cost_usd_cached_input: streamingCostsEarly.cachedInputCost,
-										cost_usd_request: streamingCostsEarly.requestCost,
-										cost_usd_image_input: streamingCostsEarly.imageInputCost,
-										cost_usd_image_output: streamingCostsEarly.imageOutputCost,
-									};
-								})(),
-							};
+						// Use costs.promptTokens as canonical value (includes image input
+						// tokens for providers that exclude them from upstream usage)
+						if (
+							costs.promptTokens !== null &&
+							costs.promptTokens !== undefined
+						) {
+							const promptDelta =
+								(costs.promptTokens ?? 0) - (calculatedPromptTokens ?? 0);
+							if (promptDelta > 0) {
+								calculatedPromptTokens = costs.promptTokens;
+								calculatedTotalTokens =
+									(calculatedTotalTokens ?? 0) + promptDelta;
+							}
+						}
+
+						// Determine plugin results for logging (includes healing results if applicable)
+						const finalPluginResults =
+							Object.keys(streamingPluginResults).length > 0
+								? streamingPluginResults
+								: undefined;
+
+						// Enhanced logging for Google models streaming to debug missing responses
+						if (isGoogleCompatibleProvider(usedProvider)) {
+							logger.debug("Google model streaming response completed", {
+								usedProvider,
+								usedModel,
+								hasContent: !!fullContent,
+								contentLength: fullContent.length,
+								finishReason,
+								promptTokens: calculatedPromptTokens,
+								completionTokens: calculatedCompletionTokens,
+								totalTokens: calculatedTotalTokens,
+								reasoningTokens,
+								streamingError: streamingError ? String(streamingError) : null,
+								canceled,
+								hasToolCalls:
+									!!streamingToolCalls && streamingToolCalls.length > 0,
+							});
+						}
+
+						// For cancelled requests, determine if we should include token counts for billing
+						const shouldIncludeTokensForBilling =
+							!canceled || (canceled && billCancelledRequests);
+
+						enqueueChatLog(
+							c,
+							{
+								providerKeyId: providerKey?.id,
+								usedModel: usedModelFormatted,
+								usedModelMapping,
+								usedProvider,
+								requestedModel: initialRequestedModel,
+								requestedProvider,
+								messages,
+								temperature,
+								max_tokens,
+								top_p,
+								frequency_penalty,
+								presence_penalty,
+								reasoningEffort: reasoning_effort,
+								reasoningMaxTokens: reasoning_max_tokens,
+								effort,
+								responseFormat: response_format,
+								tools,
+								toolChoice: tool_choice,
+								source,
+								customHeaders,
+								debugMode,
+								userAgent,
+								imageConfig: image_config,
+								routingMetadata,
+								rawRequest: rawBody,
+								rawResponse: streamingError ?? streamingRawResponseData,
+								upstreamRequest: requestBody,
+								upstreamResponse: streamingError ?? rawUpstreamData,
+								plugins: requestPluginIds,
+								pluginResults: finalPluginResults,
+							},
+							{
+								id: routingAttempts.length > 0 ? finalLogId : undefined,
+								duration,
+								timeToFirstToken,
+								timeToFirstReasoningToken,
+								responseSize: fullContent.length,
+								content: fullContent,
+								reasoningContent: fullReasoningContent || null,
+								finishReason: canceled ? "canceled" : finishReason,
+								promptTokens: shouldIncludeTokensForBilling
+									? (calculatedPromptTokens?.toString() ?? null)
+									: null,
+								completionTokens: shouldIncludeTokensForBilling
+									? (calculatedCompletionTokens?.toString() ?? null)
+									: null,
+								totalTokens: shouldIncludeTokensForBilling
+									? (calculatedTotalTokens?.toString() ?? null)
+									: null,
+								reasoningTokens: shouldIncludeTokensForBilling
+									? (calculatedReasoningTokens?.toString() ?? null)
+									: null,
+								cachedTokens: shouldIncludeTokensForBilling
+									? (cachedTokens?.toString() ?? null)
+									: null,
+								hasError: streamingError !== null,
+								errorDetails: streamingError
+									? {
+											statusCode:
+												typeof streamingError === "object" &&
+												streamingError !== null &&
+												"details" in streamingError &&
+												typeof streamingError.details === "object" &&
+												streamingError.details !== null &&
+												"statusCode" in streamingError.details &&
+												typeof streamingError.details.statusCode === "number"
+													? streamingError.details.statusCode
+													: 500,
+											statusText:
+												typeof streamingError === "object" &&
+												streamingError !== null &&
+												"details" in streamingError &&
+												typeof streamingError.details === "object" &&
+												streamingError.details !== null &&
+												"statusText" in streamingError.details &&
+												typeof streamingError.details.statusText === "string"
+													? streamingError.details.statusText
+													: "Streaming Error",
+											responseText:
+												typeof streamingError === "object" &&
+												streamingError !== null &&
+												"details" in streamingError &&
+												typeof streamingError.details === "object" &&
+												streamingError.details !== null &&
+												"responseText" in streamingError.details &&
+												typeof streamingError.details.responseText === "string"
+													? streamingError.details.responseText
+													: typeof streamingError === "object" &&
+														  streamingError !== null &&
+														  "details" in streamingError
+														? JSON.stringify(streamingError)
+														: streamingError instanceof Error
+															? streamingError.message
+															: String(streamingError),
+										}
+									: null,
+								streamed: true,
+								canceled: canceled,
+								inputCost: costs.inputCost,
+								outputCost: costs.outputCost,
+								cachedInputCost: costs.cachedInputCost,
+								requestCost: costs.requestCost,
+								webSearchCost: costs.webSearchCost,
+								imageInputTokens: costs.imageInputTokens?.toString() ?? null,
+								imageOutputTokens: costs.imageOutputTokens?.toString() ?? null,
+								imageInputCost: costs.imageInputCost ?? null,
+								imageOutputCost: costs.imageOutputCost ?? null,
+								cost: costs.totalCost,
+								estimatedCost: costs.estimatedCost,
+								discount: costs.discount,
+								pricingTier: costs.pricingTier,
+								dataStorageCost: shouldIncludeTokensForBilling
+									? calculateDataStorageCost(
+											calculatedPromptTokens,
+											cachedTokens,
+											calculatedCompletionTokens,
+											calculatedReasoningTokens,
+											retentionLevel,
+										)
+									: "0",
+								cached: false,
+								toolResults: streamingToolCalls,
+							},
+						);
 
-							await writeSSEAndCache({
-								data: JSON.stringify(finalUsageChunk),
-								id: String(eventId++),
-							});
-						} catch (error) {
-							logger.error(
-								"Error sending final usage chunk",
-								error instanceof Error ? error : new Error(String(error)),
-							);
+						// Report key health for environment-based tokens
+						if (envVarName !== undefined) {
+							if (streamingError !== null) {
+								reportKeyError(envVarName, configIndex, 500);
+							} else {
+								reportKeySuccess(envVarName, configIndex);
+							}
 						}
 
-						// Send healed content if buffering was enabled
+						// Save streaming cache if enabled and not canceled and no errors
 						if (
-							shouldBufferForHealing &&
-							bufferedContentChunks.length > 0 &&
+							cachingEnabled &&
+							streamingCacheKey &&
+							!canceled &&
+							finishReason &&
 							!streamingError
 						) {
 							try {
-								// Combine buffered content and apply healing
-								const bufferedContent = bufferedContentChunks.join("");
-								const healingResult = healJsonResponse(bufferedContent);
-
-								// Store plugin results for logging
-								streamingPluginResults.responseHealing = {
-									healed: healingResult.healed,
-									healingMethod: healingResult.healingMethod,
-								};
-
-								if (healingResult.healed) {
-									logger.debug("Streaming response healing applied", {
-										method: healingResult.healingMethod,
-										originalLength: healingResult.originalContent.length,
-										healedLength: healingResult.content.length,
-									});
-									// Update fullContent with healed version for logging
-									fullContent = healingResult.content;
-								}
-
-								// Send the healed (or original if no healing needed) content as a single chunk
-								const healedContentChunk = {
-									id: lastChunkId ?? `chatcmpl-${Date.now()}`,
-									object: "chat.completion.chunk",
-									created: lastChunkCreated ?? Math.floor(Date.now() / 1000),
-									model: lastChunkModel ?? usedModel,
-									choices: [
-										{
-											index: 0,
-											delta: {
-												content: healingResult.content,
-											},
-											finish_reason: null,
-										},
-									],
-								};
-
-								await writeSSEAndCache({
-									data: JSON.stringify(healedContentChunk),
-									id: String(eventId++),
-								});
-
-								// Send finish_reason chunk
-								const finishChunk = {
-									id: lastChunkId ?? `chatcmpl-${Date.now()}`,
-									object: "chat.completion.chunk",
-									created: lastChunkCreated ?? Math.floor(Date.now() / 1000),
-									model: lastChunkModel ?? usedModel,
-									choices: [
-										{
-											index: 0,
-											delta: {},
-											finish_reason: finishReason ?? "stop",
-										},
-									],
-								};
-
-								await writeSSEAndCache({
-									data: JSON.stringify(finishChunk),
-									id: String(eventId++),
-								});
-							} catch (error) {
-								logger.error(
-									"Error sending healed content chunk",
-									error instanceof Error ? error : new Error(String(error)),
-								);
-							}
-						}
-
-						// Send routing metadata for all attempts (including successful)
-						if (routingAttempts.length > 0 && !doneSent) {
-							try {
-								const routingChunk = {
-									id: `chatcmpl-${Date.now()}`,
-									object: "chat.completion.chunk",
-									created: Math.floor(Date.now() / 1000),
-									model: usedModel,
-									choices: [
-										{
-											index: 0,
-											delta: {},
-											finish_reason: null,
-										},
-									],
+								const streamingCacheData = {
+									chunks: streamingChunks,
 									metadata: {
-										requested_model: initialRequestedModel,
-										requested_provider: requestedProvider ?? null,
-										used_model: baseModelName,
-										used_provider: usedProvider,
-										...(usedRegion && { used_region: usedRegion }),
-										underlying_used_model: usedModel,
-										routing: routingAttempts,
+										model: usedModel,
+										provider: usedProvider,
+										finishReason: finishReason,
+										totalChunks: streamingChunks.length,
+										duration: duration,
+										completed: true,
 									},
 								};
-								await writeSSEAndCache({
-									data: JSON.stringify(routingChunk),
-									id: String(eventId++),
-								});
-							} catch (error) {
-								logger.error(
-									"Error sending routing metadata chunk",
-									error instanceof Error ? error : new Error(String(error)),
-								);
-							}
-						}
 
-						// Always send [DONE] at the end of streaming if not already sent
-						if (!doneSent) {
-							try {
-								await writeSSEAndCache({
-									event: "done",
-									data: "[DONE]",
-									id: String(eventId++),
-								});
+								await setStreamingCache(
+									streamingCacheKey,
+									streamingCacheData,
+									cacheDuration,
+								);
 							} catch (error) {
 								logger.error(
-									"Error sending [DONE] event",
+									"Error saving streaming cache",
 									error instanceof Error ? error : new Error(String(error)),
 								);
 							}
 						}
 					}
-
-					// Clean up keepalive before any potentially-throwing operations (insertLog, etc.)
-					// clearInterval is idempotent so calling it multiple times is safe
-					clearKeepalive();
-
-					// Reuse costs calculated earlier (before usage chunk was sent)
-					// If we came through the error path (hasEmptyResponse), calculate now
-					const billCancelledRequests = shouldBillCancelledRequests();
-					const costs =
-						streamingCostsEarly ??
-						(canceled && !billCancelledRequests
-							? {
-									inputCost: null,
-									outputCost: null,
-									cachedInputCost: null,
-									requestCost: null,
-									webSearchCost: null,
-									imageInputTokens: null,
-									imageOutputTokens: null,
-									imageInputCost: null,
-									imageOutputCost: null,
-									totalCost: null,
-									promptTokens: null,
-									completionTokens: null,
-									cachedTokens: null,
-									estimatedCost: false,
-									discount: undefined,
-									pricingTier: undefined,
-								}
-							: await calculateCosts(
-									usedModel,
-									usedProvider,
-									calculatedPromptTokens,
-									calculatedCompletionTokens,
-									cachedTokens,
-									{
-										prompt: messages
-											.map((m) => messageContentToString(m.content))
-											.join("\n"),
-										completion: fullContent,
-										toolResults: streamingToolCalls ?? undefined,
-									},
-									reasoningTokens,
-									outputImageCount,
-									image_config?.image_size,
-									inputImageCount,
-									webSearchCount,
-									project.organizationId,
-								));
-
-					// Use costs.promptTokens as canonical value (includes image input
-					// tokens for providers that exclude them from upstream usage)
-					if (costs.promptTokens !== null && costs.promptTokens !== undefined) {
-						const promptDelta =
-							(costs.promptTokens ?? 0) - (calculatedPromptTokens ?? 0);
-						if (promptDelta > 0) {
-							calculatedPromptTokens = costs.promptTokens;
-							calculatedTotalTokens =
-								(calculatedTotalTokens ?? 0) + promptDelta;
-						}
-					}
-
-					// Extract plugin IDs for logging
-					const streamingPluginIds = plugins?.map((p) => p.id) ?? [];
-
-					// Determine plugin results for logging (includes healing results if applicable)
-					const finalPluginResults =
-						Object.keys(streamingPluginResults).length > 0
-							? streamingPluginResults
-							: undefined;
-
-					const baseLogEntry = createLogEntry(
-						requestId,
-						project,
-						apiKey,
-						providerKey?.id,
-						usedModelFormatted,
-						usedModelMapping,
-						usedProvider,
-						initialRequestedModel,
-						requestedProvider,
-						messages,
-						temperature,
-						max_tokens,
-						top_p,
-						frequency_penalty,
-						presence_penalty,
-						reasoning_effort,
-						reasoning_max_tokens,
-						effort,
-						response_format,
-						tools,
-						tool_choice,
-						source,
-						customHeaders,
-						debugMode,
-						userAgent,
-						image_config,
-						routingMetadata,
-						rawBody,
-						streamingError ?? streamingRawResponseData, // Raw SSE data sent back to the client
-						requestBody, // The request sent to the provider
-						streamingError ?? rawUpstreamData, // Raw streaming data received from upstream provider
-						streamingPluginIds,
-						finalPluginResults, // Plugin results including healing (if enabled)
-					);
-
-					// Enhanced logging for Google models streaming to debug missing responses
-					if (isGoogleCompatibleProvider(usedProvider)) {
-						logger.debug("Google model streaming response completed", {
-							usedProvider,
-							usedModel,
-							hasContent: !!fullContent,
-							contentLength: fullContent.length,
-							finishReason,
-							promptTokens: calculatedPromptTokens,
-							completionTokens: calculatedCompletionTokens,
-							totalTokens: calculatedTotalTokens,
-							reasoningTokens,
-							streamingError: streamingError ? String(streamingError) : null,
-							canceled,
-							hasToolCalls:
-								!!streamingToolCalls && streamingToolCalls.length > 0,
-						});
-					}
-
-					// For cancelled requests, determine if we should include token counts for billing
-					const shouldIncludeTokensForBilling =
-						!canceled || (canceled && billCancelledRequests);
-
-					await insertLog({
-						...baseLogEntry,
-						id: routingAttempts.length > 0 ? finalLogId : undefined,
-						duration,
-						timeToFirstToken,
-						timeToFirstReasoningToken,
-						responseSize: fullContent.length,
-						content: fullContent,
-						reasoningContent: fullReasoningContent || null,
-						finishReason: canceled ? "canceled" : finishReason,
-						promptTokens: shouldIncludeTokensForBilling
-							? (calculatedPromptTokens?.toString() ?? null)
-							: null,
-						completionTokens: shouldIncludeTokensForBilling
-							? (calculatedCompletionTokens?.toString() ?? null)
-							: null,
-						totalTokens: shouldIncludeTokensForBilling
-							? (calculatedTotalTokens?.toString() ?? null)
-							: null,
-						reasoningTokens: shouldIncludeTokensForBilling
-							? (calculatedReasoningTokens?.toString() ?? null)
-							: null,
-						cachedTokens: shouldIncludeTokensForBilling
-							? (cachedTokens?.toString() ?? null)
-							: null,
-						hasError: streamingError !== null,
-						errorDetails: streamingError
-							? {
-									statusCode:
-										typeof streamingError === "object" &&
-										streamingError !== null &&
-										"details" in streamingError &&
-										typeof streamingError.details === "object" &&
-										streamingError.details !== null &&
-										"statusCode" in streamingError.details &&
-										typeof streamingError.details.statusCode === "number"
-											? streamingError.details.statusCode
-											: 500,
-									statusText:
-										typeof streamingError === "object" &&
-										streamingError !== null &&
-										"details" in streamingError &&
-										typeof streamingError.details === "object" &&
-										streamingError.details !== null &&
-										"statusText" in streamingError.details &&
-										typeof streamingError.details.statusText === "string"
-											? streamingError.details.statusText
-											: "Streaming Error",
-									responseText:
-										typeof streamingError === "object" &&
-										streamingError !== null &&
-										"details" in streamingError &&
-										typeof streamingError.details === "object" &&
-										streamingError.details !== null &&
-										"responseText" in streamingError.details &&
-										typeof streamingError.details.responseText === "string"
-											? streamingError.details.responseText
-											: typeof streamingError === "object" &&
-												  streamingError !== null &&
-												  "details" in streamingError
-												? JSON.stringify(streamingError)
-												: streamingError instanceof Error
-													? streamingError.message
-													: String(streamingError),
-								}
-							: null,
-						streamed: true,
-						canceled: canceled,
-						inputCost: costs.inputCost,
-						outputCost: costs.outputCost,
-						cachedInputCost: costs.cachedInputCost,
-						requestCost: costs.requestCost,
-						webSearchCost: costs.webSearchCost,
-						imageInputTokens: costs.imageInputTokens?.toString() ?? null,
-						imageOutputTokens: costs.imageOutputTokens?.toString() ?? null,
-						imageInputCost: costs.imageInputCost ?? null,
-						imageOutputCost: costs.imageOutputCost ?? null,
-						cost: costs.totalCost,
-						estimatedCost: costs.estimatedCost,
-						discount: costs.discount,
-						pricingTier: costs.pricingTier,
-						dataStorageCost: shouldIncludeTokensForBilling
-							? calculateDataStorageCost(
-									calculatedPromptTokens,
-									cachedTokens,
-									calculatedCompletionTokens,
-									calculatedReasoningTokens,
-									retentionLevel,
-								)
-							: "0",
-						cached: false,
-						tools,
-						toolResults: streamingToolCalls,
-						toolChoice: tool_choice,
-					});
-
-					// Report key health for environment-based tokens
-					if (envVarName !== undefined) {
-						if (streamingError !== null) {
-							reportKeyError(envVarName, configIndex, 500);
-						} else {
-							reportKeySuccess(envVarName, configIndex);
-						}
-					}
-
-					// Save streaming cache if enabled and not canceled and no errors
-					if (
-						cachingEnabled &&
-						streamingCacheKey &&
-						!canceled &&
-						finishReason &&
-						!streamingError
-					) {
-						try {
-							const streamingCacheData = {
-								chunks: streamingChunks,
-								metadata: {
-									model: usedModel,
-									provider: usedProvider,
-									finishReason: finishReason,
-									totalChunks: streamingChunks.length,
-									duration: duration,
-									completed: true,
-								},
-							};
-
-							await setStreamingCache(
-								streamingCacheKey,
-								streamingCacheData,
-								cacheDuration,
-							);
-						} catch (error) {
-							logger.error(
-								"Error saving streaming cache",
-								error instanceof Error ? error : new Error(String(error)),
-							);
-						}
-					}
-				}
+				})().finally(() => {
+					finishStreamCompletion(c);
+				});
 			},
 			async (error) => {
 				if (error.name === "TimeoutError") {
@@ -6726,10 +6714,6 @@ chat.openapi(completions, async (c) => {
 				),
 			});
 
-			// Log the error in the database
-			// Extract plugin IDs for logging (non-streaming fetch error)
-			const nonStreamingFetchErrorPluginIds = plugins?.map((p) => p.id) ?? [];
-
 			// Check if we should retry before logging so we can mark the log as retried
 			const willRetryFetchNonStreaming = shouldRetryRequest({
 				requestedProvider,
@@ -6743,80 +6727,78 @@ chat.openapi(completions, async (c) => {
 				usedProvider,
 			});
 
-			const baseLogEntry = createLogEntry(
-				requestId,
-				project,
-				apiKey,
-				providerKey?.id,
-				usedModelFormatted,
-				usedModelMapping,
-				usedProvider,
-				initialRequestedModel,
-				requestedProvider,
-				messages,
-				temperature,
-				max_tokens,
-				top_p,
-				frequency_penalty,
-				presence_penalty,
-				reasoning_effort,
-				reasoning_max_tokens,
-				effort,
-				response_format,
-				tools,
-				tool_choice,
-				source,
-				customHeaders,
-				debugMode,
-				userAgent,
-				image_config,
-				routingMetadata,
-				rawBody,
-				null, // No response for fetch error
-				requestBody, // The request that resulted in error
-				null, // No upstream response for fetch error
-				nonStreamingFetchErrorPluginIds,
-				undefined, // No plugin results for error case
-			);
-
-			await insertLog({
-				...baseLogEntry,
-				duration: perAttemptDuration,
-				timeToFirstToken: null, // Not applicable for error case
-				timeToFirstReasoningToken: null, // Not applicable for error case
-				responseSize: 0,
-				content: null,
-				reasoningContent: null,
-				finishReason: "upstream_error",
-				promptTokens: null,
-				completionTokens: null,
-				totalTokens: null,
-				reasoningTokens: null,
-				cachedTokens: null,
-				hasError: true,
-				streamed: false,
-				canceled: false,
-				errorDetails: {
-					statusCode: 0,
-					statusText: fetchError.name,
-					responseText: errorMessage,
-					cause: nonStreamingFetchCause,
+			enqueueChatLog(
+				c,
+				{
+					providerKeyId: providerKey?.id,
+					usedModel: usedModelFormatted,
+					usedModelMapping,
+					usedProvider,
+					requestedModel: initialRequestedModel,
+					requestedProvider,
+					messages,
+					temperature,
+					max_tokens,
+					top_p,
+					frequency_penalty,
+					presence_penalty,
+					reasoningEffort: reasoning_effort,
+					reasoningMaxTokens: reasoning_max_tokens,
+					effort,
+					responseFormat: response_format,
+					tools,
+					toolChoice: tool_choice,
+					source,
+					customHeaders,
+					debugMode,
+					userAgent,
+					imageConfig: image_config,
+					routingMetadata,
+					rawRequest: rawBody,
+					rawResponse: null,
+					upstreamRequest: requestBody,
+					upstreamResponse: null,
+					plugins: requestPluginIds,
+					pluginResults: undefined,
 				},
-				cachedInputCost: null,
-				requestCost: null,
-				webSearchCost: null,
-				imageInputTokens: null,
-				imageOutputTokens: null,
-				imageInputCost: null,
-				imageOutputCost: null,
-				estimatedCost: false,
-				discount: null,
-				dataStorageCost: "0",
-				cached: false,
-				toolResults: null,
-				retried: willRetryFetchNonStreaming,
-				retriedByLogId: willRetryFetchNonStreaming ? finalLogId : null,
-			});
+				{
+					duration: perAttemptDuration,
+					timeToFirstToken: null,
+					timeToFirstReasoningToken: null,
+					responseSize: 0,
+					content: null,
+					reasoningContent: null,
+					finishReason: "upstream_error",
+					promptTokens: null,
+					completionTokens: null,
+					totalTokens: null,
+					reasoningTokens: null,
+					cachedTokens: null,
+					hasError: true,
+					streamed: false,
+					canceled: false,
+					errorDetails: {
+						statusCode: 0,
+						statusText: fetchError.name,
+						responseText: errorMessage,
+						cause: nonStreamingFetchCause,
+					},
+					cachedInputCost: null,
+					requestCost: null,
+					webSearchCost: null,
+					imageInputTokens: null,
+					imageOutputTokens: null,
+					imageInputCost: null,
+					imageOutputCost: null,
+					estimatedCost: false,
+					discount: null,
+					dataStorageCost: "0",
+					cached: false,
+					toolResults: null,
+					retried: willRetryFetchNonStreaming,
+					retriedByLogId: willRetryFetchNonStreaming ? finalLogId : null,
+				},
+			);
 
 			// Report key health for environment-based tokens
 			if (envVarName !== undefined) {
@@ -6858,10 +6840,6 @@ chat.openapi(completions, async (c) => {
 
 		// If the request was canceled, log it and return a response
 		if (canceled) {
-			// Log the canceled request
-			// Extract plugin IDs for logging (canceled non-streaming)
-			const canceledNonStreamingPluginIds = plugins?.map((p) => p.id) ?? [];
-
 			// Calculate costs for cancelled request if billing is enabled
 			const billCancelled = shouldBillCancelledRequests();
 			let cancelledCosts: Awaited<ReturnType<typeof calculateCosts>> | null =
@@ -6902,90 +6880,93 @@ chat.openapi(completions, async (c) => {
 				);
 			}
 
-			const baseLogEntry = createLogEntry(
-				requestId,
-				project,
-				apiKey,
-				providerKey?.id,
-				usedModelFormatted,
-				usedModelMapping,
-				usedProvider,
-				initialRequestedModel,
-				requestedProvider,
-				messages,
-				temperature,
-				max_tokens,
-				top_p,
-				frequency_penalty,
-				presence_penalty,
-				reasoning_effort,
-				reasoning_max_tokens,
-				effort,
-				response_format,
-				tools,
-				tool_choice,
-				source,
-				customHeaders,
-				debugMode,
-				userAgent,
-				image_config,
-				routingMetadata,
-				rawBody,
-				null, // No response for canceled request
-				requestBody, // The request that was prepared before cancellation
-				null, // No upstream response for canceled request
-				canceledNonStreamingPluginIds,
-				undefined, // No plugin results for canceled request
+			enqueueChatLog(
+				c,
+				{
+					providerKeyId: providerKey?.id,
+					usedModel: usedModelFormatted,
+					usedModelMapping,
+					usedProvider,
+					requestedModel: initialRequestedModel,
+					requestedProvider,
+					messages,
+					temperature,
+					max_tokens,
+					top_p,
+					frequency_penalty,
+					presence_penalty,
+					reasoningEffort: reasoning_effort,
+					reasoningMaxTokens: reasoning_max_tokens,
+					effort,
+					responseFormat: response_format,
+					tools,
+					toolChoice: tool_choice,
+					source,
+					customHeaders,
+					debugMode,
+					userAgent,
+					imageConfig: image_config,
+					routingMetadata,
+					rawRequest: rawBody,
+					rawResponse: null,
+					upstreamRequest: requestBody,
+					upstreamResponse: null,
+					plugins: requestPluginIds,
+					pluginResults: undefined,
+				},
+				{
+					duration,
+					timeToFirstToken: null,
+					timeToFirstReasoningToken: null,
+					responseSize: 0,
+					content: null,
+					reasoningContent: null,
+					finishReason: "canceled",
+					promptTokens: billCancelled
+						? (
+								cancelledCosts?.promptTokens ?? estimatedPromptTokens
+							)?.toString()
+						: null,
+					completionTokens: billCancelled ? "0" : null,
+					totalTokens: billCancelled
+						? (
+								cancelledCosts?.promptTokens ?? estimatedPromptTokens
+							)?.toString()
+						: null,
+					reasoningTokens: null,
+					cachedTokens: null,
+					hasError: false,
+					streamed: false,
+					canceled: true,
+					errorDetails: null,
+					inputCost: cancelledCosts?.inputCost ?? null,
+					outputCost: cancelledCosts?.outputCost ?? null,
+					cachedInputCost: cancelledCosts?.cachedInputCost ?? null,
+					requestCost: cancelledCosts?.requestCost ?? null,
+					webSearchCost: cancelledCosts?.webSearchCost ?? null,
+					imageInputTokens:
+						cancelledCosts?.imageInputTokens?.toString() ?? null,
+					imageOutputTokens:
+						cancelledCosts?.imageOutputTokens?.toString() ?? null,
+					imageInputCost: cancelledCosts?.imageInputCost ?? null,
+					imageOutputCost: cancelledCosts?.imageOutputCost ?? null,
+					cost: cancelledCosts?.totalCost ?? null,
+					estimatedCost: cancelledCosts?.estimatedCost ?? false,
+					discount: cancelledCosts?.discount ?? null,
+					dataStorageCost: billCancelled
+						? calculateDataStorageCost(
+								cancelledCosts?.promptTokens ?? estimatedPromptTokens,
+								null,
+								0,
+								null,
+								retentionLevel,
+							)
+						: "0",
+					cached: false,
+					toolResults: null,
+				},
 			);
 
-			await insertLog({
-				...baseLogEntry,
-				duration,
-				timeToFirstToken: null, // Not applicable for canceled request
-				timeToFirstReasoningToken: null, // Not applicable for canceled request
-				responseSize: 0,
-				content: null,
-				reasoningContent: null,
-				finishReason: "canceled",
-				promptTokens: billCancelled
-					? (cancelledCosts?.promptTokens ?? estimatedPromptTokens)?.toString()
-					: null,
-				completionTokens: billCancelled ? "0" : null,
-				totalTokens: billCancelled
-					? (cancelledCosts?.promptTokens ?? estimatedPromptTokens)?.toString()
-					: null,
-				reasoningTokens: null,
-				cachedTokens: null,
-				hasError: false,
-				streamed: false,
-				canceled: true,
-				errorDetails: null,
-				inputCost: cancelledCosts?.inputCost ?? null,
-				outputCost: cancelledCosts?.outputCost ?? null,
-				cachedInputCost: cancelledCosts?.cachedInputCost ?? null,
-				requestCost: cancelledCosts?.requestCost ?? null,
-				webSearchCost: cancelledCosts?.webSearchCost ?? null,
-				imageInputTokens: cancelledCosts?.imageInputTokens?.toString() ?? null,
-				imageOutputTokens:
-					cancelledCosts?.imageOutputTokens?.toString() ?? null,
-				imageInputCost: cancelledCosts?.imageInputCost ?? null,
-				imageOutputCost: cancelledCosts?.imageOutputCost ?? null,
-				cost: cancelledCosts?.totalCost ?? null,
-				estimatedCost: cancelledCosts?.estimatedCost ?? false,
-				discount: cancelledCosts?.discount ?? null,
-				dataStorageCost: billCancelled
-					? calculateDataStorageCost(
-							cancelledCosts?.promptTokens ?? estimatedPromptTokens,
-							null,
-							0,
-							null,
-							retentionLevel,
-						)
-					: "0",
-				cached: false,
-				toolResults: null,
-			});
-
 			return c.json(
 				{
 					error: {
@@ -7027,79 +7008,76 @@ chat.openapi(completions, async (c) => {
 						),
 					});
 
-					const bodyTimeoutPluginIds = plugins?.map((p) => p.id) ?? [];
-					const baseLogEntry = createLogEntry(
-						requestId,
-						project,
-						apiKey,
-						providerKey?.id,
-						usedModelFormatted,
-						usedModelMapping!,
-						usedProvider!,
-						initialRequestedModel,
-						requestedProvider,
-						messages,
-						temperature,
-						max_tokens,
-						top_p,
-						frequency_penalty,
-						presence_penalty,
-						reasoning_effort,
-						reasoning_max_tokens,
-						effort,
-						response_format,
-						tools,
-						tool_choice,
-						source,
-						customHeaders,
-						debugMode,
-						userAgent,
-						image_config,
-						routingMetadata,
-						rawBody,
-						null,
-						requestBody,
-						null,
-						bodyTimeoutPluginIds,
-						undefined,
-					);
-
-					await insertLog({
-						...baseLogEntry,
-						duration: Date.now() - perAttemptStartTime,
-						timeToFirstToken: null,
-						timeToFirstReasoningToken: null,
-						responseSize: 0,
-						content: null,
-						reasoningContent: null,
-						finishReason: "upstream_error",
-						promptTokens: null,
-						completionTokens: null,
-						totalTokens: null,
-						reasoningTokens: null,
-						cachedTokens: null,
-						hasError: true,
-						streamed: false,
-						canceled: false,
-						errorDetails: {
-							statusCode: res.status,
-							statusText: "TimeoutError",
-							responseText: errorMessage,
-							cause: bodyErrorCause,
+					enqueueChatLog(
+						c,
+						{
+							providerKeyId: providerKey?.id,
+							usedModel: usedModelFormatted,
+							usedModelMapping,
+							usedProvider,
+							requestedModel: initialRequestedModel,
+							requestedProvider,
+							messages,
+							temperature,
+							max_tokens,
+							top_p,
+							frequency_penalty,
+							presence_penalty,
+							reasoningEffort: reasoning_effort,
+							reasoningMaxTokens: reasoning_max_tokens,
+							effort,
+							responseFormat: response_format,
+							tools,
+							toolChoice: tool_choice,
+							source,
+							customHeaders,
+							debugMode,
+							userAgent,
+							imageConfig: image_config,
+							routingMetadata,
+							rawRequest: rawBody,
+							rawResponse: null,
+							upstreamRequest: requestBody,
+							upstreamResponse: null,
+							plugins: requestPluginIds,
+							pluginResults: undefined,
 						},
-						cachedInputCost: null,
-						requestCost: null,
-						webSearchCost: null,
-						imageInputTokens: null,
-						imageOutputTokens: null,
-						imageInputCost: null,
-						imageOutputCost: null,
-						estimatedCost: false,
-						discount: null,
-						dataStorageCost: "0",
-						cached: false,
-						toolResults: null,
-					});
+						{
+							duration: Date.now() - perAttemptStartTime,
+							timeToFirstToken: null,
+							timeToFirstReasoningToken: null,
+							responseSize: 0,
+							content: null,
+							reasoningContent: null,
+							finishReason: "upstream_error",
+							promptTokens: null,
+							completionTokens: null,
+							totalTokens: null,
+							reasoningTokens: null,
+							cachedTokens: null,
+							hasError: true,
+							streamed: false,
+							canceled: false,
+							errorDetails: {
+								statusCode: res.status,
+								statusText: "TimeoutError",
+								responseText: errorMessage,
+								cause: bodyErrorCause,
+							},
+							cachedInputCost: null,
+							requestCost: null,
+							webSearchCost: null,
+							imageInputTokens: null,
+							imageOutputTokens: null,
+							imageInputCost: null,
+							imageOutputCost: null,
+							estimatedCost: false,
+							discount: null,
+							dataStorageCost: "0",
+							cached: false,
+							toolResults: null,
+						},
+					);
 
 					return c.json(
 						{
@@ -7143,10 +7121,6 @@ chat.openapi(completions, async (c) => {
 				});
 			}
 
-			// Log the request in the database
-			// Extract plugin IDs for logging
-			const providerErrorPluginIds = plugins?.map((p) => p.id) ?? [];
-
 			// Check if we should retry before logging so we can mark the log as retried
 			const willRetryHttpNonStreaming = shouldRetryRequest({
 				requestedProvider,
@@ -7160,99 +7134,95 @@ chat.openapi(completions, async (c) => {
 				usedProvider,
 			});
 
-			const baseLogEntry = createLogEntry(
-				requestId,
-				project,
-				apiKey,
-				providerKey?.id,
-				usedModelFormatted,
-				usedModelMapping,
-				usedProvider,
-				initialRequestedModel,
-				requestedProvider,
-				messages,
-				temperature,
-				max_tokens,
-				top_p,
-				frequency_penalty,
-				presence_penalty,
-				reasoning_effort,
-				reasoning_max_tokens,
-				effort,
-				response_format,
-				tools,
-				tool_choice,
-				source,
-				customHeaders,
-				debugMode,
-				userAgent,
-				image_config,
-				routingMetadata,
-				rawBody,
-				errorResponseText, // Our formatted error response
-				requestBody, // The request that resulted in error
-				errorResponseText, // Raw upstream error response
-				providerErrorPluginIds,
-				undefined, // No plugin results for error case
-			);
-
-			await insertLog({
-				...baseLogEntry,
-				duration: perAttemptDuration,
-				timeToFirstToken: null, // Not applicable for error case
-				timeToFirstReasoningToken: null, // Not applicable for error case
-				responseSize: errorResponseText.length,
-				content: null,
-				reasoningContent: null,
-				finishReason,
-				promptTokens: null,
-				completionTokens: null,
-				totalTokens: null,
-				reasoningTokens: null,
-				cachedTokens: null,
-				hasError: finishReason !== "content_filter", // content_filter is not an error
-				streamed: false,
-				canceled: false,
-				errorDetails: (() => {
-					// content_filter is not an error, no error details needed
-					if (finishReason === "content_filter") {
-						return null;
-					}
-					// For client errors, try to parse the original error and include the message
-					if (finishReason === "client_error") {
-						try {
-							const originalError = JSON.parse(errorResponseText);
-							return {
-								statusCode: res.status,
-								statusText: res.statusText,
-								responseText: errorResponseText,
-								message: originalError.error?.message ?? errorResponseText,
-							};
-						} catch {
-							// If parsing fails, use default format
+			enqueueChatLog(
+				c,
+				{
+					providerKeyId: providerKey?.id,
+					usedModel: usedModelFormatted,
+					usedModelMapping,
+					usedProvider,
+					requestedModel: initialRequestedModel,
+					requestedProvider,
+					messages,
+					temperature,
+					max_tokens,
+					top_p,
+					frequency_penalty,
+					presence_penalty,
+					reasoningEffort: reasoning_effort,
+					reasoningMaxTokens: reasoning_max_tokens,
+					effort,
+					responseFormat: response_format,
+					tools,
+					toolChoice: tool_choice,
+					source,
+					customHeaders,
+					debugMode,
+					userAgent,
+					imageConfig: image_config,
+					routingMetadata,
+					rawRequest: rawBody,
+					rawResponse: errorResponseText,
+					upstreamRequest: requestBody,
+					upstreamResponse: errorResponseText,
+					plugins: requestPluginIds,
+					pluginResults: undefined,
+				},
+				{
+					duration: perAttemptDuration,
+					timeToFirstToken: null,
+					timeToFirstReasoningToken: null,
+					responseSize: errorResponseText.length,
+					content: null,
+					reasoningContent: null,
+					finishReason,
+					promptTokens: null,
+					completionTokens: null,
+					totalTokens: null,
+					reasoningTokens: null,
+					cachedTokens: null,
+					hasError: finishReason !== "content_filter",
+					streamed: false,
+					canceled: false,
+					errorDetails: (() => {
+						if (finishReason === "content_filter") {
+							return null;
 						}
-					}
-					return {
-						statusCode: res.status,
-						statusText: res.statusText,
-						responseText: errorResponseText,
-					};
-				})(),
-				cachedInputCost: null,
-				requestCost: null,
-				webSearchCost: null,
-				imageInputTokens: null,
-				imageOutputTokens: null,
-				imageInputCost: null,
-				imageOutputCost: null,
-				estimatedCost: false,
-				discount: null,
-				dataStorageCost: "0",
-				cached: false,
-				toolResults: null,
-				retried: willRetryHttpNonStreaming,
-				retriedByLogId: willRetryHttpNonStreaming ? finalLogId : null,
-			});
+						if (finishReason === "client_error") {
+							try {
+								const originalError = JSON.parse(errorResponseText);
+								return {
+									statusCode: res.status,
+									statusText: res.statusText,
+									responseText: errorResponseText,
+									message: originalError.error?.message ?? errorResponseText,
+								};
+							} catch {
+								// If parsing fails, use default format
+							}
+						}
+						return {
+							statusCode: res.status,
+							statusText: res.statusText,
+							responseText: errorResponseText,
+						};
+					})(),
+					cachedInputCost: null,
+					requestCost: null,
+					webSearchCost: null,
+					imageInputTokens: null,
+					imageOutputTokens: null,
+					imageInputCost: null,
+					imageOutputCost: null,
+					estimatedCost: false,
+					discount: null,
+					dataStorageCost: "0",
+					cached: false,
+					toolResults: null,
+					retried: willRetryHttpNonStreaming,
+					retriedByLogId: willRetryHttpNonStreaming ? finalLogId : null,
+				},
+			);
 
 			// Report key health for environment-based tokens
 			// Don't report content_filter as a key error - it's intentional provider behavior
@@ -7416,79 +7386,76 @@ chat.openapi(completions, async (c) => {
 				),
 			});
 
-			const bodyTimeoutPluginIds = plugins?.map((p) => p.id) ?? [];
-			const baseLogEntry = createLogEntry(
-				requestId,
-				project,
-				apiKey,
-				providerKey?.id,
-				usedModelFormatted!,
-				usedModelMapping,
-				usedProvider,
-				initialRequestedModel,
-				requestedProvider,
-				messages,
-				temperature,
-				max_tokens,
-				top_p,
-				frequency_penalty,
-				presence_penalty,
-				reasoning_effort,
-				reasoning_max_tokens,
-				effort,
-				response_format,
-				tools,
-				tool_choice,
-				source,
-				customHeaders,
-				debugMode,
-				userAgent,
-				image_config,
-				routingMetadata,
-				rawBody,
-				null,
-				requestBody,
-				null,
-				bodyTimeoutPluginIds,
-				undefined,
-			);
-
-			await insertLog({
-				...baseLogEntry,
-				duration: Date.now() - startTime,
-				timeToFirstToken: null,
-				timeToFirstReasoningToken: null,
-				responseSize: 0,
-				content: null,
-				reasoningContent: null,
-				finishReason: "upstream_error",
-				promptTokens: null,
-				completionTokens: null,
-				totalTokens: null,
-				reasoningTokens: null,
-				cachedTokens: null,
-				hasError: true,
-				streamed: false,
-				canceled: false,
-				errorDetails: {
-					statusCode: res.status,
-					statusText: "TimeoutError",
-					responseText: errorMessage,
-					cause: bodyReadCause,
+			enqueueChatLog(
+				c,
+				{
+					providerKeyId: providerKey?.id,
+					usedModel: usedModelFormatted,
+					usedModelMapping,
+					usedProvider,
+					requestedModel: initialRequestedModel,
+					requestedProvider,
+					messages,
+					temperature,
+					max_tokens,
+					top_p,
+					frequency_penalty,
+					presence_penalty,
+					reasoningEffort: reasoning_effort,
+					reasoningMaxTokens: reasoning_max_tokens,
+					effort,
+					responseFormat: response_format,
+					tools,
+					toolChoice: tool_choice,
+					source,
+					customHeaders,
+					debugMode,
+					userAgent,
+					imageConfig: image_config,
+					routingMetadata,
+					rawRequest: rawBody,
+					rawResponse: null,
+					upstreamRequest: requestBody,
+					upstreamResponse: null,
+					plugins: requestPluginIds,
+					pluginResults: undefined,
 				},
-				cachedInputCost: null,
-				requestCost: null,
-				webSearchCost: null,
-				imageInputTokens: null,
-				imageOutputTokens: null,
-				imageInputCost: null,
-				imageOutputCost: null,
-				estimatedCost: false,
-				discount: null,
-				dataStorageCost: "0",
-				cached: false,
-				toolResults: null,
-			});
+				{
+					duration: Date.now() - startTime,
+					timeToFirstToken: null,
+					timeToFirstReasoningToken: null,
+					responseSize: 0,
+					content: null,
+					reasoningContent: null,
+					finishReason: "upstream_error",
+					promptTokens: null,
+					completionTokens: null,
+					totalTokens: null,
+					reasoningTokens: null,
+					cachedTokens: null,
+					hasError: true,
+					streamed: false,
+					canceled: false,
+					errorDetails: {
+						statusCode: res.status,
+						statusText: "TimeoutError",
+						responseText: errorMessage,
+						cause: bodyReadCause,
+					},
+					cachedInputCost: null,
+					requestCost: null,
+					webSearchCost: null,
+					imageInputTokens: null,
+					imageOutputTokens: null,
+					imageInputCost: null,
+					imageOutputCost: null,
+					estimatedCost: false,
+					discount: null,
+					dataStorageCost: "0",
+					cached: false,
+					toolResults: null,
+				},
+			);
 
 			return c.json(
 				{
@@ -7706,45 +7673,6 @@ chat.openapi(completions, async (c) => {
 		usedRegion,
 	);
 
-	// Extract plugin IDs for logging
-	const pluginIds = plugins?.map((p) => p.id) ?? [];
-
-	const baseLogEntry = createLogEntry(
-		requestId,
-		project,
-		apiKey,
-		providerKey?.id,
-		usedModelFormatted,
-		usedModelMapping,
-		usedProvider,
-		initialRequestedModel,
-		requestedProvider,
-		messages,
-		temperature,
-		max_tokens,
-		top_p,
-		frequency_penalty,
-		presence_penalty,
-		reasoning_effort,
-		reasoning_max_tokens,
-		effort,
-		response_format,
-		tools,
-		tool_choice,
-		source,
-		customHeaders,
-		debugMode,
-		userAgent,
-		image_config,
-		routingMetadata,
-		rawBody,
-		transformedResponse, // Our formatted response that we return to user
-		requestBody, // The request sent to the provider
-		json, // Raw upstream response from provider
-		pluginIds,
-		Object.keys(pluginResults).length > 0 ? pluginResults : undefined,
-	);
-
 	// Check if the non-streaming response is empty (no content, tokens, or tool calls)
 	// Exclude content_filter responses as they are intentionally empty (blocked by provider)
 	// For Google, check for original finish reasons that indicate content filtering
@@ -7792,63 +7720,96 @@ chat.openapi(completions, async (c) => {
 		}
 	}
 
-	await insertLog({
-		...baseLogEntry,
-		id: routingAttempts.length > 0 ? finalLogId : undefined,
-		duration,
-		timeToFirstToken: null, // Not applicable for non-streaming requests
-		timeToFirstReasoningToken: null, // Not applicable for non-streaming requests
-		responseSize,
-		content: content,
-		reasoningContent: reasoningContent,
-		finishReason: hasEmptyNonStreamingResponse
-			? "upstream_error"
-			: finishReason,
-		promptTokens: calculatedPromptTokens?.toString() ?? null,
-		completionTokens: calculatedCompletionTokens?.toString() ?? null,
-		totalTokens:
-			totalTokens ??
-			(
-				(calculatedPromptTokens ?? 0) + (calculatedCompletionTokens ?? 0)
-			).toString(),
-		reasoningTokens: calculatedReasoningTokens?.toString() ?? null,
-		cachedTokens: cachedTokens?.toString() ?? null,
-		hasError: hasEmptyNonStreamingResponse,
-		streamed: false,
-		canceled: false,
-		errorDetails: hasEmptyNonStreamingResponse
-			? {
-					statusCode: 500,
-					statusText: "Empty Response",
-					responseText:
-						"Response finished successfully but returned no content or tool calls",
-				}
-			: null,
-		inputCost: costs.inputCost,
-		outputCost: costs.outputCost,
-		cachedInputCost: costs.cachedInputCost,
-		requestCost: costs.requestCost,
-		webSearchCost: costs.webSearchCost,
-		imageInputTokens: costs.imageInputTokens?.toString() ?? null,
-		imageOutputTokens: costs.imageOutputTokens?.toString() ?? null,
-		imageInputCost: costs.imageInputCost ?? null,
-		imageOutputCost: costs.imageOutputCost ?? null,
-		cost: costs.totalCost,
-		estimatedCost: costs.estimatedCost,
-		discount: costs.discount,
-		pricingTier: costs.pricingTier,
-		dataStorageCost: calculateDataStorageCost(
-			calculatedPromptTokens,
-			cachedTokens,
-			calculatedCompletionTokens,
-			calculatedReasoningTokens,
-			retentionLevel,
-		),
-		cached: false,
-		tools,
-		toolResults,
-		toolChoice: tool_choice,
-	});
+	enqueueChatLog(
+		c,
+		{
+			providerKeyId: providerKey?.id,
+			usedModel: usedModelFormatted,
+			usedModelMapping,
+			usedProvider,
+			requestedModel: initialRequestedModel,
+			requestedProvider,
+			messages,
+			temperature,
+			max_tokens,
+			top_p,
+			frequency_penalty,
+			presence_penalty,
+			reasoningEffort: reasoning_effort,
+			reasoningMaxTokens: reasoning_max_tokens,
+			effort,
+			responseFormat: response_format,
+			tools,
+			toolChoice: tool_choice,
+			source,
+			customHeaders,
+			debugMode,
+			userAgent,
+			imageConfig: image_config,
+			routingMetadata,
+			rawRequest: rawBody,
+			rawResponse: transformedResponse,
+			upstreamRequest: requestBody,
+			upstreamResponse: json,
+			plugins: requestPluginIds,
+			pluginResults:
+				Object.keys(pluginResults).length > 0 ? pluginResults : undefined,
+		},
+		{
+			id: routingAttempts.length > 0 ? finalLogId : undefined,
+			duration,
+			timeToFirstToken: null,
+			timeToFirstReasoningToken: null,
+			responseSize,
+			content: content,
+			reasoningContent: reasoningContent,
+			finishReason: hasEmptyNonStreamingResponse
+				? "upstream_error"
+				: finishReason,
+			promptTokens: calculatedPromptTokens?.toString() ?? null,
+			completionTokens: calculatedCompletionTokens?.toString() ?? null,
+			totalTokens:
+				totalTokens ??
+				(
+					(calculatedPromptTokens ?? 0) + (calculatedCompletionTokens ?? 0)
+				).toString(),
+			reasoningTokens: calculatedReasoningTokens?.toString() ?? null,
+			cachedTokens: cachedTokens?.toString() ?? null,
+			hasError: hasEmptyNonStreamingResponse,
+			streamed: false,
+			canceled: false,
+			errorDetails: hasEmptyNonStreamingResponse
+				? {
+						statusCode: 500,
+						statusText: "Empty Response",
+						responseText:
+							"Response finished successfully but returned no content or tool calls",
+					}
+				: null,
+			inputCost: costs.inputCost,
+			outputCost: costs.outputCost,
+			cachedInputCost: costs.cachedInputCost,
+			requestCost: costs.requestCost,
+			webSearchCost: costs.webSearchCost,
+			imageInputTokens: costs.imageInputTokens?.toString() ?? null,
+			imageOutputTokens: costs.imageOutputTokens?.toString() ?? null,
+			imageInputCost: costs.imageInputCost ?? null,
+			imageOutputCost: costs.imageOutputCost ?? null,
+			cost: costs.totalCost,
+			estimatedCost: costs.estimatedCost,
+			discount: costs.discount,
+			pricingTier: costs.pricingTier,
+			dataStorageCost: calculateDataStorageCost(
+				calculatedPromptTokens,
+				cachedTokens,
+				calculatedCompletionTokens,
+				calculatedReasoningTokens,
+				retentionLevel,
+			),
+			cached: false,
+			toolResults,
+		},
+	);
 
 	// Report key health for environment-based tokens
 	// Note: We don't report empty responses as key errors since they're not upstream errors
diff --git a/apps/gateway/src/chat/middleware/chat-completion-log.ts b/apps/gateway/src/chat/middleware/chat-completion-log.ts
new file mode 100644
index 0000000000..aee0971676
--- /dev/null
+++ b/apps/gateway/src/chat/middleware/chat-completion-log.ts
@@ -0,0 +1,144 @@
+import { createMiddleware } from "hono/factory";
+import { HTTPException } from "hono/http-exception";
+
+import {
+	buildBaseLogEntry,
+	type ChatCompletionLogState,
+} from "@/chat/tools/chat-log-context.js";
+import { insertLog as _insertLog } from "@/lib/logs.js";
+
+import { logger } from "@llmgateway/logger";
+
+import type { ServerTypes } from "@/vars.js";
+import type { LogInsertData } from "@llmgateway/db";
+
+function getSynthesizedClientErrorLog(
+	baseLogEntry: ReturnType<typeof buildBaseLogEntry>,
+	status: number,
+	error: unknown,
+): LogInsertData | null {
+	if (!baseLogEntry) {
+		return null;
+	}
+
+	const responseText =
+		error instanceof HTTPException
+			? error.message
+			: error instanceof Error
+				? error.message
+				: "Client error";
+
+	return {
+		...baseLogEntry,
+		content: null,
+		responseSize: responseText.length,
+		finishReason: "client_error",
+		unifiedFinishReason: "client_error",
+		promptTokens: null,
+		completionTokens: null,
+		totalTokens: null,
+		reasoningTokens: null,
+		cachedTokens: null,
+		hasError: true,
+		streamed:
+			typeof baseLogEntry.rawRequest === "object" &&
+			baseLogEntry.rawRequest !== null &&
+			"stream" in baseLogEntry.rawRequest
+				? Boolean(baseLogEntry.rawRequest.stream)
+				: false,
+		canceled: false,
+		errorDetails: {
+			statusCode: status,
+			statusText:
+				error instanceof HTTPException
+					? "Client Error"
+					: error instanceof Error
+						? error.name
+						: "Client Error",
+			responseText,
+		},
+		duration: 0,
+		timeToFirstToken: null,
+		timeToFirstReasoningToken: null,
+		inputCost: null,
+		outputCost: null,
+		cachedInputCost: null,
+		requestCost: null,
+		webSearchCost: null,
+		imageInputTokens: null,
+		imageOutputTokens: null,
+		imageInputCost: null,
+		imageOutputCost: null,
+		cost: null,
+		estimatedCost: false,
+		discount: null,
+		pricingTier: null,
+		dataStorageCost: "0",
+		cached: false,
+		toolResults: null,
+	};
+}
+
+export const chatCompletionLogMiddleware = createMiddleware<ServerTypes>(
+	async (c, next) => {
+		const state: ChatCompletionLogState = {
+			pendingLogs: [],
+			clientErrorSynthesized: false,
+		};
+		c.set("chatCompletionLogState", state);
+
+		try {
+			await next();
+		} catch (error) {
+			state.caughtError = error;
+			throw error;
+		} finally {
+			try {
+				await state.streamCompletion;
+			} catch (error) {
+				logger.error(
+					"Error waiting for chat stream completion before flushing logs",
+					error instanceof Error ? error : new Error(String(error)),
+				);
+			}
+
+			const status =
+				state.caughtError instanceof HTTPException
+					? state.caughtError.status
+					: c.res.status;
+			const hasQueuedClientError = state.pendingLogs.some(
+				(log) =>
+					log.finishReason === "client_error" ||
+					log.unifiedFinishReason === "client_error",
+			);
+
+			if (status >= 400 && status < 500 && !hasQueuedClientError) {
+				const synthesizedLog = getSynthesizedClientErrorLog(
+					buildBaseLogEntry(c),
+					status,
+					state.caughtError,
+				);
+				if (synthesizedLog) {
+					state.pendingLogs.push(synthesizedLog);
+					state.clientErrorSynthesized = true;
+				}
+			}
+
+			for (const logData of state.pendingLogs) {
+				try {
+					await _insertLog({
+						...logData,
+						internalContentFilter: state.internalContentFilter
+							? true
+							: logData.internalContentFilter,
+					});
+				} catch (error) {
+					logger.error(
+						"Failed to flush queued chat completion log",
+						error instanceof Error ? error : new Error(String(error)),
+					);
+				}
+			}
+		}
+	},
+);
diff --git a/apps/gateway/src/chat/tools/chat-log-context.ts b/apps/gateway/src/chat/tools/chat-log-context.ts
new file mode 100644
index 0000000000..f914a1f019
--- /dev/null
+++ b/apps/gateway/src/chat/tools/chat-log-context.ts
@@ -0,0 +1,126 @@
+import { logger } from "@llmgateway/logger";
+
+import {
+	createLogEntry,
+	type CreateLogEntryOptions,
+} from "./create-log-entry.js";
+
+import type { ServerTypes } from "@/vars.js";
+import type { LogInsertData } from "@llmgateway/db";
+import type { Context } from "hono";
+
+export interface ChatCompletionLogState {
+	pendingLogs: LogInsertData[];
+	baseLogOptions?: Partial<CreateLogEntryOptions>;
+	streamCompletion?: Promise<void>;
+	resolveStreamCompletion?: () => void;
+	caughtError?: unknown;
+	internalContentFilter?: boolean;
+	clientErrorSynthesized?: boolean;
+}
+
+function getOrCreateChatCompletionLogState(
+	c: Context<ServerTypes>,
+): ChatCompletionLogState {
+	const existingState = c.get("chatCompletionLogState");
+	if (existingState) {
+		return existingState;
+	}
+
+	const nextState: ChatCompletionLogState = {
+		pendingLogs: [],
+		clientErrorSynthesized: false,
+	};
+	c.set("chatCompletionLogState", nextState);
+	return nextState;
+}
+
+export function getChatCompletionLogState(
+	c: Context<ServerTypes>,
+): ChatCompletionLogState | undefined {
+	return c.get("chatCompletionLogState");
+}
+
+export function updateBaseLogOptions(
+	c: Context<ServerTypes>,
+	patch: Partial<CreateLogEntryOptions>,
+) {
+	const state = getOrCreateChatCompletionLogState(c);
+	state.baseLogOptions = {
+		...state.baseLogOptions,
+		...patch,
+	};
+}
+
+function hasCompleteBaseLogOptions(
+	options?: Partial<CreateLogEntryOptions>,
+): options is CreateLogEntryOptions {
+	return Boolean(
+		options &&
+			typeof options.requestId === "string" &&
+			options.project &&
+			options.apiKey &&
+			typeof options.usedModel === "string" &&
+			typeof options.usedProvider === "string" &&
+			typeof options.requestedModel === "string" &&
+			Array.isArray(options.messages) &&
+			options.customHeaders !== undefined &&
+			typeof options.debugMode === "boolean",
+	);
+}
+
+export function buildBaseLogEntry(
+	c: Context<ServerTypes>,
+	patch: Partial<CreateLogEntryOptions> = {},
+) {
+	const state = getOrCreateChatCompletionLogState(c);
+	const mergedOptions = {
+		...state.baseLogOptions,
+		...patch,
+	};
+
+	if (!hasCompleteBaseLogOptions(mergedOptions)) {
+		return null;
+	}
+
+	return createLogEntry(mergedOptions);
+}
+
+export function enqueueChatLog(
+	c: Context<ServerTypes>,
+	basePatch: Partial<CreateLogEntryOptions>,
+	logFields: Omit<LogInsertData, keyof ReturnType<typeof createLogEntry>>,
+) {
+	const state = getOrCreateChatCompletionLogState(c);
+	const baseLogEntry = buildBaseLogEntry(c, basePatch);
+
+	if (!baseLogEntry) {
+		logger.warn(
+			"Skipping chat log enqueue because base log options are incomplete",
+			{
+				requestId: state.baseLogOptions?.requestId,
+			},
+		);
+		return;
+	}
+
+	state.pendingLogs.push({
+		...baseLogEntry,
+		...logFields,
+	});
+}
+
+export function registerStreamCompletion(c: Context<ServerTypes>) {
+	const state = getOrCreateChatCompletionLogState(c);
+	state.streamCompletion ??= new Promise<void>((resolve) => {
+		state.resolveStreamCompletion = resolve;
+	});
+
+	return state.streamCompletion;
+}
+
+export function finishStreamCompletion(c: Context<ServerTypes>) {
+	const state = getOrCreateChatCompletionLogState(c);
+	state.resolveStreamCompletion?.();
+	state.resolveStreamCompletion = undefined;
+}
diff --git a/apps/gateway/src/test-utils/test-helpers.ts b/apps/gateway/src/test-utils/test-helpers.ts
index c4e7a5991e..42cfcc9d04 100644
--- a/apps/gateway/src/test-utils/test-helpers.ts
+++ b/apps/gateway/src/test-utils/test-helpers.ts
@@ -7,6 +7,10 @@ export async function clearCache() {
 	await redisClient.flushdb();
 }
 
+export async function processPendingLogs() {
+	await processLogQueue();
+}
+
 /**
  * Helper function to wait for logs to be processed by the worker
  * @param expectedCount The expected number of logs
diff --git a/apps/gateway/src/vars.ts b/apps/gateway/src/vars.ts
index bb9187e75c..dd4d785c14 100644
--- a/apps/gateway/src/vars.ts
+++ b/apps/gateway/src/vars.ts
@@ -1,8 +1,10 @@
+import type { ChatCompletionLogState } from "@/chat/tools/chat-log-context.js";
 import type { Env } from "hono/types";
 
 export interface ServerTypes extends Env {
 	Variables: {
 		traceId?: string;
 		spanId?: string;
+		chatCompletionLogState?: ChatCompletionLogState;
 	};
 }

From 9d8f7ea8526df4aa316163848942ca84ea89f6f1 Mon Sep 17 00:00:00 2001
From: Luca Steeb <contact@luca-steeb.com>
Date: Sun, 29 Mar 2026 19:24:47 +0700
Subject: [PATCH 02/14] fix: avoid chat log stream deadlock

---
 .../chat/middleware/chat-completion-log.ts    | 99 +++++++++++--------
 1 file changed, 56 insertions(+), 43 deletions(-)

diff --git a/apps/gateway/src/chat/middleware/chat-completion-log.ts b/apps/gateway/src/chat/middleware/chat-completion-log.ts
index aee0971676..c6cca72a15 100644
--- a/apps/gateway/src/chat/middleware/chat-completion-log.ts
+++ b/apps/gateway/src/chat/middleware/chat-completion-log.ts
@@ -11,6 +11,7 @@ import { logger } from "@llmgateway/logger";
 
 import type { ServerTypes } from "@/vars.js";
 import type { LogInsertData } from "@llmgateway/db";
+import type { Context } from "hono";
 
 function getSynthesizedClientErrorLog(
 	baseLogEntry: ReturnType<typeof buildBaseLogEntry>,
@@ -79,6 +80,58 @@ function getSynthesizedClientErrorLog(
 	};
 }
 
+async function flushChatCompletionLogs(
+	c: Context<ServerTypes>,
+	state: ChatCompletionLogState,
+) {
+	try {
+		await state.streamCompletion;
+	} catch (error) {
+		logger.error(
+			"Error waiting for chat stream completion before flushing logs",
+			error instanceof Error ? error : new Error(String(error)),
+		);
+	}
+
+	const status =
+		state.caughtError instanceof HTTPException
+			? state.caughtError.status
+			: c.res.status;
+	const hasQueuedClientError = state.pendingLogs.some(
+		(log) =>
+			log.finishReason === "client_error" ||
+			log.unifiedFinishReason === "client_error",
+	);
+
+	if (status >= 400 && status < 500 && !hasQueuedClientError) {
+		const synthesizedLog = getSynthesizedClientErrorLog(
+			buildBaseLogEntry(c),
+			status,
+			state.caughtError,
+		);
+		if (synthesizedLog) {
+			state.pendingLogs.push(synthesizedLog);
+			state.clientErrorSynthesized = true;
+		}
+	}
+
+	for (const logData of state.pendingLogs) {
+		try {
+			await _insertLog({
+				...logData,
+				internalContentFilter: state.internalContentFilter
+					? true
+					: logData.internalContentFilter,
+			});
+		} catch (error) {
+			logger.error(
+				"Failed to flush queued chat completion log",
+				error instanceof Error ? error : new Error(String(error)),
+			);
+		}
+	}
+}
+
 export const chatCompletionLogMiddleware = createMiddleware<ServerTypes>(
 	async (c, next) => {
 		const state: ChatCompletionLogState = {
@@ -93,52 +146,12 @@ export const chatCompletionLogMiddleware = createMiddleware<ServerTypes>(
 			state.caughtError = error;
 			throw error;
 		} finally {
-			try {
-				await state.streamCompletion;
-			} catch (error) {
+			void flushChatCompletionLogs(c, state).catch((error) => {
 				logger.error(
-					"Error waiting for chat stream completion before flushing logs",
+					"Unexpected failure flushing queued chat completion logs",
 					error instanceof Error ? error : new Error(String(error)),
 				);
-			}
-
-			const status =
-				state.caughtError instanceof HTTPException
-					? state.caughtError.status
-					: c.res.status;
-			const hasQueuedClientError = state.pendingLogs.some(
-				(log) =>
-					log.finishReason === "client_error" ||
-					log.unifiedFinishReason === "client_error",
-			);
-
-			if (status >= 400 && status < 500 && !hasQueuedClientError) {
-				const synthesizedLog = getSynthesizedClientErrorLog(
-					buildBaseLogEntry(c),
-					status,
-					state.caughtError,
-				);
-				if (synthesizedLog) {
-					state.pendingLogs.push(synthesizedLog);
-					state.clientErrorSynthesized = true;
-				}
-			}
-
-			for (const logData of state.pendingLogs) {
-				try {
-					await _insertLog({
-						...logData,
-						internalContentFilter: state.internalContentFilter
-							? true
-							: logData.internalContentFilter,
-					});
-				} catch (error) {
-					logger.error(
-						"Failed to flush queued chat completion log",
-						error instanceof Error ? error : new Error(String(error)),
-					);
-				}
-			}
+			});
 		}
 	},
 );

From 4059a4016c22dcf4dc16544034242e6eebbdec6f Mon Sep 17 00:00:00 2001
From: Luca Steeb <contact@luca-steeb.com>
Date: Mon, 30 Mar 2026 00:33:54 +0700
Subject: [PATCH 03/14] fix: flush non-stream chat logs

---
 apps/gateway/src/api.spec.ts                     | 12 ++++++++++++
 .../src/chat/middleware/chat-completion-log.ts   | 16 ++++++++++------
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/apps/gateway/src/api.spec.ts b/apps/gateway/src/api.spec.ts
index 2bf6fe4d3c..390fab7f99 100644
--- a/apps/gateway/src/api.spec.ts
+++ b/apps/gateway/src/api.spec.ts
@@ -1162,6 +1162,7 @@ describe("api", () => {
 	});
 
 	test("Reasoning effort error for unsupported model", async () => {
+		const requestId = "reasoning-effort-unsupported-request-id";
 		await db.insert(tables.apiKey).values({
 			id: "token-id",
 			token: "real-token",
@@ -1174,6 +1175,7 @@ describe("api", () => {
 			method: "POST",
 			headers: {
 				"Content-Type": "application/json",
+				"x-request-id": requestId,
 				Authorization: `Bearer real-token`,
 			},
 			body: JSON.stringify({
@@ -1192,6 +1194,16 @@ describe("api", () => {
 
 		const json = await res.json();
 		expect(json.message).toContain("does not support reasoning");
+
+		const log = await waitForLogByRequestId(requestId);
+		expect(log.finishReason).toBe("client_error");
+		expect(log.unifiedFinishReason).toBe("client_error");
+
+		const matchingLogs = await db
+			.select()
+			.from(tables.log)
+			.where(eq(tables.log.requestId, requestId));
+		expect(matchingLogs).toHaveLength(1);
 	});
 
 	test("Max tokens validation error when exceeding model limit", async () => {
diff --git a/apps/gateway/src/chat/middleware/chat-completion-log.ts b/apps/gateway/src/chat/middleware/chat-completion-log.ts
index c6cca72a15..619d16f78f 100644
--- a/apps/gateway/src/chat/middleware/chat-completion-log.ts
+++ b/apps/gateway/src/chat/middleware/chat-completion-log.ts
@@ -146,12 +146,16 @@ export const chatCompletionLogMiddleware = createMiddleware<ServerTypes>(
 			state.caughtError = error;
 			throw error;
 		} finally {
-			void flushChatCompletionLogs(c, state).catch((error) => {
-				logger.error(
-					"Unexpected failure flushing queued chat completion logs",
-					error instanceof Error ? error : new Error(String(error)),
-				);
-			});
+			if (state.streamCompletion) {
+				void flushChatCompletionLogs(c, state).catch((error) => {
+					logger.error(
+						"Unexpected failure flushing queued chat completion logs",
+						error instanceof Error ? error : new Error(String(error)),
+					);
+				});
+			} else {
+				await flushChatCompletionLogs(c, state);
+			}
 		}
 	},
 );

From 13ba95e392a62392ad573a3043c55a60415329d7 Mon Sep 17 00:00:00 2001
From: Luca Steeb <contact@luca-steeb.com>
Date: Mon, 30 Mar 2026 18:22:41 +0700
Subject: [PATCH 04/14] refactor: drop insertLog alias

---
 apps/gateway/src/chat/middleware/chat-completion-log.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/apps/gateway/src/chat/middleware/chat-completion-log.ts b/apps/gateway/src/chat/middleware/chat-completion-log.ts
index 619d16f78f..0dccf0dc3c 100644
--- a/apps/gateway/src/chat/middleware/chat-completion-log.ts
+++ b/apps/gateway/src/chat/middleware/chat-completion-log.ts
@@ -5,7 +5,7 @@ import {
 	buildBaseLogEntry,
 	type ChatCompletionLogState,
 } from "@/chat/tools/chat-log-context.js";
-import { insertLog as _insertLog } from "@/lib/logs.js";
+import { insertLog } from "@/lib/logs.js";
 
 import { logger } from "@llmgateway/logger";
 
@@ -117,7 +117,7 @@ async function flushChatCompletionLogs(
 
 	for (const logData of state.pendingLogs) {
 		try {
-			await _insertLog({
+			await insertLog({
 				...logData,
 				internalContentFilter: state.internalContentFilter
 					? true

From e28767cff14d6801d173b1bf352703046f2a7038 Mon Sep 17 00:00:00 2001
From: steebchen <contact@luca-steeb.com>
Date: Mon, 30 Mar 2026 15:21:30 +0000
Subject: [PATCH 05/14] fix: handle streaming terminal events

---
 apps/gateway/src/chat/chat.ts                 |  8 +-
 .../tools/transform-streaming-to-openai.ts    | 77 ++++++++-----------
 2 files changed, 40 insertions(+), 45 deletions(-)

diff --git a/apps/gateway/src/chat/chat.ts b/apps/gateway/src/chat/chat.ts
index 79ab0dc17a..f83553188f 100644
--- a/apps/gateway/src/chat/chat.ts
+++ b/apps/gateway/src/chat/chat.ts
@@ -5531,8 +5531,12 @@ chat.openapi(completions, async (c) => {
 											}
 											break;
 										default: // OpenAI format
-											if (data.choices && data.choices[0]?.finish_reason) {
-												finishReason = data.choices[0].finish_reason;
+											if (
+												transformedData?.choices &&
+												transformedData.choices[0]?.finish_reason
+											) {
+												finishReason = transformedData.choices[0].finish_reason;
+												sawProviderTerminalEvent = true;
 											}
 											break;
 									}
diff --git a/apps/gateway/src/chat/tools/transform-streaming-to-openai.ts b/apps/gateway/src/chat/tools/transform-streaming-to-openai.ts
index a04b82e36e..ab9c37277d 100644
--- a/apps/gateway/src/chat/tools/transform-streaming-to-openai.ts
+++ b/apps/gateway/src/chat/tools/transform-streaming-to-openai.ts
@@ -18,6 +18,26 @@ export function transformStreamingToOpenai(
 ): any {
 	let transformedData = data;
 
+	const mapOpenAIResponsesUsage = (responseUsage: any) => {
+		if (!responseUsage) {
+			return null;
+		}
+
+		return {
+			prompt_tokens: responseUsage.input_tokens ?? 0,
+			completion_tokens: responseUsage.output_tokens ?? 0,
+			total_tokens: responseUsage.total_tokens ?? 0,
+			...(responseUsage.output_tokens_details?.reasoning_tokens && {
+				reasoning_tokens: responseUsage.output_tokens_details.reasoning_tokens,
+			}),
+			...(responseUsage.input_tokens_details?.cached_tokens && {
+				prompt_tokens_details: {
+					cached_tokens: responseUsage.input_tokens_details.cached_tokens,
+				},
+			}),
+		};
+	};
+
 	switch (usedProvider) {
 		case "anthropic": {
 			if (data.type === "content_block_delta" && data.delta?.text) {
@@ -769,7 +789,13 @@ export function transformStreamingToOpenai(
 					case "response.output_text.done":
 					case "response.web_search_call.in_progress":
 					case "response.web_search_call.searching":
-					case "response.web_search_call.completed":
+					case "response.web_search_call.completed": {
+						const responseStatus = data.response?.status;
+						const isCompletedTerminalEvent =
+							responseStatus === "completed" &&
+							(data.type === "response.content_part.done" ||
+								data.type === "response.output_text.done" ||
+								data.type === "response.output_item.done");
 						transformedData = {
 							id: data.response?.id ?? `chatcmpl-${Date.now()}`,
 							object: "chat.completion.chunk",
@@ -780,12 +806,15 @@ export function transformStreamingToOpenai(
 								{
 									index: 0,
 									delta: { role: "assistant" },
-									finish_reason: null,
+									finish_reason: isCompletedTerminalEvent ? "stop" : null,
 								},
 							],
-							usage: null,
+							usage: isCompletedTerminalEvent
+								? mapOpenAIResponsesUsage(data.response?.usage)
+								: null,
 						};
 						break;
+					}
 
 					case "response.reasoning_summary_part.added":
 					case "response.reasoning_summary_text.delta":
@@ -908,25 +937,6 @@ export function transformStreamingToOpenai(
 					}
 
 					case "response.completed": {
-						const responseUsage = data.response?.usage;
-						let usage = null;
-						if (responseUsage) {
-							usage = {
-								prompt_tokens: responseUsage.input_tokens ?? 0,
-								completion_tokens: responseUsage.output_tokens ?? 0,
-								total_tokens: responseUsage.total_tokens ?? 0,
-								...(responseUsage.output_tokens_details?.reasoning_tokens && {
-									reasoning_tokens:
-										responseUsage.output_tokens_details.reasoning_tokens,
-								}),
-								...(responseUsage.input_tokens_details?.cached_tokens && {
-									prompt_tokens_details: {
-										cached_tokens:
-											responseUsage.input_tokens_details.cached_tokens,
-									},
-								}),
-							};
-						}
 						transformedData = {
 							id: data.response?.id ?? `chatcmpl-${Date.now()}`,
 							object: "chat.completion.chunk",
@@ -940,31 +950,12 @@ export function transformStreamingToOpenai(
 									finish_reason: "stop",
 								},
 							],
-							usage,
+							usage: mapOpenAIResponsesUsage(data.response?.usage),
 						};
 						break;
 					}
 
 					case "response.incomplete": {
-						const incompleteUsage = data.response?.usage;
-						let usage = null;
-						if (incompleteUsage) {
-							usage = {
-								prompt_tokens: incompleteUsage.input_tokens ?? 0,
-								completion_tokens: incompleteUsage.output_tokens ?? 0,
-								total_tokens: incompleteUsage.total_tokens ?? 0,
-								...(incompleteUsage.output_tokens_details?.reasoning_tokens && {
-									reasoning_tokens:
-										incompleteUsage.output_tokens_details.reasoning_tokens,
-								}),
-								...(incompleteUsage.input_tokens_details?.cached_tokens && {
-									prompt_tokens_details: {
-										cached_tokens:
-											incompleteUsage.input_tokens_details.cached_tokens,
-									},
-								}),
-							};
-						}
 						const reason = data.response?.incomplete_details?.reason;
 						// Map incomplete reason to appropriate finish_reason
 						const mappedFinishReason =
@@ -982,7 +973,7 @@ export function transformStreamingToOpenai(
 									finish_reason: mappedFinishReason,
 								},
 							],
-							usage,
+							usage: mapOpenAIResponsesUsage(data.response?.usage),
 						};
 						break;
 					}

From cb19acee228dd114baa9f8f2f4662b00a6847bbd Mon Sep 17 00:00:00 2001
From: steebchen <contact@luca-steeb.com>
Date: Mon, 30 Mar 2026 16:28:27 +0000
Subject: [PATCH 06/14] Revert "fix: handle streaming terminal events"

This reverts commit e28767cff14d6801d173b1bf352703046f2a7038.
---
 apps/gateway/src/chat/chat.ts                 |  8 +-
 .../tools/transform-streaming-to-openai.ts    | 77 +++++++++++--------
 2 files changed, 45 insertions(+), 40 deletions(-)

diff --git a/apps/gateway/src/chat/chat.ts b/apps/gateway/src/chat/chat.ts
index f83553188f..79ab0dc17a 100644
--- a/apps/gateway/src/chat/chat.ts
+++ b/apps/gateway/src/chat/chat.ts
@@ -5531,12 +5531,8 @@ chat.openapi(completions, async (c) => {
 											}
 											break;
 										default: // OpenAI format
-											if (
-												transformedData?.choices &&
-												transformedData.choices[0]?.finish_reason
-											) {
-												finishReason = transformedData.choices[0].finish_reason;
-												sawProviderTerminalEvent = true;
+											if (data.choices && data.choices[0]?.finish_reason) {
+												finishReason = data.choices[0].finish_reason;
 											}
 											break;
 									}
diff --git a/apps/gateway/src/chat/tools/transform-streaming-to-openai.ts b/apps/gateway/src/chat/tools/transform-streaming-to-openai.ts
index ab9c37277d..a04b82e36e 100644
--- a/apps/gateway/src/chat/tools/transform-streaming-to-openai.ts
+++ b/apps/gateway/src/chat/tools/transform-streaming-to-openai.ts
@@ -18,26 +18,6 @@ export function transformStreamingToOpenai(
 ): any {
 	let transformedData = data;
 
-	const mapOpenAIResponsesUsage = (responseUsage: any) => {
-		if (!responseUsage) {
-			return null;
-		}
-
-		return {
-			prompt_tokens: responseUsage.input_tokens ?? 0,
-			completion_tokens: responseUsage.output_tokens ?? 0,
-			total_tokens: responseUsage.total_tokens ?? 0,
-			...(responseUsage.output_tokens_details?.reasoning_tokens && {
-				reasoning_tokens: responseUsage.output_tokens_details.reasoning_tokens,
-			}),
-			...(responseUsage.input_tokens_details?.cached_tokens && {
-				prompt_tokens_details: {
-					cached_tokens: responseUsage.input_tokens_details.cached_tokens,
-				},
-			}),
-		};
-	};
-
 	switch (usedProvider) {
 		case "anthropic": {
 			if (data.type === "content_block_delta" && data.delta?.text) {
@@ -789,13 +769,7 @@ export function transformStreamingToOpenai(
 					case "response.output_text.done":
 					case "response.web_search_call.in_progress":
 					case "response.web_search_call.searching":
-					case "response.web_search_call.completed": {
-						const responseStatus = data.response?.status;
-						const isCompletedTerminalEvent =
-							responseStatus === "completed" &&
-							(data.type === "response.content_part.done" ||
-								data.type === "response.output_text.done" ||
-								data.type === "response.output_item.done");
+					case "response.web_search_call.completed":
 						transformedData = {
 							id: data.response?.id ?? `chatcmpl-${Date.now()}`,
 							object: "chat.completion.chunk",
@@ -806,15 +780,12 @@ export function transformStreamingToOpenai(
 								{
 									index: 0,
 									delta: { role: "assistant" },
-									finish_reason: isCompletedTerminalEvent ? "stop" : null,
+									finish_reason: null,
 								},
 							],
-							usage: isCompletedTerminalEvent
-								? mapOpenAIResponsesUsage(data.response?.usage)
-								: null,
+							usage: null,
 						};
 						break;
-					}
 
 					case "response.reasoning_summary_part.added":
 					case "response.reasoning_summary_text.delta":
@@ -937,6 +908,25 @@ export function transformStreamingToOpenai(
 					}
 
 					case "response.completed": {
+						const responseUsage = data.response?.usage;
+						let usage = null;
+						if (responseUsage) {
+							usage = {
+								prompt_tokens: responseUsage.input_tokens ?? 0,
+								completion_tokens: responseUsage.output_tokens ?? 0,
+								total_tokens: responseUsage.total_tokens ?? 0,
+								...(responseUsage.output_tokens_details?.reasoning_tokens && {
+									reasoning_tokens:
+										responseUsage.output_tokens_details.reasoning_tokens,
+								}),
+								...(responseUsage.input_tokens_details?.cached_tokens && {
+									prompt_tokens_details: {
+										cached_tokens:
+											responseUsage.input_tokens_details.cached_tokens,
+									},
+								}),
+							};
+						}
 						transformedData = {
 							id: data.response?.id ?? `chatcmpl-${Date.now()}`,
 							object: "chat.completion.chunk",
@@ -950,12 +940,31 @@ export function transformStreamingToOpenai(
 									finish_reason: "stop",
 								},
 							],
-							usage: mapOpenAIResponsesUsage(data.response?.usage),
+							usage,
 						};
 						break;
 					}
 
 					case "response.incomplete": {
+						const incompleteUsage = data.response?.usage;
+						let usage = null;
+						if (incompleteUsage) {
+							usage = {
+								prompt_tokens: incompleteUsage.input_tokens ?? 0,
+								completion_tokens: incompleteUsage.output_tokens ?? 0,
+								total_tokens: incompleteUsage.total_tokens ?? 0,
+								...(incompleteUsage.output_tokens_details?.reasoning_tokens && {
+									reasoning_tokens:
+										incompleteUsage.output_tokens_details.reasoning_tokens,
+								}),
+								...(incompleteUsage.input_tokens_details?.cached_tokens && {
+									prompt_tokens_details: {
+										cached_tokens:
+											incompleteUsage.input_tokens_details.cached_tokens,
+									},
+								}),
+							};
+						}
 						const reason = data.response?.incomplete_details?.reason;
 						// Map incomplete reason to appropriate finish_reason
 						const mappedFinishReason =
@@ -973,7 +982,7 @@ export function transformStreamingToOpenai(
 									finish_reason: mappedFinishReason,
 								},
 							],
-							usage: mapOpenAIResponsesUsage(data.response?.usage),
+							usage,
 						};
 						break;
 					}

From b9c865fd938a06d7a605633ec1a092dda388ebce Mon Sep 17 00:00:00 2001
From: "Luca Steeb (bot)" <contact@luca-steeb.com>
Date: Sun, 5 Apr 2026 08:15:19 +0000
Subject: [PATCH 07/14] chore(autofix): apply diff

---
 .../chat/middleware/chat-completion-log.ts    | 25 +++++++++++--------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/apps/gateway/src/chat/middleware/chat-completion-log.ts b/apps/gateway/src/chat/middleware/chat-completion-log.ts
index 2011dc0aaf..ceaf26d18f 100644
--- a/apps/gateway/src/chat/middleware/chat-completion-log.ts
+++ b/apps/gateway/src/chat/middleware/chat-completion-log.ts
@@ -117,17 +117,20 @@ async function flushChatCompletionLogs(
 
 	for (const logData of state.pendingLogs) {
 		try {
-			await insertLog({
-				...logData,
-				...(state.logIdOverride && !logData.retried
-					? { id: state.logIdOverride }
-					: {}),
-				responsesApiData:
-					logData.responsesApiData ?? state.responsesApiData ?? null,
-				internalContentFilter: state.internalContentFilter
-					? true
-					: logData.internalContentFilter,
-			}, { syncInsert: state.syncInsert });
+			await insertLog(
+				{
+					...logData,
+					...(state.logIdOverride && !logData.retried
+						? { id: state.logIdOverride }
+						: {}),
+					responsesApiData:
+						logData.responsesApiData ?? state.responsesApiData ?? null,
+					internalContentFilter: state.internalContentFilter
+						? true
+						: logData.internalContentFilter,
+				},
+				{ syncInsert: state.syncInsert },
+			);
 		} catch (error) {
 			logger.error(
 				"Failed to flush queued chat completion log",

From 4ecd85778d2a05356842afbb667cc799af0804f7 Mon Sep 17 00:00:00 2001
From: Luca Steeb <contact@luca-steeb.com>
Date: Sun, 5 Apr 2026 17:57:38 +0700
Subject: [PATCH 08/14] fix: avoid duplicate client error logs

---
 .../middleware/chat-completion-log.spec.ts    | 32 +++++++++++++++++++
 .../chat/middleware/chat-completion-log.ts    | 14 ++++----
 2 files changed, 40 insertions(+), 6 deletions(-)
 create mode 100644 apps/gateway/src/chat/middleware/chat-completion-log.spec.ts

diff --git a/apps/gateway/src/chat/middleware/chat-completion-log.spec.ts b/apps/gateway/src/chat/middleware/chat-completion-log.spec.ts
new file mode 100644
index 0000000000..0591052c5c
--- /dev/null
+++ b/apps/gateway/src/chat/middleware/chat-completion-log.spec.ts
@@ -0,0 +1,32 @@
+import { describe, expect, it } from "vitest";
+
+import { shouldSynthesizeClientError } from "./chat-completion-log.js";
+
+describe("shouldSynthesizeClientError", () => {
+	it("synthesizes for 4xx responses when no logs are queued", () => {
+		expect(shouldSynthesizeClientError(400, [])).toBe(true);
+		expect(shouldSynthesizeClientError(429, [])).toBe(true);
+	});
+
+	it("skips synthesis when any terminal log is already queued", () => {
+		expect(
+			shouldSynthesizeClientError(400, [
+				{
+					finishReason: "canceled",
+				} as never,
+			]),
+		).toBe(false);
+		expect(
+			shouldSynthesizeClientError(400, [
+				{
+					finishReason: "content_filter",
+				} as never,
+			]),
+		).toBe(false);
+	});
+
+	it("skips synthesis for non-4xx responses", () => {
+		expect(shouldSynthesizeClientError(200, [])).toBe(false);
+		expect(shouldSynthesizeClientError(500, [])).toBe(false);
+	});
+});
diff --git a/apps/gateway/src/chat/middleware/chat-completion-log.ts b/apps/gateway/src/chat/middleware/chat-completion-log.ts
index ceaf26d18f..1e520c1921 100644
--- a/apps/gateway/src/chat/middleware/chat-completion-log.ts
+++ b/apps/gateway/src/chat/middleware/chat-completion-log.ts
@@ -80,6 +80,13 @@ function getSynthesizedClientErrorLog(
 	};
 }
 
+export function shouldSynthesizeClientError(
+	status: number,
+	pendingLogs: LogInsertData[],
+): boolean {
+	return status >= 400 && status < 500 && pendingLogs.length === 0;
+}
+
 async function flushChatCompletionLogs(
 	c: Context<ServerTypes>,
 	state: ChatCompletionLogState,
@@ -97,13 +104,8 @@ async function flushChatCompletionLogs(
 		state.caughtError instanceof HTTPException
 			? state.caughtError.status
 			: c.res.status;
-	const hasQueuedClientError = state.pendingLogs.some(
-		(log) =>
-			log.finishReason === "client_error" ||
-			log.unifiedFinishReason === "client_error",
-	);
 
-	if (status >= 400 && status < 500 && !hasQueuedClientError) {
+	if (shouldSynthesizeClientError(status, state.pendingLogs)) {
 		const synthesizedLog = getSynthesizedClientErrorLog(
 			buildBaseLogEntry(c),
 			status,

From b852c5183c87788d74daf0140b10786c49f34991 Mon Sep 17 00:00:00 2001
From: Luca Steeb <contact@luca-steeb.com>
Date: Sun, 5 Apr 2026 18:00:07 +0700
Subject: [PATCH 09/14] fix: warn on client errors in activity

---
 .../activity/[logId]/log-detail-client.tsx    | 30 ++++++++++--
 apps/ui/src/components/dashboard/log-card.tsx | 48 +++++++++++++++----
 2 files changed, 63 insertions(+), 15 deletions(-)

diff --git a/apps/ui/src/app/dashboard/[orgId]/[projectId]/activity/[logId]/log-detail-client.tsx b/apps/ui/src/app/dashboard/[orgId]/[projectId]/activity/[logId]/log-detail-client.tsx
index 1a4f318e8b..74bdb73571 100644
--- a/apps/ui/src/app/dashboard/[orgId]/[projectId]/activity/[logId]/log-detail-client.tsx
+++ b/apps/ui/src/app/dashboard/[orgId]/[projectId]/activity/[logId]/log-detail-client.tsx
@@ -113,8 +113,14 @@ function StatusIndicator({ log }: { log: Partial<Log> }) {
 	let color = "text-emerald-500";
 	let bgColor = "bg-emerald-500/10";
 	let label = "Completed";
+	const isClientError = log.unifiedFinishReason === "client_error";
 
-	if (log.hasError || log.unifiedFinishReason === "error") {
+	if (isClientError) {
+		StatusIcon = AlertCircle;
+		color = "text-orange-500";
+		bgColor = "bg-orange-500/10";
+		label = "Client Error";
+	} else if (log.hasError || log.unifiedFinishReason === "error") {
 		StatusIcon = AlertCircle;
 		color = "text-red-500";
 		bgColor = "bg-red-500/10";
@@ -897,23 +903,37 @@ export function LogDetailClient({
 
 				{log.hasError && !!log.errorDetails && (
 					<Section title="Error Details">
-						<div className="rounded-lg border border-red-500/20 bg-red-500/5 p-4 space-y-3">
+						<div
+							className={`rounded-lg border p-4 space-y-3 ${isClientError ? "border-orange-500/20 bg-orange-500/5" : "border-red-500/20 bg-red-500/5"}`}
+						>
 							<div className="flex gap-6">
 								<div>
-									<p className="text-xs text-red-400 mb-0.5">Status Code</p>
+									<p
+										className={`mb-0.5 text-xs ${isClientError ? "text-orange-400" : "text-red-400"}`}
+									>
+										Status Code
+									</p>
 									<p className="text-sm font-semibold">
 										{log.errorDetails.statusCode}
 									</p>
 								</div>
 								<div>
-									<p className="text-xs text-red-400 mb-0.5">Status Text</p>
+									<p
+										className={`mb-0.5 text-xs ${isClientError ? "text-orange-400" : "text-red-400"}`}
+									>
+										Status Text
+									</p>
 									<p className="text-sm font-semibold">
 										{log.errorDetails.statusText}
 									</p>
 								</div>
 							</div>
 							<div>
-								<p className="text-xs text-red-400 mb-1">Error Message</p>
+								<p
+									className={`mb-1 text-xs ${isClientError ? "text-orange-400" : "text-red-400"}`}
+								>
+									Error Message
+								</p>
 								<pre className="text-xs overflow-auto whitespace-pre-wrap break-all font-mono bg-background rounded border p-3">
 									{log.errorDetails.responseText}
 								</pre>
diff --git a/apps/ui/src/components/dashboard/log-card.tsx b/apps/ui/src/components/dashboard/log-card.tsx
index d4678f64fd..6a8bf561eb 100644
--- a/apps/ui/src/components/dashboard/log-card.tsx
+++ b/apps/ui/src/components/dashboard/log-card.tsx
@@ -101,8 +101,13 @@ export function LogCard({
 	let StatusIcon = CheckCircle2;
 	let color = "text-green-500";
 	let bgColor = "bg-green-100";
+	const isClientError = log.unifiedFinishReason === "client_error";
 
-	if (log.hasError || log.unifiedFinishReason === "error") {
+	if (isClientError) {
+		StatusIcon = AlertCircle;
+		color = "text-orange-500";
+		bgColor = "bg-orange-100";
+	} else if (log.hasError || log.unifiedFinishReason === "error") {
 		StatusIcon = AlertCircle;
 		color = "text-red-500";
 		bgColor = "bg-red-100";
@@ -190,11 +195,18 @@ export function LogCard({
 							)}
 							<Badge
 								variant={
-									log.hasError
-										? "destructive"
-										: log.unifiedFinishReason === "content_filter"
+									isClientError
+										? "outline"
+										: log.hasError
 											? "destructive"
-											: "default"
+											: log.unifiedFinishReason === "content_filter"
+												? "destructive"
+												: "default"
+								}
+								className={
+									isClientError
+										? "border-orange-300 bg-orange-50 text-orange-600"
+										: undefined
 								}
 							>
 								{log.unifiedFinishReason}
@@ -1061,15 +1073,31 @@ export function LogCard({
 					)}
 					{log.hasError && !!log.errorDetails && (
 						<div className="space-y-2">
-							<h4 className="text-sm font-medium text-red-600">
+							<h4
+								className={`text-sm font-medium ${isClientError ? "text-orange-600" : "text-red-600"}`}
+							>
 								Error Details
 							</h4>
-							<div className="grid grid-cols-2 gap-2 rounded-md border border-red-200 bg-red-50 p-3 text-sm">
-								<div className="text-red-600">Status Code</div>
+							<div
+								className={`grid grid-cols-2 gap-2 rounded-md p-3 text-sm ${isClientError ? "border border-orange-200 bg-orange-50" : "border border-red-200 bg-red-50"}`}
+							>
+								<div
+									className={isClientError ? "text-orange-600" : "text-red-600"}
+								>
+									Status Code
+								</div>
 								<div className="font-medium">{log.errorDetails.statusCode}</div>
-								<div className="text-red-600">Status Text</div>
+								<div
+									className={isClientError ? "text-orange-600" : "text-red-600"}
+								>
+									Status Text
+								</div>
 								<div className="font-medium">{log.errorDetails.statusText}</div>
-								<div className="text-red-600 col-span-2">Error Message</div>
+								<div
+									className={`col-span-2 ${isClientError ? "text-orange-600" : "text-red-600"}`}
+								>
+									Error Message
+								</div>
 								<div className="col-span-2 rounded bg-white text-black p-2 text-xs">
 									{log.errorDetails.responseText}
 								</div>

From d6cac672c56462ffec1d4adb56cbc8c20fc1d2d4 Mon Sep 17 00:00:00 2001
From: Luca Steeb <contact@luca-steeb.com>
Date: Sun, 5 Apr 2026 18:33:37 +0700
Subject: [PATCH 10/14] fix: log validation client errors

---
 apps/gateway/src/api.spec.ts                  |  54 +++++
 .../chat/middleware/chat-completion-log.ts    | 203 ++++++++++++++++--
 .../src/chat/tools/chat-log-context.ts        |   1 +
 3 files changed, 241 insertions(+), 17 deletions(-)

diff --git a/apps/gateway/src/api.spec.ts b/apps/gateway/src/api.spec.ts
index 40a170579d..dd780a8aea 100644
--- a/apps/gateway/src/api.spec.ts
+++ b/apps/gateway/src/api.spec.ts
@@ -1208,6 +1208,60 @@ describe("api", () => {
 		expect(matchingLogs).toHaveLength(1);
 	});
 
+	test("Schema validation errors are logged as client_error", async () => {
+		const requestId = "schema-validation-client-error-request-id";
+		await db.insert(tables.apiKey).values({
+			id: "token-id-schema-validation",
+			token: "real-token-schema-validation",
+			projectId: "project-id",
+			description: "Test API Key",
+			createdBy: "user-id",
+		});
+
+		const res = await app.request("/v1/chat/completions", {
+			method: "POST",
+			headers: {
+				"Content-Type": "application/json",
+				"x-request-id": requestId,
+				Authorization: "Bearer real-token-schema-validation",
+			},
+			body: JSON.stringify({
+				model: "gpt-4o-mini",
+				messages: [
+					{
+						role: "user",
+						content: 5555,
+					},
+				],
+			}),
+		});
+
+		expect(res.status).toBe(400);
+
+		const json = await res.json();
+		expect(json.success).toBe(false);
+		expect(JSON.stringify(json)).toContain("invalid_union");
+
+		const log = await waitForLogByRequestId(requestId);
+		expect(log.finishReason).toBe("client_error");
+		expect(log.unifiedFinishReason).toBe("client_error");
+		expect(log.errorDetails?.statusCode).toBe(400);
+		expect(log.errorDetails?.responseText).toContain("invalid_union");
+		expect(log.errorDetails?.responseText).toContain("messages");
+		expect(log.messages).toEqual([
+			{
+				role: "user",
+				content: 5555,
+			},
+		]);
+
+		const matchingLogs = await db
+			.select()
+			.from(tables.log)
+			.where(eq(tables.log.requestId, requestId));
+		expect(matchingLogs).toHaveLength(1);
+	});
+
 	test("Max tokens validation error when exceeding model limit", async () => {
 		await db.insert(tables.apiKey).values({
 			id: "token-id",
diff --git a/apps/gateway/src/chat/middleware/chat-completion-log.ts b/apps/gateway/src/chat/middleware/chat-completion-log.ts
index 1e520c1921..d5742b4d3d 100644
--- a/apps/gateway/src/chat/middleware/chat-completion-log.ts
+++ b/apps/gateway/src/chat/middleware/chat-completion-log.ts
@@ -4,30 +4,199 @@ import { HTTPException } from "hono/http-exception";
 import {
 	buildBaseLogEntry,
 	type ChatCompletionLogState,
+	updateBaseLogOptions,
 } from "@/chat/tools/chat-log-context.js";
+import { extractCustomHeaders } from "@/chat/tools/extract-custom-headers.js";
+import { parseModelInput } from "@/chat/tools/parse-model-input.js";
+import { validateSource } from "@/chat/tools/validate-source.js";
+import { assertApiKeyWithinUsageLimits } from "@/lib/api-key-usage-limits.js";
+import { findApiKeyByToken, findProjectById } from "@/lib/cached-queries.js";
+import { parseApiToken } from "@/lib/extract-api-token.js";
 import { insertLog } from "@/lib/logs.js";
 
+import { shortid } from "@llmgateway/db";
 import { logger } from "@llmgateway/logger";
 
 import type { ServerTypes } from "@/vars.js";
 import type { LogInsertData } from "@llmgateway/db";
 import type { Context } from "hono";
 
-function getSynthesizedClientErrorLog(
-	baseLogEntry: ReturnType<typeof buildBaseLogEntry>,
+function getRequestId(c: Context<ServerTypes>): string {
+	return c.req.header("x-request-id") ?? shortid(40);
+}
+
+function getDebugMode(c: Context<ServerTypes>): boolean {
+	return (
+		c.req.header("x-debug") === "true" ||
+		process.env.FORCE_DEBUG_MODE === "true" ||
+		process.env.NODE_ENV !== "production"
+	);
+}
+
+function getSource(c: Context<ServerTypes>): string | undefined {
+	let source = validateSource(
+		c.req.header("x-source"),
+		c.req.header("HTTP-Referer"),
+	);
+	const userAgent = c.req.header("User-Agent");
+
+	if (!source && userAgent && /^claude-cli\/.+/.test(userAgent)) {
+		source = "claude.com/claude-code";
+	}
+
+	return source;
+}
+
+function getRawRequestDetails(rawRequest: unknown): {
+	messages: unknown[];
+	requestedModel: string;
+	requestedProvider?: string;
+	usedModelMapping?: string;
+	usedProvider: string;
+} {
+	const messages =
+		typeof rawRequest === "object" &&
+		rawRequest !== null &&
+		"messages" in rawRequest &&
+		Array.isArray(rawRequest.messages)
+			? rawRequest.messages
+			: [];
+
+	const requestedModel =
+		typeof rawRequest === "object" &&
+		rawRequest !== null &&
+		"model" in rawRequest &&
+		typeof rawRequest.model === "string"
+			? rawRequest.model
+			: "unknown";
+
+	if (requestedModel === "unknown") {
+		return {
+			messages,
+			requestedModel,
+			usedProvider: "llmgateway",
+		};
+	}
+
+	try {
+		const parsedModel = parseModelInput(requestedModel);
+		return {
+			messages,
+			requestedModel,
+			requestedProvider: parsedModel.requestedProvider,
+			usedModelMapping: parsedModel.requestedModel,
+			usedProvider: parsedModel.requestedProvider ?? "llmgateway",
+		};
+	} catch {
+		return {
+			messages,
+			requestedModel,
+			usedProvider: "llmgateway",
+		};
+	}
+}
+
+async function buildFallbackBaseLogEntry(
+	c: Context<ServerTypes>,
+	state: ChatCompletionLogState,
+): Promise<ReturnType<typeof buildBaseLogEntry> | null> {
+	const existingBaseLogEntry = buildBaseLogEntry(c);
+	if (existingBaseLogEntry) {
+		return existingBaseLogEntry;
+	}
+
+	const token = parseApiToken(c);
+	if (!token) {
+		return null;
+	}
+
+	const apiKey = await findApiKeyByToken(token);
+	if (!apiKey || apiKey.status !== "active") {
+		return null;
+	}
+
+	try {
+		assertApiKeyWithinUsageLimits(apiKey);
+	} catch {
+		return null;
+	}
+
+	const project = await findProjectById(apiKey.projectId);
+	if (!project || project.status === "deleted") {
+		return null;
+	}
+
+	const rawRequest = await state.rawRequestPreviewPromise?.catch(
+		() => undefined,
+	);
+	const rawRequestDetails = getRawRequestDetails(rawRequest);
+
+	updateBaseLogOptions(c, {
+		requestId: getRequestId(c),
+		project,
+		apiKey,
+		usedModel: rawRequestDetails.requestedModel,
+		usedModelMapping: rawRequestDetails.usedModelMapping,
+		usedProvider: rawRequestDetails.usedProvider,
+		requestedModel: rawRequestDetails.requestedModel,
+		requestedProvider: rawRequestDetails.requestedProvider,
+		messages: rawRequestDetails.messages,
+		customHeaders: extractCustomHeaders(c),
+		debugMode: getDebugMode(c),
+		userAgent: c.req.header("User-Agent") ?? undefined,
+		source: getSource(c),
+		rawRequest,
+	});
+
+	return buildBaseLogEntry(c);
+}
+
+async function getSynthesizedClientErrorDetails(
+	c: Context<ServerTypes>,
+	error: unknown,
+): Promise<{
+	responseText: string;
+	statusText: string;
+}> {
+	if (error instanceof HTTPException) {
+		return {
+			responseText: error.message,
+			statusText: error.res?.statusText ?? "Client Error",
+		};
+	}
+
+	try {
+		const responseText = await c.res.clone().text();
+		return {
+			responseText: responseText || "Client error",
+			statusText: c.res.statusText ?? "Client Error",
+		};
+	} catch {
+		return {
+			responseText: error instanceof Error ? error.message : "Client error",
+			statusText:
+				error instanceof Error
+					? error.name
+					: (c.res.statusText ?? "Client Error"),
+		};
+	}
+}
+
+async function getSynthesizedClientErrorLog(
+	c: Context<ServerTypes>,
+	state: ChatCompletionLogState,
 	status: number,
 	error: unknown,
-): LogInsertData | null {
+): Promise<LogInsertData | null> {
+	const baseLogEntry = await buildFallbackBaseLogEntry(c, state);
 	if (!baseLogEntry) {
 		return null;
 	}
 
-	const responseText =
-		error instanceof HTTPException
-			? error.message
-			: error instanceof Error
-				? error.message
-				: "Client error";
+	const { responseText, statusText } = await getSynthesizedClientErrorDetails(
+		c,
+		error,
+	);
 
 	return {
 		...baseLogEntry,
@@ -50,12 +219,7 @@ function getSynthesizedClientErrorLog(
 		canceled: false,
 		errorDetails: {
 			statusCode: status,
-			statusText:
-				error instanceof HTTPException
-					? "Client Error"
-					: error instanceof Error
-						? error.name
-						: "Client Error",
+			statusText,
 			responseText,
 		},
 		duration: 0,
@@ -106,8 +270,9 @@ async function flushChatCompletionLogs(
 			: c.res.status;
 
 	if (shouldSynthesizeClientError(status, state.pendingLogs)) {
-		const synthesizedLog = getSynthesizedClientErrorLog(
-			buildBaseLogEntry(c),
+		const synthesizedLog = await getSynthesizedClientErrorLog(
+			c,
+			state,
 			status,
 			state.caughtError,
 		);
@@ -147,6 +312,10 @@ export const chatCompletionLogMiddleware = createMiddleware<ServerTypes>(
 		const state: ChatCompletionLogState = {
 			pendingLogs: [],
 			clientErrorSynthesized: false,
+			rawRequestPreviewPromise: c.req.raw
+				.clone()
+				.json()
+				.catch(() => undefined),
 		};
 		c.set("chatCompletionLogState", state);
 
diff --git a/apps/gateway/src/chat/tools/chat-log-context.ts b/apps/gateway/src/chat/tools/chat-log-context.ts
index 12fd6fd12c..ba507e9355 100644
--- a/apps/gateway/src/chat/tools/chat-log-context.ts
+++ b/apps/gateway/src/chat/tools/chat-log-context.ts
@@ -12,6 +12,7 @@ import type { Context } from "hono";
 export interface ChatCompletionLogState {
 	pendingLogs: LogInsertData[];
 	baseLogOptions?: Partial<CreateLogEntryOptions>;
+	rawRequestPreviewPromise?: Promise<unknown>;
 	streamCompletion?: Promise<void>;
 	resolveStreamCompletion?: () => void;
 	caughtError?: unknown;

From a664ea420a9fb1fca37b80ed32d308e2fd5cf1fe Mon Sep 17 00:00:00 2001
From: Luca Steeb <contact@luca-steeb.com>
Date: Sun, 5 Apr 2026 18:44:47 +0700
Subject: [PATCH 11/14] fix: lazily parse fallback body

---
 .../chat/middleware/chat-completion-log.ts    | 19 ++++++++++++-------
 .../src/chat/tools/chat-log-context.ts        |  1 +
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/apps/gateway/src/chat/middleware/chat-completion-log.ts b/apps/gateway/src/chat/middleware/chat-completion-log.ts
index d5742b4d3d..56c6781e8d 100644
--- a/apps/gateway/src/chat/middleware/chat-completion-log.ts
+++ b/apps/gateway/src/chat/middleware/chat-completion-log.ts
@@ -96,6 +96,16 @@ function getRawRequestDetails(rawRequest: unknown): {
 	}
 }
 
+async function getRawRequestPreview(
+	state: ChatCompletionLogState,
+): Promise<unknown> {
+	state.rawRequestPreviewPromise ??= state.rawRequestPreview
+		?.json()
+		.catch(() => undefined);
+
+	return state.rawRequestPreviewPromise;
+}
+
 async function buildFallbackBaseLogEntry(
 	c: Context<ServerTypes>,
 	state: ChatCompletionLogState,
@@ -126,9 +136,7 @@ async function buildFallbackBaseLogEntry(
 		return null;
 	}
 
-	const rawRequest = await state.rawRequestPreviewPromise?.catch(
-		() => undefined,
-	);
+	const rawRequest = await getRawRequestPreview(state);
 	const rawRequestDetails = getRawRequestDetails(rawRequest);
 
 	updateBaseLogOptions(c, {
@@ -312,10 +320,7 @@ export const chatCompletionLogMiddleware = createMiddleware<ServerTypes>(
 		const state: ChatCompletionLogState = {
 			pendingLogs: [],
 			clientErrorSynthesized: false,
-			rawRequestPreviewPromise: c.req.raw
-				.clone()
-				.json()
-				.catch(() => undefined),
+			rawRequestPreview: c.req.raw.clone(),
 		};
 		c.set("chatCompletionLogState", state);
 
diff --git a/apps/gateway/src/chat/tools/chat-log-context.ts b/apps/gateway/src/chat/tools/chat-log-context.ts
index ba507e9355..0e860ebe35 100644
--- a/apps/gateway/src/chat/tools/chat-log-context.ts
+++ b/apps/gateway/src/chat/tools/chat-log-context.ts
@@ -12,6 +12,7 @@ import type { Context } from "hono";
 export interface ChatCompletionLogState {
 	pendingLogs: LogInsertData[];
 	baseLogOptions?: Partial<CreateLogEntryOptions>;
+	rawRequestPreview?: Request;
 	rawRequestPreviewPromise?: Promise<unknown>;
 	streamCompletion?: Promise<void>;
 	resolveStreamCompletion?: () => void;

From 30e90abb1e8866552df077f93b937a102963ef2d Mon Sep 17 00:00:00 2001
From: "Luca Steeb (bot)" <contact@luca-steeb.com>
Date: Sun, 5 Apr 2026 11:51:25 +0000
Subject: [PATCH 12/14] chore(autofix): apply diff

---
 apps/gateway/src/chat/middleware/chat-completion-log.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/apps/gateway/src/chat/middleware/chat-completion-log.ts b/apps/gateway/src/chat/middleware/chat-completion-log.ts
index 56c6781e8d..d1da03c180 100644
--- a/apps/gateway/src/chat/middleware/chat-completion-log.ts
+++ b/apps/gateway/src/chat/middleware/chat-completion-log.ts
@@ -103,7 +103,7 @@ async function getRawRequestPreview(
 		?.json()
 		.catch(() => undefined);
 
-	return state.rawRequestPreviewPromise;
+	return await state.rawRequestPreviewPromise;
 }
 
 async function buildFallbackBaseLogEntry(

From e6c4e6987630875885aff0cdcf1173856ca1748f Mon Sep 17 00:00:00 2001
From: Luca Steeb <contact@luca-steeb.com>
Date: Wed, 22 Apr 2026 18:31:21 +0700
Subject: [PATCH 13/14] fix: add dataStorageCost to cancelled streaming cost
 stub

---
 apps/gateway/src/chat/chat.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/apps/gateway/src/chat/chat.ts b/apps/gateway/src/chat/chat.ts
index 0d4b0b5b61..6d8f80a32d 100644
--- a/apps/gateway/src/chat/chat.ts
+++ b/apps/gateway/src/chat/chat.ts
@@ -6523,6 +6523,7 @@ chat.openapi(completions, async (c) => {
 											imageInputCost: null,
 											imageOutputCost: null,
 											totalCost: null,
+											dataStorageCost: null as number | null,
 											promptTokens: null,
 											completionTokens: null,
 											cachedTokens: null,

From 54e3806bb2789ebcd5743ef2ad8fdd013f5b0e49 Mon Sep 17 00:00:00 2001
From: Luca Steeb <contact@luca-steeb.com>
Date: Mon, 27 Apr 2026 02:52:30 +0700
Subject: [PATCH 14/14] fix: port retry/routing features into log middleware

Adopts main's chat.ts wholesale and rewires its insertLog wrappers to
push entries onto the middleware's pendingLogs queue, so the branch
gets the immediate-SSE-error retry, same-provider alternate-key retry,
buildRoutingAttempt logId/apiKeyHash stamping, and request_id metadata
flow without losing the middleware-based 4xx synthesis.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 apps/gateway/src/chat/chat.ts                 | 4227 ++++++++++-------
 .../chat/middleware/chat-completion-log.ts    |    6 +
 .../src/chat/tools/chat-log-context.ts        |    1 +
 3 files changed, 2577 insertions(+), 1657 deletions(-)

diff --git a/apps/gateway/src/chat/chat.ts b/apps/gateway/src/chat/chat.ts
index 6d8f80a32d..8615f01669 100644
--- a/apps/gateway/src/chat/chat.ts
+++ b/apps/gateway/src/chat/chat.ts
@@ -5,7 +5,13 @@ import { streamSSE } from "hono/streaming";
 
 import { extractFirstSseEventData } from "@/chat/tools/extract-first-sse-event-data.js";
 import { validateSource } from "@/chat/tools/validate-source.js";
-import { reportKeyError, reportKeySuccess } from "@/lib/api-key-health.js";
+import { getApiKeyFingerprint } from "@/lib/api-key-fingerprint.js";
+import {
+	reportKeyError,
+	reportKeySuccess,
+	reportTrackedKeyError,
+	reportTrackedKeySuccess,
+} from "@/lib/api-key-health.js";
 import { assertApiKeyWithinUsageLimits } from "@/lib/api-key-usage-limits.js";
 import {
 	findApiKeyByToken,
@@ -22,6 +28,8 @@ import { throwIamException, validateModelAccess } from "@/lib/iam.js";
 import {
 	calculateDataStorageCost,
 	getUnifiedFinishReason,
+	isContentFilterFinishReason,
+	insertLog as _insertLog,
 } from "@/lib/logs.js";
 import {
 	checkProviderRateLimit,
@@ -59,6 +67,7 @@ import {
 	type InferSelectModel,
 	isCachingEnabled,
 	metricsKey,
+	type LogInsertData,
 	shortid,
 	type tables,
 	type ProviderMetrics,
@@ -92,11 +101,8 @@ import {
 import { chatCompletionLogMiddleware } from "./middleware/chat-completion-log.js";
 import { completionsRequestSchema } from "./schemas/completions.js";
 import {
-	enqueueChatLog,
 	finishStreamCompletion,
 	registerStreamCompletion,
-	updateBaseLogOptions,
-	updateLogInsertOptions,
 } from "./tools/chat-log-context.js";
 import {
 	checkContentFilter,
@@ -106,6 +112,7 @@ import {
 } from "./tools/check-content-filter.js";
 import { convertImagesToBase64 } from "./tools/convert-images-to-base64.js";
 import { countInputImages } from "./tools/count-input-images.js";
+import { createLogEntry } from "./tools/create-log-entry.js";
 import { estimateTokensFromContent } from "./tools/estimate-tokens-from-content.js";
 import { estimateTokens } from "./tools/estimate-tokens.js";
 import {
@@ -123,6 +130,7 @@ import { getProviderEnv } from "./tools/get-provider-env.js";
 import { hasMeaningfulAssistantOutput } from "./tools/has-meaningful-assistant-output.js";
 import { healJsonResponse } from "./tools/heal-json-response.js";
 import { isModelTrulyFree } from "./tools/is-model-truly-free.js";
+import { mapFinishReasonToOpenai } from "./tools/map-finish-reason-to-openai.js";
 import { messagesContainImages } from "./tools/messages-contain-images.js";
 import { mightBeCompleteJson } from "./tools/might-be-complete-json.js";
 import { normalizeStreamingError } from "./tools/normalize-streaming-error.js";
@@ -143,6 +151,7 @@ import {
 import {
 	type RoutingAttempt,
 	getErrorType,
+	isRetryableErrorType,
 	MAX_RETRIES,
 	providerRetryKey,
 	selectNextProvider,
@@ -152,7 +161,12 @@ import {
 	encodeChatMessages,
 	messageContentToString,
 } from "./tools/tokenizer.js";
-import { transformResponseToOpenai } from "./tools/transform-response-to-openai.js";
+import {
+	applyExtendedUsageFields,
+	stripRequestScopedMetadataFromOpenAiResponse,
+	transformResponseToOpenai,
+	withCurrentRequestMetadataOnOpenAiResponse,
+} from "./tools/transform-response-to-openai.js";
 import { transformStreamingToOpenai } from "./tools/transform-streaming-to-openai.js";
 import { validateFreeModelUsage } from "./tools/validate-free-model-usage.js";
 import { validateModelCapabilities } from "./tools/validate-model-capabilities.js";
@@ -167,6 +181,27 @@ import type { ServerTypes } from "@/vars.js";
  * - Non-default regions only pass if a region-specific env key exists
  *   (e.g. LLM_ALIBABA_API_KEY__US_VIRGINIA).
  */
+function toDataStorageCostNumber(
+	promptTokens: number | string | null | undefined,
+	cachedTokens: number | string | null | undefined,
+	completionTokens: number | string | null | undefined,
+	reasoningTokens: number | string | null | undefined,
+	retentionLevel: "retain" | "none" | null,
+): number | null {
+	if (retentionLevel === "none") {
+		return null;
+	}
+	const str = calculateDataStorageCost(
+		promptTokens,
+		cachedTokens,
+		completionTokens,
+		reasoningTokens,
+		retentionLevel,
+	);
+	const num = Number(str);
+	return Number.isFinite(num) ? num : null;
+}
+
 function filterRegionsByAvailableKeys(
 	expandedProviders: ProviderModelMapping[],
 ): ProviderModelMapping[] {
@@ -450,6 +485,48 @@ function addContentFilterRoutingMetadata(
 	};
 }
 
+function withUsedApiKeyHash(
+	routingMetadata: RoutingMetadata | undefined,
+	usedApiKeyHash: string | undefined,
+): RoutingMetadata | undefined {
+	if (!routingMetadata || !usedApiKeyHash) {
+		return routingMetadata;
+	}
+
+	if (routingMetadata.usedApiKeyHash === usedApiKeyHash) {
+		return routingMetadata;
+	}
+
+	return {
+		...routingMetadata,
+		usedApiKeyHash,
+	};
+}
+
+function buildRoutingAttempt(
+	provider: string,
+	model: string,
+	statusCode: number,
+	errorType: string,
+	succeeded: boolean,
+	options?: {
+		region?: string;
+		apiKeyHash?: string;
+		logId?: string;
+	},
+): RoutingAttempt {
+	return {
+		provider,
+		model,
+		...(options?.region && { region: options.region }),
+		status_code: statusCode,
+		error_type: errorType,
+		succeeded,
+		...(options?.apiKeyHash && { apiKeyHash: options.apiKeyHash }),
+		...(options?.logId && { logId: options.logId }),
+	};
+}
+
 function usesGoogleQueryToken(provider: string): boolean {
 	return (
 		provider === "google-ai-studio" ||
@@ -635,7 +712,7 @@ export async function inspectImmediateStreamingProviderError(
 			try {
 				await reader.cancel();
 			} catch {
-				// Ignore cancellation errors once the immediate error is extracted.
+				// Ignore cancellation errors - the response body is no longer needed.
 			}
 
 			return {
@@ -654,7 +731,7 @@ export async function inspectImmediateStreamingProviderError(
 		try {
 			await reader.cancel();
 		} catch {
-			// Ignore cancellation errors when the replay stream setup fails.
+			// Ignore cancellation errors - the response body is no longer needed.
 		}
 
 		return {
@@ -787,16 +864,40 @@ const completions = createRoute({
 							prompt_tokens_details: z
 								.object({
 									cached_tokens: z.number(),
+									cache_write_tokens: z.number().optional(),
+									cache_creation_tokens: z.number().optional(),
+									audio_tokens: z.number().optional(),
+									video_tokens: z.number().optional(),
+								})
+								.optional(),
+							completion_tokens_details: z
+								.object({
+									reasoning_tokens: z.number().optional(),
+									image_tokens: z.number().optional(),
+									audio_tokens: z.number().optional(),
+								})
+								.optional(),
+							cost: z.number().nullable().optional(),
+							cost_details: z
+								.object({
+									upstream_inference_cost: z.number(),
+									upstream_inference_prompt_cost: z.number(),
+									upstream_inference_completions_cost: z.number(),
+									total_cost: z.number().nullable().optional(),
+									input_cost: z.number().nullable().optional(),
+									output_cost: z.number().nullable().optional(),
+									cached_input_cost: z.number().nullable().optional(),
+									request_cost: z.number().nullable().optional(),
+									web_search_cost: z.number().nullable().optional(),
+									image_input_cost: z.number().nullable().optional(),
+									image_output_cost: z.number().nullable().optional(),
+									data_storage_cost: z.number().nullable().optional(),
 								})
 								.optional(),
-							cost_usd_total: z.number().nullable().optional(),
-							cost_usd_input: z.number().nullable().optional(),
-							cost_usd_output: z.number().nullable().optional(),
-							cost_usd_cached_input: z.number().nullable().optional(),
 							info: z.string().optional(),
-							cost_usd_request: z.number().nullable().optional(),
 						}),
 						metadata: z.object({
+							request_id: z.string(),
 							requested_model: z.string(),
 							requested_provider: z.string().nullable(),
 							used_model: z.string(),
@@ -811,6 +912,9 @@ const completions = createRoute({
 										region: z.string().optional(),
 										status_code: z.number(),
 										error_type: z.string(),
+										succeeded: z.boolean(),
+										apiKeyHash: z.string().optional(),
+										logId: z.string().optional(),
 									}),
 								)
 								.optional(),
@@ -846,7 +950,7 @@ const completions = createRoute({
 
 chat.openapi(completions, async (c) => {
 	// Extract or generate request ID
-	const requestId = c.req.header("x-request-id") ?? shortid(40);
+	const requestId = c.req.header("x-request-id")?.trim() || shortid(40);
 
 	// Parse JSON manually even if it's malformed
 	let rawBody: unknown;
@@ -1021,17 +1125,38 @@ chat.openapi(completions, async (c) => {
 
 	// Extract custom X-LLMGateway-* headers
 	const customHeaders = extractCustomHeaders(c);
-	const requestPluginIds = plugins?.map((plugin) => plugin.id) ?? [];
+
+	// Read Responses API context from in-memory Map (set by /v1/responses proxy).
+	// Uses a lookup key passed via header; actual data is never in headers.
+	// External callers cannot exploit this: the key is a resp_ + shortid(24) that
+	// only exists in the Map for the duration of a single app.request() call, and
+	// getResponsesContext() deletes on read (one-time use).
 	const responsesContextKey = c.req.header("x-responses-context-key");
 	const responsesContext = responsesContextKey
 		? getResponsesContext(responsesContextKey)
 		: undefined;
+	const syncLogInsert = responsesContext?.syncInsert ?? false;
 	const logIdOverride = responsesContext?.logId;
-	updateLogInsertOptions(c, {
-		syncInsert: responsesContext?.syncInsert ?? false,
-		logIdOverride,
-		responsesApiData: responsesContext?.responsesApiData ?? null,
-	});
+	const responsesApiData: unknown = responsesContext?.responsesApiData ?? null;
+
+	const chatLogState = c.get("chatCompletionLogState");
+	if (chatLogState) {
+		chatLogState.syncInsert = syncLogInsert;
+		chatLogState.logIdOverride = logIdOverride;
+		chatLogState.responsesApiData = responsesApiData;
+	}
+
+	// Queue a log entry for the middleware to flush after the request completes.
+	// The middleware applies logIdOverride/responsesApiData/syncInsert from state
+	// at flush time, so we just push the raw log data here.
+	const insertLogEntry = (logData: LogInsertData): Promise<unknown> => {
+		if (chatLogState) {
+			chatLogState.pendingLogs.push(logData);
+		} else {
+			void _insertLog(logData);
+		}
+		return Promise.resolve(1);
+	};
 
 	// Check for X-No-Fallback header to disable provider fallback on low uptime
 	const xNoFallbackHeaderSet =
@@ -1160,9 +1285,6 @@ chat.openapi(completions, async (c) => {
 		});
 	}
 
-	const validatedApiKey = apiKey;
-	const validatedProject = project;
-
 	// Check if project is deleted (archived)
 	if (project.status === "deleted") {
 		throw new HTTPException(410, {
@@ -1232,35 +1354,18 @@ chat.openapi(completions, async (c) => {
 		});
 	}
 
-	updateBaseLogOptions(c, {
-		requestId,
-		project,
-		apiKey,
-		usedModel: initialRequestedModel,
-		usedModelMapping: requestedModel,
-		usedProvider: requestedProvider ?? "llmgateway",
-		requestedModel: initialRequestedModel,
-		requestedProvider,
-		messages,
-		temperature,
-		max_tokens,
-		top_p,
-		frequency_penalty,
-		presence_penalty,
-		reasoningEffort: reasoning_effort,
-		reasoningMaxTokens: reasoning_max_tokens,
-		effort,
-		responseFormat: response_format,
-		tools,
-		toolChoice: tool_choice,
-		source,
-		customHeaders,
-		debugMode,
-		userAgent,
-		imageConfig: image_config,
-		rawRequest: rawBody,
-		plugins: requestPluginIds,
-	});
+	const retryProjectContext = {
+		mode: project.mode,
+		organizationId: project.organizationId,
+	};
+	const retryOrganizationContext = {
+		id: organization.id,
+		credits: organization.credits,
+		devPlan: organization.devPlan,
+		devPlanCreditsLimit: organization.devPlanCreditsLimit,
+		devPlanCreditsUsed: organization.devPlanCreditsUsed,
+		devPlanExpiresAt: organization.devPlanExpiresAt,
+	};
 
 	// Run guardrails check for enterprise organizations
 	let guardrailResult: Awaited<ReturnType<typeof checkGuardrails>> | undefined;
@@ -1302,9 +1407,6 @@ chat.openapi(completions, async (c) => {
 				messages as Parameters<typeof applyRedactions>[0],
 				guardrailResult.redactions,
 			) as typeof messages;
-			updateBaseLogOptions(c, {
-				messages,
-			});
 		}
 
 		// Log non-blocking violations (redact/warn)
@@ -1481,8 +1583,8 @@ chat.openapi(completions, async (c) => {
 			}
 		}
 
-		// Find the cheapest model that meets our context size requirements.
-		// Only consider hardcoded models for auto selection unless free_models_only is set.
+		// Find the cheapest model that meets our context size requirements
+		// Only consider hardcoded models for auto selection
 		const allowedAutoModels = [
 			"claude-opus-4-6",
 			"claude-sonnet-4-6",
@@ -1491,175 +1593,151 @@ chat.openapi(completions, async (c) => {
 
 		let selectedModel: ModelDefinition | undefined;
 		let selectedProviders: any[] = [];
+		let lowestPrice = Number.MAX_VALUE;
+		const now = new Date(); // Cache current time for deprecation checks
 
-		async function findBestAutoRoutingCandidate(
-			candidateModels: ModelDefinition[],
-		): Promise<{
-			selectedModel: ModelDefinition;
-			selectedProviders: ProviderModelMapping[];
-		} | null> {
-			let bestModel: ModelDefinition | undefined;
-			let bestProviders: ProviderModelMapping[] = [];
-			let lowestPrice = Number.MAX_VALUE;
-			const now = new Date(); // Cache current time for deprecation checks
-
-			for (const modelDef of candidateModels) {
-				if (modelDef.id === "auto" || modelDef.id === "custom") {
-					continue;
-				}
+		for (const modelDef of models) {
+			if (modelDef.id === "auto" || modelDef.id === "custom") {
+				continue;
+			}
 
-				// When free_models_only is true, only consider models marked as free.
-				if (free_models_only) {
-					if (!("free" in modelDef && modelDef.free)) {
-						continue;
-					}
-				} else if (!allowedAutoModels.includes(modelDef.id)) {
-					continue;
-				} else if (
-					estimatedInputTokens > 10_000 &&
-					modelDef.id === "claude-haiku-4-5"
-				) {
-					// Prefer Sonnet over Haiku for larger prompts once the input crosses 10k tokens.
+			// When free_models_only is true, only consider models marked as free
+			// Otherwise, only consider hardcoded allowed models
+			if (free_models_only) {
+				if (!("free" in modelDef && modelDef.free)) {
 					continue;
 				}
+			} else if (!allowedAutoModels.includes(modelDef.id)) {
+				continue;
+			} else if (
+				estimatedInputTokens > 10_000 &&
+				modelDef.id === "claude-haiku-4-5"
+			) {
+				// Prefer Sonnet over Haiku for larger prompts once the input crosses 10k tokens
+				continue;
+			}
 
-				// Validate IAM rules for this candidate model and filter providers.
-				// We must re-evaluate per model because iamAllowedProviders was computed
-				// for the "auto" model which only has the "llmgateway" provider.
-				const candidateIam = await validateModelAccess(
-					validatedApiKey.id,
-					modelDef.id,
-					undefined,
-					modelDef,
-				);
-				if (!candidateIam.allowed) {
-					continue;
-				}
-				const candidateAllowedProviders = candidateIam.allowedProviders;
+			// Validate IAM rules for this candidate model and filter providers.
+			// We must re-evaluate per model because iamAllowedProviders was computed
+			// for the "auto" model which only has the "llmgateway" provider.
+			const candidateIam = await validateModelAccess(
+				apiKey.id,
+				modelDef.id,
+				undefined,
+				modelDef,
+			);
+			if (!candidateIam.allowed) {
+				continue;
+			}
+			const candidateAllowedProviders = candidateIam.allowedProviders;
 
-				const candidateProviders = preferConcreteRegionalMappings(
-					validatedProject.mode === "credits"
-						? filterRegionsByAvailableKeys(
-								expandAllProviderRegions(
-									modelDef.providers as ProviderModelMapping[],
-								),
-							)
-						: expandAllProviderRegions(
+			const candidateProviders = preferConcreteRegionalMappings(
+				project.mode === "credits"
+					? filterRegionsByAvailableKeys(
+							expandAllProviderRegions(
 								modelDef.providers as ProviderModelMapping[],
 							),
-				);
-				// Check if any of the model's providers are available
-				const availableModelProviders = candidateProviders.filter(
-					(provider) =>
-						availableProviders.includes(provider.providerId) &&
-						(!candidateAllowedProviders ||
-							candidateAllowedProviders.includes(provider.providerId)),
-				);
-
-				// Filter by context size requirement, reasoning capability, and deprecation status
-				const suitableProviders = availableModelProviders.filter((provider) => {
-					// Skip deprecated provider mappings
-					if (provider.deprecatedAt && now > provider.deprecatedAt) {
-						return false;
-					}
-
-					// Use the provider's context size, defaulting to a reasonable value if not specified
-					const modelContextSize = provider.contextSize ?? 8192;
-					const contextSizeMet = modelContextSize >= requiredContextSize;
+						)
+					: expandAllProviderRegions(
+							modelDef.providers as ProviderModelMapping[],
+						),
+			);
+			// Check if any of the model's providers are available
+			const availableModelProviders = candidateProviders.filter(
+				(provider) =>
+					availableProviders.includes(provider.providerId) &&
+					(!candidateAllowedProviders ||
+						candidateAllowedProviders.includes(provider.providerId)),
+			);
 
-					// If no_reasoning is true, exclude reasoning models
-					if (no_reasoning && provider.reasoning === true) {
-						return false;
-					}
+			// Filter by context size requirement, reasoning capability, and deprecation status
+			const suitableProviders = availableModelProviders.filter((provider) => {
+				// Skip deprecated provider mappings
+				if (provider.deprecatedAt && now > provider.deprecatedAt!) {
+					return false;
+				}
 
-					// Check reasoning capability if reasoning_effort is specified
-					if (reasoning_effort !== undefined && provider.reasoning !== true) {
-						return false;
-					}
+				// Use the provider's context size, defaulting to a reasonable value if not specified
+				const modelContextSize = provider.contextSize ?? 8192;
+				const contextSizeMet = modelContextSize >= requiredContextSize;
 
-					// Check reasoning.max_tokens support if specified
-					if (
-						reasoning_max_tokens !== undefined &&
-						provider.reasoningMaxTokens !== true
-					) {
-						return false;
-					}
+				// If no_reasoning is true, exclude reasoning models
+				if (no_reasoning && provider.reasoning === true) {
+					return false;
+				}
 
-					// Check tool capability if tools or tool_choice is specified
-					if (
-						(tools !== undefined || tool_choice !== undefined) &&
-						provider.tools !== true
-					) {
-						return false;
-					}
+				// Check reasoning capability if reasoning_effort is specified
+				if (reasoning_effort !== undefined && provider.reasoning !== true) {
+					return false;
+				}
 
-					// Check web search capability if web search tool is requested
-					if (webSearchTool && provider.webSearch !== true) {
-						return false;
-					}
+				// Check reasoning.max_tokens support if specified
+				if (
+					reasoning_max_tokens !== undefined &&
+					provider.reasoningMaxTokens !== true
+				) {
+					return false;
+				}
 
-					// Check JSON output capability if json_object or json_schema response format is requested
-					if (
-						response_format?.type === "json_object" ||
-						response_format?.type === "json_schema"
-					) {
-						if (provider.jsonOutput !== true) {
-							return false;
-						}
-					}
+				// Check tool capability if tools or tool_choice is specified
+				if (
+					(tools !== undefined || tool_choice !== undefined) &&
+					provider.tools !== true
+				) {
+					return false;
+				}
 
-					// Check JSON schema output capability if json_schema response format is requested
-					if (response_format?.type === "json_schema") {
-						if (provider.jsonOutputSchema !== true) {
-							return false;
-						}
-					}
+				// Check web search capability if web search tool is requested
+				if (webSearchTool && provider.webSearch !== true) {
+					return false;
+				}
 
-					// Check vision capability if images are present in messages
-					if (hasImages && provider.vision !== true) {
+				// Check JSON output capability if json_object or json_schema response format is requested
+				if (
+					response_format?.type === "json_object" ||
+					response_format?.type === "json_schema"
+				) {
+					if (provider.jsonOutput !== true) {
 						return false;
 					}
+				}
 
-					if (
-						max_tokens !== undefined &&
-						provider.maxOutput !== undefined &&
-						max_tokens > provider.maxOutput
-					) {
+				// Check JSON schema output capability if json_schema response format is requested
+				if (response_format?.type === "json_schema") {
+					if (provider.jsonOutputSchema !== true) {
 						return false;
 					}
+				}
 
-					return contextSizeMet;
-				});
-
-				if (suitableProviders.length > 0) {
-					// Find the cheapest among the suitable providers for this model
-					for (const provider of suitableProviders) {
-						const totalPrice =
-							((provider.inputPrice ?? 0) + (provider.outputPrice ?? 0)) / 2;
+				// Check vision capability if images are present in messages
+				if (hasImages && provider.vision !== true) {
+					return false;
+				}
 
-						if (totalPrice < lowestPrice) {
-							lowestPrice = totalPrice;
-							bestModel = modelDef;
-							bestProviders = suitableProviders;
-						}
-					}
+				if (
+					max_tokens !== undefined &&
+					provider.maxOutput !== undefined &&
+					max_tokens > provider.maxOutput
+				) {
+					return false;
 				}
-			}
 
-			if (!bestModel) {
-				return null;
-			}
+				return contextSizeMet;
+			});
 
-			return {
-				selectedModel: bestModel,
-				selectedProviders: bestProviders,
-			};
-		}
+			if (suitableProviders.length > 0) {
+				// Find the cheapest among the suitable providers for this model
+				for (const provider of suitableProviders) {
+					const totalPrice =
+						((provider.inputPrice ?? 0) + (provider.outputPrice ?? 0)) / 2;
 
-		const autoRoutingCandidate = await findBestAutoRoutingCandidate(models);
-		if (autoRoutingCandidate) {
-			selectedModel = autoRoutingCandidate.selectedModel;
-			selectedProviders = autoRoutingCandidate.selectedProviders;
+					if (totalPrice < lowestPrice) {
+						lowestPrice = totalPrice;
+						selectedModel = modelDef;
+						selectedProviders = suitableProviders;
+					}
+				}
+			}
 		}
 
 		let providerAgnosticSelectedProviders = selectedProviders;
@@ -2218,6 +2296,7 @@ chat.openapi(completions, async (c) => {
 							if (cheapestResult) {
 								usedProvider = cheapestResult.provider.providerId;
 								usedModel = cheapestResult.provider.modelName;
+								usedRegion = cheapestResult.provider.region;
 								routingMetadata = {
 									...cheapestResult.metadata,
 									selectionReason: "low-uptime-fallback",
@@ -2677,16 +2756,13 @@ chat.openapi(completions, async (c) => {
 		}
 	}
 
-	updateBaseLogOptions(c, {
-		reasoningEffort: reasoning_effort,
-	});
-
 	let url: string | undefined;
 
 	// Get the provider key for the selected provider based on project mode
 
 	let providerKey: InferSelectModel<typeof tables.providerKey> | undefined;
 	let usedToken: string | undefined;
+	let usedApiKeyHash: string | undefined;
 	let configIndex = 0; // Index for round-robin environment variables
 	let envVarName: string | undefined; // Environment variable name for health tracking
 	if (
@@ -2986,6 +3062,9 @@ chat.openapi(completions, async (c) => {
 		});
 	}
 
+	usedApiKeyHash = getApiKeyFingerprint(usedToken);
+	routingMetadata = withUsedApiKeyHash(routingMetadata, usedApiKeyHash);
+
 	const contentFilterBlocked =
 		contentFilterMode === "enabled" &&
 		contentFilterMatched &&
@@ -3000,27 +3079,66 @@ chat.openapi(completions, async (c) => {
 		.length
 		? openAIContentFilterResult.responses
 		: null;
-	updateBaseLogOptions(c, {
-		gatewayContentFilterResponse,
-	});
-	const chatCompletionLogState = c.get("chatCompletionLogState");
-	if (chatCompletionLogState) {
-		chatCompletionLogState.internalContentFilter = shouldTagContentFilter;
+
+	if (chatLogState) {
+		if (shouldTagContentFilter) {
+			chatLogState.internalContentFilter = true;
+		}
+		chatLogState.gatewayContentFilterResponse = gatewayContentFilterResponse;
 	}
 
+	const insertLog = (
+		logData: Parameters<typeof _insertLog>[0],
+		_options?: Parameters<typeof _insertLog>[1],
+	): Promise<unknown> => {
+		if (chatLogState) {
+			chatLogState.pendingLogs.push(logData as LogInsertData);
+		} else {
+			const enriched = {
+				...logData,
+				gatewayContentFilterResponse:
+					logData.gatewayContentFilterResponse ?? gatewayContentFilterResponse,
+				...(shouldTagContentFilter ? { internalContentFilter: true } : {}),
+			};
+			void _insertLog(enriched);
+		}
+		return Promise.resolve(1);
+	};
+
 	if (contentFilterBlocked) {
 		const contentFilterResponseId = `chatcmpl-${Date.now()}`;
 		const contentFilterCreated = Math.floor(Date.now() / 1000);
 
-		enqueueChatLog(
-			c,
-			{
-				providerKeyId: undefined,
-				usedModel: "",
-				usedModelMapping: undefined,
-				usedProvider: "llmgateway",
-			},
-			{
+		// Log the filtered request
+		try {
+			await insertLog({
+				...createLogEntry(
+					requestId,
+					project,
+					apiKey,
+					undefined,
+					"",
+					undefined,
+					"llmgateway",
+					requestedModel,
+					requestedProvider,
+					messages as any[],
+					temperature,
+					max_tokens,
+					top_p,
+					frequency_penalty,
+					presence_penalty,
+					undefined,
+					undefined,
+					effort as "low" | "medium" | "high" | undefined,
+					response_format,
+					tools,
+					tool_choice,
+					source,
+					customHeaders,
+					c.req.header("x-debug") === "true",
+					c.req.header("user-agent"),
+				),
 				content: null,
 				responseSize: 0,
 				finishReason: "llmgateway_content_filter",
@@ -3036,7 +3154,6 @@ chat.openapi(completions, async (c) => {
 				errorDetails: null,
 				duration: 0,
 				timeToFirstToken: null,
-				timeToFirstReasoningToken: null,
 				inputCost: 0,
 				outputCost: 0,
 				cachedInputCost: 0,
@@ -3051,10 +3168,10 @@ chat.openapi(completions, async (c) => {
 				discount: null,
 				pricingTier: null,
 				dataStorageCost: "0",
-				cached: false,
-				toolResults: null,
-			},
-		);
+			});
+		} catch {
+			// Silently ignore logging failures
+		}
 
 		if (stream) {
 			void registerStreamCompletion(c);
@@ -3146,6 +3263,7 @@ chat.openapi(completions, async (c) => {
 			configIndex,
 			isImageGeneration,
 			usedRegion,
+			providerKey !== undefined,
 		);
 
 		// If region is still unset but the provider supports regions, resolve the
@@ -3279,6 +3397,46 @@ chat.openapi(completions, async (c) => {
 					}
 				}
 
+				// Log the cached streaming request with reconstructed content
+				// Extract plugin IDs for logging (cached streaming)
+				const cachedStreamingPluginIds = plugins?.map((p) => p.id) ?? [];
+
+				const baseLogEntry = createLogEntry(
+					requestId,
+					project,
+					apiKey,
+					providerKey?.id,
+					usedModelFormatted,
+					usedModelMapping,
+					usedProvider,
+					initialRequestedModel,
+					requestedProvider,
+					messages,
+					temperature,
+					max_tokens,
+					top_p,
+					frequency_penalty,
+					presence_penalty,
+					reasoning_effort,
+					reasoning_max_tokens,
+					effort,
+					response_format,
+					tools,
+					tool_choice,
+					source,
+					customHeaders,
+					debugMode,
+					userAgent,
+					image_config,
+					routingMetadata,
+					rawBody,
+					rawCachedResponseData, // Raw SSE data from cached response
+					null, // No upstream request for cached response
+					rawCachedResponseData, // Raw SSE data from cached response (same for both)
+					cachedStreamingPluginIds,
+					undefined, // No plugin results for cached response
+				);
+
 				// Calculate costs for cached response
 				const costs = await calculateCosts(
 					usedModel,
@@ -3295,90 +3453,56 @@ chat.openapi(completions, async (c) => {
 					project.organizationId,
 				);
 
-				enqueueChatLog(
-					c,
-					{
-						providerKeyId: providerKey?.id,
-						usedModel: usedModelFormatted,
-						usedModelMapping,
-						usedProvider,
-						requestedModel: initialRequestedModel,
-						requestedProvider,
-						messages,
-						temperature,
-						max_tokens,
-						top_p,
-						frequency_penalty,
-						presence_penalty,
-						reasoningEffort: reasoning_effort,
-						reasoningMaxTokens: reasoning_max_tokens,
-						effort,
-						responseFormat: response_format,
-						tools,
-						toolChoice: tool_choice,
-						source,
-						customHeaders,
-						debugMode,
-						userAgent,
-						imageConfig: image_config,
-						routingMetadata,
-						rawRequest: rawBody,
-						rawResponse: rawCachedResponseData,
-						upstreamRequest: null,
-						upstreamResponse: rawCachedResponseData,
-						plugins: requestPluginIds,
-						pluginResults: undefined,
-					},
-					{
-						duration: 0,
-						timeToFirstToken: null,
-						timeToFirstReasoningToken: null,
-						responseSize: cachedResponseSize,
-						content: fullContent || null,
-						reasoningContent: fullReasoningContent || null,
-						finishReason: cachedStreamingResponse.metadata.finishReason,
-						promptTokens:
-							(costs.promptTokens ?? promptTokens)?.toString() ?? null,
-						completionTokens: completionTokens?.toString() ?? null,
-						totalTokens: costs.imageInputTokens
-							? (
-									(costs.promptTokens ?? promptTokens ?? 0) +
-									(completionTokens ?? 0) +
-									(reasoningTokens ?? 0)
-								).toString()
-							: (totalTokens?.toString() ?? null),
-						reasoningTokens: reasoningTokens?.toString() ?? null,
-						cachedTokens: cachedTokens?.toString() ?? null,
-						hasError: false,
-						streamed: true,
-						canceled: false,
-						errorDetails: null,
-						inputCost: costs.inputCost ?? 0,
-						outputCost: costs.outputCost ?? 0,
-						cachedInputCost: costs.cachedInputCost ?? 0,
-						requestCost: costs.requestCost ?? 0,
-						webSearchCost: costs.webSearchCost ?? 0,
-						imageInputTokens: costs.imageInputTokens?.toString() ?? null,
-						imageOutputTokens: costs.imageOutputTokens?.toString() ?? null,
-						imageInputCost: costs.imageInputCost ?? null,
-						imageOutputCost: costs.imageOutputCost ?? null,
-						cost: costs.totalCost ?? 0,
-						estimatedCost: costs.estimatedCost,
-						discount: costs.discount ?? null,
-						pricingTier: costs.pricingTier ?? null,
-						dataStorageCost: calculateDataStorageCost(
-							costs.promptTokens ?? promptTokens,
-							cachedTokens,
-							completionTokens,
-							reasoningTokens,
-							retentionLevel,
-						),
-						cached: true,
-						toolResults:
-							(cachedStreamingResponse.metadata as { toolResults?: any })
-								?.toolResults ?? null,
-					},
-				);
+				await insertLogEntry({
+					...baseLogEntry,
+					duration: 0, // No processing time for cached response
+					timeToFirstToken: null, // Not applicable for cached response
+					timeToFirstReasoningToken: null, // Not applicable for cached response
+					responseSize: cachedResponseSize,
+					content: fullContent || null,
+					reasoningContent: fullReasoningContent || null,
+					finishReason: cachedStreamingResponse.metadata.finishReason,
+					promptTokens:
+						(costs.promptTokens ?? promptTokens)?.toString() ?? null,
+					completionTokens: completionTokens?.toString() ?? null,
+					totalTokens: costs.imageInputTokens
+						? (
+								(costs.promptTokens ?? promptTokens ?? 0) +
+								(completionTokens ?? 0) +
+								(reasoningTokens ?? 0)
+							).toString()
+						: (totalTokens?.toString() ?? null),
+					reasoningTokens: reasoningTokens?.toString() ?? null,
+					cachedTokens: cachedTokens?.toString() ?? null,
+					hasError: false,
+					streamed: true,
+					canceled: false,
+					errorDetails: null,
+					inputCost: costs.inputCost ?? 0,
+					outputCost: costs.outputCost ?? 0,
+					cachedInputCost: costs.cachedInputCost ?? 0,
+					requestCost: costs.requestCost ?? 0,
+					webSearchCost: costs.webSearchCost ?? 0,
+					imageInputTokens: costs.imageInputTokens?.toString() ?? null,
+					imageOutputTokens: costs.imageOutputTokens?.toString() ?? null,
+					imageInputCost: costs.imageInputCost ?? null,
+					imageOutputCost: costs.imageOutputCost ?? null,
+					cost: costs.totalCost ?? 0,
+					estimatedCost: costs.estimatedCost,
+					discount: costs.discount ?? null,
+					pricingTier: costs.pricingTier ?? null,
+					dataStorageCost: calculateDataStorageCost(
+						costs.promptTokens ?? promptTokens,
+						cachedTokens,
+						completionTokens,
+						reasoningTokens,
+						retentionLevel,
+					),
+					cached: true,
+					toolResults:
+						(cachedStreamingResponse.metadata as { toolResults?: any })
+							?.toolResults ?? null,
+				});
 
 				// Return cached streaming response by replaying chunks with original timing
 				void registerStreamCompletion(c);
@@ -3420,6 +3544,7 @@ chat.openapi(completions, async (c) => {
 						} else {
 							logger.error("Error replaying cached stream", error);
 						}
+						finishStreamCompletion(c);
 					},
 				);
 			}
@@ -3427,18 +3552,59 @@ chat.openapi(completions, async (c) => {
 			cacheKey = generateCacheKey(cachePayload);
 			const cachedResponse = cacheKey ? await getCache(cacheKey) : null;
 			if (cachedResponse) {
+				const responseForCurrentRequest =
+					withCurrentRequestMetadataOnOpenAiResponse(cachedResponse, requestId);
+
 				// Log the cached request
 				const duration = 0; // No processing time needed
+				// Extract plugin IDs for logging (cached non-streaming)
+				const cachedPluginIds = plugins?.map((p) => p.id) ?? [];
 
-				// Calculate costs for cached response
-				const cachedCosts = await calculateCosts(
-					usedModel,
+				const baseLogEntry = createLogEntry(
+					requestId,
+					project,
+					apiKey,
+					providerKey?.id,
+					usedModelFormatted,
+					usedModelMapping,
 					usedProvider,
-					cachedResponse.usage?.prompt_tokens ?? null,
-					cachedResponse.usage?.completion_tokens ?? null,
-					cachedResponse.usage?.prompt_tokens_details?.cached_tokens ?? null,
-					undefined,
-					cachedResponse.usage?.reasoning_tokens ?? null,
+					initialRequestedModel,
+					requestedProvider,
+					messages,
+					temperature,
+					max_tokens,
+					top_p,
+					frequency_penalty,
+					presence_penalty,
+					reasoning_effort,
+					reasoning_max_tokens,
+					effort,
+					response_format,
+					tools,
+					tool_choice,
+					source,
+					customHeaders,
+					debugMode,
+					userAgent,
+					image_config,
+					routingMetadata,
+					rawBody,
+					responseForCurrentRequest,
+					null, // No upstream request for cached response
+					responseForCurrentRequest, // upstream response is same as cached response
+					cachedPluginIds,
+					undefined, // No plugin results for cached response
+				);
+
+				// Calculate costs for cached response
+				const cachedCosts = await calculateCosts(
+					usedModel,
+					usedProvider,
+					cachedResponse.usage?.prompt_tokens ?? null,
+					cachedResponse.usage?.completion_tokens ?? null,
+					cachedResponse.usage?.prompt_tokens_details?.cached_tokens ?? null,
+					undefined,
+					cachedResponse.usage?.reasoning_tokens ?? null,
 					0, // outputImageCount
 					undefined, // imageSize
 					inputImageCount,
@@ -3455,98 +3621,61 @@ chat.openapi(completions, async (c) => {
 					(cachedReasoningContent?.length ?? 0) +
 					500; // overhead for metadata
 
-				enqueueChatLog(
-					c,
-					{
-						providerKeyId: providerKey?.id,
-						usedModel: usedModelFormatted,
-						usedModelMapping,
-						usedProvider,
-						requestedModel: initialRequestedModel,
-						requestedProvider,
-						messages,
-						temperature,
-						max_tokens,
-						top_p,
-						frequency_penalty,
-						presence_penalty,
-						reasoningEffort: reasoning_effort,
-						reasoningMaxTokens: reasoning_max_tokens,
-						effort,
-						responseFormat: response_format,
-						tools,
-						toolChoice: tool_choice,
-						source,
-						customHeaders,
-						debugMode,
-						userAgent,
-						imageConfig: image_config,
-						routingMetadata,
-						rawRequest: rawBody,
-						rawResponse: cachedResponse,
-						upstreamRequest: null,
-						upstreamResponse: cachedResponse,
-						plugins: requestPluginIds,
-						pluginResults: undefined,
-					},
-					{
-						duration,
-						timeToFirstToken: null,
-						timeToFirstReasoningToken: null,
-						responseSize: estimatedCachedSize,
-						content: cachedContent ?? null,
-						reasoningContent: cachedReasoningContent ?? null,
-						finishReason: cachedResponse.choices?.[0]?.finish_reason ?? null,
-						promptTokens:
-							(
-								cachedCosts.promptTokens ?? cachedResponse.usage?.prompt_tokens
-							)?.toString() ?? null,
-						completionTokens: cachedResponse.usage?.completion_tokens ?? null,
-						totalTokens: cachedCosts.imageInputTokens
-							? (
-									(cachedCosts.promptTokens ??
-										cachedResponse.usage?.prompt_tokens ??
-										0) +
-									(cachedResponse.usage?.completion_tokens ?? 0) +
-									(cachedResponse.usage?.reasoning_tokens ?? 0)
-								).toString()
-							: (cachedResponse.usage?.total_tokens ?? null),
-						reasoningTokens: cachedResponse.usage?.reasoning_tokens ?? null,
-						cachedTokens:
-							cachedResponse.usage?.prompt_tokens_details?.cached_tokens ??
-							null,
-						hasError: false,
-						streamed: false,
-						canceled: false,
-						errorDetails: null,
-						inputCost: cachedCosts.inputCost ?? 0,
-						outputCost: cachedCosts.outputCost ?? 0,
-						cachedInputCost: cachedCosts.cachedInputCost ?? 0,
-						requestCost: cachedCosts.requestCost ?? 0,
-						webSearchCost: cachedCosts.webSearchCost ?? 0,
-						imageInputTokens: cachedCosts.imageInputTokens?.toString() ?? null,
-						imageOutputTokens:
-							cachedCosts.imageOutputTokens?.toString() ?? null,
-						imageInputCost: cachedCosts.imageInputCost ?? null,
-						imageOutputCost: cachedCosts.imageOutputCost ?? null,
-						cost: cachedCosts.totalCost ?? 0,
-						estimatedCost: cachedCosts.estimatedCost,
-						discount: cachedCosts.discount ?? null,
-						pricingTier: cachedCosts.pricingTier ?? null,
-						dataStorageCost: calculateDataStorageCost(
-							cachedCosts.promptTokens ?? cachedResponse.usage?.prompt_tokens,
-							cachedResponse.usage?.prompt_tokens_details?.cached_tokens,
-							cachedResponse.usage?.completion_tokens,
-							cachedResponse.usage?.reasoning_tokens,
-							retentionLevel,
-						),
-						cached: true,
-						toolResults:
-							cachedResponse.choices?.[0]?.message?.tool_calls ?? null,
-					},
-				);
+				await insertLogEntry({
+					...baseLogEntry,
+					duration,
+					timeToFirstToken: null, // Not applicable for cached response
+					timeToFirstReasoningToken: null, // Not applicable for cached response
+					responseSize: estimatedCachedSize,
+					content: cachedContent ?? null,
+					reasoningContent: cachedReasoningContent ?? null,
+					finishReason: cachedResponse.choices?.[0]?.finish_reason ?? null,
+					promptTokens:
+						(
+							cachedCosts.promptTokens ?? cachedResponse.usage?.prompt_tokens
+						)?.toString() ?? null,
+					completionTokens: cachedResponse.usage?.completion_tokens ?? null,
+					totalTokens: cachedCosts.imageInputTokens
+						? (
+								(cachedCosts.promptTokens ??
+									cachedResponse.usage?.prompt_tokens ??
+									0) +
+								(cachedResponse.usage?.completion_tokens ?? 0) +
+								(cachedResponse.usage?.reasoning_tokens ?? 0)
+							).toString()
+						: (cachedResponse.usage?.total_tokens ?? null),
+					reasoningTokens: cachedResponse.usage?.reasoning_tokens ?? null,
+					cachedTokens:
+						cachedResponse.usage?.prompt_tokens_details?.cached_tokens ?? null,
+					hasError: false,
+					streamed: false,
+					canceled: false,
+					errorDetails: null,
+					inputCost: cachedCosts.inputCost ?? 0,
+					outputCost: cachedCosts.outputCost ?? 0,
+					cachedInputCost: cachedCosts.cachedInputCost ?? 0,
+					requestCost: cachedCosts.requestCost ?? 0,
+					webSearchCost: cachedCosts.webSearchCost ?? 0,
+					imageInputTokens: cachedCosts.imageInputTokens?.toString() ?? null,
+					imageOutputTokens: cachedCosts.imageOutputTokens?.toString() ?? null,
+					imageInputCost: cachedCosts.imageInputCost ?? null,
+					imageOutputCost: cachedCosts.imageOutputCost ?? null,
+					cost: cachedCosts.totalCost ?? 0,
+					estimatedCost: cachedCosts.estimatedCost,
+					discount: cachedCosts.discount ?? null,
+					pricingTier: cachedCosts.pricingTier ?? null,
+					dataStorageCost: calculateDataStorageCost(
+						cachedCosts.promptTokens ?? cachedResponse.usage?.prompt_tokens,
+						cachedResponse.usage?.prompt_tokens_details?.cached_tokens,
+						cachedResponse.usage?.completion_tokens,
+						cachedResponse.usage?.reasoning_tokens,
+						retentionLevel,
+					),
+					cached: true,
+					toolResults: cachedResponse.choices?.[0]?.message?.tool_calls ?? null,
+				});
 
-				return c.json(cachedResponse);
+				return c.json(responseForCurrentRequest);
 			}
 		}
 	}
@@ -3577,13 +3706,20 @@ chat.openapi(completions, async (c) => {
 	// Check if streaming is requested and if the model/provider combination supports it
 	// For image generation models, we'll fake streaming by converting the response
 	const fakeStreamingForImageGen = stream && isImageGeneration;
-	const effectiveStream = fakeStreamingForImageGen ? false : stream;
+	const streamingSupport = getModelStreamingSupport(
+		baseModelName,
+		usedProvider,
+		usedRegion,
+	);
+	// When the provider only supports streaming, force it even if the client didn't request it.
+	// The upstream request uses effectiveStream; the client response uses stream.
+	const forceStream = streamingSupport === "only" && !stream;
+	const effectiveStream = fakeStreamingForImageGen
+		? false
+		: stream || forceStream;
 
 	if (stream) {
-		if (
-			!isImageGeneration &&
-			getModelStreamingSupport(baseModelName, usedProvider) === false
-		) {
+		if (!isImageGeneration && streamingSupport === false) {
 			throw new HTTPException(400, {
 				message: `Model ${usedModel} with provider ${usedProvider} does not support streaming`,
 			});
@@ -3700,39 +3836,7 @@ chat.openapi(completions, async (c) => {
 		}
 	}
 
-	// For Moonshot provider, enrich assistant messages with cached reasoning_content
-	// This is needed for multi-turn tool call conversations with thinking models
-	// Moonshot requires reasoning_content in assistant messages with tool_calls
-	if (usedProvider === "moonshot") {
-		const { redisClient } = await import("@llmgateway/cache");
-		for (const message of messages) {
-			if (
-				message.role === "assistant" &&
-				message.tool_calls &&
-				Array.isArray(message.tool_calls) &&
-				message.tool_calls.length > 0 &&
-				!(message as any).reasoning_content // Only add if not already present
-			) {
-				// Get reasoning_content from the first tool call (all tool calls share the same reasoning)
-				const firstToolCall = message.tool_calls[0];
-				if (firstToolCall?.id) {
-					try {
-						const cachedReasoningContent = await redisClient.get(
-							`reasoning_content:${firstToolCall.id}`,
-						);
-						if (cachedReasoningContent) {
-							// Add reasoning_content to the message for Moonshot
-							(message as any).reasoning_content = cachedReasoningContent;
-						}
-					} catch {
-						// Silently fail - reasoning_content caching is optional
-					}
-				}
-			}
-		}
-	}
-
-	let requestBody: ProviderRequestBody = await prepareRequestBody(
+	let requestBody: ProviderRequestBody | FormData = await prepareRequestBody(
 		usedProvider,
 		upstreamModelName,
 		messages as BaseMessage[],
@@ -3761,6 +3865,7 @@ chat.openapi(completions, async (c) => {
 
 	// Validate effective max_tokens value after prepareRequestBody
 	if (
+		!(requestBody instanceof FormData) &&
 		hasMaxTokens(requestBody) &&
 		requestBody.max_tokens !== undefined &&
 		finalModelInfo
@@ -3790,16 +3895,171 @@ chat.openapi(completions, async (c) => {
 		isImageGeneration &&
 		usedProvider === "xai" &&
 		url &&
+		!(requestBody instanceof FormData) &&
 		("image" in requestBody || "images" in requestBody)
 	) {
 		url = url.replace("/v1/images/generations", "/v1/images/edits");
 	}
 
+	// Switch OpenAI image generation endpoint to /edits when input images are present.
+	// prepareRequestBody returns a FormData (multipart/form-data) only for this edits flow.
+	if (
+		isImageGeneration &&
+		usedProvider === "openai" &&
+		url &&
+		requestBody instanceof FormData
+	) {
+		url = url.replace("/v1/images/generations", "/v1/images/edits");
+	}
+
 	const startTime = Date.now();
+	const failedEnvKeyIndicesByProvider = new Map<string, Set<number>>();
+	const failedTrackedKeyIdsByProvider = new Map<string, Set<string>>();
+
+	function rememberFailedKey(
+		providerId: string,
+		region: string | undefined,
+		options: {
+			envVarName?: string;
+			configIndex?: number;
+			providerKeyId?: string;
+		},
+	): void {
+		const retryKey = providerRetryKey(providerId, region);
+
+		if (options.envVarName !== undefined && options.configIndex !== undefined) {
+			const failedIndices =
+				failedEnvKeyIndicesByProvider.get(retryKey) ?? new Set<number>();
+			failedIndices.add(options.configIndex);
+			failedEnvKeyIndicesByProvider.set(retryKey, failedIndices);
+		}
+
+		if (options.providerKeyId) {
+			const failedKeyIds =
+				failedTrackedKeyIdsByProvider.get(retryKey) ?? new Set<string>();
+			failedKeyIds.add(options.providerKeyId);
+			failedTrackedKeyIdsByProvider.set(retryKey, failedKeyIds);
+		}
+	}
+
+	async function resolveProviderContextForRetry(
+		providerMapping: {
+			providerId: string;
+			modelName: string;
+			region?: string;
+		},
+		streamValue: boolean,
+	) {
+		const retryKey = providerRetryKey(
+			providerMapping.providerId,
+			providerMapping.region,
+		);
+		return await resolveProviderContext(
+			providerMapping,
+			retryProjectContext,
+			retryOrganizationContext,
+			modelInfo,
+			originalRequestParams,
+			{
+				requestId,
+				stream: streamValue,
+				effectiveStream,
+				messages: messages as BaseMessage[],
+				response_format,
+				tools,
+				tool_choice,
+				reasoning_effort,
+				reasoning_max_tokens,
+				effort,
+				webSearchTool,
+				image_config,
+				sensitive_word_check,
+				maxImageSizeMB,
+				userPlan,
+				hasExistingToolCalls,
+				customProviderName,
+				webSearchEnabled: !!webSearchTool,
+				excludedEnvKeyIndices: failedEnvKeyIndicesByProvider.get(retryKey),
+				excludedProviderKeyIds: failedTrackedKeyIdsByProvider.get(retryKey),
+			},
+		);
+	}
+
+	function applyResolvedProviderContext(
+		ctx: Awaited<ReturnType<typeof resolveProviderContext>>,
+	): void {
+		usedProvider = ctx.usedProvider;
+		usedModel = ctx.usedModel;
+		usedModelFormatted = ctx.usedModelFormatted;
+		usedModelMapping = ctx.usedModelMapping;
+		baseModelName = ctx.baseModelName;
+		usedToken = ctx.usedToken;
+		usedApiKeyHash = ctx.usedApiKeyHash;
+		providerKey = ctx.providerKey;
+		configIndex = ctx.configIndex;
+		envVarName = ctx.envVarName;
+		url = ctx.url;
+		requestBody = ctx.requestBody;
+		useResponsesApi = ctx.useResponsesApi;
+		requestCanBeCanceled = ctx.requestCanBeCanceled;
+		isImageGeneration = ctx.isImageGeneration;
+		supportsReasoning = ctx.supportsReasoning;
+		splitTaggedReasoning = ctx.splitTaggedReasoning ?? false;
+		temperature = ctx.temperature;
+		max_tokens = ctx.max_tokens;
+		top_p = ctx.top_p;
+		frequency_penalty = ctx.frequency_penalty;
+		presence_penalty = ctx.presence_penalty;
+		usedRegion = ctx.usedRegion;
+		routingMetadata = withUsedApiKeyHash(routingMetadata, usedApiKeyHash);
+	}
+
+	async function tryResolveAlternateKeyForCurrentProvider(
+		streamValue: boolean,
+	): Promise<Awaited<ReturnType<typeof resolveProviderContext>> | null> {
+		if (!usedProvider || !usedModel) {
+			return null;
+		}
+
+		const currentProviderKeyId = providerKey?.id;
+		const currentEnvVarName = envVarName;
+		const currentConfigIndex = configIndex;
+		const currentToken = usedToken;
+
+		try {
+			const nextContext = await resolveProviderContextForRetry(
+				{
+					providerId: usedProvider,
+					modelName: usedModel,
+					region: usedRegion,
+				},
+				streamValue,
+			);
+
+			const isDifferentTrackedKey =
+				nextContext.providerKey?.id !== undefined &&
+				nextContext.providerKey.id !== currentProviderKeyId;
+			const isDifferentEnvKey =
+				nextContext.envVarName !== undefined &&
+				(nextContext.envVarName !== currentEnvVarName ||
+					nextContext.configIndex !== currentConfigIndex);
+			const isDifferentToken = nextContext.usedToken !== currentToken;
+
+			if (!isDifferentTrackedKey && !isDifferentEnvKey && !isDifferentToken) {
+				return null;
+			}
+
+			return nextContext;
+		} catch {
+			return null;
+		}
+	}
 
 	// Handle streaming response if requested
 	// For image generation models, we skip real streaming and use fake streaming later
-	if (effectiveStream) {
+	// For stream-only models where the client didn't request streaming, use the non-streaming path
+	// (effectiveStream forces streaming upstream, but the client gets a regular JSON response)
+	if (effectiveStream && !forceStream) {
 		void registerStreamCompletion(c);
 		return streamSSE(
 			c,
@@ -3907,6 +4167,13 @@ chat.openapi(completions, async (c) => {
 							0,
 							project.organizationId,
 						);
+						streamingCosts.dataStorageCost = toDataStorageCostNumber(
+							streamingCosts.promptTokens ?? promptTokenCount,
+							null,
+							0,
+							null,
+							retentionLevel,
+						);
 
 						await writeSSEAndCache({
 							data: JSON.stringify({
@@ -3926,6 +4193,27 @@ chat.openapi(completions, async (c) => {
 							id: String(eventId++),
 						});
 
+						const contentFilterUsage: Record<string, any> = {
+							prompt_tokens: promptTokenCount,
+							completion_tokens: 0,
+							total_tokens: promptTokenCount,
+						};
+						applyExtendedUsageFields(contentFilterUsage, {
+							costs: {
+								inputCost: streamingCosts.inputCost,
+								outputCost: streamingCosts.outputCost,
+								cachedInputCost: streamingCosts.cachedInputCost,
+								requestCost: streamingCosts.requestCost,
+								webSearchCost: streamingCosts.webSearchCost,
+								imageInputCost: streamingCosts.imageInputCost,
+								imageOutputCost: streamingCosts.imageOutputCost,
+								totalCost: streamingCosts.totalCost,
+								dataStorageCost: streamingCosts.dataStorageCost,
+							},
+							cachedTokens: null,
+							cacheCreationTokens: null,
+							reasoningTokens: null,
+						});
 						await writeSSEAndCache({
 							data: JSON.stringify({
 								id: `chatcmpl-${Date.now()}`,
@@ -3939,18 +4227,7 @@ chat.openapi(completions, async (c) => {
 										finish_reason: null,
 									},
 								],
-								usage: {
-									prompt_tokens: promptTokenCount,
-									completion_tokens: 0,
-									total_tokens: promptTokenCount,
-									cost_usd_total: streamingCosts.totalCost,
-									cost_usd_input: streamingCosts.inputCost,
-									cost_usd_output: streamingCosts.outputCost,
-									cost_usd_cached_input: streamingCosts.cachedInputCost,
-									cost_usd_request: streamingCosts.requestCost,
-									cost_usd_image_input: streamingCosts.imageInputCost,
-									cost_usd_image_output: streamingCosts.imageOutputCost,
-								},
+								usage: contentFilterUsage,
 							}),
 							id: String(eventId++),
 						});
@@ -4040,65 +4317,11 @@ chat.openapi(completions, async (c) => {
 							}
 
 							try {
-								const ctx = await resolveProviderContext(
+								const ctx = await resolveProviderContextForRetry(
 									nextProvider,
-									{
-										mode: project.mode,
-										organizationId: project.organizationId,
-									},
-									{
-										id: organization.id,
-										credits: organization.credits,
-										devPlan: organization.devPlan,
-										devPlanCreditsLimit: organization.devPlanCreditsLimit,
-										devPlanCreditsUsed: organization.devPlanCreditsUsed,
-										devPlanExpiresAt: organization.devPlanExpiresAt,
-									},
-									modelInfo,
-									originalRequestParams,
-									{
-										requestId,
-										stream: true,
-										effectiveStream,
-										messages: messages as BaseMessage[],
-										response_format,
-										tools,
-										tool_choice,
-										reasoning_effort,
-										reasoning_max_tokens,
-										effort,
-										webSearchTool,
-										image_config,
-										sensitive_word_check,
-										maxImageSizeMB,
-										userPlan,
-										hasExistingToolCalls,
-										customProviderName,
-										webSearchEnabled: !!webSearchTool,
-									},
+									true,
 								);
-								usedProvider = ctx.usedProvider;
-								usedModel = ctx.usedModel;
-								usedModelFormatted = ctx.usedModelFormatted;
-								usedModelMapping = ctx.usedModelMapping;
-								baseModelName = ctx.baseModelName;
-								usedToken = ctx.usedToken;
-								providerKey = ctx.providerKey;
-								configIndex = ctx.configIndex;
-								envVarName = ctx.envVarName;
-								url = ctx.url;
-								requestBody = ctx.requestBody;
-								useResponsesApi = ctx.useResponsesApi;
-								requestCanBeCanceled = ctx.requestCanBeCanceled;
-								isImageGeneration = ctx.isImageGeneration;
-								supportsReasoning = ctx.supportsReasoning;
-								splitTaggedReasoning = ctx.splitTaggedReasoning ?? false;
-								temperature = ctx.temperature;
-								max_tokens = ctx.max_tokens;
-								top_p = ctx.top_p;
-								frequency_penalty = ctx.frequency_penalty;
-								presence_penalty = ctx.presence_penalty;
-								usedRegion = ctx.usedRegion;
+								applyResolvedProviderContext(ctx);
 							} catch {
 								failedProviderIds.add(
 									providerRetryKey(
@@ -4114,6 +4337,7 @@ chat.openapi(completions, async (c) => {
 
 						try {
 							const headers = getProviderHeaders(usedProvider, usedToken, {
+								requestId,
 								webSearchEnabled: !!webSearchTool,
 							});
 							headers["Content-Type"] = "application/json";
@@ -4171,6 +4395,20 @@ chat.openapi(completions, async (c) => {
 									),
 								});
 
+								// Log the timeout error in the database
+								const timeoutPluginIds = plugins?.map((p) => p.id) ?? [];
+
+								let sameProviderRetryContext: Awaited<
+									ReturnType<typeof resolveProviderContext>
+								> | null = null;
+								rememberFailedKey(usedProvider, usedRegion, {
+									envVarName,
+									configIndex,
+									providerKeyId: providerKey?.id,
+								});
+								sameProviderRetryContext =
+									await tryResolveAlternateKeyForCurrentProvider(true);
+
 								// Check if we should retry before logging so we can mark the log as retried
 								const willRetryTimeout = shouldRetryRequest({
 									requestedProvider,
@@ -4183,88 +4421,121 @@ chat.openapi(completions, async (c) => {
 										1,
 									usedProvider,
 								});
+								const willRetrySameProvider = sameProviderRetryContext !== null;
+								const willRetryRequest =
+									willRetrySameProvider || willRetryTimeout;
 
-								enqueueChatLog(
-									c,
-									{
-										providerKeyId: providerKey?.id,
-										usedModel: usedModelFormatted,
-										usedModelMapping,
-										usedProvider,
-										requestedModel: initialRequestedModel,
-										requestedProvider,
-										messages,
-										temperature,
-										max_tokens,
-										top_p,
-										frequency_penalty,
-										presence_penalty,
-										reasoningEffort: reasoning_effort,
-										reasoningMaxTokens: reasoning_max_tokens,
-										effort,
-										responseFormat: response_format,
-										tools,
-										toolChoice: tool_choice,
-										source,
-										customHeaders,
-										debugMode,
-										userAgent,
-										imageConfig: image_config,
-										routingMetadata,
-										rawRequest: rawBody,
-										rawResponse: null,
-										upstreamRequest: requestBody,
-										upstreamResponse: null,
-										plugins: requestPluginIds,
-										pluginResults: undefined,
-									},
-									{
-										duration: Date.now() - perAttemptStartTime,
-										timeToFirstToken: null,
-										timeToFirstReasoningToken: null,
-										responseSize: 0,
-										content: null,
-										reasoningContent: null,
-										finishReason: "upstream_error",
-										promptTokens: null,
-										completionTokens: null,
-										totalTokens: null,
-										reasoningTokens: null,
-										cachedTokens: null,
-										hasError: true,
-										streamed: true,
-										canceled: false,
-										errorDetails: {
-											statusCode: 0,
-											statusText: "TimeoutError",
-											responseText: errorMessage,
-											cause: timeoutCause,
-										},
-										cachedInputCost: null,
-										requestCost: null,
-										webSearchCost: null,
-										imageInputTokens: null,
-										imageOutputTokens: null,
-										imageInputCost: null,
-										imageOutputCost: null,
-										discount: null,
-										dataStorageCost: "0",
-										cached: false,
-										toolResults: null,
-										retried: willRetryTimeout,
-										retriedByLogId: willRetryTimeout ? finalLogId : null,
-									},
+								const baseLogEntry = createLogEntry(
+									requestId,
+									project,
+									apiKey,
+									providerKey?.id,
+									usedModelFormatted,
+									usedModelMapping,
+									usedProvider,
+									initialRequestedModel,
+									requestedProvider,
+									messages,
+									temperature,
+									max_tokens,
+									top_p,
+									frequency_penalty,
+									presence_penalty,
+									reasoning_effort,
+									reasoning_max_tokens,
+									effort,
+									response_format,
+									tools,
+									tool_choice,
+									source,
+									customHeaders,
+									debugMode,
+									userAgent,
+									image_config,
+									routingMetadata,
+									rawBody,
+									null, // No response for timeout error
+									requestBody,
+									null, // No upstream response for timeout error
+									timeoutPluginIds,
+									undefined, // No plugin results for error case
 								);
+								const attemptLogId = shortid();
+
+								await insertLogEntry({
+									...baseLogEntry,
+									id: attemptLogId,
+									duration: Date.now() - perAttemptStartTime,
+									timeToFirstToken: null,
+									timeToFirstReasoningToken: null,
+									responseSize: 0,
+									content: null,
+									reasoningContent: null,
+									finishReason: "upstream_error",
+									promptTokens: null,
+									completionTokens: null,
+									totalTokens: null,
+									reasoningTokens: null,
+									cachedTokens: null,
+									hasError: true,
+									streamed: true,
+									canceled: false,
+									errorDetails: {
+										statusCode: 0,
+										statusText: "TimeoutError",
+										responseText: errorMessage,
+										cause: timeoutCause,
+									},
+									cachedInputCost: null,
+									requestCost: null,
+									webSearchCost: null,
+									imageInputTokens: null,
+									imageOutputTokens: null,
+									imageInputCost: null,
+									imageOutputCost: null,
+									discount: null,
+									dataStorageCost: "0",
+									cached: false,
+									toolResults: null,
+									retried: willRetryRequest,
+									retriedByLogId: willRetryRequest ? finalLogId : null,
+								});
+
+								if (willRetrySameProvider && sameProviderRetryContext) {
+									routingAttempts.push(
+										buildRoutingAttempt(
+											usedProvider,
+											baseModelName,
+											0,
+											getErrorType(0),
+											false,
+											{
+												region: usedRegion,
+												apiKeyHash: usedApiKeyHash,
+												logId: attemptLogId,
+											},
+										),
+									);
+									applyResolvedProviderContext(sameProviderRetryContext);
+									retryAttempt--;
+									continue;
+								}
 
 								if (willRetryTimeout) {
-									routingAttempts.push({
-										provider: usedProvider,
-										model: baseModelName,
-										...(usedRegion && { region: usedRegion }),
-										status_code: 0,
-										error_type: getErrorType(0),
-										succeeded: false,
-									});
+									routingAttempts.push(
+										buildRoutingAttempt(
+											usedProvider,
+											baseModelName,
+											0,
+											getErrorType(0),
+											false,
+											{
+												region: usedRegion,
+												apiKeyHash: usedApiKeyHash,
+												logId: attemptLogId,
+											},
+										),
+									);
 									failedProviderIds.add(
 										providerRetryKey(usedProvider, usedRegion),
 									);
@@ -4287,6 +4558,10 @@ chat.openapi(completions, async (c) => {
 								error instanceof Error &&
 								error.name === "AbortError"
 							) {
+								// Log the canceled request
+								// Extract plugin IDs for logging (canceled request)
+								const canceledPluginIds = plugins?.map((p) => p.id) ?? [];
+
 								// Calculate costs for cancelled request if billing is enabled
 								const billCancelled = shouldBillCancelledRequests();
 								let cancelledCosts: Awaited<
@@ -4329,95 +4604,97 @@ chat.openapi(completions, async (c) => {
 									);
 								}
 
-								enqueueChatLog(
-									c,
-									{
-										providerKeyId: providerKey?.id,
-										usedModel: usedModelFormatted,
-										usedModelMapping,
-										usedProvider,
-										requestedModel: initialRequestedModel,
-										requestedProvider,
-										messages,
-										temperature,
-										max_tokens,
-										top_p,
-										frequency_penalty,
-										presence_penalty,
-										reasoningEffort: reasoning_effort,
-										reasoningMaxTokens: reasoning_max_tokens,
-										effort,
-										responseFormat: response_format,
-										tools,
-										toolChoice: tool_choice,
-										source,
-										customHeaders,
-										debugMode,
-										userAgent,
-										imageConfig: image_config,
-										routingMetadata,
-										rawRequest: rawBody,
-										rawResponse: null,
-										upstreamRequest: requestBody,
-										upstreamResponse: null,
-										plugins: requestPluginIds,
-										pluginResults: undefined,
-									},
-									{
-										duration: Date.now() - perAttemptStartTime,
-										timeToFirstToken: null,
-										timeToFirstReasoningToken: null,
-										responseSize: 0,
-										content: null,
-										reasoningContent: null,
-										finishReason: "canceled",
-										promptTokens: billCancelled
-											? (
-													cancelledCosts?.promptTokens ?? estimatedPromptTokens
-												)?.toString()
-											: null,
-										completionTokens: billCancelled ? "0" : null,
-										totalTokens: billCancelled
-											? (
-													cancelledCosts?.promptTokens ?? estimatedPromptTokens
-												)?.toString()
-											: null,
-										reasoningTokens: null,
-										cachedTokens: null,
-										hasError: false,
-										streamed: true,
-										canceled: true,
-										errorDetails: null,
-										inputCost: cancelledCosts?.inputCost ?? null,
-										outputCost: cancelledCosts?.outputCost ?? null,
-										cachedInputCost: cancelledCosts?.cachedInputCost ?? null,
-										requestCost: cancelledCosts?.requestCost ?? null,
-										webSearchCost: cancelledCosts?.webSearchCost ?? null,
-										imageInputTokens:
-											cancelledCosts?.imageInputTokens?.toString() ?? null,
-										imageOutputTokens:
-											cancelledCosts?.imageOutputTokens?.toString() ?? null,
-										imageInputCost: cancelledCosts?.imageInputCost ?? null,
-										imageOutputCost: cancelledCosts?.imageOutputCost ?? null,
-										cost: cancelledCosts?.totalCost ?? null,
-										estimatedCost: cancelledCosts?.estimatedCost ?? false,
-										discount: cancelledCosts?.discount ?? null,
-										dataStorageCost: billCancelled
-											? calculateDataStorageCost(
-													cancelledCosts?.promptTokens ?? estimatedPromptTokens,
-													null,
-													0,
-													null,
-													retentionLevel,
-												)
-											: "0",
-										cached: false,
-										toolResults: null,
-									},
+								const baseLogEntry = createLogEntry(
+									requestId,
+									project,
+									apiKey,
+									providerKey?.id,
+									usedModelFormatted,
+									usedModelMapping,
+									usedProvider,
+									initialRequestedModel,
+									requestedProvider,
+									messages,
+									temperature,
+									max_tokens,
+									top_p,
+									frequency_penalty,
+									presence_penalty,
+									reasoning_effort,
+									reasoning_max_tokens,
+									effort,
+									response_format,
+									tools,
+									tool_choice,
+									source,
+									customHeaders,
+									debugMode,
+									userAgent,
+									image_config,
+									routingMetadata,
+									rawBody,
+									null, // No response for canceled request
+									requestBody, // The request that was sent before cancellation
+									null, // No upstream response for canceled request
+									canceledPluginIds,
+									undefined, // No plugin results for canceled request
 								);
 
-								// Send a cancellation event to the client
-								await writeSSEAndCache({
+								await insertLogEntry({
+									...baseLogEntry,
+									duration: Date.now() - perAttemptStartTime,
+									timeToFirstToken: null, // Not applicable for canceled request
+									timeToFirstReasoningToken: null, // Not applicable for canceled request
+									responseSize: 0,
+									content: null,
+									reasoningContent: null,
+									finishReason: "canceled",
+									promptTokens: billCancelled
+										? (
+												cancelledCosts?.promptTokens ?? estimatedPromptTokens
+											)?.toString()
+										: null,
+									completionTokens: billCancelled ? "0" : null,
+									totalTokens: billCancelled
+										? (
+												cancelledCosts?.promptTokens ?? estimatedPromptTokens
+											)?.toString()
+										: null,
+									reasoningTokens: null,
+									cachedTokens: null,
+									hasError: false,
+									streamed: true,
+									canceled: true,
+									errorDetails: null,
+									inputCost: cancelledCosts?.inputCost ?? null,
+									outputCost: cancelledCosts?.outputCost ?? null,
+									cachedInputCost: cancelledCosts?.cachedInputCost ?? null,
+									requestCost: cancelledCosts?.requestCost ?? null,
+									webSearchCost: cancelledCosts?.webSearchCost ?? null,
+									imageInputTokens:
+										cancelledCosts?.imageInputTokens?.toString() ?? null,
+									imageOutputTokens:
+										cancelledCosts?.imageOutputTokens?.toString() ?? null,
+									imageInputCost: cancelledCosts?.imageInputCost ?? null,
+									imageOutputCost: cancelledCosts?.imageOutputCost ?? null,
+									cost: cancelledCosts?.totalCost ?? null,
+									estimatedCost: cancelledCosts?.estimatedCost ?? false,
+									discount: cancelledCosts?.discount ?? null,
+									dataStorageCost: billCancelled
+										? calculateDataStorageCost(
+												cancelledCosts?.promptTokens ?? estimatedPromptTokens,
+												null,
+												0,
+												null,
+												retentionLevel,
+											)
+										: "0",
+									cached: false,
+									toolResults: null,
+								});
+
+								// Send a cancellation event to the client
+								await writeSSEAndCache({
 									event: "canceled",
 									data: JSON.stringify({
 										message: "Request canceled by client",
@@ -4448,6 +4725,23 @@ chat.openapi(completions, async (c) => {
 									),
 								});
 
+								// Log the error in the database
+								// Extract plugin IDs for logging (fetch error)
+								const fetchErrorPluginIds = plugins?.map((p) => p.id) ?? [];
+
+								let sameProviderRetryContext: Awaited<
+									ReturnType<typeof resolveProviderContext>
+								> | null = null;
+								if (isRetryableErrorType("network_error")) {
+									rememberFailedKey(usedProvider, usedRegion, {
+										envVarName,
+										configIndex,
+										providerKeyId: providerKey?.id,
+									});
+									sameProviderRetryContext =
+										await tryResolveAlternateKeyForCurrentProvider(true);
+								}
+
 								// Check if we should retry before logging so we can mark the log as retried
 								const willRetryFetch = shouldRetryRequest({
 									requestedProvider,
@@ -4460,93 +4754,129 @@ chat.openapi(completions, async (c) => {
 										1,
 									usedProvider,
 								});
+								const willRetrySameProvider = sameProviderRetryContext !== null;
+								const willRetryRequest =
+									willRetrySameProvider || willRetryFetch;
 
-								enqueueChatLog(
-									c,
-									{
-										providerKeyId: providerKey?.id,
-										usedModel: usedModelFormatted,
-										usedModelMapping,
-										usedProvider,
-										requestedModel: initialRequestedModel,
-										requestedProvider,
-										messages,
-										temperature,
-										max_tokens,
-										top_p,
-										frequency_penalty,
-										presence_penalty,
-										reasoningEffort: reasoning_effort,
-										reasoningMaxTokens: reasoning_max_tokens,
-										effort,
-										responseFormat: response_format,
-										tools,
-										toolChoice: tool_choice,
-										source,
-										customHeaders,
-										debugMode,
-										userAgent,
-										imageConfig: image_config,
-										routingMetadata,
-										rawRequest: rawBody,
-										rawResponse: null,
-										upstreamRequest: requestBody,
-										upstreamResponse: null,
-										plugins: requestPluginIds,
-										pluginResults: undefined,
-									},
-									{
-										duration: Date.now() - perAttemptStartTime,
-										timeToFirstToken: null,
-										timeToFirstReasoningToken: null,
-										responseSize: 0,
-										content: null,
-										reasoningContent: null,
-										finishReason: "upstream_error",
-										promptTokens: null,
-										completionTokens: null,
-										totalTokens: null,
-										reasoningTokens: null,
-										cachedTokens: null,
-										hasError: true,
-										streamed: true,
-										canceled: false,
-										errorDetails: {
-											statusCode: 0,
-											statusText: error.name,
-											responseText: errorMessage,
-											cause: fetchCause,
-										},
-										cachedInputCost: null,
-										requestCost: null,
-										webSearchCost: null,
-										imageInputTokens: null,
-										imageOutputTokens: null,
-										imageInputCost: null,
-										imageOutputCost: null,
-										discount: null,
-										dataStorageCost: "0",
-										cached: false,
-										toolResults: null,
-										retried: willRetryFetch,
-										retriedByLogId: willRetryFetch ? finalLogId : null,
-									},
+								const baseLogEntry = createLogEntry(
+									requestId,
+									project,
+									apiKey,
+									providerKey?.id,
+									usedModelFormatted,
+									usedModelMapping,
+									usedProvider,
+									initialRequestedModel,
+									requestedProvider,
+									messages,
+									temperature,
+									max_tokens,
+									top_p,
+									frequency_penalty,
+									presence_penalty,
+									reasoning_effort,
+									reasoning_max_tokens,
+									effort,
+									response_format,
+									tools,
+									tool_choice,
+									source,
+									customHeaders,
+									debugMode,
+									userAgent,
+									image_config,
+									routingMetadata,
+									rawBody,
+									null, // No response for fetch error
+									requestBody, // The request that resulted in error
+									null, // No upstream response for fetch error
+									fetchErrorPluginIds,
+									undefined, // No plugin results for error case
 								);
+								const attemptLogId = shortid();
 
-								// Report key health for environment-based tokens
+								await insertLogEntry({
+									...baseLogEntry,
+									id: attemptLogId,
+									duration: Date.now() - perAttemptStartTime,
+									timeToFirstToken: null, // Not applicable for error case
+									timeToFirstReasoningToken: null, // Not applicable for error case
+									responseSize: 0,
+									content: null,
+									reasoningContent: null,
+									finishReason: "upstream_error",
+									promptTokens: null,
+									completionTokens: null,
+									totalTokens: null,
+									reasoningTokens: null,
+									cachedTokens: null,
+									hasError: true,
+									streamed: true,
+									canceled: false,
+									errorDetails: {
+										statusCode: 0,
+										statusText: error.name,
+										responseText: errorMessage,
+										cause: fetchCause,
+									},
+									cachedInputCost: null,
+									requestCost: null,
+									webSearchCost: null,
+									imageInputTokens: null,
+									imageOutputTokens: null,
+									imageInputCost: null,
+									imageOutputCost: null,
+									discount: null,
+									dataStorageCost: "0",
+									cached: false,
+									toolResults: null,
+									retried: willRetryRequest,
+									retriedByLogId: willRetryRequest ? finalLogId : null,
+								});
+
+								// Report key health for the selected token source
 								if (envVarName !== undefined) {
 									reportKeyError(envVarName, configIndex, 0);
 								}
+								if (providerKey?.id) {
+									reportTrackedKeyError(providerKey.id, 0);
+								}
+
+								if (willRetrySameProvider && sameProviderRetryContext) {
+									routingAttempts.push(
+										buildRoutingAttempt(
+											usedProvider,
+											baseModelName,
+											0,
+											getErrorType(0),
+											false,
+											{
+												region: usedRegion,
+												apiKeyHash: usedApiKeyHash,
+												logId: attemptLogId,
+											},
+										),
+									);
+									applyResolvedProviderContext(sameProviderRetryContext);
+									retryAttempt--;
+									continue;
+								}
 
 								if (willRetryFetch) {
-									routingAttempts.push({
-										provider: usedProvider,
-										model: baseModelName,
-										...(usedRegion && { region: usedRegion }),
-										status_code: 0,
-										error_type: getErrorType(0),
-										succeeded: false,
-									});
+									routingAttempts.push(
+										buildRoutingAttempt(
+											usedProvider,
+											baseModelName,
+											0,
+											getErrorType(0),
+											false,
+											{
+												region: usedRegion,
+												apiKeyHash: usedApiKeyHash,
+												logId: attemptLogId,
+											},
+										),
+									);
 									failedProviderIds.add(
 										providerRetryKey(usedProvider, usedRegion),
 									);
@@ -4611,6 +4941,23 @@ chat.openapi(completions, async (c) => {
 								});
 							}
 
+							// Log the request in the database
+							// Extract plugin IDs for logging
+							const streamingErrorPluginIds = plugins?.map((p) => p.id) ?? [];
+
+							let sameProviderRetryContext: Awaited<
+								ReturnType<typeof resolveProviderContext>
+							> | null = null;
+							if (isRetryableErrorType(finishReason)) {
+								rememberFailedKey(usedProvider, usedRegion, {
+									envVarName,
+									configIndex,
+									providerKeyId: providerKey?.id,
+								});
+								sameProviderRetryContext =
+									await tryResolveAlternateKeyForCurrentProvider(true);
+							}
+
 							// Check if we should retry before logging so we can mark the log as retried
 							const willRetryHttpError = shouldRetryRequest({
 								requestedProvider,
@@ -4623,94 +4970,101 @@ chat.openapi(completions, async (c) => {
 									1,
 								usedProvider,
 							});
-
-							enqueueChatLog(
-								c,
-								{
-									providerKeyId: providerKey?.id,
-									usedModel: usedModelFormatted,
-									usedModelMapping,
-									usedProvider,
-									requestedModel: initialRequestedModel,
-									requestedProvider,
-									messages,
-									temperature,
-									max_tokens,
-									top_p,
-									frequency_penalty,
-									presence_penalty,
-									reasoningEffort: reasoning_effort,
-									reasoningMaxTokens: reasoning_max_tokens,
-									effort,
-									responseFormat: response_format,
-									tools,
-									toolChoice: tool_choice,
-									source,
-									customHeaders,
-									debugMode,
-									userAgent,
-									imageConfig: image_config,
-									routingMetadata,
-									rawRequest: rawBody,
-									rawResponse: null,
-									upstreamRequest: requestBody,
-									upstreamResponse: null,
-									plugins: requestPluginIds,
-									pluginResults: undefined,
-								},
-								{
-									duration: Date.now() - perAttemptStartTime,
-									timeToFirstToken: null,
-									timeToFirstReasoningToken: null,
-									responseSize: errorResponseText.length,
-									content: null,
-									reasoningContent: null,
-									finishReason,
-									promptTokens:
-										finishReason === "content_filter"
-											? (
-													estimateTokens(usedProvider, messages, null, null, 0)
-														.calculatedPromptTokens ?? null
-												)?.toString()
-											: null,
-									completionTokens: null,
-									totalTokens:
-										finishReason === "content_filter"
-											? (
-													estimateTokens(usedProvider, messages, null, null, 0)
-														.calculatedPromptTokens ?? null
-												)?.toString()
-											: null,
-									reasoningTokens: null,
-									cachedTokens: null,
-									hasError: finishReason !== "content_filter",
-									streamed: true,
-									canceled: false,
-									errorDetails:
-										finishReason === "content_filter"
-											? null
-											: {
-													statusCode: res.status,
-													statusText: res.statusText,
-													responseText: errorResponseText,
-												},
-									cachedInputCost: null,
-									requestCost: null,
-									webSearchCost: null,
-									imageInputTokens: null,
-									imageOutputTokens: null,
-									imageInputCost: null,
-									imageOutputCost: null,
-									discount: null,
-									dataStorageCost: "0",
-									cached: false,
-									toolResults: null,
-									retried: willRetryHttpError,
-									retriedByLogId: willRetryHttpError ? finalLogId : null,
-								},
+							const willRetrySameProvider = sameProviderRetryContext !== null;
+							const willRetryRequest =
+								willRetrySameProvider || willRetryHttpError;
+
+							const baseLogEntry = createLogEntry(
+								requestId,
+								project,
+								apiKey,
+								providerKey?.id,
+								usedModelFormatted,
+								usedModelMapping,
+								usedProvider,
+								initialRequestedModel,
+								requestedProvider,
+								messages,
+								temperature,
+								max_tokens,
+								top_p,
+								frequency_penalty,
+								presence_penalty,
+								reasoning_effort,
+								reasoning_max_tokens,
+								effort,
+								response_format,
+								tools,
+								tool_choice,
+								source,
+								customHeaders,
+								debugMode,
+								userAgent,
+								image_config,
+								routingMetadata,
+								rawBody,
+								null, // No response for error case
+								requestBody, // The request that was sent and resulted in error
+								null, // No upstream response for error case
+								streamingErrorPluginIds,
+								undefined, // No plugin results for error case
 							);
+							const attemptLogId = shortid();
+
+							await insertLogEntry({
+								...baseLogEntry,
+								id: attemptLogId,
+								duration: Date.now() - perAttemptStartTime,
+								timeToFirstToken: null,
+								timeToFirstReasoningToken: null,
+								responseSize: errorResponseText.length,
+								content: null,
+								reasoningContent: null,
+								finishReason,
+								promptTokens:
+									finishReason === "content_filter"
+										? (
+												estimateTokens(usedProvider, messages, null, null, 0)
+													.calculatedPromptTokens ?? null
+											)?.toString()
+										: null,
+								completionTokens: null,
+								totalTokens:
+									finishReason === "content_filter"
+										? (
+												estimateTokens(usedProvider, messages, null, null, 0)
+													.calculatedPromptTokens ?? null
+											)?.toString()
+										: null,
+								reasoningTokens: null,
+								cachedTokens: null,
+								hasError: finishReason !== "content_filter", // content_filter is not an error
+								streamed: true,
+								canceled: false,
+								errorDetails:
+									finishReason === "content_filter"
+										? null
+										: {
+												statusCode: res.status,
+												statusText: res.statusText,
+												responseText: errorResponseText,
+											},
+								cachedInputCost: null,
+								requestCost: null,
+								webSearchCost: null,
+								imageInputTokens: null,
+								imageOutputTokens: null,
+								imageInputCost: null,
+								imageOutputCost: null,
+								discount: null,
+								dataStorageCost: "0",
+								cached: false,
+								toolResults: null,
+								retried: willRetryRequest,
+								retriedByLogId: willRetryRequest ? finalLogId : null,
+							});
 
-							// Report key health for environment-based tokens
+							// Report key health for the selected token source
 							// Don't report content_filter as a key error - it's intentional provider behavior
 							if (
 								envVarName !== undefined &&
@@ -4723,16 +5077,49 @@ chat.openapi(completions, async (c) => {
 									errorResponseText,
 								);
 							}
+							if (providerKey?.id && finishReason !== "content_filter") {
+								reportTrackedKeyError(
+									providerKey.id,
+									res.status,
+									errorResponseText,
+								);
+							}
+
+							if (willRetrySameProvider && sameProviderRetryContext) {
+								routingAttempts.push(
+									buildRoutingAttempt(
+										usedProvider,
+										baseModelName,
+										res.status,
+										getErrorType(res.status),
+										false,
+										{
+											region: usedRegion,
+											apiKeyHash: usedApiKeyHash,
+											logId: attemptLogId,
+										},
+									),
+								);
+								applyResolvedProviderContext(sameProviderRetryContext);
+								retryAttempt--;
+								continue;
+							}
 
 							if (willRetryHttpError) {
-								routingAttempts.push({
-									provider: usedProvider,
-									model: baseModelName,
-									...(usedRegion && { region: usedRegion }),
-									status_code: res.status,
-									error_type: getErrorType(res.status),
-									succeeded: false,
-								});
+								routingAttempts.push(
+									buildRoutingAttempt(
+										usedProvider,
+										baseModelName,
+										res.status,
+										getErrorType(res.status),
+										false,
+										{
+											region: usedRegion,
+											apiKeyHash: usedApiKeyHash,
+											logId: attemptLogId,
+										},
+									),
+								);
 								failedProviderIds.add(
 									providerRetryKey(usedProvider, usedRegion),
 								);
@@ -4801,19 +5188,241 @@ chat.openapi(completions, async (c) => {
 							return;
 						}
 
+						const inspectedStreamingResponse =
+							await inspectImmediateStreamingProviderError(res, usedProvider);
+						res = inspectedStreamingResponse.response;
+						if (inspectedStreamingResponse.immediateError) {
+							const {
+								errorCode,
+								errorMessage,
+								errorResponseText,
+								errorType,
+								inferredStatusCode,
+								statusText,
+							} = inspectedStreamingResponse.immediateError;
+
+							logger.warn("Immediate streaming provider error", {
+								status: inferredStatusCode,
+								errorText: errorResponseText,
+								usedProvider,
+								requestedProvider,
+								usedModel,
+								initialRequestedModel,
+								organizationId: project.organizationId,
+								projectId: apiKey.projectId,
+								apiKeyId: apiKey.id,
+								unifiedFinishReason: getUnifiedFinishReason(
+									errorType,
+									usedProvider,
+								),
+							});
+
+							const streamingErrorPluginIds = plugins?.map((p) => p.id) ?? [];
+
+							let sameProviderRetryContext: Awaited<
+								ReturnType<typeof resolveProviderContext>
+							> | null = null;
+							if (isRetryableErrorType(errorType)) {
+								rememberFailedKey(usedProvider, usedRegion, {
+									envVarName,
+									configIndex,
+									providerKeyId: providerKey?.id,
+								});
+								sameProviderRetryContext =
+									await tryResolveAlternateKeyForCurrentProvider(true);
+							}
+
+							const willRetryStreamingError = shouldRetryRequest({
+								requestedProvider,
+								noFallback,
+								errorType,
+								retryCount: retryAttempt,
+								remainingProviders:
+									(routingMetadata?.providerScores.length ?? 0) -
+									failedProviderIds.size -
+									1,
+								usedProvider,
+							});
+							const willRetrySameProvider = sameProviderRetryContext !== null;
+							const willRetryRequest =
+								willRetrySameProvider || willRetryStreamingError;
+
+							const baseLogEntry = createLogEntry(
+								requestId,
+								project,
+								apiKey,
+								providerKey?.id,
+								usedModelFormatted,
+								usedModelMapping,
+								usedProvider,
+								initialRequestedModel,
+								requestedProvider,
+								messages,
+								temperature,
+								max_tokens,
+								top_p,
+								frequency_penalty,
+								presence_penalty,
+								reasoning_effort,
+								reasoning_max_tokens,
+								effort,
+								response_format,
+								tools,
+								tool_choice,
+								source,
+								customHeaders,
+								debugMode,
+								userAgent,
+								image_config,
+								routingMetadata,
+								rawBody,
+								null,
+								requestBody,
+								null,
+								streamingErrorPluginIds,
+								undefined,
+							);
+							const attemptLogId = shortid();
+
+							await insertLogEntry({
+								...baseLogEntry,
+								id: attemptLogId,
+								duration: Date.now() - perAttemptStartTime,
+								timeToFirstToken: null,
+								timeToFirstReasoningToken: null,
+								responseSize: errorResponseText.length,
+								content: null,
+								reasoningContent: null,
+								finishReason: errorType,
+								promptTokens: null,
+								completionTokens: null,
+								totalTokens: null,
+								reasoningTokens: null,
+								cachedTokens: null,
+								hasError: errorType !== "content_filter",
+								streamed: true,
+								canceled: false,
+								errorDetails:
+									errorType === "content_filter"
+										? null
+										: {
+												statusCode: inferredStatusCode,
+												statusText,
+												responseText: errorResponseText,
+											},
+								cachedInputCost: null,
+								requestCost: null,
+								webSearchCost: null,
+								imageInputTokens: null,
+								imageOutputTokens: null,
+								imageInputCost: null,
+								imageOutputCost: null,
+								discount: null,
+								dataStorageCost: "0",
+								cached: false,
+								toolResults: null,
+								retried: willRetryRequest,
+								retriedByLogId: willRetryRequest ? finalLogId : null,
+							});
+
+							if (envVarName !== undefined && errorType !== "content_filter") {
+								reportKeyError(
+									envVarName,
+									configIndex,
+									inferredStatusCode,
+									errorResponseText,
+								);
+							}
+							if (providerKey?.id && errorType !== "content_filter") {
+								reportTrackedKeyError(
+									providerKey.id,
+									inferredStatusCode,
+									errorResponseText,
+								);
+							}
+
+							if (willRetrySameProvider && sameProviderRetryContext) {
+								routingAttempts.push(
+									buildRoutingAttempt(
+										usedProvider,
+										baseModelName,
+										inferredStatusCode,
+										getErrorType(inferredStatusCode),
+										false,
+										{
+											region: usedRegion,
+											apiKeyHash: usedApiKeyHash,
+											logId: attemptLogId,
+										},
+									),
+								);
+								applyResolvedProviderContext(sameProviderRetryContext);
+								retryAttempt--;
+								continue;
+							}
+
+							if (willRetryStreamingError) {
+								routingAttempts.push(
+									buildRoutingAttempt(
+										usedProvider,
+										baseModelName,
+										inferredStatusCode,
+										getErrorType(inferredStatusCode),
+										false,
+										{
+											region: usedRegion,
+											apiKeyHash: usedApiKeyHash,
+											logId: attemptLogId,
+										},
+									),
+								);
+								failedProviderIds.add(
+									providerRetryKey(usedProvider, usedRegion),
+								);
+								continue;
+							}
+
+							await writeSSEAndCache({
+								event: "error",
+								data: JSON.stringify({
+									error: {
+										message: errorMessage,
+										type: errorType,
+										code: errorCode,
+										param: null,
+										responseText: errorResponseText,
+									},
+								}),
+								id: String(eventId++),
+							});
+							await writeSSEAndCache({
+								event: "done",
+								data: "[DONE]",
+								id: String(eventId++),
+							});
+							clearKeepalive();
+							return;
+						}
+
 						break; // Fetch succeeded, exit retry loop
 					} // End of retry for loop
 
 					// Add the final attempt (successful or last failed) to routing
 					if (res && res.ok && usedProvider) {
-						routingAttempts.push({
-							provider: usedProvider,
-							model: baseModelName,
-							...(usedRegion && { region: usedRegion }),
-							status_code: res.status,
-							error_type: "none",
-							succeeded: true,
-						});
+						routingAttempts.push(
+							buildRoutingAttempt(
+								usedProvider,
+								baseModelName,
+								res.status,
+								"none",
+								true,
+								{
+									region: usedRegion,
+									apiKeyHash: usedApiKeyHash,
+									logId: finalLogId,
+								},
+							),
+						);
 					}
 
 					// Update routingMetadata with all routing attempts for DB logging
@@ -4906,6 +5515,7 @@ chat.openapi(completions, async (c) => {
 					let totalTokens = null;
 					let reasoningTokens = null;
 					let cachedTokens = null;
+					let cacheCreationTokens: number | null = null;
 					let streamingToolCalls = null;
 					let imageByteSize = 0; // Track total image data size for token estimation
 					let outputImageCount = 0; // Track number of output images for cost calculation
@@ -4915,6 +5525,7 @@ chat.openapi(completions, async (c) => {
 					let sawProviderTerminalEvent = false;
 					let sawOpenAiResponsesDoneEvent = false;
 					let sawOpenAiResponsesCompletedStatus = false;
+					let sentDownstreamFinishReasonChunk = false;
 					let handledTerminalProviderEvent = false;
 					let buffer = ""; // Buffer for accumulating partial data across chunks (string for SSE)
 					let binaryBuffer = new Uint8Array(0); // Buffer for binary event streams (AWS Bedrock)
@@ -5328,10 +5939,70 @@ chat.openapi(completions, async (c) => {
 											webSearchCount,
 											project.organizationId,
 										);
+										streamingCosts.dataStorageCost = toDataStorageCostNumber(
+											streamingCosts.promptTokens ?? finalPromptTokens,
+											cachedTokens,
+											streamingCosts.completionTokens ?? finalCompletionTokens,
+											reasoningTokens,
+											retentionLevel,
+										);
 
 										// Include costs in response for all users
 										const shouldIncludeCosts = true;
 
+										const finalStreamUsage: Record<string, any> = {
+											prompt_tokens: Math.max(
+												1,
+												streamingCosts.promptTokens ?? finalPromptTokens ?? 1,
+											),
+											completion_tokens:
+												streamingCosts.completionTokens ??
+												finalCompletionTokens ??
+												0,
+											total_tokens: Math.max(
+												1,
+												(streamingCosts.promptTokens ??
+													finalPromptTokens ??
+													0) +
+													(streamingCosts.completionTokens ??
+														finalCompletionTokens ??
+														0) +
+													(reasoningTokens ?? 0),
+											),
+											...(reasoningTokens !== null &&
+												reasoningTokens > 0 && {
+													reasoning_tokens: reasoningTokens,
+												}),
+											...((cachedTokens !== null ||
+												(cacheCreationTokens !== null &&
+													cacheCreationTokens > 0)) && {
+												prompt_tokens_details: {
+													cached_tokens: cachedTokens ?? 0,
+													...(cacheCreationTokens !== null &&
+														cacheCreationTokens > 0 && {
+															cache_creation_tokens: cacheCreationTokens,
+														}),
+												},
+											}),
+										};
+										applyExtendedUsageFields(finalStreamUsage, {
+											costs: shouldIncludeCosts
+												? {
+														inputCost: streamingCosts.inputCost,
+														outputCost: streamingCosts.outputCost,
+														cachedInputCost: streamingCosts.cachedInputCost,
+														requestCost: streamingCosts.requestCost,
+														webSearchCost: streamingCosts.webSearchCost,
+														imageInputCost: streamingCosts.imageInputCost,
+														imageOutputCost: streamingCosts.imageOutputCost,
+														totalCost: streamingCosts.totalCost,
+														dataStorageCost: streamingCosts.dataStorageCost,
+													}
+												: null,
+											cachedTokens,
+											cacheCreationTokens,
+											reasoningTokens,
+										});
 										const finalUsageChunk = {
 											id: `chatcmpl-${Date.now()}`,
 											object: "chat.completion.chunk",
@@ -5344,35 +6015,7 @@ chat.openapi(completions, async (c) => {
 													finish_reason: null,
 												},
 											],
-											usage: {
-												prompt_tokens: Math.max(
-													1,
-													streamingCosts.promptTokens ?? finalPromptTokens ?? 1,
-												),
-												completion_tokens:
-													streamingCosts.completionTokens ??
-													finalCompletionTokens ??
-													0,
-												total_tokens: Math.max(
-													1,
-													(streamingCosts.promptTokens ??
-														finalPromptTokens ??
-														0) +
-														(streamingCosts.completionTokens ??
-															finalCompletionTokens ??
-															0) +
-														(reasoningTokens ?? 0),
-												),
-												...(shouldIncludeCosts && {
-													cost_usd_total: streamingCosts.totalCost,
-													cost_usd_input: streamingCosts.inputCost,
-													cost_usd_output: streamingCosts.outputCost,
-													cost_usd_cached_input: streamingCosts.cachedInputCost,
-													cost_usd_request: streamingCosts.requestCost,
-													cost_usd_image_input: streamingCosts.imageInputCost,
-													cost_usd_image_output: streamingCosts.imageOutputCost,
-												}),
-											},
+											usage: finalStreamUsage,
 										};
 
 										await writeSSEAndCache({
@@ -5468,6 +6111,28 @@ chat.openapi(completions, async (c) => {
 										usedProvider === "aws-bedrock"
 											? extractAwsBedrockStreamError(data)
 											: null;
+									if (
+										data &&
+										typeof data === "object" &&
+										"response" in data &&
+										data.response &&
+										typeof data.response === "object" &&
+										"status" in data.response &&
+										data.response.status === "completed"
+									) {
+										sawOpenAiResponsesCompletedStatus = true;
+									}
+									if (
+										data &&
+										typeof data === "object" &&
+										"type" in data &&
+										typeof data.type === "string" &&
+										(data.type === "response.content_part.done" ||
+											data.type === "response.output_item.done" ||
+											data.type === "response.output_text.done")
+									) {
+										sawOpenAiResponsesDoneEvent = true;
+									}
 									const openAiCompatibleStreamError =
 										!awsBedrockStreamError &&
 										data &&
@@ -5492,13 +6157,10 @@ chat.openapi(completions, async (c) => {
 												),
 											);
 										}
-										const inferredStatusCode =
-											typeof openAiCompatibleStreamError.status_code ===
-											"number"
-												? openAiCompatibleStreamError.status_code
-												: typeof openAiCompatibleStreamError.status === "number"
-													? openAiCompatibleStreamError.status
-													: 400;
+										const inferredStatusCode = inferStreamingErrorStatusCode(
+											openAiCompatibleStreamError,
+											errorResponseText,
+										);
 										const errorType = getFinishReasonFromError(
 											inferredStatusCode,
 											errorResponseText,
@@ -5640,29 +6302,6 @@ chat.openapi(completions, async (c) => {
 										continue;
 									}
 
-									if (
-										data &&
-										typeof data === "object" &&
-										"response" in data &&
-										data.response &&
-										typeof data.response === "object" &&
-										"status" in data.response &&
-										data.response.status === "completed"
-									) {
-										sawOpenAiResponsesCompletedStatus = true;
-									}
-									if (
-										data &&
-										typeof data === "object" &&
-										"type" in data &&
-										typeof data.type === "string" &&
-										(data.type === "response.content_part.done" ||
-											data.type === "response.output_item.done" ||
-											data.type === "response.output_text.done")
-									) {
-										sawOpenAiResponsesDoneEvent = true;
-									}
-
 									if (splitTaggedReasoning) {
 										const deltaContent =
 											transformedData.choices?.[0]?.delta?.content;
@@ -5886,6 +6525,8 @@ chat.openapi(completions, async (c) => {
 									// Extract finishReason from transformedData to update tracking variable
 									if (transformedData.choices?.[0]?.finish_reason) {
 										finishReason = transformedData.choices[0].finish_reason;
+										sawProviderTerminalEvent = true;
+										sentDownstreamFinishReasonChunk = true;
 									}
 
 									// Extract content for logging using helper function
@@ -6056,12 +6697,8 @@ chat.openapi(completions, async (c) => {
 											}
 											break;
 										default: // OpenAI format
-											if (
-												transformedData?.choices &&
-												transformedData.choices[0]?.finish_reason
-											) {
-												finishReason = transformedData.choices[0].finish_reason;
-												sawProviderTerminalEvent = true;
+											if (data.choices && data.choices[0]?.finish_reason) {
+												finishReason = data.choices[0].finish_reason;
 											}
 											break;
 									}
@@ -6088,6 +6725,9 @@ chat.openapi(completions, async (c) => {
 									if (usage.cachedTokens !== null) {
 										cachedTokens = usage.cachedTokens;
 									}
+									if (usage.cacheCreationTokens !== null) {
+										cacheCreationTokens = usage.cacheCreationTokens;
+									}
 
 									// Estimate tokens if not provided and we have a finish reason
 									if (finishReason && (!promptTokens || !completionTokens)) {
@@ -6353,10 +6993,18 @@ chat.openapi(completions, async (c) => {
 							sawUpstreamDoneSentinel ||
 							sawProviderTerminalEvent ||
 							handledTerminalProviderEvent;
+						// A terminal finish reason (stop, tool_calls, length) also counts
+						// as a valid stream completion — some providers (e.g. MiniMax)
+						// send finish_reason but omit the [DONE] sentinel.
+						const hasTerminalFinishReason =
+							finishReason !== null &&
+							finishReason !== "upstream_error" &&
+							finishReason !== "gateway_error";
 						const streamEndedWithoutTerminalEvent =
 							!streamingError &&
 							!canceled &&
-							(!streamHasVerifiedTerminalEvent || finishReason === null);
+							!streamHasVerifiedTerminalEvent &&
+							!hasTerminalFinishReason;
 						if (streamEndedWithoutTerminalEvent) {
 							const hasBufferedNonWhitespace = /\S/u.test(buffer);
 							const responseText = hasBufferedNonWhitespace
@@ -6427,23 +7075,14 @@ chat.openapi(completions, async (c) => {
 						// Check if the response finished successfully but has no content, tokens, or tool calls
 						// This indicates an empty response which should be marked as an error
 						// Do this check BEFORE sending usage chunks to ensure proper event ordering
-						// Exclude content_filter responses as they are intentionally empty (blocked by provider)
-						// For Google, check for original finish reasons that indicate content filtering
-						// These include both finishReason values and promptFeedback.blockReason values
-						const isGoogleContentFilterStreaming =
-							isGoogleCompatibleProvider(usedProvider) &&
-							(finishReason === "SAFETY" ||
-								finishReason === "PROHIBITED_CONTENT" ||
-								finishReason === "RECITATION" ||
-								finishReason === "BLOCKLIST" ||
-								finishReason === "SPII" ||
-								finishReason === "OTHER");
+						// Exclude content filter responses as they are intentionally empty.
+						const isContentFilterStreamingResponse =
+							isContentFilterFinishReason(finishReason, usedProvider);
 						const hasEmptyResponse =
 							!streamingError &&
 							finishReason &&
-							finishReason !== "content_filter" &&
 							finishReason !== "incomplete" &&
-							!isGoogleContentFilterStreaming &&
+							!isContentFilterStreamingResponse &&
 							(!calculatedCompletionTokens ||
 								calculatedCompletionTokens === 0) &&
 							(!calculatedReasoningTokens || calculatedReasoningTokens === 0) &&
@@ -6507,7 +7146,44 @@ chat.openapi(completions, async (c) => {
 										: new Error(String(sseError)),
 								);
 							}
-						} else if (!streamingError && !doneSent) {
+						} else if (!streamingError && !doneSent) {
+							if (
+								finishReason &&
+								!sentDownstreamFinishReasonChunk &&
+								!shouldBufferForHealing
+							) {
+								try {
+									const finishChunk = {
+										id: `chatcmpl-${Date.now()}`,
+										object: "chat.completion.chunk",
+										created: Math.floor(Date.now() / 1000),
+										model: usedModel,
+										choices: [
+											{
+												index: 0,
+												delta: {},
+												finish_reason: mapFinishReasonToOpenai(
+													finishReason,
+													usedProvider,
+													!!streamingToolCalls && streamingToolCalls.length > 0,
+												),
+											},
+										],
+									};
+
+									await writeSSEAndCache({
+										data: JSON.stringify(finishChunk),
+										id: String(eventId++),
+									});
+									sentDownstreamFinishReasonChunk = true;
+								} catch (error) {
+									logger.error(
+										"Error sending synthesized finish chunk",
+										error instanceof Error ? error : new Error(String(error)),
+									);
+								}
+							}
+
 							// Calculate costs before sending usage chunk so we can include cost data
 							const billCancelledRequestsEarly = shouldBillCancelledRequests();
 							streamingCostsEarly =
@@ -6523,13 +7199,13 @@ chat.openapi(completions, async (c) => {
 											imageInputCost: null,
 											imageOutputCost: null,
 											totalCost: null,
-											dataStorageCost: null as number | null,
 											promptTokens: null,
 											completionTokens: null,
 											cachedTokens: null,
 											estimatedCost: false,
 											discount: undefined,
 											pricingTier: undefined,
+											dataStorageCost: null as number | null,
 										}
 									: await calculateCosts(
 											usedModel,
@@ -6551,6 +7227,16 @@ chat.openapi(completions, async (c) => {
 											webSearchCount,
 											project.organizationId,
 										);
+							if (streamingCostsEarly.totalCost !== null) {
+								streamingCostsEarly.dataStorageCost = toDataStorageCostNumber(
+									streamingCostsEarly.promptTokens ?? calculatedPromptTokens,
+									cachedTokens,
+									streamingCostsEarly.completionTokens ??
+										calculatedCompletionTokens,
+									reasoningTokens,
+									retentionLevel,
+								);
+							}
 
 							// Always send final usage chunk with cost data for SDK compatibility
 							try {
@@ -6585,28 +7271,46 @@ chat.openapi(completions, async (c) => {
 										const adjCompletion = Math.round(
 											completionTokens ?? calculatedCompletionTokens ?? 0,
 										);
-										return {
+										const earlyUsage: Record<string, any> = {
 											prompt_tokens: adjPrompt,
 											completion_tokens: adjCompletion,
 											total_tokens: Math.max(
 												1,
 												Math.round(adjPrompt + adjCompletion),
 											),
-											...(cachedTokens !== null && {
+											...(reasoningTokens !== null &&
+												reasoningTokens > 0 && {
+													reasoning_tokens: reasoningTokens,
+												}),
+											...((cachedTokens !== null ||
+												(cacheCreationTokens !== null &&
+													cacheCreationTokens > 0)) && {
 												prompt_tokens_details: {
-													cached_tokens: cachedTokens,
+													cached_tokens: cachedTokens ?? 0,
+													...(cacheCreationTokens !== null &&
+														cacheCreationTokens > 0 && {
+															cache_creation_tokens: cacheCreationTokens,
+														}),
 												},
 											}),
-											cost_usd_total: streamingCostsEarly.totalCost,
-											cost_usd_input: streamingCostsEarly.inputCost,
-											cost_usd_output: streamingCostsEarly.outputCost,
-											cost_usd_cached_input:
-												streamingCostsEarly.cachedInputCost,
-											cost_usd_request: streamingCostsEarly.requestCost,
-											cost_usd_image_input: streamingCostsEarly.imageInputCost,
-											cost_usd_image_output:
-												streamingCostsEarly.imageOutputCost,
 										};
+										applyExtendedUsageFields(earlyUsage, {
+											costs: {
+												inputCost: streamingCostsEarly.inputCost,
+												outputCost: streamingCostsEarly.outputCost,
+												cachedInputCost: streamingCostsEarly.cachedInputCost,
+												requestCost: streamingCostsEarly.requestCost,
+												webSearchCost: streamingCostsEarly.webSearchCost,
+												imageInputCost: streamingCostsEarly.imageInputCost,
+												imageOutputCost: streamingCostsEarly.imageOutputCost,
+												totalCost: streamingCostsEarly.totalCost,
+												dataStorageCost: streamingCostsEarly.dataStorageCost,
+											},
+											cachedTokens,
+											cacheCreationTokens,
+											reasoningTokens,
+										});
+										return earlyUsage;
 									})(),
 								};
 
@@ -6680,7 +7384,11 @@ chat.openapi(completions, async (c) => {
 											{
 												index: 0,
 												delta: {},
-												finish_reason: finishReason ?? "stop",
+												finish_reason: mapFinishReasonToOpenai(
+													finishReason,
+													usedProvider,
+													!!streamingToolCalls && streamingToolCalls.length > 0,
+												),
 											},
 										],
 									};
@@ -6786,6 +7494,7 @@ chat.openapi(completions, async (c) => {
 										estimatedCost: false,
 										discount: undefined,
 										pricingTier: undefined,
+										dataStorageCost: null as number | null,
 									}
 								: await calculateCosts(
 										usedModel,
@@ -6823,12 +7532,51 @@ chat.openapi(completions, async (c) => {
 							}
 						}
 
+						// Extract plugin IDs for logging
+						const streamingPluginIds = plugins?.map((p) => p.id) ?? [];
+
 						// Determine plugin results for logging (includes healing results if applicable)
 						const finalPluginResults =
 							Object.keys(streamingPluginResults).length > 0
 								? streamingPluginResults
 								: undefined;
 
+						const baseLogEntry = createLogEntry(
+							requestId,
+							project,
+							apiKey,
+							providerKey?.id,
+							usedModelFormatted,
+							usedModelMapping,
+							usedProvider,
+							initialRequestedModel,
+							requestedProvider,
+							messages,
+							temperature,
+							max_tokens,
+							top_p,
+							frequency_penalty,
+							presence_penalty,
+							reasoning_effort,
+							reasoning_max_tokens,
+							effort,
+							response_format,
+							tools,
+							tool_choice,
+							source,
+							customHeaders,
+							debugMode,
+							userAgent,
+							image_config,
+							routingMetadata,
+							rawBody,
+							streamingError ?? streamingRawResponseData, // Raw SSE data sent back to the client
+							requestBody, // The request sent to the provider
+							streamingError ?? rawUpstreamData, // Raw streaming data received from upstream provider
+							streamingPluginIds,
+							finalPluginResults, // Plugin results including healing (if enabled)
+						);
+
 						// Enhanced logging for Google models streaming to debug missing responses
 						if (isGoogleCompatibleProvider(usedProvider)) {
 							logger.debug("Google model streaming response completed", {
@@ -6852,142 +7600,123 @@ chat.openapi(completions, async (c) => {
 						const shouldIncludeTokensForBilling =
 							!canceled || (canceled && billCancelledRequests);
 
-						enqueueChatLog(
-							c,
-							{
-								providerKeyId: providerKey?.id,
-								usedModel: usedModelFormatted,
-								usedModelMapping,
-								usedProvider,
-								requestedModel: initialRequestedModel,
-								requestedProvider,
-								messages,
-								temperature,
-								max_tokens,
-								top_p,
-								frequency_penalty,
-								presence_penalty,
-								reasoningEffort: reasoning_effort,
-								reasoningMaxTokens: reasoning_max_tokens,
-								effort,
-								responseFormat: response_format,
-								tools,
-								toolChoice: tool_choice,
-								source,
-								customHeaders,
-								debugMode,
-								userAgent,
-								imageConfig: image_config,
-								routingMetadata,
-								rawRequest: rawBody,
-								rawResponse: streamingError ?? streamingRawResponseData,
-								upstreamRequest: requestBody,
-								upstreamResponse: streamingError ?? rawUpstreamData,
-								plugins: requestPluginIds,
-								pluginResults: finalPluginResults,
-							},
-							{
-								id: routingAttempts.length > 0 ? finalLogId : undefined,
-								duration,
-								timeToFirstToken,
-								timeToFirstReasoningToken,
-								responseSize: fullContent.length,
-								content: fullContent,
-								reasoningContent: fullReasoningContent || null,
-								finishReason: canceled ? "canceled" : finishReason,
-								promptTokens: shouldIncludeTokensForBilling
-									? (calculatedPromptTokens?.toString() ?? null)
-									: null,
-								completionTokens: shouldIncludeTokensForBilling
-									? (calculatedCompletionTokens?.toString() ?? null)
-									: null,
-								totalTokens: shouldIncludeTokensForBilling
-									? (calculatedTotalTokens?.toString() ?? null)
-									: null,
-								reasoningTokens: shouldIncludeTokensForBilling
-									? (calculatedReasoningTokens?.toString() ?? null)
-									: null,
-								cachedTokens: shouldIncludeTokensForBilling
-									? (cachedTokens?.toString() ?? null)
-									: null,
-								hasError: streamingError !== null,
-								errorDetails: streamingError
-									? {
-											statusCode:
-												typeof streamingError === "object" &&
-												streamingError !== null &&
-												"details" in streamingError &&
-												typeof streamingError.details === "object" &&
-												streamingError.details !== null &&
-												"statusCode" in streamingError.details &&
-												typeof streamingError.details.statusCode === "number"
-													? streamingError.details.statusCode
-													: 500,
-											statusText:
-												typeof streamingError === "object" &&
-												streamingError !== null &&
-												"details" in streamingError &&
-												typeof streamingError.details === "object" &&
-												streamingError.details !== null &&
-												"statusText" in streamingError.details &&
-												typeof streamingError.details.statusText === "string"
-													? streamingError.details.statusText
-													: "Streaming Error",
-											responseText:
-												typeof streamingError === "object" &&
-												streamingError !== null &&
-												"details" in streamingError &&
-												typeof streamingError.details === "object" &&
-												streamingError.details !== null &&
-												"responseText" in streamingError.details &&
-												typeof streamingError.details.responseText === "string"
-													? streamingError.details.responseText
-													: typeof streamingError === "object" &&
-														  streamingError !== null &&
-														  "details" in streamingError
-														? JSON.stringify(streamingError)
-														: streamingError instanceof Error
-															? streamingError.message
-															: String(streamingError),
-										}
-									: null,
-								streamed: true,
-								canceled: canceled,
-								inputCost: costs.inputCost,
-								outputCost: costs.outputCost,
-								cachedInputCost: costs.cachedInputCost,
-								requestCost: costs.requestCost,
-								webSearchCost: costs.webSearchCost,
-								imageInputTokens: costs.imageInputTokens?.toString() ?? null,
-								imageOutputTokens: costs.imageOutputTokens?.toString() ?? null,
-								imageInputCost: costs.imageInputCost ?? null,
-								imageOutputCost: costs.imageOutputCost ?? null,
-								cost: costs.totalCost,
-								estimatedCost: costs.estimatedCost,
-								discount: costs.discount,
-								pricingTier: costs.pricingTier,
-								dataStorageCost: shouldIncludeTokensForBilling
-									? calculateDataStorageCost(
-											calculatedPromptTokens,
-											cachedTokens,
-											calculatedCompletionTokens,
-											calculatedReasoningTokens,
-											retentionLevel,
-										)
-									: "0",
-								cached: false,
-								toolResults: streamingToolCalls,
-							},
-						);
+						const streamingErrorStatusCode =
+							typeof streamingError === "object" &&
+							streamingError !== null &&
+							"details" in streamingError &&
+							typeof streamingError.details === "object" &&
+							streamingError.details !== null &&
+							"statusCode" in streamingError.details &&
+							typeof streamingError.details.statusCode === "number"
+								? streamingError.details.statusCode
+								: 500;
+
+						await insertLogEntry({
+							...baseLogEntry,
+							id: routingAttempts.length > 0 ? finalLogId : undefined,
+							duration,
+							timeToFirstToken,
+							timeToFirstReasoningToken,
+							responseSize: fullContent.length,
+							content: fullContent,
+							reasoningContent: fullReasoningContent || null,
+							finishReason: canceled ? "canceled" : finishReason,
+							promptTokens: shouldIncludeTokensForBilling
+								? (calculatedPromptTokens?.toString() ?? null)
+								: null,
+							completionTokens: shouldIncludeTokensForBilling
+								? (calculatedCompletionTokens?.toString() ?? null)
+								: null,
+							totalTokens: shouldIncludeTokensForBilling
+								? (calculatedTotalTokens?.toString() ?? null)
+								: null,
+							reasoningTokens: shouldIncludeTokensForBilling
+								? (calculatedReasoningTokens?.toString() ?? null)
+								: null,
+							cachedTokens: shouldIncludeTokensForBilling
+								? (cachedTokens?.toString() ?? null)
+								: null,
+							hasError: streamingError !== null,
+							errorDetails: streamingError
+								? {
+										statusCode: streamingErrorStatusCode,
+										statusText:
+											typeof streamingError === "object" &&
+											streamingError !== null &&
+											"details" in streamingError &&
+											typeof streamingError.details === "object" &&
+											streamingError.details !== null &&
+											"statusText" in streamingError.details &&
+											typeof streamingError.details.statusText === "string"
+												? streamingError.details.statusText
+												: "Streaming Error",
+										responseText:
+											typeof streamingError === "object" &&
+											streamingError !== null &&
+											"details" in streamingError &&
+											typeof streamingError.details === "object" &&
+											streamingError.details !== null &&
+											"responseText" in streamingError.details &&
+											typeof streamingError.details.responseText === "string"
+												? streamingError.details.responseText
+												: typeof streamingError === "object" &&
+													  streamingError !== null &&
+													  "details" in streamingError
+													? JSON.stringify(streamingError)
+													: streamingError instanceof Error
+														? streamingError.message
+														: String(streamingError),
+									}
+								: null,
+							streamed: true,
+							canceled: canceled,
+							inputCost: costs.inputCost,
+							outputCost: costs.outputCost,
+							cachedInputCost: costs.cachedInputCost,
+							requestCost: costs.requestCost,
+							webSearchCost: costs.webSearchCost,
+							imageInputTokens: costs.imageInputTokens?.toString() ?? null,
+							imageOutputTokens: costs.imageOutputTokens?.toString() ?? null,
+							imageInputCost: costs.imageInputCost ?? null,
+							imageOutputCost: costs.imageOutputCost ?? null,
+							cost: costs.totalCost,
+							estimatedCost: costs.estimatedCost,
+							discount: costs.discount,
+							pricingTier: costs.pricingTier,
+							dataStorageCost: shouldIncludeTokensForBilling
+								? calculateDataStorageCost(
+										calculatedPromptTokens,
+										cachedTokens,
+										calculatedCompletionTokens,
+										calculatedReasoningTokens,
+										retentionLevel,
+									)
+								: "0",
+							cached: false,
+							tools,
+							toolResults: streamingToolCalls,
+							toolChoice: tool_choice,
+						});
 
-						// Report key health for environment-based tokens
+						// Report key health for the selected token source
 						if (envVarName !== undefined) {
 							if (streamingError !== null) {
-								reportKeyError(envVarName, configIndex, 500);
+								reportKeyError(
+									envVarName,
+									configIndex,
+									streamingErrorStatusCode,
+								);
 							} else {
 								reportKeySuccess(envVarName, configIndex);
 							}
 						}
+						if (providerKey?.id) {
+							if (streamingError !== null) {
+								reportTrackedKeyError(providerKey.id, streamingErrorStatusCode);
+							} else {
+								reportTrackedKeySuccess(providerKey.id);
+							}
+						}
 
 						// Save streaming cache if enabled and not canceled and no errors
 						if (
@@ -7041,6 +7770,7 @@ chat.openapi(completions, async (c) => {
 				} else {
 					logger.error("Streaming request error (escaped handler)", error);
 				}
+				finishStreamCompletion(c);
 			},
 		);
 	}
@@ -7115,65 +7845,8 @@ chat.openapi(completions, async (c) => {
 			}
 
 			try {
-				const ctx = await resolveProviderContext(
-					nextProvider,
-					{
-						mode: project.mode,
-						organizationId: project.organizationId,
-					},
-					{
-						id: organization.id,
-						credits: organization.credits,
-						devPlan: organization.devPlan,
-						devPlanCreditsLimit: organization.devPlanCreditsLimit,
-						devPlanCreditsUsed: organization.devPlanCreditsUsed,
-						devPlanExpiresAt: organization.devPlanExpiresAt,
-					},
-					modelInfo,
-					originalRequestParams,
-					{
-						requestId,
-						stream,
-						effectiveStream,
-						messages: messages as BaseMessage[],
-						response_format,
-						tools,
-						tool_choice,
-						reasoning_effort,
-						reasoning_max_tokens,
-						effort,
-						webSearchTool,
-						image_config,
-						sensitive_word_check,
-						maxImageSizeMB,
-						userPlan,
-						hasExistingToolCalls,
-						customProviderName,
-						webSearchEnabled: !!webSearchTool,
-					},
-				);
-				usedProvider = ctx.usedProvider;
-				usedModel = ctx.usedModel;
-				usedModelFormatted = ctx.usedModelFormatted;
-				usedModelMapping = ctx.usedModelMapping;
-				baseModelName = ctx.baseModelName;
-				usedToken = ctx.usedToken;
-				providerKey = ctx.providerKey;
-				configIndex = ctx.configIndex;
-				envVarName = ctx.envVarName;
-				url = ctx.url;
-				requestBody = ctx.requestBody;
-				useResponsesApi = ctx.useResponsesApi;
-				requestCanBeCanceled = ctx.requestCanBeCanceled;
-				isImageGeneration = ctx.isImageGeneration;
-				supportsReasoning = ctx.supportsReasoning;
-				splitTaggedReasoning = ctx.splitTaggedReasoning ?? false;
-				temperature = ctx.temperature;
-				max_tokens = ctx.max_tokens;
-				top_p = ctx.top_p;
-				frequency_penalty = ctx.frequency_penalty;
-				presence_penalty = ctx.presence_penalty;
-				usedRegion = ctx.usedRegion;
+				const ctx = await resolveProviderContextForRetry(nextProvider, stream);
+				applyResolvedProviderContext(ctx);
 			} catch {
 				failedProviderIds.add(
 					providerRetryKey(nextProvider.providerId, nextProvider.region),
@@ -7192,9 +7865,12 @@ chat.openapi(completions, async (c) => {
 
 		try {
 			const headers = getProviderHeaders(usedProvider, usedToken, {
+				requestId,
 				webSearchEnabled: !!webSearchTool,
 			});
-			headers["Content-Type"] = "application/json";
+			if (!(requestBody instanceof FormData)) {
+				headers["Content-Type"] = "application/json";
+			}
 
 			// Add effort beta header for Anthropic if effort parameter is specified
 			if (usedProvider === "anthropic" && effort !== undefined) {
@@ -7224,7 +7900,10 @@ chat.openapi(completions, async (c) => {
 			res = await fetch(url, {
 				method: "POST",
 				headers,
-				body: JSON.stringify(requestBody),
+				body:
+					requestBody instanceof FormData
+						? requestBody
+						: JSON.stringify(requestBody),
 				signal: fetchSignal,
 			});
 		} catch (error) {
@@ -7267,7 +7946,24 @@ chat.openapi(completions, async (c) => {
 				),
 			});
 
+			// Log the error in the database
+			// Extract plugin IDs for logging (non-streaming fetch error)
+			const nonStreamingFetchErrorPluginIds = plugins?.map((p) => p.id) ?? [];
+
 			// Check if we should retry before logging so we can mark the log as retried
+			let sameProviderRetryContext: Awaited<
+				ReturnType<typeof resolveProviderContext>
+			> | null = null;
+			if (isRetryableErrorType("network_error")) {
+				rememberFailedKey(usedProvider, usedRegion, {
+					envVarName,
+					configIndex,
+					providerKeyId: providerKey?.id,
+				});
+				sameProviderRetryContext =
+					await tryResolveAlternateKeyForCurrentProvider(stream);
+			}
+
 			const willRetryFetchNonStreaming = shouldRetryRequest({
 				requestedProvider,
 				noFallback,
@@ -7279,94 +7975,130 @@ chat.openapi(completions, async (c) => {
 					1,
 				usedProvider,
 			});
+			const willRetrySameProvider = sameProviderRetryContext !== null;
+			const willRetryRequest =
+				willRetrySameProvider || willRetryFetchNonStreaming;
 
-			enqueueChatLog(
-				c,
-				{
-					providerKeyId: providerKey?.id,
-					usedModel: usedModelFormatted,
-					usedModelMapping,
-					usedProvider,
-					requestedModel: initialRequestedModel,
-					requestedProvider,
-					messages,
-					temperature,
-					max_tokens,
-					top_p,
-					frequency_penalty,
-					presence_penalty,
-					reasoningEffort: reasoning_effort,
-					reasoningMaxTokens: reasoning_max_tokens,
-					effort,
-					responseFormat: response_format,
-					tools,
-					toolChoice: tool_choice,
-					source,
-					customHeaders,
-					debugMode,
-					userAgent,
-					imageConfig: image_config,
-					routingMetadata,
-					rawRequest: rawBody,
-					rawResponse: null,
-					upstreamRequest: requestBody,
-					upstreamResponse: null,
-					plugins: requestPluginIds,
-					pluginResults: undefined,
-				},
-				{
-					duration: perAttemptDuration,
-					timeToFirstToken: null,
-					timeToFirstReasoningToken: null,
-					responseSize: 0,
-					content: null,
-					reasoningContent: null,
-					finishReason: "upstream_error",
-					promptTokens: null,
-					completionTokens: null,
-					totalTokens: null,
-					reasoningTokens: null,
-					cachedTokens: null,
-					hasError: true,
-					streamed: false,
-					canceled: false,
-					errorDetails: {
-						statusCode: 0,
-						statusText: fetchError.name,
-						responseText: errorMessage,
-						cause: nonStreamingFetchCause,
-					},
-					cachedInputCost: null,
-					requestCost: null,
-					webSearchCost: null,
-					imageInputTokens: null,
-					imageOutputTokens: null,
-					imageInputCost: null,
-					imageOutputCost: null,
-					estimatedCost: false,
-					discount: null,
-					dataStorageCost: "0",
-					cached: false,
-					toolResults: null,
-					retried: willRetryFetchNonStreaming,
-					retriedByLogId: willRetryFetchNonStreaming ? finalLogId : null,
-				},
+			const baseLogEntry = createLogEntry(
+				requestId,
+				project,
+				apiKey,
+				providerKey?.id,
+				usedModelFormatted,
+				usedModelMapping,
+				usedProvider,
+				initialRequestedModel,
+				requestedProvider,
+				messages,
+				temperature,
+				max_tokens,
+				top_p,
+				frequency_penalty,
+				presence_penalty,
+				reasoning_effort,
+				reasoning_max_tokens,
+				effort,
+				response_format,
+				tools,
+				tool_choice,
+				source,
+				customHeaders,
+				debugMode,
+				userAgent,
+				image_config,
+				routingMetadata,
+				rawBody,
+				null, // No response for fetch error
+				requestBody, // The request that resulted in error
+				null, // No upstream response for fetch error
+				nonStreamingFetchErrorPluginIds,
+				undefined, // No plugin results for error case
 			);
+			const attemptLogId = shortid();
+
+			await insertLogEntry({
+				...baseLogEntry,
+				id: attemptLogId,
+				duration: perAttemptDuration,
+				timeToFirstToken: null, // Not applicable for error case
+				timeToFirstReasoningToken: null, // Not applicable for error case
+				responseSize: 0,
+				content: null,
+				reasoningContent: null,
+				finishReason: "upstream_error",
+				promptTokens: null,
+				completionTokens: null,
+				totalTokens: null,
+				reasoningTokens: null,
+				cachedTokens: null,
+				hasError: true,
+				streamed: false,
+				canceled: false,
+				errorDetails: {
+					statusCode: 0,
+					statusText: fetchError.name,
+					responseText: errorMessage,
+					cause: nonStreamingFetchCause,
+				},
+				cachedInputCost: null,
+				requestCost: null,
+				webSearchCost: null,
+				imageInputTokens: null,
+				imageOutputTokens: null,
+				imageInputCost: null,
+				imageOutputCost: null,
+				estimatedCost: false,
+				discount: null,
+				dataStorageCost: "0",
+				cached: false,
+				toolResults: null,
+				retried: willRetryRequest,
+				retriedByLogId: willRetryRequest ? finalLogId : null,
+			});
 
-			// Report key health for environment-based tokens
+			// Report key health for the selected token source
 			if (envVarName !== undefined) {
 				reportKeyError(envVarName, configIndex, 0);
 			}
+			if (providerKey?.id) {
+				reportTrackedKeyError(providerKey.id, 0);
+			}
+
+			if (willRetrySameProvider && sameProviderRetryContext) {
+				routingAttempts.push(
+					buildRoutingAttempt(
+						usedProvider,
+						baseModelName,
+						0,
+						getErrorType(0),
+						false,
+						{
+							region: usedRegion,
+							apiKeyHash: usedApiKeyHash,
+							logId: attemptLogId,
+						},
+					),
+				);
+				applyResolvedProviderContext(sameProviderRetryContext);
+				retryAttempt--;
+				continue;
+			}
 
 			if (willRetryFetchNonStreaming) {
-				routingAttempts.push({
-					provider: usedProvider,
-					model: baseModelName,
-					...(usedRegion && { region: usedRegion }),
-					status_code: 0,
-					error_type: getErrorType(0),
-					succeeded: false,
-				});
+				routingAttempts.push(
+					buildRoutingAttempt(
+						usedProvider,
+						baseModelName,
+						0,
+						getErrorType(0),
+						false,
+						{
+							region: usedRegion,
+							apiKeyHash: usedApiKeyHash,
+							logId: attemptLogId,
+						},
+					),
+				);
 				failedProviderIds.add(providerRetryKey(usedProvider, usedRegion));
 				continue;
 			}
@@ -7393,6 +8125,10 @@ chat.openapi(completions, async (c) => {
 
 		// If the request was canceled, log it and return a response
 		if (canceled) {
+			// Log the canceled request
+			// Extract plugin IDs for logging (canceled non-streaming)
+			const canceledNonStreamingPluginIds = plugins?.map((p) => p.id) ?? [];
+
 			// Calculate costs for cancelled request if billing is enabled
 			const billCancelled = shouldBillCancelledRequests();
 			let cancelledCosts: Awaited<ReturnType<typeof calculateCosts>> | null =
@@ -7433,93 +8169,90 @@ chat.openapi(completions, async (c) => {
 				);
 			}
 
-			enqueueChatLog(
-				c,
-				{
-					providerKeyId: providerKey?.id,
-					usedModel: usedModelFormatted,
-					usedModelMapping,
-					usedProvider,
-					requestedModel: initialRequestedModel,
-					requestedProvider,
-					messages,
-					temperature,
-					max_tokens,
-					top_p,
-					frequency_penalty,
-					presence_penalty,
-					reasoningEffort: reasoning_effort,
-					reasoningMaxTokens: reasoning_max_tokens,
-					effort,
-					responseFormat: response_format,
-					tools,
-					toolChoice: tool_choice,
-					source,
-					customHeaders,
-					debugMode,
-					userAgent,
-					imageConfig: image_config,
-					routingMetadata,
-					rawRequest: rawBody,
-					rawResponse: null,
-					upstreamRequest: requestBody,
-					upstreamResponse: null,
-					plugins: requestPluginIds,
-					pluginResults: undefined,
-				},
-				{
-					duration,
-					timeToFirstToken: null,
-					timeToFirstReasoningToken: null,
-					responseSize: 0,
-					content: null,
-					reasoningContent: null,
-					finishReason: "canceled",
-					promptTokens: billCancelled
-						? (
-								cancelledCosts?.promptTokens ?? estimatedPromptTokens
-							)?.toString()
-						: null,
-					completionTokens: billCancelled ? "0" : null,
-					totalTokens: billCancelled
-						? (
-								cancelledCosts?.promptTokens ?? estimatedPromptTokens
-							)?.toString()
-						: null,
-					reasoningTokens: null,
-					cachedTokens: null,
-					hasError: false,
-					streamed: false,
-					canceled: true,
-					errorDetails: null,
-					inputCost: cancelledCosts?.inputCost ?? null,
-					outputCost: cancelledCosts?.outputCost ?? null,
-					cachedInputCost: cancelledCosts?.cachedInputCost ?? null,
-					requestCost: cancelledCosts?.requestCost ?? null,
-					webSearchCost: cancelledCosts?.webSearchCost ?? null,
-					imageInputTokens:
-						cancelledCosts?.imageInputTokens?.toString() ?? null,
-					imageOutputTokens:
-						cancelledCosts?.imageOutputTokens?.toString() ?? null,
-					imageInputCost: cancelledCosts?.imageInputCost ?? null,
-					imageOutputCost: cancelledCosts?.imageOutputCost ?? null,
-					cost: cancelledCosts?.totalCost ?? null,
-					estimatedCost: cancelledCosts?.estimatedCost ?? false,
-					discount: cancelledCosts?.discount ?? null,
-					dataStorageCost: billCancelled
-						? calculateDataStorageCost(
-								cancelledCosts?.promptTokens ?? estimatedPromptTokens,
-								null,
-								0,
-								null,
-								retentionLevel,
-							)
-						: "0",
-					cached: false,
-					toolResults: null,
-				},
+			const baseLogEntry = createLogEntry(
+				requestId,
+				project,
+				apiKey,
+				providerKey?.id,
+				usedModelFormatted,
+				usedModelMapping,
+				usedProvider,
+				initialRequestedModel,
+				requestedProvider,
+				messages,
+				temperature,
+				max_tokens,
+				top_p,
+				frequency_penalty,
+				presence_penalty,
+				reasoning_effort,
+				reasoning_max_tokens,
+				effort,
+				response_format,
+				tools,
+				tool_choice,
+				source,
+				customHeaders,
+				debugMode,
+				userAgent,
+				image_config,
+				routingMetadata,
+				rawBody,
+				null, // No response for canceled request
+				requestBody, // The request that was prepared before cancellation
+				null, // No upstream response for canceled request
+				canceledNonStreamingPluginIds,
+				undefined, // No plugin results for canceled request
 			);
 
+			await insertLogEntry({
+				...baseLogEntry,
+				duration,
+				timeToFirstToken: null, // Not applicable for canceled request
+				timeToFirstReasoningToken: null, // Not applicable for canceled request
+				responseSize: 0,
+				content: null,
+				reasoningContent: null,
+				finishReason: "canceled",
+				promptTokens: billCancelled
+					? (cancelledCosts?.promptTokens ?? estimatedPromptTokens)?.toString()
+					: null,
+				completionTokens: billCancelled ? "0" : null,
+				totalTokens: billCancelled
+					? (cancelledCosts?.promptTokens ?? estimatedPromptTokens)?.toString()
+					: null,
+				reasoningTokens: null,
+				cachedTokens: null,
+				hasError: false,
+				streamed: false,
+				canceled: true,
+				errorDetails: null,
+				inputCost: cancelledCosts?.inputCost ?? null,
+				outputCost: cancelledCosts?.outputCost ?? null,
+				cachedInputCost: cancelledCosts?.cachedInputCost ?? null,
+				requestCost: cancelledCosts?.requestCost ?? null,
+				webSearchCost: cancelledCosts?.webSearchCost ?? null,
+				imageInputTokens: cancelledCosts?.imageInputTokens?.toString() ?? null,
+				imageOutputTokens:
+					cancelledCosts?.imageOutputTokens?.toString() ?? null,
+				imageInputCost: cancelledCosts?.imageInputCost ?? null,
+				imageOutputCost: cancelledCosts?.imageOutputCost ?? null,
+				cost: cancelledCosts?.totalCost ?? null,
+				estimatedCost: cancelledCosts?.estimatedCost ?? false,
+				discount: cancelledCosts?.discount ?? null,
+				dataStorageCost: billCancelled
+					? calculateDataStorageCost(
+							cancelledCosts?.promptTokens ?? estimatedPromptTokens,
+							null,
+							0,
+							null,
+							retentionLevel,
+						)
+					: "0",
+				cached: false,
+				toolResults: null,
+			});
+
 			return c.json(
 				{
 					error: {
@@ -7561,77 +8294,80 @@ chat.openapi(completions, async (c) => {
 						),
 					});
 
-					enqueueChatLog(
-						c,
-						{
-							providerKeyId: providerKey?.id,
-							usedModel: usedModelFormatted,
-							usedModelMapping,
-							usedProvider,
-							requestedModel: initialRequestedModel,
-							requestedProvider,
-							messages,
-							temperature,
-							max_tokens,
-							top_p,
-							frequency_penalty,
-							presence_penalty,
-							reasoningEffort: reasoning_effort,
-							reasoningMaxTokens: reasoning_max_tokens,
-							effort,
-							responseFormat: response_format,
-							tools,
-							toolChoice: tool_choice,
-							source,
-							customHeaders,
-							debugMode,
-							userAgent,
-							imageConfig: image_config,
-							routingMetadata,
-							rawRequest: rawBody,
-							rawResponse: null,
-							upstreamRequest: requestBody,
-							upstreamResponse: null,
-							plugins: requestPluginIds,
-							pluginResults: undefined,
-						},
-						{
-							duration: Date.now() - perAttemptStartTime,
-							timeToFirstToken: null,
-							timeToFirstReasoningToken: null,
-							responseSize: 0,
-							content: null,
-							reasoningContent: null,
-							finishReason: "upstream_error",
-							promptTokens: null,
-							completionTokens: null,
-							totalTokens: null,
-							reasoningTokens: null,
-							cachedTokens: null,
-							hasError: true,
-							streamed: false,
-							canceled: false,
-							errorDetails: {
-								statusCode: res.status,
-								statusText: "TimeoutError",
-								responseText: errorMessage,
-								cause: bodyErrorCause,
-							},
-							cachedInputCost: null,
-							requestCost: null,
-							webSearchCost: null,
-							imageInputTokens: null,
-							imageOutputTokens: null,
-							imageInputCost: null,
-							imageOutputCost: null,
-							estimatedCost: false,
-							discount: null,
-							dataStorageCost: "0",
-							cached: false,
-							toolResults: null,
-						},
+					const bodyTimeoutPluginIds = plugins?.map((p) => p.id) ?? [];
+					const baseLogEntry = createLogEntry(
+						requestId,
+						project,
+						apiKey,
+						providerKey?.id,
+						usedModelFormatted,
+						usedModelMapping!,
+						usedProvider!,
+						initialRequestedModel,
+						requestedProvider,
+						messages,
+						temperature,
+						max_tokens,
+						top_p,
+						frequency_penalty,
+						presence_penalty,
+						reasoning_effort,
+						reasoning_max_tokens,
+						effort,
+						response_format,
+						tools,
+						tool_choice,
+						source,
+						customHeaders,
+						debugMode,
+						userAgent,
+						image_config,
+						routingMetadata,
+						rawBody,
+						null,
+						requestBody,
+						null,
+						bodyTimeoutPluginIds,
+						undefined,
 					);
 
+					await insertLogEntry({
+						...baseLogEntry,
+						duration: Date.now() - perAttemptStartTime,
+						timeToFirstToken: null,
+						timeToFirstReasoningToken: null,
+						responseSize: 0,
+						content: null,
+						reasoningContent: null,
+						finishReason: "upstream_error",
+						promptTokens: null,
+						completionTokens: null,
+						totalTokens: null,
+						reasoningTokens: null,
+						cachedTokens: null,
+						hasError: true,
+						streamed: false,
+						canceled: false,
+						errorDetails: {
+							statusCode: res.status,
+							statusText: "TimeoutError",
+							responseText: errorMessage,
+							cause: bodyErrorCause,
+						},
+						cachedInputCost: null,
+						requestCost: null,
+						webSearchCost: null,
+						imageInputTokens: null,
+						imageOutputTokens: null,
+						imageInputCost: null,
+						imageOutputCost: null,
+						estimatedCost: false,
+						discount: null,
+						dataStorageCost: "0",
+						cached: false,
+						toolResults: null,
+					});
+
 					return c.json(
 						{
 							error: {
@@ -7674,6 +8410,23 @@ chat.openapi(completions, async (c) => {
 				});
 			}
 
+			// Log the request in the database
+			// Extract plugin IDs for logging
+			const providerErrorPluginIds = plugins?.map((p) => p.id) ?? [];
+
+			let sameProviderRetryContext: Awaited<
+				ReturnType<typeof resolveProviderContext>
+			> | null = null;
+			if (isRetryableErrorType(finishReason)) {
+				rememberFailedKey(usedProvider, usedRegion, {
+					envVarName,
+					configIndex,
+					providerKeyId: providerKey?.id,
+				});
+				sameProviderRetryContext =
+					await tryResolveAlternateKeyForCurrentProvider(stream);
+			}
+
 			// Check if we should retry before logging so we can mark the log as retried
 			const willRetryHttpNonStreaming = shouldRetryRequest({
 				requestedProvider,
@@ -7686,112 +8439,150 @@ chat.openapi(completions, async (c) => {
 					1,
 				usedProvider,
 			});
+			const willRetrySameProvider = sameProviderRetryContext !== null;
+			const willRetryRequest =
+				willRetrySameProvider || willRetryHttpNonStreaming;
 
-			enqueueChatLog(
-				c,
-				{
-					providerKeyId: providerKey?.id,
-					usedModel: usedModelFormatted,
-					usedModelMapping,
-					usedProvider,
-					requestedModel: initialRequestedModel,
-					requestedProvider,
-					messages,
-					temperature,
-					max_tokens,
-					top_p,
-					frequency_penalty,
-					presence_penalty,
-					reasoningEffort: reasoning_effort,
-					reasoningMaxTokens: reasoning_max_tokens,
-					effort,
-					responseFormat: response_format,
-					tools,
-					toolChoice: tool_choice,
-					source,
-					customHeaders,
-					debugMode,
-					userAgent,
-					imageConfig: image_config,
-					routingMetadata,
-					rawRequest: rawBody,
-					rawResponse: errorResponseText,
-					upstreamRequest: requestBody,
-					upstreamResponse: errorResponseText,
-					plugins: requestPluginIds,
-					pluginResults: undefined,
-				},
-				{
-					duration: perAttemptDuration,
-					timeToFirstToken: null,
-					timeToFirstReasoningToken: null,
-					responseSize: errorResponseText.length,
-					content: null,
-					reasoningContent: null,
-					finishReason,
-					promptTokens: null,
-					completionTokens: null,
-					totalTokens: null,
-					reasoningTokens: null,
-					cachedTokens: null,
-					hasError: finishReason !== "content_filter",
-					streamed: false,
-					canceled: false,
-					errorDetails: (() => {
-						if (finishReason === "content_filter") {
-							return null;
-						}
-						if (finishReason === "client_error") {
-							try {
-								const originalError = JSON.parse(errorResponseText);
-								return {
-									statusCode: res.status,
-									statusText: res.statusText,
-									responseText: errorResponseText,
-									message: originalError.error?.message ?? errorResponseText,
-								};
-							} catch {
-								// If parsing fails, use default format
-							}
-						}
-						return {
-							statusCode: res.status,
-							statusText: res.statusText,
-							responseText: errorResponseText,
-						};
-					})(),
-					cachedInputCost: null,
-					requestCost: null,
-					webSearchCost: null,
-					imageInputTokens: null,
-					imageOutputTokens: null,
-					imageInputCost: null,
-					imageOutputCost: null,
-					estimatedCost: false,
-					discount: null,
-					dataStorageCost: "0",
-					cached: false,
-					toolResults: null,
-					retried: willRetryHttpNonStreaming,
-					retriedByLogId: willRetryHttpNonStreaming ? finalLogId : null,
-				},
+			const baseLogEntry = createLogEntry(
+				requestId,
+				project,
+				apiKey,
+				providerKey?.id,
+				usedModelFormatted,
+				usedModelMapping,
+				usedProvider,
+				initialRequestedModel,
+				requestedProvider,
+				messages,
+				temperature,
+				max_tokens,
+				top_p,
+				frequency_penalty,
+				presence_penalty,
+				reasoning_effort,
+				reasoning_max_tokens,
+				effort,
+				response_format,
+				tools,
+				tool_choice,
+				source,
+				customHeaders,
+				debugMode,
+				userAgent,
+				image_config,
+				routingMetadata,
+				rawBody,
+				errorResponseText, // Our formatted error response
+				requestBody, // The request that resulted in error
+				errorResponseText, // Raw upstream error response
+				providerErrorPluginIds,
+				undefined, // No plugin results for error case
 			);
+			const attemptLogId = shortid();
+
+			await insertLogEntry({
+				...baseLogEntry,
+				id: attemptLogId,
+				duration: perAttemptDuration,
+				timeToFirstToken: null, // Not applicable for error case
+				timeToFirstReasoningToken: null, // Not applicable for error case
+				responseSize: errorResponseText.length,
+				content: null,
+				reasoningContent: null,
+				finishReason,
+				promptTokens: null,
+				completionTokens: null,
+				totalTokens: null,
+				reasoningTokens: null,
+				cachedTokens: null,
+				hasError: finishReason !== "content_filter", // content_filter is not an error
+				streamed: false,
+				canceled: false,
+				errorDetails: (() => {
+					// content_filter is not an error, no error details needed
+					if (finishReason === "content_filter") {
+						return null;
+					}
+					// For client errors, try to parse the original error and include the message
+					if (finishReason === "client_error") {
+						try {
+							const originalError = JSON.parse(errorResponseText);
+							return {
+								statusCode: res.status,
+								statusText: res.statusText,
+								responseText: errorResponseText,
+								message: originalError.error?.message ?? errorResponseText,
+							};
+						} catch {
+							// If parsing fails, use default format
+						}
+					}
+					return {
+						statusCode: res.status,
+						statusText: res.statusText,
+						responseText: errorResponseText,
+					};
+				})(),
+				cachedInputCost: null,
+				requestCost: null,
+				webSearchCost: null,
+				imageInputTokens: null,
+				imageOutputTokens: null,
+				imageInputCost: null,
+				imageOutputCost: null,
+				estimatedCost: false,
+				discount: null,
+				dataStorageCost: "0",
+				cached: false,
+				toolResults: null,
+				retried: willRetryRequest,
+				retriedByLogId: willRetryRequest ? finalLogId : null,
+			});
 
-			// Report key health for environment-based tokens
+			// Report key health for the selected token source
 			// Don't report content_filter as a key error - it's intentional provider behavior
 			if (envVarName !== undefined && finishReason !== "content_filter") {
 				reportKeyError(envVarName, configIndex, res.status, errorResponseText);
 			}
+			if (providerKey?.id && finishReason !== "content_filter") {
+				reportTrackedKeyError(providerKey.id, res.status, errorResponseText);
+			}
+
+			if (willRetrySameProvider && sameProviderRetryContext) {
+				routingAttempts.push(
+					buildRoutingAttempt(
+						usedProvider,
+						baseModelName,
+						res.status,
+						getErrorType(res.status),
+						false,
+						{
+							region: usedRegion,
+							apiKeyHash: usedApiKeyHash,
+							logId: attemptLogId,
+						},
+					),
+				);
+				applyResolvedProviderContext(sameProviderRetryContext);
+				retryAttempt--;
+				continue;
+			}
 
 			if (willRetryHttpNonStreaming) {
-				routingAttempts.push({
-					provider: usedProvider,
-					model: baseModelName,
-					...(usedRegion && { region: usedRegion }),
-					status_code: res.status,
-					error_type: getErrorType(res.status),
-					succeeded: false,
-				});
+				routingAttempts.push(
+					buildRoutingAttempt(
+						usedProvider,
+						baseModelName,
+						res.status,
+						getErrorType(res.status),
+						false,
+						{
+							region: usedRegion,
+							apiKeyHash: usedApiKeyHash,
+							logId: attemptLogId,
+						},
+					),
+				);
 				failedProviderIds.add(providerRetryKey(usedProvider, usedRegion));
 				continue;
 			}
@@ -7820,6 +8611,7 @@ chat.openapi(completions, async (c) => {
 						total_tokens: 0,
 					},
 					metadata: {
+						request_id: requestId,
 						requested_model: initialRequestedModel,
 						requested_provider: requestedProvider,
 						used_model: baseModelName,
@@ -7864,14 +8656,20 @@ chat.openapi(completions, async (c) => {
 
 	// Add the final attempt (successful or last failed) to routing
 	if (res && res.ok && usedProvider) {
-		routingAttempts.push({
-			provider: usedProvider,
-			model: baseModelName,
-			...(usedRegion && { region: usedRegion }),
-			status_code: res.status,
-			error_type: "none",
-			succeeded: true,
-		});
+		routingAttempts.push(
+			buildRoutingAttempt(
+				usedProvider,
+				baseModelName,
+				res.status,
+				"none",
+				true,
+				{
+					region: usedRegion,
+					apiKeyHash: usedApiKeyHash,
+					logId: finalLogId,
+				},
+			),
+		);
 	}
 
 	// Update routingMetadata with all routing attempts for DB logging
@@ -7920,7 +8718,92 @@ chat.openapi(completions, async (c) => {
 
 	let json: any;
 	try {
-		json = await res.json();
+		if (forceStream && res.body) {
+			// Stream-only model: upstream returned SSE but client expects JSON.
+			// Read the full stream and assemble a non-streaming response.
+			const text = await res.text();
+			const lines = text.split("\n");
+			let content = "";
+			const toolCalls: any[] = [];
+			let finishReason: string | null = null;
+			let usage: any = null;
+			let responseId = "";
+			let model = "";
+			let created = 0;
+
+			for (const line of lines) {
+				if (!line.startsWith("data: ") || line === "data: [DONE]") {
+					continue;
+				}
+				try {
+					const chunk = JSON.parse(line.slice(6));
+					if (!responseId && chunk.id) {
+						responseId = chunk.id;
+					}
+					if (!model && chunk.model) {
+						model = chunk.model;
+					}
+					if (!created && chunk.created) {
+						created = chunk.created;
+					}
+					const delta = chunk.choices?.[0]?.delta;
+					if (delta?.content) {
+						content += delta.content;
+					}
+					if (delta?.tool_calls) {
+						for (const tc of delta.tool_calls) {
+							const idx = tc.index ?? 0;
+							if (!toolCalls[idx]) {
+								toolCalls[idx] = {
+									id: tc.id ?? "",
+									type: tc.type ?? "function",
+									function: { name: tc.function?.name ?? "", arguments: "" },
+								};
+							} else {
+								if (tc.id) {
+									toolCalls[idx].id = tc.id;
+								}
+								if (tc.function?.name) {
+									toolCalls[idx].function.name = tc.function.name;
+								}
+							}
+							if (tc.function?.arguments) {
+								toolCalls[idx].function.arguments += tc.function.arguments;
+							}
+						}
+					}
+					if (chunk.choices?.[0]?.finish_reason) {
+						finishReason = chunk.choices[0].finish_reason;
+					}
+					if (chunk.usage) {
+						usage = chunk.usage;
+					}
+				} catch {
+					// skip unparseable lines
+				}
+			}
+
+			json = {
+				id: responseId,
+				object: "chat.completion",
+				created,
+				model,
+				choices: [
+					{
+						index: 0,
+						message: {
+							role: "assistant",
+							content: content || null,
+							...(toolCalls.length > 0 ? { tool_calls: toolCalls } : {}),
+						},
+						finish_reason: finishReason ?? "stop",
+					},
+				],
+				...(usage ? { usage } : {}),
+			};
+		} else {
+			json = await res.json();
+		}
 	} catch (bodyError) {
 		if (isTimeoutError(bodyError)) {
 			const errorMessage =
@@ -7939,77 +8822,80 @@ chat.openapi(completions, async (c) => {
 				),
 			});
 
-			enqueueChatLog(
-				c,
-				{
-					providerKeyId: providerKey?.id,
-					usedModel: usedModelFormatted,
-					usedModelMapping,
-					usedProvider,
-					requestedModel: initialRequestedModel,
-					requestedProvider,
-					messages,
-					temperature,
-					max_tokens,
-					top_p,
-					frequency_penalty,
-					presence_penalty,
-					reasoningEffort: reasoning_effort,
-					reasoningMaxTokens: reasoning_max_tokens,
-					effort,
-					responseFormat: response_format,
-					tools,
-					toolChoice: tool_choice,
-					source,
-					customHeaders,
-					debugMode,
-					userAgent,
-					imageConfig: image_config,
-					routingMetadata,
-					rawRequest: rawBody,
-					rawResponse: null,
-					upstreamRequest: requestBody,
-					upstreamResponse: null,
-					plugins: requestPluginIds,
-					pluginResults: undefined,
-				},
-				{
-					duration: Date.now() - startTime,
-					timeToFirstToken: null,
-					timeToFirstReasoningToken: null,
-					responseSize: 0,
-					content: null,
-					reasoningContent: null,
-					finishReason: "upstream_error",
-					promptTokens: null,
-					completionTokens: null,
-					totalTokens: null,
-					reasoningTokens: null,
-					cachedTokens: null,
-					hasError: true,
-					streamed: false,
-					canceled: false,
-					errorDetails: {
-						statusCode: res.status,
-						statusText: "TimeoutError",
-						responseText: errorMessage,
-						cause: bodyReadCause,
-					},
-					cachedInputCost: null,
-					requestCost: null,
-					webSearchCost: null,
-					imageInputTokens: null,
-					imageOutputTokens: null,
-					imageInputCost: null,
-					imageOutputCost: null,
-					estimatedCost: false,
-					discount: null,
-					dataStorageCost: "0",
-					cached: false,
-					toolResults: null,
-				},
+			const bodyTimeoutPluginIds = plugins?.map((p) => p.id) ?? [];
+			const baseLogEntry = createLogEntry(
+				requestId,
+				project,
+				apiKey,
+				providerKey?.id,
+				usedModelFormatted!,
+				usedModelMapping,
+				usedProvider,
+				initialRequestedModel,
+				requestedProvider,
+				messages,
+				temperature,
+				max_tokens,
+				top_p,
+				frequency_penalty,
+				presence_penalty,
+				reasoning_effort,
+				reasoning_max_tokens,
+				effort,
+				response_format,
+				tools,
+				tool_choice,
+				source,
+				customHeaders,
+				debugMode,
+				userAgent,
+				image_config,
+				routingMetadata,
+				rawBody,
+				null,
+				requestBody,
+				null,
+				bodyTimeoutPluginIds,
+				undefined,
 			);
 
+			await insertLogEntry({
+				...baseLogEntry,
+				duration: Date.now() - startTime,
+				timeToFirstToken: null,
+				timeToFirstReasoningToken: null,
+				responseSize: 0,
+				content: null,
+				reasoningContent: null,
+				finishReason: "upstream_error",
+				promptTokens: null,
+				completionTokens: null,
+				totalTokens: null,
+				reasoningTokens: null,
+				cachedTokens: null,
+				hasError: true,
+				streamed: false,
+				canceled: false,
+				errorDetails: {
+					statusCode: res.status,
+					statusText: "TimeoutError",
+					responseText: errorMessage,
+					cause: bodyReadCause,
+				},
+				cachedInputCost: null,
+				requestCost: null,
+				webSearchCost: null,
+				imageInputTokens: null,
+				imageOutputTokens: null,
+				imageInputCost: null,
+				imageOutputCost: null,
+				estimatedCost: false,
+				discount: null,
+				dataStorageCost: "0",
+				cached: false,
+				toolResults: null,
+			});
+
 			return c.json(
 				{
 					error: {
@@ -8050,6 +8936,7 @@ chat.openapi(completions, async (c) => {
 		completionTokens,
 		reasoningTokens,
 		cachedTokens,
+		cacheCreationTokens,
 		toolResults,
 		images,
 		annotations,
@@ -8176,6 +9063,13 @@ chat.openapi(completions, async (c) => {
 		webSearchCount,
 		project.organizationId,
 	);
+	costs.dataStorageCost = toDataStorageCostNumber(
+		costs.promptTokens ?? calculatedPromptTokens,
+		cachedTokens,
+		costs.completionTokens ?? calculatedCompletionTokens,
+		calculatedReasoningTokens,
+		retentionLevel,
+	);
 
 	// Use costs.promptTokens as canonical value (includes image input
 	// tokens for providers that exclude them from upstream usage)
@@ -8224,31 +9118,66 @@ chat.openapi(completions, async (c) => {
 					imageInputCost: costs.imageInputCost,
 					imageOutputCost: costs.imageOutputCost,
 					totalCost: costs.totalCost,
+					dataStorageCost: costs.dataStorageCost,
 				}
 			: null,
 		false, // showUpgradeMessage - never show since Pro plan is removed
 		annotations,
 		routingAttempts.length > 0 ? routingAttempts : null,
+		requestId,
 		usedRegion,
+		cacheCreationTokens,
+	);
+
+	// Extract plugin IDs for logging
+	const pluginIds = plugins?.map((p) => p.id) ?? [];
+
+	const baseLogEntry = createLogEntry(
+		requestId,
+		project,
+		apiKey,
+		providerKey?.id,
+		usedModelFormatted,
+		usedModelMapping,
+		usedProvider,
+		initialRequestedModel,
+		requestedProvider,
+		messages,
+		temperature,
+		max_tokens,
+		top_p,
+		frequency_penalty,
+		presence_penalty,
+		reasoning_effort,
+		reasoning_max_tokens,
+		effort,
+		response_format,
+		tools,
+		tool_choice,
+		source,
+		customHeaders,
+		debugMode,
+		userAgent,
+		image_config,
+		routingMetadata,
+		rawBody,
+		transformedResponse, // Our formatted response that we return to user
+		requestBody, // The request sent to the provider
+		json, // Raw upstream response from provider
+		pluginIds,
+		Object.keys(pluginResults).length > 0 ? pluginResults : undefined,
 	);
 
 	// Check if the non-streaming response is empty (no content, tokens, or tool calls)
-	// Exclude content_filter responses as they are intentionally empty (blocked by provider)
-	// For Google, check for original finish reasons that indicate content filtering
-	// These include both finishReason values and promptFeedback.blockReason values
-	const isGoogleContentFilter =
-		isGoogleCompatibleProvider(usedProvider) &&
-		(finishReason === "SAFETY" ||
-			finishReason === "PROHIBITED_CONTENT" ||
-			finishReason === "RECITATION" ||
-			finishReason === "BLOCKLIST" ||
-			finishReason === "SPII" ||
-			finishReason === "OTHER");
+	// Exclude content filter responses as they are intentionally empty.
+	const isContentFilterResponse = isContentFilterFinishReason(
+		finishReason,
+		usedProvider,
+	);
 	const hasEmptyNonStreamingResponse =
 		!!finishReason &&
-		finishReason !== "content_filter" &&
 		finishReason !== "incomplete" &&
-		!isGoogleContentFilter &&
+		!isContentFilterResponse &&
 		!hasMeaningfulAssistantOutput({
 			completionTokens: calculatedCompletionTokens,
 			reasoningTokens: calculatedReasoningTokens,
@@ -8283,105 +9212,89 @@ chat.openapi(completions, async (c) => {
 		}
 	}
 
-	enqueueChatLog(
-		c,
-		{
-			providerKeyId: providerKey?.id,
-			usedModel: usedModelFormatted,
-			usedModelMapping,
-			usedProvider,
-			requestedModel: initialRequestedModel,
-			requestedProvider,
-			messages,
-			temperature,
-			max_tokens,
-			top_p,
-			frequency_penalty,
-			presence_penalty,
-			reasoningEffort: reasoning_effort,
-			reasoningMaxTokens: reasoning_max_tokens,
-			effort,
-			responseFormat: response_format,
-			tools,
-			toolChoice: tool_choice,
-			source,
-			customHeaders,
-			debugMode,
-			userAgent,
-			imageConfig: image_config,
-			routingMetadata,
-			rawRequest: rawBody,
-			rawResponse: transformedResponse,
-			upstreamRequest: requestBody,
-			upstreamResponse: json,
-			plugins: requestPluginIds,
-			pluginResults:
-				Object.keys(pluginResults).length > 0 ? pluginResults : undefined,
-		},
-		{
-			id: routingAttempts.length > 0 ? finalLogId : undefined,
-			duration,
-			timeToFirstToken: null,
-			timeToFirstReasoningToken: null,
-			responseSize,
-			content: content,
-			reasoningContent: reasoningContent,
-			finishReason: hasEmptyNonStreamingResponse
-				? "upstream_error"
-				: finishReason,
-			promptTokens: calculatedPromptTokens?.toString() ?? null,
-			completionTokens: calculatedCompletionTokens?.toString() ?? null,
-			totalTokens:
-				totalTokens ??
-				(
-					(calculatedPromptTokens ?? 0) + (calculatedCompletionTokens ?? 0)
-				).toString(),
-			reasoningTokens: calculatedReasoningTokens?.toString() ?? null,
-			cachedTokens: cachedTokens?.toString() ?? null,
-			hasError: hasEmptyNonStreamingResponse,
-			streamed: false,
-			canceled: false,
-			errorDetails: hasEmptyNonStreamingResponse
-				? {
-						statusCode: 500,
-						statusText: "Empty Response",
-						responseText:
-							"Response finished successfully but returned no content or tool calls",
-					}
-				: null,
-			inputCost: costs.inputCost,
-			outputCost: costs.outputCost,
-			cachedInputCost: costs.cachedInputCost,
-			requestCost: costs.requestCost,
-			webSearchCost: costs.webSearchCost,
-			imageInputTokens: costs.imageInputTokens?.toString() ?? null,
-			imageOutputTokens: costs.imageOutputTokens?.toString() ?? null,
-			imageInputCost: costs.imageInputCost ?? null,
-			imageOutputCost: costs.imageOutputCost ?? null,
-			cost: costs.totalCost,
-			estimatedCost: costs.estimatedCost,
-			discount: costs.discount,
-			pricingTier: costs.pricingTier,
-			dataStorageCost: calculateDataStorageCost(
-				calculatedPromptTokens,
-				cachedTokens,
-				calculatedCompletionTokens,
-				calculatedReasoningTokens,
-				retentionLevel,
-			),
-			cached: false,
-			toolResults,
-		},
-	);
+	// For image generation, store the base64 data URLs in content
+	// so the activity detail page can render the images
+	const base64Images =
+		convertedImages?.filter((img) => img.image_url.url.startsWith("data:")) ??
+		[];
+	const logContent =
+		base64Images.length > 0
+			? base64Images.map((img) => img.image_url.url).join("\n")
+			: content;
+
+	await insertLogEntry({
+		...baseLogEntry,
+		id: routingAttempts.length > 0 ? finalLogId : undefined,
+		duration,
+		timeToFirstToken: null, // Not applicable for non-streaming requests
+		timeToFirstReasoningToken: null, // Not applicable for non-streaming requests
+		responseSize,
+		content: logContent,
+		reasoningContent: reasoningContent,
+		finishReason: hasEmptyNonStreamingResponse
+			? "upstream_error"
+			: finishReason,
+		promptTokens: calculatedPromptTokens?.toString() ?? null,
+		completionTokens: calculatedCompletionTokens?.toString() ?? null,
+		totalTokens:
+			totalTokens ??
+			(
+				(calculatedPromptTokens ?? 0) + (calculatedCompletionTokens ?? 0)
+			).toString(),
+		reasoningTokens: calculatedReasoningTokens?.toString() ?? null,
+		cachedTokens: cachedTokens?.toString() ?? null,
+		hasError: hasEmptyNonStreamingResponse,
+		streamed: false,
+		canceled: false,
+		errorDetails: hasEmptyNonStreamingResponse
+			? {
+					statusCode: 500,
+					statusText: "Empty Response",
+					responseText:
+						"Response finished successfully but returned no content or tool calls",
+				}
+			: null,
+		inputCost: costs.inputCost,
+		outputCost: costs.outputCost,
+		cachedInputCost: costs.cachedInputCost,
+		requestCost: costs.requestCost,
+		webSearchCost: costs.webSearchCost,
+		imageInputTokens: costs.imageInputTokens?.toString() ?? null,
+		imageOutputTokens: costs.imageOutputTokens?.toString() ?? null,
+		imageInputCost: costs.imageInputCost ?? null,
+		imageOutputCost: costs.imageOutputCost ?? null,
+		cost: costs.totalCost,
+		estimatedCost: costs.estimatedCost,
+		discount: costs.discount,
+		pricingTier: costs.pricingTier,
+		dataStorageCost: calculateDataStorageCost(
+			calculatedPromptTokens,
+			cachedTokens,
+			calculatedCompletionTokens,
+			calculatedReasoningTokens,
+			retentionLevel,
+		),
+		cached: false,
+		tools,
+		toolResults,
+		toolChoice: tool_choice,
+	});
 
-	// Report key health for environment-based tokens
+	// Report key health for the selected token source
 	// Note: We don't report empty responses as key errors since they're not upstream errors
 	if (envVarName !== undefined) {
 		reportKeySuccess(envVarName, configIndex);
 	}
+	if (providerKey?.id) {
+		reportTrackedKeySuccess(providerKey.id);
+	}
 
 	if (cachingEnabled && cacheKey && !stream && !hasEmptyNonStreamingResponse) {
-		await setCache(cacheKey, transformedResponse, cacheDuration);
+		await setCache(
+			cacheKey,
+			stripRequestScopedMetadataFromOpenAiResponse(transformedResponse),
+			cacheDuration,
+		);
 	}
 
 	// For image generation models with streaming requested, convert to SSE format
diff --git a/apps/gateway/src/chat/middleware/chat-completion-log.ts b/apps/gateway/src/chat/middleware/chat-completion-log.ts
index d1da03c180..f69e6f0f76 100644
--- a/apps/gateway/src/chat/middleware/chat-completion-log.ts
+++ b/apps/gateway/src/chat/middleware/chat-completion-log.ts
@@ -303,6 +303,12 @@ async function flushChatCompletionLogs(
 					internalContentFilter: state.internalContentFilter
 						? true
 						: logData.internalContentFilter,
+					gatewayContentFilterResponse:
+						logData.gatewayContentFilterResponse ??
+						(state.gatewayContentFilterResponse as
+							| LogInsertData["gatewayContentFilterResponse"]
+							| undefined) ??
+						null,
 				},
 				{ syncInsert: state.syncInsert },
 			);
diff --git a/apps/gateway/src/chat/tools/chat-log-context.ts b/apps/gateway/src/chat/tools/chat-log-context.ts
index 0e860ebe35..c9d14082f7 100644
--- a/apps/gateway/src/chat/tools/chat-log-context.ts
+++ b/apps/gateway/src/chat/tools/chat-log-context.ts
@@ -18,6 +18,7 @@ export interface ChatCompletionLogState {
 	resolveStreamCompletion?: () => void;
 	caughtError?: unknown;
 	internalContentFilter?: boolean;
+	gatewayContentFilterResponse?: unknown;
 	clientErrorSynthesized?: boolean;
 	syncInsert?: boolean;
 	logIdOverride?: string;