theopenco · steebchen · May 7, 2026 · May 7, 2026
diff --git a/apps/gateway/src/chat/tools/extract-token-usage.spec.ts b/apps/gateway/src/chat/tools/extract-token-usage.spec.ts
@@ -26,6 +26,28 @@ describe("extractTokenUsage", () => {
 			expect(result.totalTokens).toBe(350);
 		});
 
+		it("extracts cache creation tokens from cacheDetails by TTL", () => {
+			const data = {
+				usage: {
+					inputTokens: 100,
+					cacheReadInputTokens: 0,
+					cacheWriteInputTokens: 1000,
+					cacheDetails: [
+						{ ttl: "1h", inputTokens: 700 },
+						{ ttl: "5m", inputTokens: 300 },
+					],
+					outputTokens: 200,
+					totalTokens: 1300,
+				},
+			};
+
+			const result = extractTokenUsage(data, "aws-bedrock");
+
+			expect(result.cacheCreationTokens).toBe(1000);
+			expect(result.cacheCreation5mTokens).toBe(300);
+			expect(result.cacheCreation1hTokens).toBe(700);
+		});
+
 		it("returns cachedTokens with correct value when cacheReadInputTokens > 0", () => {
 			const data = {
 				usage: {

diff --git a/apps/gateway/src/chat/tools/extract-token-usage.ts b/apps/gateway/src/chat/tools/extract-token-usage.ts
@@ -31,6 +31,31 @@ export function adjustGoogleCandidateTokens(
 	return candidatesTokenCount;
 }
 
+export function extractBedrockCacheCreationDetails(usage: any): {
+	cacheCreation5mTokens: number | null;
+	cacheCreation1hTokens: number | null;
+} {
+	let fiveMinuteTokens = 0;
+	let oneHourTokens = 0;
+
+	const cacheDetails = Array.isArray(usage?.cacheDetails)
+		? usage.cacheDetails
+		: [];
+	for (const detail of cacheDetails) {
+		const inputTokens = detail?.inputTokens ?? 0;
+		if (detail?.ttl === "1h") {
+			oneHourTokens += inputTokens;
+		} else if (detail?.ttl === "5m") {
+			fiveMinuteTokens += inputTokens;
+		}
+	}
+
+	return {
+		cacheCreation5mTokens: fiveMinuteTokens > 0 ? fiveMinuteTokens : null,
+		cacheCreation1hTokens: oneHourTokens > 0 ? oneHourTokens : null,
+	};
+}
+
 /**
  * Extracts token usage information from streaming data based on provider format
  */
@@ -106,13 +131,16 @@ export function extractTokenUsage(
 				const inputTokens = data.usage.inputTokens ?? 0;
 				const cacheReadTokens = data.usage.cacheReadInputTokens ?? 0;
 				const cacheWriteTokens = data.usage.cacheWriteInputTokens ?? 0;
+				const cacheDetails = extractBedrockCacheCreationDetails(data.usage);
 
 				// Total prompt tokens = regular input + cache read + cache write
 				promptTokens = inputTokens + cacheReadTokens + cacheWriteTokens;
 				completionTokens = data.usage.outputTokens ?? null;
 				// Cached tokens are the tokens read from cache (discount applies to these)
 				cachedTokens = cacheReadTokens;
 				cacheCreationTokens = cacheWriteTokens;
+				cacheCreation5mTokens = cacheDetails.cacheCreation5mTokens;
+				cacheCreation1hTokens = cacheDetails.cacheCreation1hTokens;
 				totalTokens = data.usage.totalTokens ?? null;
 			}
 			break;

diff --git a/apps/gateway/src/chat/tools/parse-provider-response.spec.ts b/apps/gateway/src/chat/tools/parse-provider-response.spec.ts
@@ -84,6 +84,39 @@ describe("parseProviderResponse", () => {
 			expect(result.promptTokens).toBe(150); // 100 + 0 + 50
 		});
 
+		it("extracts cache creation tokens from cacheDetails by TTL", () => {
+			const json = {
+				output: {
+					message: {
+						content: [{ text: "Hello" }],
+						role: "assistant",
+					},
+				},
+				stopReason: "end_turn",
+				usage: {
+					inputTokens: 100,
+					cacheReadInputTokens: 0,
+					cacheWriteInputTokens: 1000,
+					cacheDetails: [
+						{ ttl: "1h", inputTokens: 700 },
+						{ ttl: "5m", inputTokens: 300 },
+					],
+					outputTokens: 200,
+					totalTokens: 1300,
+				},
+			};
+
+			const result = parseProviderResponse(
+				"aws-bedrock",
+				"anthropic.claude-sonnet-4-5-20250929-v1:0",
+				json,
+			);
+
+			expect(result.cacheCreationTokens).toBe(1000);
+			expect(result.cacheCreation5mTokens).toBe(300);
+			expect(result.cacheCreation1hTokens).toBe(700);
+		});
+
 		it("returns cachedTokens with correct value when cacheReadInputTokens > 0", () => {
 			const json = {
 				output: {

diff --git a/apps/gateway/src/chat/tools/parse-provider-response.ts b/apps/gateway/src/chat/tools/parse-provider-response.ts
@@ -2,7 +2,10 @@ import { redisClient } from "@llmgateway/cache";
 import { logger } from "@llmgateway/logger";
 
 import { estimateTokens } from "./estimate-tokens.js";
-import { adjustGoogleCandidateTokens } from "./extract-token-usage.js";
+import {
+	adjustGoogleCandidateTokens,
+	extractBedrockCacheCreationDetails,
+} from "./extract-token-usage.js";
 import {
 	extractReasoningDetailsText,
 	splitReasoningFromTaggedContent,
@@ -104,6 +107,7 @@ export function parseProviderResponse(
 				const inputTokens = json.usage.inputTokens ?? 0;
 				const cacheReadTokens = json.usage.cacheReadInputTokens ?? 0;
 				const cacheWriteTokens = json.usage.cacheWriteInputTokens ?? 0;
+				const cacheDetails = extractBedrockCacheCreationDetails(json.usage);
 
 				// Total prompt tokens = regular input + cache read + cache write
 				promptTokens = inputTokens + cacheReadTokens + cacheWriteTokens;
@@ -112,6 +116,8 @@ export function parseProviderResponse(
 				// Cached tokens are the tokens read from cache (discount applies to these)
 				cachedTokens = cacheReadTokens;
 				cacheCreationTokens = cacheWriteTokens;
+				cacheCreation5mTokens = cacheDetails.cacheCreation5mTokens;
+				cacheCreation1hTokens = cacheDetails.cacheCreation1hTokens;
 			}
 
 			// Extract tool calls if present

diff --git a/apps/gateway/src/chat/tools/transform-streaming-to-openai.spec.ts b/apps/gateway/src/chat/tools/transform-streaming-to-openai.spec.ts
@@ -158,6 +158,50 @@ describe("transformStreamingToOpenai", () => {
 		expect(warn).not.toHaveBeenCalled();
 	});
 
+	it("maps AWS Bedrock metadata cache creation details", () => {
+		warn.mockClear();
+
+		const result = transformStreamingToOpenai(
+			"aws-bedrock",
+			"anthropic.claude-sonnet-4-5-20250929-v1:0",
+			{
+				__aws_event_type: "metadata",
+				usage: {
+					inputTokens: 10,
+					cacheReadInputTokens: 0,
+					cacheWriteInputTokens: 1000,
+					cacheDetails: [
+						{ ttl: "1h", inputTokens: 700 },
+						{ ttl: "5m", inputTokens: 300 },
+					],
+					outputTokens: 1,
+					totalTokens: 1011,
+				},
+			},
+			[],
+		);
+
+		expect(result).toMatchObject({
+			object: "chat.completion.chunk",
+			model: "anthropic.claude-sonnet-4-5-20250929-v1:0",
+			usage: {
+				prompt_tokens: 1010,
+				completion_tokens: 1,
+				total_tokens: 1011,
+				prompt_tokens_details: {
+					cached_tokens: 0,
+					cache_write_tokens: 1000,
+					cache_creation_tokens: 1000,
+					cache_creation: {
+						ephemeral_5m_input_tokens: 300,
+						ephemeral_1h_input_tokens: 700,
+					},
+				},
+			},
+		});
+		expect(warn).not.toHaveBeenCalled();
+	});
+
 	it("treats non-text AWS Bedrock contentBlockDelta members as handled", () => {
 		warn.mockClear();
 

diff --git a/apps/gateway/src/chat/tools/transform-streaming-to-openai.ts b/apps/gateway/src/chat/tools/transform-streaming-to-openai.ts
@@ -3,7 +3,10 @@ import { logger } from "@llmgateway/logger";
 
 import { calculatePromptTokensFromMessages } from "./calculate-prompt-tokens.js";
 import { extractImages } from "./extract-images.js";
-import { adjustGoogleCandidateTokens } from "./extract-token-usage.js";
+import {
+	adjustGoogleCandidateTokens,
+	extractBedrockCacheCreationDetails,
+} from "./extract-token-usage.js";
 import { mapFinishReasonToOpenai } from "./map-finish-reason-to-openai.js";
 import { transformOpenaiStreaming } from "./transform-openai-streaming.js";
 
@@ -1224,7 +1227,11 @@ export function transformStreamingToOpenai(
 				const inputTokens = data.usage.inputTokens ?? 0;
 				const cacheReadTokens = data.usage.cacheReadInputTokens ?? 0;
 				const cacheWriteTokens = data.usage.cacheWriteInputTokens ?? 0;
+				const cacheDetails = extractBedrockCacheCreationDetails(data.usage);
 				const promptTokens = inputTokens + cacheReadTokens + cacheWriteTokens;
+				const hasCacheCreationDetails =
+					cacheDetails.cacheCreation5mTokens !== null ||
+					cacheDetails.cacheCreation1hTokens !== null;
 
 				transformedData = {
 					id: `chatcmpl-${Date.now()}`,
@@ -1242,9 +1249,27 @@ export function transformStreamingToOpenai(
 						prompt_tokens: promptTokens,
 						completion_tokens: data.usage.outputTokens ?? 0,
 						total_tokens: data.usage.totalTokens ?? 0,
-						...(cacheReadTokens > 0 && {
+						...((cacheReadTokens > 0 || cacheWriteTokens > 0) && {
 							prompt_tokens_details: {
 								cached_tokens: cacheReadTokens,
+								...(cacheWriteTokens > 0 && {
+									cache_write_tokens: cacheWriteTokens,
+									cache_creation_tokens: cacheWriteTokens,
+								}),
+								...(cacheWriteTokens > 0 &&
+									hasCacheCreationDetails && {
+										cache_creation: {
+											ephemeral_5m_input_tokens:
+												cacheDetails.cacheCreation5mTokens ??
+												Math.max(
+													0,
+													cacheWriteTokens -
+														(cacheDetails.cacheCreation1hTokens ?? 0),
+												),
+											ephemeral_1h_input_tokens:
+												cacheDetails.cacheCreation1hTokens ?? 0,
+										},
+									}),
 							},
 						}),
 					},

diff --git a/apps/gateway/src/lib/anthropic-pricing.spec.ts b/apps/gateway/src/lib/anthropic-pricing.spec.ts
@@ -137,3 +137,91 @@ describe("Anthropic model pricing", () => {
 		},
 	);
 });
+
+describe("AWS Bedrock Anthropic model pricing", () => {
+	const bedrockProviderEntries = models.flatMap((model) =>
+		model.family === "anthropic"
+			? model.providers
+					.filter((provider) => provider.providerId === "aws-bedrock")
+					.map((provider) => ({
+						modelId: model.id,
+						provider: provider as ProviderModelMapping,
+					}))
+			: [],
+	);
+
+	it("has at least one AWS Bedrock Anthropic provider mapping to validate", () => {
+		expect(bedrockProviderEntries.length).toBeGreaterThan(0);
+	});
+
+	it.each(bedrockProviderEntries)(
+		"$modelId defines cacheWriteInputPrice whenever cachedInputPrice is set",
+		({ provider }) => {
+			if (provider.cachedInputPrice === undefined) {
+				return;
+			}
+			expect(
+				provider.cacheWriteInputPrice,
+				`${provider.modelName}: cachedInputPrice is set but cacheWriteInputPrice is missing`,
+			).toBeDefined();
+		},
+	);
+
+	const ONE_HOUR_BEDROCK_PREFIXES = [
+		"anthropic.claude-opus-4-5",
+		"anthropic.claude-opus-4-6",
+		"anthropic.claude-opus-4-7",
+		"anthropic.claude-haiku-4-5",
+		"anthropic.claude-sonnet-4-5",
+		"anthropic.claude-sonnet-4-6",
+	];
+	const supportsBedrock1h = (modelName: string) =>
+		ONE_HOUR_BEDROCK_PREFIXES.some((prefix) => modelName.startsWith(prefix));
+
+	it.each(bedrockProviderEntries)(
+		"$modelId only sets cacheWriteInputPrice1h on bedrock models that support 1h TTL",
+		({ provider }) => {
+			if (provider.cacheWriteInputPrice1h === undefined) {
+				return;
+			}
+			expect(
+				supportsBedrock1h(provider.modelName),
+				`${provider.modelName}: cacheWriteInputPrice1h is set but bedrock does not document 1h TTL support for this model`,
+			).toBe(true);
+		},
+	);
+
+	it.each(bedrockProviderEntries)(
+		"$modelId cache prices follow the standard 1.25x/2x/0.1x ratios",
+		({ provider }) => {
+			if (provider.inputPrice === undefined) {
+				return;
+			}
+			const base = provider.inputPrice;
+			if (provider.cacheWriteInputPrice !== undefined) {
+				assertRatio(
+					provider.modelName,
+					"cacheWriteInputPrice (5m)",
+					provider.cacheWriteInputPrice,
+					base * FIVE_MIN_WRITE_MULTIPLIER,
+				);
+			}
+			if (provider.cacheWriteInputPrice1h !== undefined) {
+				assertRatio(
+					provider.modelName,
+					"cacheWriteInputPrice1h",
+					provider.cacheWriteInputPrice1h,
+					base * ONE_HOUR_WRITE_MULTIPLIER,
+				);
+			}
+			if (provider.cachedInputPrice !== undefined) {
+				assertRatio(
+					provider.modelName,
+					"cachedInputPrice",
+					provider.cachedInputPrice,
+					base * CACHE_READ_MULTIPLIER,
+				);
+			}
+		},
+	);
+});
diff --git a/apps/gateway/src/lib/costs.spec.ts b/apps/gateway/src/lib/costs.spec.ts
@@ -200,6 +200,42 @@ describe("calculateCosts", () => {
 		expect(result.cacheWriteInputCost).toBeCloseTo(1000 * (3.75 / 1e6));
 	});
 
+	it("should calculate AWS Bedrock Claude cache write costs", async () => {
+		// Bedrock Claude Haiku 4.5 input is 1.0/1M; 5m write 1.25/1M; 1h write 2.0/1M.
+		const result = await calculateCosts(
+			"claude-haiku-4-5",
+			"aws-bedrock",
+			1004,
+			50,
+			0,
+			undefined,
+			null,
+			0,
+			undefined,
+			0,
+			null,
+			null,
+			undefined,
+			{
+				cacheWriteTokens: 1000,
+				cacheWrite1hTokens: 700,
+			},
+		);
+
+		const discountMultiplier = 0.8;
+		expect(result.inputCost).toBeCloseTo(4 * (1.0 / 1e6) * discountMultiplier);
+		expect(result.outputCost).toBeCloseTo(
+			50 * (5.0 / 1e6) * discountMultiplier,
+		);
+		const fiveMinuteCacheWriteCost = 300 * (1.25 / 1e6);
+		const oneHourCacheWriteCost = 700 * (2.0 / 1e6);
+		expect(result.cacheWriteInputCost).toBeCloseTo(
+			(fiveMinuteCacheWriteCost + oneHourCacheWriteCost) * discountMultiplier,
+		);
+		expect(result.discount).toBeCloseTo(0.2);
+		expect(result.cacheWriteTokens).toBe(1000);
+	});
+
 	it("should calculate costs with cached tokens for Anthropic (subsequent request - cache read)", async () => {
 		// For Anthropic subsequent request: 4 non-cached + 1659 cache read = 1663 total tokens, 1659 cache reads
 		const result = await calculateCosts(