Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions apps/gateway/src/chat/tools/extract-token-usage.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,28 @@ describe("extractTokenUsage", () => {
expect(result.totalTokens).toBe(350);
});

it("extracts cache creation tokens from cacheDetails by TTL", () => {
const data = {
usage: {
inputTokens: 100,
cacheReadInputTokens: 0,
cacheWriteInputTokens: 1000,
cacheDetails: [
{ ttl: "1h", inputTokens: 700 },
{ ttl: "5m", inputTokens: 300 },
],
outputTokens: 200,
totalTokens: 1300,
},
};

const result = extractTokenUsage(data, "aws-bedrock");

expect(result.cacheCreationTokens).toBe(1000);
expect(result.cacheCreation5mTokens).toBe(300);
expect(result.cacheCreation1hTokens).toBe(700);
});

it("returns cachedTokens with correct value when cacheReadInputTokens > 0", () => {
const data = {
usage: {
Expand Down
28 changes: 28 additions & 0 deletions apps/gateway/src/chat/tools/extract-token-usage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,31 @@ export function adjustGoogleCandidateTokens(
return candidatesTokenCount;
}

export function extractBedrockCacheCreationDetails(usage: any): {
cacheCreation5mTokens: number | null;
cacheCreation1hTokens: number | null;
} {
let fiveMinuteTokens = 0;
let oneHourTokens = 0;

const cacheDetails = Array.isArray(usage?.cacheDetails)
? usage.cacheDetails
: [];
for (const detail of cacheDetails) {
const inputTokens = detail?.inputTokens ?? 0;
if (detail?.ttl === "1h") {
oneHourTokens += inputTokens;
} else if (detail?.ttl === "5m") {
fiveMinuteTokens += inputTokens;
}
}

return {
cacheCreation5mTokens: fiveMinuteTokens > 0 ? fiveMinuteTokens : null,
cacheCreation1hTokens: oneHourTokens > 0 ? oneHourTokens : null,
};
}

/**
* Extracts token usage information from streaming data based on provider format
*/
Expand Down Expand Up @@ -106,13 +131,16 @@ export function extractTokenUsage(
const inputTokens = data.usage.inputTokens ?? 0;
const cacheReadTokens = data.usage.cacheReadInputTokens ?? 0;
const cacheWriteTokens = data.usage.cacheWriteInputTokens ?? 0;
const cacheDetails = extractBedrockCacheCreationDetails(data.usage);

// Total prompt tokens = regular input + cache read + cache write
promptTokens = inputTokens + cacheReadTokens + cacheWriteTokens;
completionTokens = data.usage.outputTokens ?? null;
// Cached tokens are the tokens read from cache (discount applies to these)
cachedTokens = cacheReadTokens;
cacheCreationTokens = cacheWriteTokens;
cacheCreation5mTokens = cacheDetails.cacheCreation5mTokens;
cacheCreation1hTokens = cacheDetails.cacheCreation1hTokens;
totalTokens = data.usage.totalTokens ?? null;
}
break;
Expand Down
33 changes: 33 additions & 0 deletions apps/gateway/src/chat/tools/parse-provider-response.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,39 @@ describe("parseProviderResponse", () => {
expect(result.promptTokens).toBe(150); // 100 + 0 + 50
});

it("extracts cache creation tokens from cacheDetails by TTL", () => {
const json = {
output: {
message: {
content: [{ text: "Hello" }],
role: "assistant",
},
},
stopReason: "end_turn",
usage: {
inputTokens: 100,
cacheReadInputTokens: 0,
cacheWriteInputTokens: 1000,
cacheDetails: [
{ ttl: "1h", inputTokens: 700 },
{ ttl: "5m", inputTokens: 300 },
],
outputTokens: 200,
totalTokens: 1300,
},
};

const result = parseProviderResponse(
"aws-bedrock",
"anthropic.claude-sonnet-4-5-20250929-v1:0",
json,
);

expect(result.cacheCreationTokens).toBe(1000);
expect(result.cacheCreation5mTokens).toBe(300);
expect(result.cacheCreation1hTokens).toBe(700);
});

it("returns cachedTokens with correct value when cacheReadInputTokens > 0", () => {
const json = {
output: {
Expand Down
8 changes: 7 additions & 1 deletion apps/gateway/src/chat/tools/parse-provider-response.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ import { redisClient } from "@llmgateway/cache";
import { logger } from "@llmgateway/logger";

import { estimateTokens } from "./estimate-tokens.js";
import { adjustGoogleCandidateTokens } from "./extract-token-usage.js";
import {
adjustGoogleCandidateTokens,
extractBedrockCacheCreationDetails,
} from "./extract-token-usage.js";
import {
extractReasoningDetailsText,
splitReasoningFromTaggedContent,
Expand Down Expand Up @@ -104,6 +107,7 @@ export function parseProviderResponse(
const inputTokens = json.usage.inputTokens ?? 0;
const cacheReadTokens = json.usage.cacheReadInputTokens ?? 0;
const cacheWriteTokens = json.usage.cacheWriteInputTokens ?? 0;
const cacheDetails = extractBedrockCacheCreationDetails(json.usage);

// Total prompt tokens = regular input + cache read + cache write
promptTokens = inputTokens + cacheReadTokens + cacheWriteTokens;
Expand All @@ -112,6 +116,8 @@ export function parseProviderResponse(
// Cached tokens are the tokens read from cache (discount applies to these)
cachedTokens = cacheReadTokens;
cacheCreationTokens = cacheWriteTokens;
cacheCreation5mTokens = cacheDetails.cacheCreation5mTokens;
cacheCreation1hTokens = cacheDetails.cacheCreation1hTokens;
}

// Extract tool calls if present
Expand Down
44 changes: 44 additions & 0 deletions apps/gateway/src/chat/tools/transform-streaming-to-openai.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,50 @@ describe("transformStreamingToOpenai", () => {
expect(warn).not.toHaveBeenCalled();
});

it("maps AWS Bedrock metadata cache creation details", () => {
warn.mockClear();

const result = transformStreamingToOpenai(
"aws-bedrock",
"anthropic.claude-sonnet-4-5-20250929-v1:0",
{
__aws_event_type: "metadata",
usage: {
inputTokens: 10,
cacheReadInputTokens: 0,
cacheWriteInputTokens: 1000,
cacheDetails: [
{ ttl: "1h", inputTokens: 700 },
{ ttl: "5m", inputTokens: 300 },
],
outputTokens: 1,
totalTokens: 1011,
},
},
[],
);

expect(result).toMatchObject({
object: "chat.completion.chunk",
model: "anthropic.claude-sonnet-4-5-20250929-v1:0",
usage: {
prompt_tokens: 1010,
completion_tokens: 1,
total_tokens: 1011,
prompt_tokens_details: {
cached_tokens: 0,
cache_write_tokens: 1000,
cache_creation_tokens: 1000,
cache_creation: {
ephemeral_5m_input_tokens: 300,
ephemeral_1h_input_tokens: 700,
},
},
},
});
expect(warn).not.toHaveBeenCalled();
});

it("treats non-text AWS Bedrock contentBlockDelta members as handled", () => {
warn.mockClear();

Expand Down
29 changes: 27 additions & 2 deletions apps/gateway/src/chat/tools/transform-streaming-to-openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ import { logger } from "@llmgateway/logger";

import { calculatePromptTokensFromMessages } from "./calculate-prompt-tokens.js";
import { extractImages } from "./extract-images.js";
import { adjustGoogleCandidateTokens } from "./extract-token-usage.js";
import {
adjustGoogleCandidateTokens,
extractBedrockCacheCreationDetails,
} from "./extract-token-usage.js";
import { mapFinishReasonToOpenai } from "./map-finish-reason-to-openai.js";
import { transformOpenaiStreaming } from "./transform-openai-streaming.js";

Expand Down Expand Up @@ -1224,7 +1227,11 @@ export function transformStreamingToOpenai(
const inputTokens = data.usage.inputTokens ?? 0;
const cacheReadTokens = data.usage.cacheReadInputTokens ?? 0;
const cacheWriteTokens = data.usage.cacheWriteInputTokens ?? 0;
const cacheDetails = extractBedrockCacheCreationDetails(data.usage);
const promptTokens = inputTokens + cacheReadTokens + cacheWriteTokens;
const hasCacheCreationDetails =
cacheDetails.cacheCreation5mTokens !== null ||
cacheDetails.cacheCreation1hTokens !== null;

transformedData = {
id: `chatcmpl-${Date.now()}`,
Expand All @@ -1242,9 +1249,27 @@ export function transformStreamingToOpenai(
prompt_tokens: promptTokens,
completion_tokens: data.usage.outputTokens ?? 0,
total_tokens: data.usage.totalTokens ?? 0,
...(cacheReadTokens > 0 && {
...((cacheReadTokens > 0 || cacheWriteTokens > 0) && {
prompt_tokens_details: {
cached_tokens: cacheReadTokens,
...(cacheWriteTokens > 0 && {
cache_write_tokens: cacheWriteTokens,
cache_creation_tokens: cacheWriteTokens,
}),
...(cacheWriteTokens > 0 &&
hasCacheCreationDetails && {
cache_creation: {
ephemeral_5m_input_tokens:
cacheDetails.cacheCreation5mTokens ??
Math.max(
0,
cacheWriteTokens -
(cacheDetails.cacheCreation1hTokens ?? 0),
),
ephemeral_1h_input_tokens:
cacheDetails.cacheCreation1hTokens ?? 0,
},
}),
},
}),
},
Expand Down
88 changes: 88 additions & 0 deletions apps/gateway/src/lib/anthropic-pricing.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,91 @@ describe("Anthropic model pricing", () => {
},
);
});

describe("AWS Bedrock Anthropic model pricing", () => {
const bedrockProviderEntries = models.flatMap((model) =>
model.family === "anthropic"
? model.providers
.filter((provider) => provider.providerId === "aws-bedrock")
.map((provider) => ({
modelId: model.id,
provider: provider as ProviderModelMapping,
}))
: [],
);

it("has at least one AWS Bedrock Anthropic provider mapping to validate", () => {
expect(bedrockProviderEntries.length).toBeGreaterThan(0);
});

it.each(bedrockProviderEntries)(
"$modelId defines cacheWriteInputPrice whenever cachedInputPrice is set",
({ provider }) => {
if (provider.cachedInputPrice === undefined) {
return;
}
expect(
provider.cacheWriteInputPrice,
`${provider.modelName}: cachedInputPrice is set but cacheWriteInputPrice is missing`,
).toBeDefined();
},
);

const ONE_HOUR_BEDROCK_PREFIXES = [
"anthropic.claude-opus-4-5",
"anthropic.claude-opus-4-6",
"anthropic.claude-opus-4-7",
"anthropic.claude-haiku-4-5",
"anthropic.claude-sonnet-4-5",
"anthropic.claude-sonnet-4-6",
];
const supportsBedrock1h = (modelName: string) =>
ONE_HOUR_BEDROCK_PREFIXES.some((prefix) => modelName.startsWith(prefix));

it.each(bedrockProviderEntries)(
"$modelId only sets cacheWriteInputPrice1h on bedrock models that support 1h TTL",
({ provider }) => {
if (provider.cacheWriteInputPrice1h === undefined) {
return;
}
expect(
supportsBedrock1h(provider.modelName),
`${provider.modelName}: cacheWriteInputPrice1h is set but bedrock does not document 1h TTL support for this model`,
).toBe(true);
},
);

it.each(bedrockProviderEntries)(
"$modelId cache prices follow the standard 1.25x/2x/0.1x ratios",
({ provider }) => {
if (provider.inputPrice === undefined) {
return;
}
const base = provider.inputPrice;
if (provider.cacheWriteInputPrice !== undefined) {
assertRatio(
provider.modelName,
"cacheWriteInputPrice (5m)",
provider.cacheWriteInputPrice,
base * FIVE_MIN_WRITE_MULTIPLIER,
);
}
if (provider.cacheWriteInputPrice1h !== undefined) {
assertRatio(
provider.modelName,
"cacheWriteInputPrice1h",
provider.cacheWriteInputPrice1h,
base * ONE_HOUR_WRITE_MULTIPLIER,
);
}
if (provider.cachedInputPrice !== undefined) {
assertRatio(
provider.modelName,
"cachedInputPrice",
provider.cachedInputPrice,
base * CACHE_READ_MULTIPLIER,
);
}
},
);
});
36 changes: 36 additions & 0 deletions apps/gateway/src/lib/costs.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,42 @@ describe("calculateCosts", () => {
expect(result.cacheWriteInputCost).toBeCloseTo(1000 * (3.75 / 1e6));
});

it("should calculate AWS Bedrock Claude cache write costs", async () => {
// Bedrock Claude Haiku 4.5 input is 1.0/1M; 5m write 1.25/1M; 1h write 2.0/1M.
const result = await calculateCosts(
"claude-haiku-4-5",
"aws-bedrock",
1004,
50,
0,
undefined,
null,
0,
undefined,
0,
null,
null,
undefined,
{
cacheWriteTokens: 1000,
cacheWrite1hTokens: 700,
},
);

const discountMultiplier = 0.8;
expect(result.inputCost).toBeCloseTo(4 * (1.0 / 1e6) * discountMultiplier);
expect(result.outputCost).toBeCloseTo(
50 * (5.0 / 1e6) * discountMultiplier,
);
const fiveMinuteCacheWriteCost = 300 * (1.25 / 1e6);
const oneHourCacheWriteCost = 700 * (2.0 / 1e6);
expect(result.cacheWriteInputCost).toBeCloseTo(
(fiveMinuteCacheWriteCost + oneHourCacheWriteCost) * discountMultiplier,
);
expect(result.discount).toBeCloseTo(0.2);
expect(result.cacheWriteTokens).toBe(1000);
});

it("should calculate costs with cached tokens for Anthropic (subsequent request - cache read)", async () => {
// For Anthropic subsequent request: 4 non-cached + 1659 cache read = 1663 total tokens, 1659 cache reads
const result = await calculateCosts(
Expand Down
Loading
Loading