Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions apps/gateway/src/api.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2783,11 +2783,11 @@ describe("api", () => {
expect(logs[0].hasError).toBe(true);
expect(logs[0].errorDetails?.statusCode).toBe(401);

expect(isTrackedKeyHealthy("provider-key-id-stream-auth-error")).toBe(
false,
);
expect(
getTrackedKeyMetrics("provider-key-id-stream-auth-error"),
isTrackedKeyHealthy("provider-key-id-stream-auth-error", "custom"),
).toBe(false);
expect(
getTrackedKeyMetrics("provider-key-id-stream-auth-error", "custom"),
).toMatchObject({
permanentlyBlacklisted: true,
totalRequests: 1,
Expand Down
176 changes: 135 additions & 41 deletions apps/gateway/src/chat/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ import {
type WebSearchTool,
expandAllProviderRegions,
getProviderDefinition,
getRegionSpecificEnvValue,
getRegionSpecificEnvVarName,
stripRegionFromModelName,
} from "@llmgateway/models";

Expand Down Expand Up @@ -155,6 +155,7 @@ import {
MAX_RETRIES,
providerRetryKey,
selectNextProvider,
shouldRetryAlternateKey,
shouldRetryRequest,
} from "./tools/retry-with-fallback.js";
import {
Expand Down Expand Up @@ -1577,7 +1578,6 @@ chat.openapi(completions, async (c) => {
const customProviderKey = await findCustomProviderKey(
project.organizationId,
customProviderName,
requestId,
);
if (!customProviderKey) {
throw new HTTPException(400, {
Expand Down Expand Up @@ -1960,7 +1960,7 @@ chat.openapi(completions, async (c) => {
const providerKey = await findProviderKey(
project.organizationId,
usedProvider,
requestId,
modelInfo.id || stripRegionFromModelName(usedModel, usedRegion),
);
lockedRegion = providerKey
? resolveExplicitRegionFromProviderKey(providerKey)
Expand Down Expand Up @@ -2666,7 +2666,7 @@ chat.openapi(completions, async (c) => {
const providerKey = await findProviderKey(
project.organizationId,
requestedProvider,
requestId,
modelInfo.id || stripRegionFromModelName(usedModel, usedRegion),
);
explicitDirectRegion = providerKey
? resolveExplicitRegionFromProviderKey(providerKey)
Expand Down Expand Up @@ -2911,13 +2911,13 @@ chat.openapi(completions, async (c) => {
providerKey = await findCustomProviderKey(
project.organizationId,
customProviderName,
requestId,
baseModelName,
);
} else {
providerKey = await findProviderKey(
project.organizationId,
usedProvider,
requestId,
baseModelName,
);
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

Expand All @@ -2933,11 +2933,22 @@ chat.openapi(completions, async (c) => {

usedToken = providerKey.token;
usedRegion ??= resolveRegionFromProviderKey(providerKey);
// Override with region-specific env var if the DB key doesn't match the requested region
// Override with region-specific env var if the DB key doesn't match the requested region.
// When we do override, route health attribution to the regional env credential
// (clear providerKey so reportTrackedKey* doesn't blame the unused DB key).
if (usedRegion) {
const regionToken = getRegionSpecificEnvValue(usedProvider, usedRegion);
if (regionToken && regionToken !== usedToken) {
usedToken = regionToken;
const regionEnvVarName = getRegionSpecificEnvVarName(
usedProvider,
usedRegion,
);
if (regionEnvVarName) {
const regionToken = process.env[regionEnvVarName];
if (regionToken && regionToken !== usedToken) {
usedToken = regionToken;
envVarName = regionEnvVarName;
configIndex = 0;
providerKey = undefined;
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve BYOK endpoint context when attributing env health

When a BYOK project has a provider key with baseUrl or provider options and a regional env var is also configured, this clears providerKey after swapping to the regional token. The later getProviderEndpoint(...) call uses providerKey?.baseUrl, providerKey?.options, and providerKey !== undefined, so those requests stop using the BYOK endpoint/deployment/options and are routed as if they were credits/env traffic. Keep the provider key for endpoint construction and track the actual credential source separately for health attribution.

Useful? React with 👍 / 👎.

}
}
}
} else if (project.mode === "credits") {
Expand Down Expand Up @@ -2977,16 +2988,27 @@ chat.openapi(completions, async (c) => {
});
}

const envResult = getProviderEnv(usedProvider);
const envResult = getProviderEnv(usedProvider, {
selectionScope: baseModelName,
});
Comment thread
coderabbitai[bot] marked this conversation as resolved.
usedToken = envResult.token;
configIndex = envResult.configIndex;
envVarName = envResult.envVarName;

// Override with region-specific env var if a non-default region is selected
// Override with region-specific env var if a non-default region is selected.
// Health attribution must follow the credential we actually send.
if (usedRegion) {
const regionToken = getRegionSpecificEnvValue(usedProvider, usedRegion);
if (regionToken) {
usedToken = regionToken;
const regionEnvVarName = getRegionSpecificEnvVarName(
usedProvider,
usedRegion,
);
if (regionEnvVarName) {
const regionToken = process.env[regionEnvVarName];
if (regionToken) {
usedToken = regionToken;
envVarName = regionEnvVarName;
configIndex = 0;
}
}
}
} else if (project.mode === "hybrid") {
Expand All @@ -2995,24 +3017,34 @@ chat.openapi(completions, async (c) => {
providerKey = await findCustomProviderKey(
project.organizationId,
customProviderName,
requestId,
baseModelName,
);
} else {
providerKey = await findProviderKey(
project.organizationId,
usedProvider,
requestId,
baseModelName,
);
}

if (providerKey) {
usedToken = providerKey.token;
usedRegion ??= resolveRegionFromProviderKey(providerKey);
// Override with region-specific env var if the DB key doesn't match the requested region
// Override with region-specific env var if the DB key doesn't match the requested region.
// Route health attribution to whichever credential is actually sent.
if (usedRegion) {
const regionToken = getRegionSpecificEnvValue(usedProvider, usedRegion);
if (regionToken && regionToken !== usedToken) {
usedToken = regionToken;
const regionEnvVarName = getRegionSpecificEnvVarName(
usedProvider,
usedRegion,
);
if (regionEnvVarName) {
const regionToken = process.env[regionEnvVarName];
if (regionToken && regionToken !== usedToken) {
usedToken = regionToken;
envVarName = regionEnvVarName;
configIndex = 0;
providerKey = undefined;
}
}
}
} else {
Expand Down Expand Up @@ -3051,16 +3083,27 @@ chat.openapi(completions, async (c) => {
});
}

const envResult = getProviderEnv(usedProvider);
const envResult = getProviderEnv(usedProvider, {
selectionScope: baseModelName,
});
usedToken = envResult.token;
configIndex = envResult.configIndex;
envVarName = envResult.envVarName;

// Override with region-specific env var if a non-default region is selected
// Override with region-specific env var if a non-default region is selected.
// Health attribution must follow the credential we actually send.
if (usedRegion) {
const regionToken = getRegionSpecificEnvValue(usedProvider, usedRegion);
if (regionToken) {
usedToken = regionToken;
const regionEnvVarName = getRegionSpecificEnvVarName(
usedProvider,
usedRegion,
);
if (regionEnvVarName) {
const regionToken = process.env[regionEnvVarName];
if (regionToken) {
usedToken = regionToken;
envVarName = regionEnvVarName;
configIndex = 0;
}
}
}
}
Expand Down Expand Up @@ -5003,10 +5046,21 @@ chat.openapi(completions, async (c) => {

// Report key health for the selected token source
if (envVarName !== undefined) {
reportKeyError(envVarName, configIndex, 0);
reportKeyError(
envVarName,
configIndex,
0,
undefined,
baseModelName,
);
}
if (providerKey?.id) {
reportTrackedKeyError(providerKey.id, 0);
reportTrackedKeyError(
providerKey.id,
0,
undefined,
baseModelName,
);
}

if (willRetrySameProvider && sameProviderRetryContext) {
Expand Down Expand Up @@ -5115,7 +5169,13 @@ chat.openapi(completions, async (c) => {
let sameProviderRetryContext: Awaited<
ReturnType<typeof resolveProviderContext>
> | null = null;
if (isRetryableErrorType(finishReason)) {
if (
shouldRetryAlternateKey(
finishReason,
res.status,
errorResponseText,
)
) {
rememberFailedKey(usedProvider, usedRegion, {
envVarName,
configIndex,
Expand Down Expand Up @@ -5239,13 +5299,15 @@ chat.openapi(completions, async (c) => {
configIndex,
res.status,
errorResponseText,
baseModelName,
);
}
if (providerKey?.id && finishReason !== "content_filter") {
reportTrackedKeyError(
providerKey.id,
res.status,
errorResponseText,
baseModelName,
);
}

Expand Down Expand Up @@ -5384,7 +5446,13 @@ chat.openapi(completions, async (c) => {
let sameProviderRetryContext: Awaited<
ReturnType<typeof resolveProviderContext>
> | null = null;
if (isRetryableErrorType(errorType)) {
if (
shouldRetryAlternateKey(
errorType,
inferredStatusCode,
errorResponseText,
)
) {
rememberFailedKey(usedProvider, usedRegion, {
envVarName,
configIndex,
Expand Down Expand Up @@ -5493,13 +5561,15 @@ chat.openapi(completions, async (c) => {
configIndex,
inferredStatusCode,
errorResponseText,
baseModelName,
);
}
if (providerKey?.id && errorType !== "content_filter") {
reportTrackedKeyError(
providerKey.id,
inferredStatusCode,
errorResponseText,
baseModelName,
);
}

Expand Down Expand Up @@ -7822,16 +7892,27 @@ chat.openapi(completions, async (c) => {
// Report key health for the selected token source
if (envVarName !== undefined) {
if (streamingError !== null) {
reportKeyError(envVarName, configIndex, streamingErrorStatusCode);
reportKeyError(
envVarName,
configIndex,
streamingErrorStatusCode,
undefined,
baseModelName,
);
} else {
reportKeySuccess(envVarName, configIndex);
reportKeySuccess(envVarName, configIndex, baseModelName);
}
}
if (providerKey?.id) {
if (streamingError !== null) {
reportTrackedKeyError(providerKey.id, streamingErrorStatusCode);
reportTrackedKeyError(
providerKey.id,
streamingErrorStatusCode,
undefined,
baseModelName,
);
} else {
reportTrackedKeySuccess(providerKey.id);
reportTrackedKeySuccess(providerKey.id, baseModelName);
}
}

Expand Down Expand Up @@ -8175,10 +8256,10 @@ chat.openapi(completions, async (c) => {

// Report key health for the selected token source
if (envVarName !== undefined) {
reportKeyError(envVarName, configIndex, 0);
reportKeyError(envVarName, configIndex, 0, undefined, baseModelName);
}
if (providerKey?.id) {
reportTrackedKeyError(providerKey.id, 0);
reportTrackedKeyError(providerKey.id, 0, undefined, baseModelName);
}

if (willRetrySameProvider && sameProviderRetryContext) {
Expand Down Expand Up @@ -8534,7 +8615,9 @@ chat.openapi(completions, async (c) => {
let sameProviderRetryContext: Awaited<
ReturnType<typeof resolveProviderContext>
> | null = null;
if (isRetryableErrorType(finishReason)) {
if (
shouldRetryAlternateKey(finishReason, res.status, errorResponseText)
) {
rememberFailedKey(usedProvider, usedRegion, {
envVarName,
configIndex,
Expand Down Expand Up @@ -8659,10 +8742,21 @@ chat.openapi(completions, async (c) => {
// Report key health for the selected token source
// Don't report content_filter as a key error - it's intentional provider behavior
if (envVarName !== undefined && finishReason !== "content_filter") {
reportKeyError(envVarName, configIndex, res.status, errorResponseText);
reportKeyError(
envVarName,
configIndex,
res.status,
errorResponseText,
baseModelName,
);
}
if (providerKey?.id && finishReason !== "content_filter") {
reportTrackedKeyError(providerKey.id, res.status, errorResponseText);
reportTrackedKeyError(
providerKey.id,
res.status,
errorResponseText,
baseModelName,
);
}

if (willRetrySameProvider && sameProviderRetryContext) {
Expand Down Expand Up @@ -9542,10 +9636,10 @@ chat.openapi(completions, async (c) => {
// Report key health for the selected token source
// Note: We don't report empty responses as key errors since they're not upstream errors
if (envVarName !== undefined) {
reportKeySuccess(envVarName, configIndex);
reportKeySuccess(envVarName, configIndex, baseModelName);
}
if (providerKey?.id) {
reportTrackedKeySuccess(providerKey.id);
reportTrackedKeySuccess(providerKey.id, baseModelName);
}

if (cachingEnabled && cacheKey && !stream && !hasEmptyNonStreamingResponse) {
Expand Down
Loading
Loading