Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions apps/gateway/src/api.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2783,11 +2783,11 @@ describe("api", () => {
expect(logs[0].hasError).toBe(true);
expect(logs[0].errorDetails?.statusCode).toBe(401);

expect(isTrackedKeyHealthy("provider-key-id-stream-auth-error")).toBe(
false,
);
expect(
getTrackedKeyMetrics("provider-key-id-stream-auth-error"),
isTrackedKeyHealthy("provider-key-id-stream-auth-error", "custom"),
).toBe(false);
expect(
getTrackedKeyMetrics("provider-key-id-stream-auth-error", "custom"),
).toMatchObject({
permanentlyBlacklisted: true,
totalRequests: 1,
Expand Down
103 changes: 79 additions & 24 deletions apps/gateway/src/chat/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ import {
MAX_RETRIES,
providerRetryKey,
selectNextProvider,
shouldRetryAlternateKey,
shouldRetryRequest,
} from "./tools/retry-with-fallback.js";
import {
Expand Down Expand Up @@ -1484,7 +1485,6 @@ chat.openapi(completions, async (c) => {
const customProviderKey = await findCustomProviderKey(
project.organizationId,
customProviderName,
requestId,
);
if (!customProviderKey) {
throw new HTTPException(400, {
Expand Down Expand Up @@ -1867,7 +1867,7 @@ chat.openapi(completions, async (c) => {
const providerKey = await findProviderKey(
project.organizationId,
usedProvider,
requestId,
modelInfo.id || stripRegionFromModelName(usedModel, usedRegion),
);
lockedRegion = providerKey
? resolveExplicitRegionFromProviderKey(providerKey)
Expand Down Expand Up @@ -2528,7 +2528,7 @@ chat.openapi(completions, async (c) => {
const providerKey = await findProviderKey(
project.organizationId,
requestedProvider,
requestId,
modelInfo.id || stripRegionFromModelName(usedModel, usedRegion),
);
explicitDirectRegion = providerKey
? resolveExplicitRegionFromProviderKey(providerKey)
Expand Down Expand Up @@ -2766,13 +2766,13 @@ chat.openapi(completions, async (c) => {
providerKey = await findCustomProviderKey(
project.organizationId,
customProviderName,
requestId,
baseModelName,
);
} else {
providerKey = await findProviderKey(
project.organizationId,
usedProvider,
requestId,
baseModelName,
);
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

Expand Down Expand Up @@ -2830,7 +2830,9 @@ chat.openapi(completions, async (c) => {
});
}

const envResult = getProviderEnv(usedProvider);
const envResult = getProviderEnv(usedProvider, {
selectionScope: baseModelName,
});
Comment thread
coderabbitai[bot] marked this conversation as resolved.
usedToken = envResult.token;
configIndex = envResult.configIndex;
envVarName = envResult.envVarName;
Expand All @@ -2848,13 +2850,13 @@ chat.openapi(completions, async (c) => {
providerKey = await findCustomProviderKey(
project.organizationId,
customProviderName,
requestId,
baseModelName,
);
} else {
providerKey = await findProviderKey(
project.organizationId,
usedProvider,
requestId,
baseModelName,
);
}

Expand Down Expand Up @@ -2905,7 +2907,9 @@ chat.openapi(completions, async (c) => {
});
}

const envResult = getProviderEnv(usedProvider);
const envResult = getProviderEnv(usedProvider, {
selectionScope: baseModelName,
});
usedToken = envResult.token;
configIndex = envResult.configIndex;
envVarName = envResult.envVarName;
Expand Down Expand Up @@ -4787,10 +4791,21 @@ chat.openapi(completions, async (c) => {

// Report key health for the selected token source
if (envVarName !== undefined) {
reportKeyError(envVarName, configIndex, 0);
reportKeyError(
envVarName,
configIndex,
0,
undefined,
baseModelName,
);
}
if (providerKey?.id) {
reportTrackedKeyError(providerKey.id, 0);
reportTrackedKeyError(
providerKey.id,
0,
undefined,
baseModelName,
);
}

if (willRetrySameProvider && sameProviderRetryContext) {
Expand Down Expand Up @@ -4899,7 +4914,13 @@ chat.openapi(completions, async (c) => {
let sameProviderRetryContext: Awaited<
ReturnType<typeof resolveProviderContext>
> | null = null;
if (isRetryableErrorType(finishReason)) {
if (
shouldRetryAlternateKey(
finishReason,
res.status,
errorResponseText,
)
) {
rememberFailedKey(usedProvider, usedRegion, {
envVarName,
configIndex,
Expand Down Expand Up @@ -5023,13 +5044,15 @@ chat.openapi(completions, async (c) => {
configIndex,
res.status,
errorResponseText,
baseModelName,
);
}
if (providerKey?.id && finishReason !== "content_filter") {
reportTrackedKeyError(
providerKey.id,
res.status,
errorResponseText,
baseModelName,
);
}

Expand Down Expand Up @@ -5168,7 +5191,13 @@ chat.openapi(completions, async (c) => {
let sameProviderRetryContext: Awaited<
ReturnType<typeof resolveProviderContext>
> | null = null;
if (isRetryableErrorType(errorType)) {
if (
shouldRetryAlternateKey(
errorType,
inferredStatusCode,
errorResponseText,
)
) {
rememberFailedKey(usedProvider, usedRegion, {
envVarName,
configIndex,
Expand Down Expand Up @@ -5277,13 +5306,15 @@ chat.openapi(completions, async (c) => {
configIndex,
inferredStatusCode,
errorResponseText,
baseModelName,
);
}
if (providerKey?.id && errorType !== "content_filter") {
reportTrackedKeyError(
providerKey.id,
inferredStatusCode,
errorResponseText,
baseModelName,
);
}

Expand Down Expand Up @@ -7631,16 +7662,27 @@ chat.openapi(completions, async (c) => {
// Report key health for the selected token source
if (envVarName !== undefined) {
if (streamingError !== null) {
reportKeyError(envVarName, configIndex, streamingErrorStatusCode);
reportKeyError(
envVarName,
configIndex,
streamingErrorStatusCode,
undefined,
baseModelName,
);
} else {
reportKeySuccess(envVarName, configIndex);
reportKeySuccess(envVarName, configIndex, baseModelName);
}
}
if (providerKey?.id) {
if (streamingError !== null) {
reportTrackedKeyError(providerKey.id, streamingErrorStatusCode);
reportTrackedKeyError(
providerKey.id,
streamingErrorStatusCode,
undefined,
baseModelName,
);
} else {
reportTrackedKeySuccess(providerKey.id);
reportTrackedKeySuccess(providerKey.id, baseModelName);
}
}

Expand Down Expand Up @@ -7980,10 +8022,10 @@ chat.openapi(completions, async (c) => {

// Report key health for the selected token source
if (envVarName !== undefined) {
reportKeyError(envVarName, configIndex, 0);
reportKeyError(envVarName, configIndex, 0, undefined, baseModelName);
}
if (providerKey?.id) {
reportTrackedKeyError(providerKey.id, 0);
reportTrackedKeyError(providerKey.id, 0, undefined, baseModelName);
}

if (willRetrySameProvider && sameProviderRetryContext) {
Expand Down Expand Up @@ -8339,7 +8381,9 @@ chat.openapi(completions, async (c) => {
let sameProviderRetryContext: Awaited<
ReturnType<typeof resolveProviderContext>
> | null = null;
if (isRetryableErrorType(finishReason)) {
if (
shouldRetryAlternateKey(finishReason, res.status, errorResponseText)
) {
rememberFailedKey(usedProvider, usedRegion, {
envVarName,
configIndex,
Expand Down Expand Up @@ -8464,10 +8508,21 @@ chat.openapi(completions, async (c) => {
// Report key health for the selected token source
// Don't report content_filter as a key error - it's intentional provider behavior
if (envVarName !== undefined && finishReason !== "content_filter") {
reportKeyError(envVarName, configIndex, res.status, errorResponseText);
reportKeyError(
envVarName,
configIndex,
res.status,
errorResponseText,
baseModelName,
);
}
if (providerKey?.id && finishReason !== "content_filter") {
reportTrackedKeyError(providerKey.id, res.status, errorResponseText);
reportTrackedKeyError(
providerKey.id,
res.status,
errorResponseText,
baseModelName,
);
}

if (willRetrySameProvider && sameProviderRetryContext) {
Expand Down Expand Up @@ -9205,10 +9260,10 @@ chat.openapi(completions, async (c) => {
// Report key health for the selected token source
// Note: We don't report empty responses as key errors since they're not upstream errors
if (envVarName !== undefined) {
reportKeySuccess(envVarName, configIndex);
reportKeySuccess(envVarName, configIndex, baseModelName);
}
if (providerKey?.id) {
reportTrackedKeySuccess(providerKey.id);
reportTrackedKeySuccess(providerKey.id, baseModelName);
}

if (cachingEnabled && cacheKey && !stream && !hasEmptyNonStreamingResponse) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,15 @@ describe("getFinishReasonFromError", () => {
expect(getFinishReasonFromError(403)).toBe("gateway_error");
});

it("returns gateway_error for 400 invalid API key payloads", () => {
expect(
getFinishReasonFromError(
400,
'{"error":{"message":"API key not valid. Please pass a valid API key.","type":"authentication_error","code":"invalid_api_key"}}',
),
).toBe("gateway_error");
});

it("returns client_error when no error text provided for other 4xx", () => {
expect(getFinishReasonFromError(400)).toBe("client_error");
expect(getFinishReasonFromError(422)).toBe("client_error");
Expand Down
10 changes: 8 additions & 2 deletions apps/gateway/src/chat/tools/get-finish-reason-from-error.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { hasInvalidProviderCredentialError } from "@/lib/provider-auth-errors.js";

/**
* Determines the appropriate finish reason based on HTTP status code and error message
* 5xx status codes indicate upstream provider errors
Expand Down Expand Up @@ -55,8 +57,12 @@ export function getFinishReasonFromError(
return "content_filter";
}

// 401/403 usually indicate invalid or unauthorized provider credentials
if (statusCode === 401 || statusCode === 403) {
// 401/403 and known provider credential payloads indicate bad provider keys.
if (
statusCode === 401 ||
statusCode === 403 ||
hasInvalidProviderCredentialError(errorText)
) {
return "gateway_error";
}

Expand Down
18 changes: 18 additions & 0 deletions apps/gateway/src/chat/tools/get-provider-env.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { afterEach, beforeEach, describe, expect, it } from "vitest";

import { reportKeyError, resetKeyHealth } from "@/lib/api-key-health.js";
import { resetRoundRobinCounters } from "@/lib/round-robin-env.js";

import { getProviderEnv } from "./get-provider-env.js";
Expand All @@ -9,6 +10,7 @@ describe("getProviderEnv", () => {

beforeEach(() => {
resetRoundRobinCounters();
resetKeyHealth();
process.env.LLM_OPENAI_API_KEY = "sk-openai-a,sk-openai-b,sk-openai-c";
});

Expand Down Expand Up @@ -55,4 +57,20 @@ describe("getProviderEnv", () => {
expect(thirdKey.token).toBe("sk-openai-c");
expect(thirdKey.configIndex).toBe(2);
});

it("passes selection scope through to env key health", () => {
reportKeyError("LLM_OPENAI_API_KEY", 0, 500, undefined, "gpt-4");
reportKeyError("LLM_OPENAI_API_KEY", 0, 500, undefined, "gpt-4");
reportKeyError("LLM_OPENAI_API_KEY", 0, 500, undefined, "gpt-4");

const gpt4Selection = getProviderEnv("openai", {
selectionScope: "gpt-4",
});
const claudeSelection = getProviderEnv("openai", {
selectionScope: "claude-3-5-sonnet",
});

expect(gpt4Selection.configIndex).toBe(1);
expect(claudeSelection.configIndex).toBe(0);
});
});
6 changes: 4 additions & 2 deletions apps/gateway/src/chat/tools/get-provider-env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export interface ProviderEnvResult {
interface GetProviderEnvOptions {
advanceRoundRobin?: boolean;
excludedIndices?: ReadonlySet<number>;
selectionScope?: string;
}

/**
Expand Down Expand Up @@ -62,9 +63,10 @@ export function getProviderEnv(

const advanceRoundRobin = options.advanceRoundRobin ?? true;
const excludedIndices = options.excludedIndices;
const selectionScope = options.selectionScope;
const result = advanceRoundRobin
? getRoundRobinValue(envVar, envValue, excludedIndices)
: peekRoundRobinValue(envVar, envValue, excludedIndices);
? getRoundRobinValue(envVar, envValue, selectionScope, excludedIndices)
: peekRoundRobinValue(envVar, envValue, selectionScope, excludedIndices);

return { token: result.value, configIndex: result.index, envVarName: envVar };
}
Loading
Loading