-
Notifications
You must be signed in to change notification settings - Fork 134
fix: scope retry health by model and region #1977
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 15 commits
b8579f3
8391b63
b49cce3
b45c134
86a36d2
d48275a
f4de254
90b4a5f
3e208ad
17e7932
2cb5875
551fea0
3808c0e
8670ffa
403759b
0e0ce47
344ef27
81ef46b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -98,7 +98,7 @@ import { | |
| type WebSearchTool, | ||
| expandAllProviderRegions, | ||
| getProviderDefinition, | ||
| getRegionSpecificEnvValue, | ||
| getRegionSpecificEnvVarName, | ||
| stripRegionFromModelName, | ||
| } from "@llmgateway/models"; | ||
|
|
||
|
|
@@ -155,6 +155,7 @@ import { | |
| MAX_RETRIES, | ||
| providerRetryKey, | ||
| selectNextProvider, | ||
| shouldRetryAlternateKey, | ||
| shouldRetryRequest, | ||
| } from "./tools/retry-with-fallback.js"; | ||
| import { | ||
|
|
@@ -1577,7 +1578,6 @@ chat.openapi(completions, async (c) => { | |
| const customProviderKey = await findCustomProviderKey( | ||
| project.organizationId, | ||
| customProviderName, | ||
| requestId, | ||
| ); | ||
| if (!customProviderKey) { | ||
| throw new HTTPException(400, { | ||
|
|
@@ -1960,7 +1960,7 @@ chat.openapi(completions, async (c) => { | |
| const providerKey = await findProviderKey( | ||
| project.organizationId, | ||
| usedProvider, | ||
| requestId, | ||
| modelInfo.id || stripRegionFromModelName(usedModel, usedRegion), | ||
| ); | ||
| lockedRegion = providerKey | ||
| ? resolveExplicitRegionFromProviderKey(providerKey) | ||
|
|
@@ -2666,7 +2666,7 @@ chat.openapi(completions, async (c) => { | |
| const providerKey = await findProviderKey( | ||
| project.organizationId, | ||
| requestedProvider, | ||
| requestId, | ||
| modelInfo.id || stripRegionFromModelName(usedModel, usedRegion), | ||
| ); | ||
| explicitDirectRegion = providerKey | ||
| ? resolveExplicitRegionFromProviderKey(providerKey) | ||
|
|
@@ -2911,13 +2911,13 @@ chat.openapi(completions, async (c) => { | |
| providerKey = await findCustomProviderKey( | ||
| project.organizationId, | ||
| customProviderName, | ||
| requestId, | ||
| baseModelName, | ||
| ); | ||
| } else { | ||
| providerKey = await findProviderKey( | ||
| project.organizationId, | ||
| usedProvider, | ||
| requestId, | ||
| baseModelName, | ||
| ); | ||
| } | ||
|
|
||
|
|
@@ -2933,11 +2933,22 @@ chat.openapi(completions, async (c) => { | |
|
|
||
| usedToken = providerKey.token; | ||
| usedRegion ??= resolveRegionFromProviderKey(providerKey); | ||
| // Override with region-specific env var if the DB key doesn't match the requested region | ||
| // Override with region-specific env var if the DB key doesn't match the requested region. | ||
| // When we do override, route health attribution to the regional env credential | ||
| // (clear providerKey so reportTrackedKey* doesn't blame the unused DB key). | ||
| if (usedRegion) { | ||
| const regionToken = getRegionSpecificEnvValue(usedProvider, usedRegion); | ||
| if (regionToken && regionToken !== usedToken) { | ||
| usedToken = regionToken; | ||
| const regionEnvVarName = getRegionSpecificEnvVarName( | ||
| usedProvider, | ||
| usedRegion, | ||
| ); | ||
| if (regionEnvVarName) { | ||
| const regionToken = process.env[regionEnvVarName]; | ||
| if (regionToken && regionToken !== usedToken) { | ||
| usedToken = regionToken; | ||
| envVarName = regionEnvVarName; | ||
| configIndex = 0; | ||
| providerKey = undefined; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When a BYOK project has a provider key with Useful? React with 👍 / 👎. |
||
| } | ||
| } | ||
| } | ||
| } else if (project.mode === "credits") { | ||
|
|
@@ -2977,16 +2988,27 @@ chat.openapi(completions, async (c) => { | |
| }); | ||
| } | ||
|
|
||
| const envResult = getProviderEnv(usedProvider); | ||
| const envResult = getProviderEnv(usedProvider, { | ||
| selectionScope: baseModelName, | ||
| }); | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| usedToken = envResult.token; | ||
| configIndex = envResult.configIndex; | ||
| envVarName = envResult.envVarName; | ||
|
|
||
| // Override with region-specific env var if a non-default region is selected | ||
| // Override with region-specific env var if a non-default region is selected. | ||
| // Health attribution must follow the credential we actually send. | ||
| if (usedRegion) { | ||
| const regionToken = getRegionSpecificEnvValue(usedProvider, usedRegion); | ||
| if (regionToken) { | ||
| usedToken = regionToken; | ||
| const regionEnvVarName = getRegionSpecificEnvVarName( | ||
| usedProvider, | ||
| usedRegion, | ||
| ); | ||
| if (regionEnvVarName) { | ||
| const regionToken = process.env[regionEnvVarName]; | ||
| if (regionToken) { | ||
| usedToken = regionToken; | ||
| envVarName = regionEnvVarName; | ||
| configIndex = 0; | ||
| } | ||
| } | ||
| } | ||
| } else if (project.mode === "hybrid") { | ||
|
|
@@ -2995,24 +3017,34 @@ chat.openapi(completions, async (c) => { | |
| providerKey = await findCustomProviderKey( | ||
| project.organizationId, | ||
| customProviderName, | ||
| requestId, | ||
| baseModelName, | ||
| ); | ||
| } else { | ||
| providerKey = await findProviderKey( | ||
| project.organizationId, | ||
| usedProvider, | ||
| requestId, | ||
| baseModelName, | ||
| ); | ||
| } | ||
|
|
||
| if (providerKey) { | ||
| usedToken = providerKey.token; | ||
| usedRegion ??= resolveRegionFromProviderKey(providerKey); | ||
| // Override with region-specific env var if the DB key doesn't match the requested region | ||
| // Override with region-specific env var if the DB key doesn't match the requested region. | ||
| // Route health attribution to whichever credential is actually sent. | ||
| if (usedRegion) { | ||
| const regionToken = getRegionSpecificEnvValue(usedProvider, usedRegion); | ||
| if (regionToken && regionToken !== usedToken) { | ||
| usedToken = regionToken; | ||
| const regionEnvVarName = getRegionSpecificEnvVarName( | ||
| usedProvider, | ||
| usedRegion, | ||
| ); | ||
| if (regionEnvVarName) { | ||
| const regionToken = process.env[regionEnvVarName]; | ||
| if (regionToken && regionToken !== usedToken) { | ||
| usedToken = regionToken; | ||
| envVarName = regionEnvVarName; | ||
| configIndex = 0; | ||
| providerKey = undefined; | ||
| } | ||
| } | ||
| } | ||
| } else { | ||
|
|
@@ -3051,16 +3083,27 @@ chat.openapi(completions, async (c) => { | |
| }); | ||
| } | ||
|
|
||
| const envResult = getProviderEnv(usedProvider); | ||
| const envResult = getProviderEnv(usedProvider, { | ||
| selectionScope: baseModelName, | ||
| }); | ||
| usedToken = envResult.token; | ||
| configIndex = envResult.configIndex; | ||
| envVarName = envResult.envVarName; | ||
|
|
||
| // Override with region-specific env var if a non-default region is selected | ||
| // Override with region-specific env var if a non-default region is selected. | ||
| // Health attribution must follow the credential we actually send. | ||
| if (usedRegion) { | ||
| const regionToken = getRegionSpecificEnvValue(usedProvider, usedRegion); | ||
| if (regionToken) { | ||
| usedToken = regionToken; | ||
| const regionEnvVarName = getRegionSpecificEnvVarName( | ||
| usedProvider, | ||
| usedRegion, | ||
| ); | ||
| if (regionEnvVarName) { | ||
| const regionToken = process.env[regionEnvVarName]; | ||
| if (regionToken) { | ||
| usedToken = regionToken; | ||
| envVarName = regionEnvVarName; | ||
| configIndex = 0; | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -5003,10 +5046,21 @@ chat.openapi(completions, async (c) => { | |
|
|
||
| // Report key health for the selected token source | ||
| if (envVarName !== undefined) { | ||
| reportKeyError(envVarName, configIndex, 0); | ||
| reportKeyError( | ||
| envVarName, | ||
| configIndex, | ||
| 0, | ||
| undefined, | ||
| baseModelName, | ||
| ); | ||
| } | ||
| if (providerKey?.id) { | ||
| reportTrackedKeyError(providerKey.id, 0); | ||
| reportTrackedKeyError( | ||
| providerKey.id, | ||
| 0, | ||
| undefined, | ||
| baseModelName, | ||
| ); | ||
| } | ||
|
|
||
| if (willRetrySameProvider && sameProviderRetryContext) { | ||
|
|
@@ -5115,7 +5169,13 @@ chat.openapi(completions, async (c) => { | |
| let sameProviderRetryContext: Awaited< | ||
| ReturnType<typeof resolveProviderContext> | ||
| > | null = null; | ||
| if (isRetryableErrorType(finishReason)) { | ||
| if ( | ||
| shouldRetryAlternateKey( | ||
| finishReason, | ||
| res.status, | ||
| errorResponseText, | ||
| ) | ||
| ) { | ||
| rememberFailedKey(usedProvider, usedRegion, { | ||
| envVarName, | ||
| configIndex, | ||
|
|
@@ -5239,13 +5299,15 @@ chat.openapi(completions, async (c) => { | |
| configIndex, | ||
| res.status, | ||
| errorResponseText, | ||
| baseModelName, | ||
| ); | ||
| } | ||
| if (providerKey?.id && finishReason !== "content_filter") { | ||
| reportTrackedKeyError( | ||
| providerKey.id, | ||
| res.status, | ||
| errorResponseText, | ||
| baseModelName, | ||
| ); | ||
| } | ||
|
|
||
|
|
@@ -5384,7 +5446,13 @@ chat.openapi(completions, async (c) => { | |
| let sameProviderRetryContext: Awaited< | ||
| ReturnType<typeof resolveProviderContext> | ||
| > | null = null; | ||
| if (isRetryableErrorType(errorType)) { | ||
| if ( | ||
| shouldRetryAlternateKey( | ||
| errorType, | ||
| inferredStatusCode, | ||
| errorResponseText, | ||
| ) | ||
| ) { | ||
| rememberFailedKey(usedProvider, usedRegion, { | ||
| envVarName, | ||
| configIndex, | ||
|
|
@@ -5493,13 +5561,15 @@ chat.openapi(completions, async (c) => { | |
| configIndex, | ||
| inferredStatusCode, | ||
| errorResponseText, | ||
| baseModelName, | ||
| ); | ||
| } | ||
| if (providerKey?.id && errorType !== "content_filter") { | ||
| reportTrackedKeyError( | ||
| providerKey.id, | ||
| inferredStatusCode, | ||
| errorResponseText, | ||
| baseModelName, | ||
| ); | ||
| } | ||
|
|
||
|
|
@@ -7822,16 +7892,27 @@ chat.openapi(completions, async (c) => { | |
| // Report key health for the selected token source | ||
| if (envVarName !== undefined) { | ||
| if (streamingError !== null) { | ||
| reportKeyError(envVarName, configIndex, streamingErrorStatusCode); | ||
| reportKeyError( | ||
| envVarName, | ||
| configIndex, | ||
| streamingErrorStatusCode, | ||
| undefined, | ||
| baseModelName, | ||
| ); | ||
| } else { | ||
| reportKeySuccess(envVarName, configIndex); | ||
| reportKeySuccess(envVarName, configIndex, baseModelName); | ||
| } | ||
| } | ||
| if (providerKey?.id) { | ||
| if (streamingError !== null) { | ||
| reportTrackedKeyError(providerKey.id, streamingErrorStatusCode); | ||
| reportTrackedKeyError( | ||
| providerKey.id, | ||
| streamingErrorStatusCode, | ||
| undefined, | ||
| baseModelName, | ||
| ); | ||
| } else { | ||
| reportTrackedKeySuccess(providerKey.id); | ||
| reportTrackedKeySuccess(providerKey.id, baseModelName); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -8175,10 +8256,10 @@ chat.openapi(completions, async (c) => { | |
|
|
||
| // Report key health for the selected token source | ||
| if (envVarName !== undefined) { | ||
| reportKeyError(envVarName, configIndex, 0); | ||
| reportKeyError(envVarName, configIndex, 0, undefined, baseModelName); | ||
| } | ||
| if (providerKey?.id) { | ||
| reportTrackedKeyError(providerKey.id, 0); | ||
| reportTrackedKeyError(providerKey.id, 0, undefined, baseModelName); | ||
| } | ||
|
|
||
| if (willRetrySameProvider && sameProviderRetryContext) { | ||
|
|
@@ -8534,7 +8615,9 @@ chat.openapi(completions, async (c) => { | |
| let sameProviderRetryContext: Awaited< | ||
| ReturnType<typeof resolveProviderContext> | ||
| > | null = null; | ||
| if (isRetryableErrorType(finishReason)) { | ||
| if ( | ||
| shouldRetryAlternateKey(finishReason, res.status, errorResponseText) | ||
| ) { | ||
| rememberFailedKey(usedProvider, usedRegion, { | ||
| envVarName, | ||
| configIndex, | ||
|
|
@@ -8659,10 +8742,21 @@ chat.openapi(completions, async (c) => { | |
| // Report key health for the selected token source | ||
| // Don't report content_filter as a key error - it's intentional provider behavior | ||
| if (envVarName !== undefined && finishReason !== "content_filter") { | ||
| reportKeyError(envVarName, configIndex, res.status, errorResponseText); | ||
| reportKeyError( | ||
| envVarName, | ||
| configIndex, | ||
| res.status, | ||
| errorResponseText, | ||
| baseModelName, | ||
| ); | ||
| } | ||
| if (providerKey?.id && finishReason !== "content_filter") { | ||
| reportTrackedKeyError(providerKey.id, res.status, errorResponseText); | ||
| reportTrackedKeyError( | ||
| providerKey.id, | ||
| res.status, | ||
| errorResponseText, | ||
| baseModelName, | ||
| ); | ||
| } | ||
|
|
||
| if (willRetrySameProvider && sameProviderRetryContext) { | ||
|
|
@@ -9542,10 +9636,10 @@ chat.openapi(completions, async (c) => { | |
| // Report key health for the selected token source | ||
| // Note: We don't report empty responses as key errors since they're not upstream errors | ||
| if (envVarName !== undefined) { | ||
| reportKeySuccess(envVarName, configIndex); | ||
| reportKeySuccess(envVarName, configIndex, baseModelName); | ||
| } | ||
| if (providerKey?.id) { | ||
| reportTrackedKeySuccess(providerKey.id); | ||
| reportTrackedKeySuccess(providerKey.id, baseModelName); | ||
| } | ||
|
|
||
| if (cachingEnabled && cacheKey && !stream && !hasEmptyNonStreamingResponse) { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.