Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 45 additions & 58 deletions apps/gateway/src/chat/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -254,18 +254,18 @@ function filterRegionsByAvailableKeys(
});
}

function preferConcreteRegionalMappings(
function preferProviderRootMappings(
providers: ProviderModelMapping[],
): ProviderModelMapping[] {
const providersWithRegions = new Set(
const providersWithRootMappings = new Set(
providers
.filter((mapping) => mapping.region)
.filter((mapping) => !mapping.region)
.map((mapping) => mapping.providerId),
);

return providers.filter(
(mapping) =>
!providersWithRegions.has(mapping.providerId) || Boolean(mapping.region),
!providersWithRootMappings.has(mapping.providerId) || !mapping.region,
);
Comment on lines 266 to 269
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Preserve concrete mapping before resolving key region

This filter now drops concrete regional rows whenever a provider has a root row, so routing selects a root modelName (no :region) even for regional providers. In API-key/hybrid mode, usedRegion is later populated from the provider key, but downstream validation still matches mappings by exact (providerId, modelName, region) (e.g. finalModelInfo.providers.find(...) used for max token and supported-parameter checks), which no longer matches and silently skips those guards. For regional providers like Alibaba, this can let invalid max_tokens/unsupported params through to upstream instead of being rejected by the gateway.

Useful? React with 👍 / 👎.

}

Expand Down Expand Up @@ -1691,7 +1691,7 @@ chat.openapi(completions, async (c) => {
}
const candidateAllowedProviders = candidateIam.allowedProviders;

const candidateProviders = preferConcreteRegionalMappings(
const candidateProviders = preferProviderRootMappings(
project.mode === "credits"
? filterRegionsByAvailableKeys(
expandAllProviderRegions(
Expand Down Expand Up @@ -1834,6 +1834,7 @@ chat.openapi(completions, async (c) => {
{
metricsMap,
isStreaming: stream,
includeProviderScoreRegions: false,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Preserve region in provider scores used for retries

Passing includeProviderScoreRegions: false here strips region data from routingMetadata.providerScores before the retry loop runs. In selectNextProvider (apps/gateway/src/chat/tools/retry-with-fallback.ts), the next mapping is resolved with p.providerId === score.providerId && p.region === score.region; when the chosen mappings are regional (for example alibaba with cn-beijing), score.region becomes undefined, no mapping matches, and fallback retries stop after the first retryable failure even when alternative providers exist.

Useful? React with 👍 / 👎.

promptTokens: routingPromptTokens,
},
);
Expand Down Expand Up @@ -2108,9 +2109,7 @@ chat.openapi(completions, async (c) => {

// Attempt to re-route to alternative providers (same pattern as low-uptime fallback)
const providerIds = modelInfo.providers
.filter(
(p) => !(p.providerId === usedProvider && p.region === usedRegion),
)
.filter((p) => p.providerId !== usedProvider)
.map((p) => p.providerId);

if (providerIds.length > 0) {
Expand All @@ -2127,39 +2126,18 @@ chat.openapi(completions, async (c) => {
.filter((p) => hasProviderEnvironmentToken(p.id as Provider))
.map((p) => p.id);

const availableModelProviders = preferConcreteRegionalMappings(
iamFilteredModelProviders,
).filter((provider) => {
if (!availableProviders.includes(provider.providerId)) {
return false;
}
if (
provider.providerId === usedProvider &&
provider.region === usedRegion
) {
return false;
}
if (webSearchTool && provider.webSearch !== true) {
return false;
}
if (
response_format?.type === "json_object" ||
response_format?.type === "json_schema"
) {
if (provider.jsonOutput !== true) {
return false;
}
}
if (response_format?.type === "json_schema") {
if (provider.jsonOutputSchema !== true) {
return false;
}
}
if (hasImages && provider.vision !== true) {
return false;
}
return true;
});
const availableModelProviders = filterEligibleModelProviders(
preferProviderRootMappings(expandedIamFilteredModelProviders),
{
allProviderVariants: modelInfo.providers,
availableProviders,
webSearchTool,
responseFormatType: response_format?.type,
hasImages,
maxTokens: max_tokens,
reasoningEffort: reasoning_effort,
},
).filter((provider) => provider.providerId !== usedProvider);

Comment on lines +2129 to 2141
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Provider-key region locks can be bypassed after root-mapping preference.

At Line 1781 (and similarly at Line 1478 and Line 1624), root mappings are preferred before eligibility filtering. Since filterEligibleModelProviders only rejects mismatched non-empty regions, a locked provider can still pass with region: undefined, which weakens explicit region pinning from provider keys.

💡 Suggested fix
diff --git a/apps/gateway/src/chat/chat.ts b/apps/gateway/src/chat/chat.ts
@@
 		const lockedRegion = options.providerLockedRegions?.get(
 			provider.providerId,
 		);
-		if (lockedRegion && provider.region && provider.region !== lockedRegion) {
-			return false;
-		}
+		if (lockedRegion) {
+			// Enforce explicit provider-key region pin.
+			if (!provider.region || provider.region !== lockedRegion) {
+				return false;
+			}
+		}

Also applies to: 1624-1634, 1781-1791

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@apps/gateway/src/chat/chat.ts` around lines 1477 - 1489, Root-mapping is
currently applied before eligibility filtering which lets providers with locked
regions be returned with region undefined; fix by ensuring region-locks are
enforced before/while root-mapping: either call filterEligibleModelProviders on
expandedIamFilteredModelProviders (and related calls at the other sites) before
preferProviderRootMappings, or modify preferProviderRootMappings to
preserve/propagate any explicit region property from provider keys so that
filterEligibleModelProviders still rejects mismatched regions; update the call
sites using filterEligibleModelProviders and preferProviderRootMappings
(referencing expandedIamFilteredModelProviders, modelInfo.providers,
availableProviders, response_format, hasImages, max_tokens, reasoning_effort)
accordingly so region locks cannot be bypassed.

// Also filter out rate-limited alternatives
const rateLimitedAlternatives = await filterRateLimitedProviders(
Expand Down Expand Up @@ -2206,6 +2184,7 @@ chat.openapi(completions, async (c) => {
{
metricsMap: allMetricsMap,
isStreaming: stream,
includeProviderScoreRegions: false,
promptTokens: routingPromptTokens,
},
);
Expand Down Expand Up @@ -2285,9 +2264,7 @@ chat.openapi(completions, async (c) => {
const currentUptime = metrics.uptime;
// Get available providers for routing
const providerIds = modelInfo.providers
.filter(
(p) => !(p.providerId === usedProvider && p.region === usedRegion),
) // Exclude the exact low-uptime provider+region pair
.filter((p) => p.providerId !== usedProvider)
.map((p) => p.providerId);
Comment on lines 2266 to 2268
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Exclude only the failed provider-region in uptime fallback

This change drops all mappings for usedProvider during low-uptime rerouting, rather than excluding only the failing provider+region pair. In a model that has multiple regions for one provider, a low-uptime region can no longer fail over to a healthier region of the same provider, and if no other providers exist this path now silently skips fallback even though a viable regional alternative is available.

Useful? React with 👍 / 👎.


if (providerIds.length > 0) {
Expand All @@ -2308,7 +2285,7 @@ chat.openapi(completions, async (c) => {
// If web search is requested, also filter to providers that support it
// If JSON output is requested, also filter to providers that support it
const availableModelProviders = filterEligibleModelProviders(
preferConcreteRegionalMappings(expandedIamFilteredModelProviders),
preferProviderRootMappings(expandedIamFilteredModelProviders),
{
allProviderVariants: modelInfo.providers,
availableProviders,
Expand All @@ -2318,13 +2295,7 @@ chat.openapi(completions, async (c) => {
maxTokens: max_tokens,
reasoningEffort: reasoning_effort,
},
).filter(
(provider) =>
!(
provider.providerId === usedProvider &&
provider.region === usedRegion
),
);
).filter((provider) => provider.providerId !== usedProvider);

if (availableModelProviders.length > 0) {
const rawModelForFallback = models.find((m) => m.id === baseModelId);
Expand Down Expand Up @@ -2388,6 +2359,7 @@ chat.openapi(completions, async (c) => {
{
metricsMap: allMetricsMap,
isStreaming: stream,
includeProviderScoreRegions: false,
promptTokens: routingPromptTokens,
},
);
Expand Down Expand Up @@ -2486,7 +2458,7 @@ chat.openapi(completions, async (c) => {

// Filter model providers to only those eligible for this request
const availableModelProviders = filterEligibleModelProviders(
preferConcreteRegionalMappings(expandedIamFilteredModelProviders),
preferProviderRootMappings(expandedIamFilteredModelProviders),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Preserve regional rows when applying provider locks

Passing preferProviderRootMappings(expandedIamFilteredModelProviders) into filterEligibleModelProviders removes concrete region mappings before routing in the API-key/hybrid path, which makes providerLockedRegions effectively inert because that lock check only applies when provider.region is present. As a result, scoring and eligibility checks can run against the synthetic root row instead of the user’s locked region (e.g., cn-beijing), so routing decisions and pricing/capability filtering are based on the wrong region until execution-time token resolution.

Useful? React with 👍 / 👎.

{
allProviderVariants: modelInfo.providers,
availableProviders,
Expand Down Expand Up @@ -2583,6 +2555,7 @@ chat.openapi(completions, async (c) => {
{
metricsMap,
isStreaming: stream,
includeProviderScoreRegions: false,
promptTokens: routingPromptTokens,
},
);
Expand Down Expand Up @@ -2657,7 +2630,10 @@ chat.openapi(completions, async (c) => {
selectionReason = "fallback-first-available";
}

let routingMetadataProviders = allModelProviders;
let routingMetadataProviders =
selectionReason === "direct-provider-specified"
? allModelProviders
: preferProviderRootMappings(allModelProviders);
let directProviderRegionWasExplicit = false;

if (
Expand Down Expand Up @@ -2752,6 +2728,8 @@ chat.openapi(completions, async (c) => {
{
metricsMap,
isStreaming: stream,
includeProviderScoreRegions:
selectionReason === "direct-provider-specified",
promptTokens: routingPromptTokens,
},
Comment thread
coderabbitai[bot] marked this conversation as resolved.
);
Expand Down Expand Up @@ -2781,13 +2759,18 @@ chat.openapi(completions, async (c) => {
throughput: metrics?.throughput ?? 0,
};
});
const includeRoutingScoreRegions =
selectionReason === "direct-provider-specified";

routingMetadata = addContentFilterRoutingMetadata(
{
availableProviders: routingMetadataProviders.map((p) => p.providerId),
selectedProvider: usedProvider,
selectionReason,
providerScores: allProviderScores,
providerScores: allProviderScores.map((score) => ({
...score,
region: includeRoutingScoreRegions ? score.region : undefined,
})),
...getNoFallbackRoutingMetadata(noFallback, xNoFallbackHeaderSet),
},
contentFilterMatched,
Expand Down Expand Up @@ -2864,9 +2847,13 @@ chat.openapi(completions, async (c) => {

// Create the model mapping values according to new schema
let usedModelMapping = usedModel; // Store the original provider model name
const includeUsedModelRegion =
routingMetadata?.selectionReason === "direct-provider-specified";
let usedModelFormatted = formatUsedModelForDisplay(
usedProvider,
usedRegion ? `${baseModelName}:${usedRegion}` : baseModelName,
includeUsedModelRegion && usedRegion
? `${baseModelName}:${usedRegion}`
: baseModelName,
Comment on lines +2850 to +2856
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Keep usedModelFormatted regionless when the region was inferred.

This boolean now follows direct-provider-specified alone, so once usedRegion is filled from a default/implicit region, logs still show model:region. That reintroduces the hidden-region leak even if providerScores get normalized.

💡 Possible fix
-	const includeUsedModelRegion =
-		routingMetadata?.selectionReason === "direct-provider-specified";
+	const includeUsedModelRegion =
+		routingMetadata?.selectionReason === "direct-provider-specified" &&
+		routingMetadata.providerScores.some(
+			(score) => score.providerId === usedProvider && Boolean(score.region),
+		);

Also applies to: 3397-3402

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@apps/gateway/src/chat/chat.ts` around lines 2850 - 2856, The formatted model
string is including a region even when that region was inferred; change the
includeUsedModelRegion condition so it is true only when the selection was
explicitly provider-specified and the region itself is explicitly set (not
inferred). Concretely, update includeUsedModelRegion to check both
routingMetadata?.selectionReason === "direct-provider-specified" and a
flag/property indicating the region was explicit (e.g.,
routingMetadata?.regionIsExplicit or an equivalent boolean you add when
populating usedRegion), then use that boolean when calling
formatUsedModelForDisplay (same change also at the second occurrence around
usedModelFormatted at the other location).

customProviderName,
); // Store in LLMGateway format

Expand Down Expand Up @@ -3396,7 +3383,7 @@ chat.openapi(completions, async (c) => {
);

// If region is still unset but the provider supports regions, resolve the
// default region so it appears in logs and metadata.
// default region for request execution.
if (!usedRegion) {
const providerDef = providers.find((p) => p.id === usedProvider) as
| { regionConfig?: { defaultRegion: string } }
Expand All @@ -3407,7 +3394,7 @@ chat.openapi(completions, async (c) => {
}

// Re-compute usedModelFormatted now that region may have been resolved
if (usedRegion) {
if (includeUsedModelRegion && usedRegion) {
usedModelFormatted = formatUsedModelForDisplay(
usedProvider,
`${baseModelName}:${usedRegion}`,
Expand Down
36 changes: 22 additions & 14 deletions apps/gateway/src/fallback.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1045,16 +1045,17 @@ describe("fallback and error status code handling", () => {
const logs = await waitForLogs(1);
expect(logs).toHaveLength(1);
expect(logs[0].usedProvider).toBe("alibaba");
expect(logs[0].usedModel).toBe("alibaba/glm-4.6:cn-beijing");
expect(logs[0].usedModel).toBe("alibaba/glm-4.6");
expect(logs[0].routingMetadata?.selectedProvider).toBe("alibaba");
expect(logs[0].routingMetadata?.selectionReason).toBe(
"low-uptime-fallback",
);
expect(
logs[0].routingMetadata?.providerScores?.some(
(score) => score.providerId === "alibaba" && !score.region,
),
).toBe(false);
const alibabaScores =
logs[0].routingMetadata?.providerScores?.filter(
(score) => score.providerId === "alibaba",
) ?? [];
expect(alibabaScores).toHaveLength(1);
expect(alibabaScores[0]?.region).toBeUndefined();
expect(
logs[0].routingMetadata?.providerScores?.some(
(score) =>
Expand Down Expand Up @@ -1172,12 +1173,13 @@ describe("fallback and error status code handling", () => {
logs.find((entry) => entry.requestedModel === "glm-4.6") ?? logs.at(-1);
expect(log).toBeTruthy();
expect(log?.usedProvider).toBe("alibaba");
expect(log?.usedModel).toBe("alibaba/glm-4.6:cn-beijing");
expect(
log?.routingMetadata?.providerScores?.some(
(score) => score.providerId === "alibaba" && !score.region,
),
).toBe(false);
expect(log?.usedModel).toBe("alibaba/glm-4.6");
const alibabaScores =
log?.routingMetadata?.providerScores?.filter(
(score) => score.providerId === "alibaba",
) ?? [];
expect(alibabaScores).toHaveLength(1);
expect(alibabaScores[0]?.region).toBeUndefined();
expect(
log?.routingMetadata?.providerScores?.some(
(score) =>
Expand Down Expand Up @@ -1465,13 +1467,19 @@ describe("fallback and error status code handling", () => {
expect(res.status).toBe(200);

const logs = await waitForLogs(1);
expect(logs[0].routingMetadata?.providerScores).toContainEqual(
expect(logs[0].usedModel).toBe("alibaba/deepseek-v3.2");
const alibabaScores =
logs[0].routingMetadata?.providerScores?.filter(
(score) => score.providerId === "alibaba",
) ?? [];
expect(alibabaScores).toHaveLength(1);
expect(alibabaScores[0]).toEqual(
expect.objectContaining({
providerId: "alibaba",
region: "cn-beijing",
score: expect.any(Number),
}),
);
expect(alibabaScores[0]?.region).toBeUndefined();
expect(
logs[0].routingMetadata?.providerScores?.some(
(score) =>
Expand Down
15 changes: 12 additions & 3 deletions packages/actions/src/get-cheapest-from-available-providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ export interface ProviderSelectionOptions {
metricsMap?: Map<string, ProviderMetrics>;
isStreaming?: boolean;
videoPricing?: VideoPricingContext;
includeProviderScoreRegions?: boolean;
/**
* Estimated prompt tokens for the request. When provided and at or above
* CACHE_PROMPT_TOKEN_THRESHOLD, cache support is factored into the
Expand Down Expand Up @@ -335,6 +336,8 @@ export function getCheapestFromAvailableProviders<
const metricsMap = options?.metricsMap;
const isStreaming = options?.isStreaming ?? false;
const videoPricing = options?.videoPricing;
const includeProviderScoreRegions =
options?.includeProviderScoreRegions ?? true;
const promptTokens = options?.promptTokens;
// Use higher price weight for image generation models
const isImageModel = modelWithPricing.output?.includes("image") ?? false;
Expand Down Expand Up @@ -416,7 +419,12 @@ export function getCheapestFromAvailableProviders<

// If no metrics provided, fall back to price-only selection
if (!metricsMap || metricsMap.size === 0) {
return selectByPriceOnly(stableProviders, modelWithPricing, videoPricing);
return selectByPriceOnly(
stableProviders,
modelWithPricing,
videoPricing,
includeProviderScoreRegions,
);
}

// Calculate scores for each provider
Expand Down Expand Up @@ -559,7 +567,7 @@ export function getCheapestFromAvailableProviders<
const priority = providerDef?.priority ?? 1;
return {
providerId: p.provider.providerId,
region: p.provider.region,
region: includeProviderScoreRegions ? p.provider.region : undefined,
score: Number(p.score.toFixed(3)),
uptime: p.uptime,
latency: p.latency,
Expand All @@ -584,6 +592,7 @@ function selectByPriceOnly<T extends AvailableModelProvider>(
stableProviders: T[],
modelWithPricing: ModelWithPricing & { id: string; output?: string[] },
videoPricing?: VideoPricingContext,
includeProviderScoreRegions = true,
): ProviderSelectionResult<T> {
let cheapestProvider = stableProviders[0];
let lowestEffectivePrice = Number.MAX_VALUE;
Expand Down Expand Up @@ -628,7 +637,7 @@ function selectByPriceOnly<T extends AvailableModelProvider>(
selectionReason: "price-only-no-metrics",
providerScores: providerPrices.map((p) => ({
providerId: p.providerId,
region: p.region,
region: includeProviderScoreRegions ? p.region : undefined,
score: 0,
price: p.price,
priority: p.priority,
Expand Down
Loading
Loading