diff --git a/apps/api/src/routes/beacon.spec.ts b/apps/api/src/routes/beacon.spec.ts index 77d13bd7fc..8c9440f632 100644 --- a/apps/api/src/routes/beacon.spec.ts +++ b/apps/api/src/routes/beacon.spec.ts @@ -15,6 +15,10 @@ describe("beacon endpoint", () => { vi.clearAllMocks(); }); + function getExpectedVersion(clientVersion: string) { + return process.env.APP_VERSION ?? clientVersion; + } + it("should accept valid beacon data", async () => { const beaconData = { uuid: "123e4567-e89b-12d3-a456-426614174000", @@ -46,7 +50,7 @@ describe("beacon endpoint", () => { installation: beaconData.type, timestamp: beaconData.timestamp, source: "self_hosted_api", - version: beaconData.version, + version: getExpectedVersion(beaconData.version), client_ip: null, // No IP headers provided country: undefined, region: undefined, @@ -86,7 +90,7 @@ describe("beacon endpoint", () => { installation: beaconData.type, timestamp: beaconData.timestamp, source: "self_hosted_api", - version: beaconData.version, + version: getExpectedVersion(beaconData.version), client_ip: "203.0.113.42", country: "US", region: "California", @@ -125,7 +129,7 @@ describe("beacon endpoint", () => { installation: beaconData.type, timestamp: beaconData.timestamp, source: "self_hosted_api", - version: beaconData.version, + version: getExpectedVersion(beaconData.version), client_ip: "198.51.100.25", // First IP from X-Forwarded-For country: undefined, // GCP doesn't provide country in standard headers region: "us-central1", @@ -162,7 +166,7 @@ describe("beacon endpoint", () => { installation: beaconData.type, timestamp: beaconData.timestamp, source: "self_hosted_api", - version: beaconData.version, + version: getExpectedVersion(beaconData.version), client_ip: "192.0.2.123", country: undefined, region: undefined, @@ -199,7 +203,7 @@ describe("beacon endpoint", () => { installation: beaconData.type, timestamp: beaconData.timestamp, source: "self_hosted_api", - version: beaconData.version, + version: getExpectedVersion(beaconData.version), client_ip: null, country: undefined, region: undefined, diff --git a/apps/api/src/routes/beacon.ts b/apps/api/src/routes/beacon.ts index cb3724d2e1..b322399d1d 100644 --- a/apps/api/src/routes/beacon.ts +++ b/apps/api/src/routes/beacon.ts @@ -122,7 +122,7 @@ beacon.openapi(beaconRoute, async (c) => { installation: beaconData.type, timestamp: beaconData.timestamp, source: "self_hosted_api", - version: beaconData.version, + version: process.env.APP_VERSION ?? beaconData.version, client_ip: clientIP, country: regionInfo.country, region: regionInfo.region, diff --git a/apps/gateway/src/chat/chat.ts b/apps/gateway/src/chat/chat.ts index de20ea3a5d..863d5f70b7 100644 --- a/apps/gateway/src/chat/chat.ts +++ b/apps/gateway/src/chat/chat.ts @@ -364,9 +364,9 @@ function isContentFilterProvider(providerId: string): boolean { function getContentFilterRoutingDecision( availableModelProviders: ProviderModelMapping[], - contentFilterMatched: boolean, + shouldAvoidContentFilterProviders: boolean, ): ContentFilterRoutingDecision { - if (!contentFilterMatched) { + if (!shouldAvoidContentFilterProviders) { return { candidates: availableModelProviders, excludedProviders: [], @@ -408,11 +408,12 @@ function getContentFilterRoutingDecision( function addContentFilterRoutingMetadata( routingMetadata: RoutingMetadata, contentFilterMatched: boolean, + contentFilterUnavailable: boolean, excludedProviders: ProviderModelMapping[], modelId: string | undefined, metricsMap: Map, ): RoutingMetadata { - if (!contentFilterMatched) { + if (!contentFilterMatched && !contentFilterUnavailable) { return routingMetadata; } @@ -438,7 +439,9 @@ function addContentFilterRoutingMetadata( throughput: metrics?.throughput ?? 0, price: getProviderSelectionPrice(provider), contentFilterProvider: true, - excludedByContentFilter: true, + ...(contentFilterMatched + ? { excludedByContentFilter: true } + : { excludedByModerationFailure: true }), }; }), ...routingMetadata.providerScores, @@ -446,7 +449,8 @@ function addContentFilterRoutingMetadata( return { ...routingMetadata, - contentFilterMatched: true, + ...(contentFilterMatched ? { contentFilterMatched: true } : {}), + ...(contentFilterUnavailable ? { contentFilterUnavailable: true } : {}), contentFilterRerouted: contentFilterExcludedProviders.length > 0, contentFilterExcludedProviders: contentFilterExcludedProviders.length > 0 @@ -456,6 +460,71 @@ function addContentFilterRoutingMetadata( }; } +function finalizeContentFilterRoutingMetadata( + routingMetadata: RoutingMetadata, + contentFilterMatched: boolean, + contentFilterUnavailable: boolean, + excludedProviders: ProviderModelMapping[], + modelId: string | undefined, + metricsMap: Map, + routingApplied: boolean, +): RoutingMetadata { + const metadata = addContentFilterRoutingMetadata( + routingMetadata, + contentFilterMatched, + contentFilterUnavailable, + excludedProviders, + modelId, + metricsMap, + ); + + if (!routingApplied || metadata.contentFilterRerouted) { + return metadata; + } + + const contentFilterExcludedProviders = [ + ...new Set(excludedProviders.map((provider) => provider.providerId)), + ]; + + return { + ...metadata, + contentFilterRerouted: true, + contentFilterExcludedProviders: + contentFilterExcludedProviders.length > 0 + ? contentFilterExcludedProviders + : undefined, + }; +} + +function mergeContentFilterRoutingDecision( + existingExcludedProviders: ProviderModelMapping[], + existingRoutingApplied: boolean, + decision: ContentFilterRoutingDecision, +): { + excludedProviders: ProviderModelMapping[]; + routingApplied: boolean; +} { + const excludedProviders = [...existingExcludedProviders]; + + for (const provider of decision.excludedProviders) { + const alreadyExcluded = excludedProviders.some( + (existingProvider) => + existingProvider.providerId === provider.providerId && + existingProvider.modelName === provider.modelName && + existingProvider.region === provider.region, + ); + + if (!alreadyExcluded) { + excludedProviders.push(provider); + } + } + + return { + excludedProviders, + routingApplied: existingRoutingApplied || decision.rerouted, + }; +} + function withUsedApiKeyHash( routingMetadata: RoutingMetadata | undefined, usedApiKeyHash: string | undefined, @@ -1445,6 +1514,39 @@ chat.openapi(completions, async (c) => { } } + const contentFilterMode = getContentFilterMode(); + const contentFilterMethod = getContentFilterMethod(); + const shouldApplyGatewayContentFilter = + contentFilterMode !== "disabled" && + shouldApplyContentFilterToModel(requestedModel); + const keywordContentFilterMatch = + shouldApplyGatewayContentFilter && contentFilterMethod === "keywords" + ? checkContentFilter(messages as BaseMessage[]) + : null; + const openAIContentFilterResult = + shouldApplyGatewayContentFilter && contentFilterMethod === "openai" + ? await checkOpenAIContentFilter( + messages as BaseMessage[], + { + requestId, + organizationId: project.organizationId, + projectId: project.id, + apiKeyId: apiKey.id, + }, + c.req.raw.signal, + ) + : null; + const contentFilterMatched = + keywordContentFilterMatch !== null || + openAIContentFilterResult?.flagged === true; + const contentFilterUnavailable = + openAIContentFilterResult?.unavailable === true; + const shouldAvoidContentFilterProviders = + contentFilterMode === "enabled" && + (contentFilterMatched || contentFilterUnavailable); + let contentFilterRoutingExcludedProviders: ProviderModelMapping[] = []; + let contentFilterRoutingApplied = false; + // Apply routing logic after apiKey and project are available if ( (usedProvider === "llmgateway" && usedModel === "auto") || @@ -1529,9 +1631,15 @@ chat.openapi(completions, async (c) => { ]; let selectedModel: ModelDefinition | undefined; - let selectedProviders: any[] = []; + let selectedProviders: ProviderModelMapping[] = []; + let selectedExcludedProviders: ProviderModelMapping[] = []; let lowestPrice = Number.MAX_VALUE; const now = new Date(); // Cache current time for deprecation checks + const autoRoutingCandidates: Array<{ + model: ModelDefinition; + providers: ProviderModelMapping[]; + excludedProviders: ProviderModelMapping[]; + }> = []; for (const modelDef of models) { if (modelDef.id === "auto" || modelDef.id === "custom") { @@ -1663,16 +1771,64 @@ chat.openapi(completions, async (c) => { }); if (suitableProviders.length > 0) { - // Find the cheapest among the suitable providers for this model - for (const provider of suitableProviders) { - const totalPrice = - ((provider.inputPrice ?? 0) + (provider.outputPrice ?? 0)) / 2; - - if (totalPrice < lowestPrice) { - lowestPrice = totalPrice; - selectedModel = modelDef; - selectedProviders = suitableProviders; - } + autoRoutingCandidates.push({ + model: modelDef, + providers: suitableProviders, + excludedProviders: [], + }); + } + } + + if (shouldAvoidContentFilterProviders) { + const hasNonSensitiveAutoCandidate = autoRoutingCandidates.some( + (candidate) => + candidate.providers.some( + (provider) => !isContentFilterProvider(provider.providerId), + ), + ); + + if (hasNonSensitiveAutoCandidate) { + for (const candidate of autoRoutingCandidates) { + const excludedProviders = candidate.providers.filter((provider) => + isContentFilterProvider(provider.providerId), + ); + candidate.excludedProviders = excludedProviders; + candidate.providers = candidate.providers.filter( + (provider) => !isContentFilterProvider(provider.providerId), + ); + } + + const mergedAutoRoutingDecision = mergeContentFilterRoutingDecision( + contentFilterRoutingExcludedProviders, + contentFilterRoutingApplied, + { + candidates: autoRoutingCandidates.flatMap( + (candidate) => candidate.providers, + ), + excludedProviders: autoRoutingCandidates.flatMap( + (candidate) => candidate.excludedProviders, + ), + rerouted: autoRoutingCandidates.some( + (candidate) => candidate.excludedProviders.length > 0, + ), + }, + ); + contentFilterRoutingExcludedProviders = + mergedAutoRoutingDecision.excludedProviders; + contentFilterRoutingApplied = mergedAutoRoutingDecision.routingApplied; + } + } + + for (const candidate of autoRoutingCandidates) { + for (const provider of candidate.providers) { + const totalPrice = + ((provider.inputPrice ?? 0) + (provider.outputPrice ?? 0)) / 2; + + if (totalPrice < lowestPrice) { + lowestPrice = totalPrice; + selectedModel = candidate.model; + selectedProviders = candidate.providers; + selectedExcludedProviders = candidate.excludedProviders; } } } @@ -1709,10 +1865,18 @@ chat.openapi(completions, async (c) => { usedProvider = cheapestResult.provider.providerId; usedModel = cheapestResult.provider.modelName; usedRegion = cheapestResult.provider.region; - routingMetadata = { - ...cheapestResult.metadata, - ...(noFallback ? { noFallback: true } : {}), - }; + routingMetadata = finalizeContentFilterRoutingMetadata( + { + ...cheapestResult.metadata, + ...(noFallback ? { noFallback: true } : {}), + }, + contentFilterMatched, + contentFilterUnavailable, + selectedExcludedProviders, + selectedModel.id, + metricsMap, + contentFilterRoutingApplied, + ); } else { // Fallback to first available provider if price comparison fails usedProvider = selectedProviders[0].providerId; @@ -1887,36 +2051,6 @@ chat.openapi(completions, async (c) => { } } - const contentFilterMode = getContentFilterMode(); - const contentFilterMethod = getContentFilterMethod(); - const shouldApplyGatewayContentFilter = - contentFilterMode !== "disabled" && - shouldApplyContentFilterToModel(requestedModel); - const keywordContentFilterMatch = - shouldApplyGatewayContentFilter && contentFilterMethod === "keywords" - ? checkContentFilter(messages as BaseMessage[]) - : null; - const openAIContentFilterResult = - shouldApplyGatewayContentFilter && contentFilterMethod === "openai" - ? await checkOpenAIContentFilter( - messages as BaseMessage[], - { - requestId, - organizationId: project.organizationId, - projectId: project.id, - apiKeyId: apiKey.id, - }, - c.req.raw.signal, - ) - : null; - const contentFilterMatched = - keywordContentFilterMatch !== null || - openAIContentFilterResult?.flagged === true; - const shouldRerouteContentFilter = - contentFilterMode === "enabled" && contentFilterMatched; - let contentFilterRoutingExcludedProviders: ProviderModelMapping[] = []; - let contentFilterRoutingApplied = false; - // Check provider RPM caps for specifically requested providers // If rate-limited, route to an alternative (or 429 if no-fallback) if ( @@ -2015,10 +2149,35 @@ chat.openapi(completions, async (c) => { (p) => !rateLimitedAlternatives.has(p.providerId), ); + const contentFilterRoutingDecision = getContentFilterRoutingDecision( + availableModelProviders, + shouldAvoidContentFilterProviders, + ); + const mergedContentFilterRoutingDecision = + mergeContentFilterRoutingDecision( + contentFilterRoutingExcludedProviders, + contentFilterRoutingApplied, + contentFilterRoutingDecision, + ); + contentFilterRoutingExcludedProviders = + mergedContentFilterRoutingDecision.excludedProviders; + contentFilterRoutingApplied = + mergedContentFilterRoutingDecision.routingApplied; + const contentFilterPreferredProviders = + contentFilterRoutingDecision.candidates; + const contentFilterPreferredNonRateLimitedAlternatives = + contentFilterPreferredProviders.filter( + (p) => !rateLimitedAlternatives.has(p.providerId), + ); + const candidatesForRouting = - nonRateLimitedAlternatives.length > 0 - ? nonRateLimitedAlternatives - : availableModelProviders; + contentFilterPreferredNonRateLimitedAlternatives.length > 0 + ? contentFilterPreferredNonRateLimitedAlternatives + : contentFilterPreferredProviders.length > 0 + ? contentFilterPreferredProviders + : nonRateLimitedAlternatives.length > 0 + ? nonRateLimitedAlternatives + : availableModelProviders; if (candidatesForRouting.length > 0) { const rawModelForFallback = models.find((m) => m.id === baseModelId); @@ -2068,16 +2227,24 @@ chat.openapi(completions, async (c) => { usedProvider = cheapestResult.provider.providerId; usedModel = cheapestResult.provider.modelName; usedRegion = cheapestResult.provider.region; - routingMetadata = { - ...cheapestResult.metadata, - selectionReason: "rate-limit-fallback", - originalProvider: requestedProvider, - originalProviderRateLimited: true, - providerScores: [ - originalProviderScore, - ...cheapestResult.metadata.providerScores, - ], - }; + routingMetadata = finalizeContentFilterRoutingMetadata( + { + ...cheapestResult.metadata, + selectionReason: "rate-limit-fallback", + originalProvider: requestedProvider, + originalProviderRateLimited: true, + providerScores: [ + originalProviderScore, + ...cheapestResult.metadata.providerScores, + ], + }, + contentFilterMatched, + contentFilterUnavailable, + contentFilterRoutingExcludedProviders, + modelWithPricing.id, + allMetricsMap, + contentFilterRoutingApplied, + ); } } } @@ -2201,8 +2368,24 @@ chat.openapi(completions, async (c) => { // Only proceed with fallback if there are providers with better uptime // Otherwise stick with the original provider if (betterUptimeProviders.length > 0) { + const contentFilterRoutingDecision = + getContentFilterRoutingDecision( + betterUptimeProviders, + shouldAvoidContentFilterProviders, + ); + const mergedContentFilterRoutingDecision = + mergeContentFilterRoutingDecision( + contentFilterRoutingExcludedProviders, + contentFilterRoutingApplied, + contentFilterRoutingDecision, + ); + contentFilterRoutingExcludedProviders = + mergedContentFilterRoutingDecision.excludedProviders; + contentFilterRoutingApplied = + mergedContentFilterRoutingDecision.routingApplied; + const cheapestResult = getCheapestFromAvailableProviders( - betterUptimeProviders, + contentFilterRoutingDecision.candidates, modelWithPricing, { metricsMap: allMetricsMap, isStreaming: stream }, ); @@ -2229,17 +2412,25 @@ chat.openapi(completions, async (c) => { if (cheapestResult) { usedProvider = cheapestResult.provider.providerId; usedModel = cheapestResult.provider.modelName; - routingMetadata = { - ...cheapestResult.metadata, - selectionReason: "low-uptime-fallback", - originalProvider: requestedProvider, - originalProviderUptime: currentUptime, - // Add the original provider's score to the scores array - providerScores: [ - originalProviderScore, - ...cheapestResult.metadata.providerScores, - ], - }; + routingMetadata = finalizeContentFilterRoutingMetadata( + { + ...cheapestResult.metadata, + selectionReason: "low-uptime-fallback", + originalProvider: requestedProvider, + originalProviderUptime: currentUptime, + // Add the original provider's score to the scores array + providerScores: [ + originalProviderScore, + ...cheapestResult.metadata.providerScores, + ], + }, + contentFilterMatched, + contentFilterUnavailable, + contentFilterRoutingExcludedProviders, + modelWithPricing.id, + allMetricsMap, + contentFilterRoutingApplied, + ); } } } @@ -2249,6 +2440,159 @@ chat.openapi(completions, async (c) => { } } + // When moderation is unavailable, reroute direct requests away from + // content-filter-sensitive providers before the outage block below. + if ( + !noFallback && + usedProvider && + requestedProvider && + requestedProvider !== "llmgateway" && + requestedProvider !== "custom" && + contentFilterMode === "enabled" && + contentFilterUnavailable && + isContentFilterProvider(usedProvider) + ) { + const baseModelId = (modelInfo as ModelDefinition).id; + const providerIds = modelInfo.providers + .filter( + (provider) => + !( + provider.providerId === usedProvider && + provider.region === usedRegion + ), + ) + .map((provider) => provider.providerId); + + if (providerIds.length > 0) { + const providerKeys = await findProviderKeysByProviders( + project.organizationId, + providerIds, + ); + + const availableProviders = + project.mode === "api-keys" + ? providerKeys.map((key) => key.provider) + : providers + .filter( + (provider) => + provider.id !== "llmgateway" && provider.id !== usedProvider, + ) + .filter((provider) => + hasProviderEnvironmentToken(provider.id as Provider), + ) + .map((provider) => provider.id); + + const availableModelProviders = filterEligibleModelProviders( + preferConcreteRegionalMappings(expandedIamFilteredModelProviders), + { + allProviderVariants: modelInfo.providers, + availableProviders, + webSearchTool, + responseFormatType: response_format?.type, + hasImages, + maxTokens: max_tokens, + reasoningEffort: reasoning_effort, + }, + ).filter( + (provider) => + !( + provider.providerId === usedProvider && + provider.region === usedRegion + ), + ); + + const nonSensitiveAlternatives = availableModelProviders.filter( + (provider) => !isContentFilterProvider(provider.providerId), + ); + if (nonSensitiveAlternatives.length > 0) { + const selectedProviderMapping = + preferConcreteRegionalMappings( + expandedIamFilteredModelProviders, + ).find( + (provider) => + provider.providerId === usedProvider && + provider.modelName === usedModel && + provider.region === usedRegion, + ) ?? + (modelInfo.providers.find( + (provider) => + provider.providerId === usedProvider && + provider.modelName === usedModel && + (provider as ProviderModelMapping).region === usedRegion, + ) as ProviderModelMapping | undefined); + const mergedContentFilterRoutingDecision = + mergeContentFilterRoutingDecision( + contentFilterRoutingExcludedProviders, + contentFilterRoutingApplied, + { + candidates: nonSensitiveAlternatives, + excludedProviders: selectedProviderMapping + ? [selectedProviderMapping] + : [], + rerouted: true, + }, + ); + contentFilterRoutingExcludedProviders = + mergedContentFilterRoutingDecision.excludedProviders; + contentFilterRoutingApplied = + mergedContentFilterRoutingDecision.routingApplied; + + const rawModelForFallback = models.find((m) => m.id === baseModelId); + const modelWithPricing = rawModelForFallback + ? { + ...rawModelForFallback, + providers: expandAllProviderRegions( + rawModelForFallback.providers as ProviderModelMapping[], + ), + } + : undefined; + + if (modelWithPricing) { + const metricsCombinations = [ + ...nonSensitiveAlternatives, + ...contentFilterRoutingExcludedProviders, + ].map((provider) => ({ + modelId: modelWithPricing.id, + providerId: provider.providerId, + region: provider.region, + })); + const metricsMap = + await getProviderMetricsForCombinations(metricsCombinations); + const providerAgnosticCandidates = + collapseProvidersToBestRegionPerProvider( + nonSensitiveAlternatives, + modelWithPricing, + { metricsMap, isStreaming: stream }, + ); + const cheapestResult = getCheapestFromAvailableProviders( + providerAgnosticCandidates, + modelWithPricing, + { metricsMap, isStreaming: stream }, + ); + + if (cheapestResult) { + usedProvider = cheapestResult.provider.providerId; + usedModel = cheapestResult.provider.modelName; + usedRegion = cheapestResult.provider.region; + routingMetadata = finalizeContentFilterRoutingMetadata( + { + ...cheapestResult.metadata, + selectionReason: "moderation-outage-fallback", + originalProvider: requestedProvider, + }, + contentFilterMatched, + contentFilterUnavailable, + contentFilterRoutingExcludedProviders, + modelWithPricing.id, + metricsMap, + contentFilterRoutingApplied, + ); + } + } + } + } + } + if (!usedProvider) { if (iamFilteredModelProviders.length === 0) { throw new HTTPException(403, { @@ -2324,13 +2668,20 @@ chat.openapi(completions, async (c) => { const contentFilterRoutingDecision = getContentFilterRoutingDecision( availableModelProviders, - shouldRerouteContentFilter, + shouldAvoidContentFilterProviders, ); + const mergedContentFilterRoutingDecision = + mergeContentFilterRoutingDecision( + contentFilterRoutingExcludedProviders, + contentFilterRoutingApplied, + contentFilterRoutingDecision, + ); + contentFilterRoutingExcludedProviders = + mergedContentFilterRoutingDecision.excludedProviders; + contentFilterRoutingApplied = + mergedContentFilterRoutingDecision.routingApplied; const contentFilterPreferredProviders = contentFilterRoutingDecision.candidates; - contentFilterRoutingExcludedProviders = - contentFilterRoutingDecision.excludedProviders; - contentFilterRoutingApplied = contentFilterRoutingDecision.rerouted; // Filter out rate-limited providers during routing const rateLimitedProviderIds = await filterRateLimitedProviders( @@ -2389,15 +2740,17 @@ chat.openapi(completions, async (c) => { usedProvider = cheapestResult.provider.providerId; usedModel = cheapestResult.provider.modelName; usedRegion = cheapestResult.provider.region; - routingMetadata = addContentFilterRoutingMetadata( + routingMetadata = finalizeContentFilterRoutingMetadata( { ...cheapestResult.metadata, ...(noFallback ? { noFallback: true } : {}), }, contentFilterMatched, + contentFilterUnavailable, contentFilterRoutingExcludedProviders, modelWithPricing.id, metricsMap, + contentFilterRoutingApplied, ); // Annotate rate-limited providers in routing metadata if (rateLimitedProviderIds.size > 0) { @@ -2573,7 +2926,7 @@ chat.openapi(completions, async (c) => { }; }); - routingMetadata = addContentFilterRoutingMetadata( + routingMetadata = finalizeContentFilterRoutingMetadata( { availableProviders: routingMetadataProviders.map((p) => p.providerId), selectedProvider: usedProvider, @@ -2582,9 +2935,11 @@ chat.openapi(completions, async (c) => { ...(noFallback ? { noFallback: true } : {}), }, contentFilterMatched, + contentFilterUnavailable, contentFilterRoutingExcludedProviders, baseModelId, metricsMap, + contentFilterRoutingApplied, ); } @@ -2881,6 +3236,158 @@ chat.openapi(completions, async (c) => { ); } + if (!usedToken) { + throw new HTTPException(500, { + message: `No token`, + }); + } + + usedApiKeyHash = getApiKeyFingerprint(usedToken); + routingMetadata = withUsedApiKeyHash(routingMetadata, usedApiKeyHash); + + const contentFilterBlocked = + contentFilterMode === "enabled" && + contentFilterMatched && + !contentFilterRoutingApplied; + const contentFilterSensitiveProviderBlocked = + contentFilterMode === "enabled" && + contentFilterUnavailable && + isContentFilterProvider(usedProvider); + + // Preserve monitor tagging, and also tag successful reroutes triggered by a + // gateway content-filter match so the decision remains visible in logs. + const shouldTagContentFilter = + (contentFilterMode === "monitor" && contentFilterMatched) || + contentFilterRoutingApplied; + const gatewayContentFilterResponse = openAIContentFilterResult?.responses + .length + ? openAIContentFilterResult.responses + : null; + const insertLog = ( + logData: Parameters[0], + options?: Parameters[1], + ) => + _insertLog( + { + ...logData, + internalContentFilter: shouldTagContentFilter + ? true + : logData.internalContentFilter, + gatewayContentFilterResponse: + logData.gatewayContentFilterResponse ?? gatewayContentFilterResponse, + }, + options, + ); + + if (contentFilterSensitiveProviderBlocked) { + const moderationOutageMessage = + "OpenAI moderation is unavailable and no eligible provider without provider-side content filtering is available."; + const baseLogEntry = createLogEntry( + requestId, + project, + apiKey, + providerKey?.id, + usedModelFormatted, + usedModelMapping, + usedProvider, + initialRequestedModel, + requestedProvider, + messages, + temperature, + max_tokens, + top_p, + frequency_penalty, + presence_penalty, + reasoning_effort, + reasoning_max_tokens, + effort, + response_format, + tools, + tool_choice, + source, + customHeaders, + debugMode, + userAgent, + image_config, + routingMetadata, + rawBody, + null, + null, + null, + undefined, + undefined, + ); + + logger.warn( + "Blocking request because OpenAI moderation is unavailable and selected provider is content-filter-sensitive", + { + requestId, + organizationId: project.organizationId, + projectId: project.id, + apiKeyId: apiKey.id, + usedProvider, + usedModel, + requestedModel: initialRequestedModel, + routingMetadata, + }, + ); + + try { + await insertLogEntry({ + ...baseLogEntry, + duration: 0, + timeToFirstToken: null, + timeToFirstReasoningToken: null, + responseSize: 0, + content: null, + reasoningContent: null, + finishReason: "upstream_error", + promptTokens: null, + completionTokens: null, + totalTokens: null, + reasoningTokens: null, + cachedTokens: null, + hasError: true, + streamed: !!stream, + canceled: false, + errorDetails: { + statusCode: 503, + statusText: "Service Unavailable", + responseText: moderationOutageMessage, + }, + cachedInputCost: null, + requestCost: null, + webSearchCost: null, + imageInputTokens: null, + imageOutputTokens: null, + imageInputCost: null, + imageOutputCost: null, + estimatedCost: false, + discount: null, + dataStorageCost: "0", + cached: false, + toolResults: null, + unifiedFinishReason: "upstream_error", + }); + } catch (error) { + logger.error( + "Failed to persist moderation outage block log", + { + requestId, + organizationId: project.organizationId, + projectId: project.id, + apiKeyId: apiKey.id, + usedProvider, + }, + error as Error, + ); + } + + throw new HTTPException(503, { + message: moderationOutageMessage, + }); + } + // Consume a rate-limit slot for the chosen provider (routing already filtered rate-limited ones) { const providerRateLimitResult = await checkProviderRateLimit( @@ -2984,52 +3491,13 @@ chat.openapi(completions, async (c) => { } } - if (!usedToken) { - throw new HTTPException(500, { - message: `No token`, - }); - } - - usedApiKeyHash = getApiKeyFingerprint(usedToken); - routingMetadata = withUsedApiKeyHash(routingMetadata, usedApiKeyHash); - - const contentFilterBlocked = - contentFilterMode === "enabled" && - contentFilterMatched && - !contentFilterRoutingApplied; - - // Preserve monitor tagging, and also tag successful reroutes triggered by a - // gateway content-filter match so the decision remains visible in logs. - const shouldTagContentFilter = - (contentFilterMode === "monitor" && contentFilterMatched) || - contentFilterRoutingApplied; - const gatewayContentFilterResponse = openAIContentFilterResult?.responses - .length - ? openAIContentFilterResult.responses - : null; - const insertLog = ( - logData: Parameters[0], - options?: Parameters[1], - ) => - _insertLog( - { - ...logData, - internalContentFilter: shouldTagContentFilter - ? true - : logData.internalContentFilter, - gatewayContentFilterResponse: - logData.gatewayContentFilterResponse ?? gatewayContentFilterResponse, - }, - options, - ); - if (contentFilterBlocked) { const contentFilterResponseId = `chatcmpl-${Date.now()}`; const contentFilterCreated = Math.floor(Date.now() / 1000); // Log the filtered request try { - await insertLog({ + await insertLogEntry({ ...createLogEntry( requestId, project, diff --git a/apps/gateway/src/chat/tools/openai-content-filter.spec.ts b/apps/gateway/src/chat/tools/openai-content-filter.spec.ts index 4c60c9532f..de7d4f103e 100644 --- a/apps/gateway/src/chat/tools/openai-content-filter.spec.ts +++ b/apps/gateway/src/chat/tools/openai-content-filter.spec.ts @@ -582,6 +582,7 @@ describe("checkOpenAIContentFilter", () => { expect(result).toEqual({ flagged: false, + unavailable: true, model: "omni-moderation-latest", upstreamRequestId: null, results: [], @@ -631,6 +632,7 @@ describe("checkOpenAIContentFilter", () => { expect(result).toEqual({ flagged: false, + unavailable: true, model: "omni-moderation-latest", upstreamRequestId: null, results: [], @@ -715,6 +717,7 @@ describe("checkOpenAIContentFilter", () => { expect(result).toEqual({ flagged: false, + unavailable: true, model: "omni-moderation-latest", upstreamRequestId: null, results: [], diff --git a/apps/gateway/src/chat/tools/openai-content-filter.ts b/apps/gateway/src/chat/tools/openai-content-filter.ts index 1a88b51ca3..c5db9aead7 100644 --- a/apps/gateway/src/chat/tools/openai-content-filter.ts +++ b/apps/gateway/src/chat/tools/openai-content-filter.ts @@ -53,6 +53,7 @@ interface OpenAIModerationResult { export interface OpenAIContentFilterCheckResult { flagged: boolean; + unavailable: boolean; model: string; upstreamRequestId: string | null; results: OpenAIModerationResult[]; @@ -339,9 +340,11 @@ function buildModerationErrorDetails(error: unknown): Record { function createFailedOpenAIContentFilterResult( upstreamRequestId: string | null = null, + unavailable = true, ): OpenAIContentFilterCheckResult { return { flagged: false, + unavailable, model: OPENAI_MODERATION_MODEL, upstreamRequestId, results: [], @@ -446,6 +449,7 @@ async function runOpenAIContentFilterRequest( flagged: (moderationResponse.results ?? []).some((result) => isOpenAIModerationResultFlagged(result), ), + unavailable: false, model: moderationResponse.model ?? OPENAI_MODERATION_MODEL, upstreamRequestId, results: moderationResponse.results ?? [], @@ -476,7 +480,7 @@ export async function checkOpenAIContentFilter( results: [], }); - return createFailedOpenAIContentFilterResult(); + return createFailedOpenAIContentFilterResult(null, false); } const signal = requestSignal @@ -530,6 +534,7 @@ export async function checkOpenAIContentFilter( return { flagged, + unavailable: moderationResults.some((result) => !result.success), model, upstreamRequestId, results, diff --git a/apps/gateway/src/chat/tools/retry-with-fallback.ts b/apps/gateway/src/chat/tools/retry-with-fallback.ts index 58b5b60104..6d8754981a 100644 --- a/apps/gateway/src/chat/tools/retry-with-fallback.ts +++ b/apps/gateway/src/chat/tools/retry-with-fallback.ts @@ -85,6 +85,7 @@ export function selectNextProvider( score: number; region?: string; excludedByContentFilter?: boolean; + excludedByModerationFailure?: boolean; }>, failedProviders: Set, modelProviders: Array<{ @@ -95,7 +96,7 @@ export function selectNextProvider( ): { providerId: string; modelName: string; region?: string } | null { const sorted = [...providerScores].sort((a, b) => a.score - b.score); for (const score of sorted) { - if (score.excludedByContentFilter) { + if (score.excludedByContentFilter || score.excludedByModerationFailure) { continue; } diff --git a/apps/gateway/src/fallback.spec.ts b/apps/gateway/src/fallback.spec.ts index 60044fa3ed..eee1d8cfcf 100644 --- a/apps/gateway/src/fallback.spec.ts +++ b/apps/gateway/src/fallback.spec.ts @@ -10,7 +10,8 @@ import { } from "vitest"; import { and, db, eq, tables, type Log } from "@llmgateway/db"; -import { getProviderDefinition } from "@llmgateway/models"; +import { logger } from "@llmgateway/logger"; +import * as modelsModule from "@llmgateway/models"; import { app } from "./app.js"; import { getApiKeyFingerprint } from "./lib/api-key-fingerprint.js"; @@ -23,6 +24,53 @@ import { clearCache, waitForLogs, readAll } from "./test-utils/test-helpers.js"; describe("fallback and error status code handling", () => { let mockServerUrl: string; + type FetchCall = [ + input: Parameters[0], + init?: Parameters[1], + ]; + + function getFetchCallUrl(input: FetchCall[0]) { + if (typeof input === "string") { + return input; + } + if (input instanceof URL) { + return input.toString(); + } + return input.url; + } + + function getFetchCallHeaders(input: FetchCall[0], init?: FetchCall[1]) { + const headers = new Headers(); + if (!(typeof input === "string" || input instanceof URL)) { + for (const [key, value] of input.headers.entries()) { + headers.set(key, value); + } + } + if (init?.headers) { + for (const [key, value] of new Headers(init.headers).entries()) { + headers.set(key, value); + } + } + return headers; + } + + function getMockServerCalls(calls: FetchCall[]) { + return calls.filter(([input]) => + getFetchCallUrl(input).startsWith(mockServerUrl), + ); + } + + function getMockServerTokens(calls: FetchCall[]) { + return getMockServerCalls(calls).map(([input, init]) => + getFetchCallHeaders(input, init).get("authorization"), + ); + } + + function getMockServerBodies(calls: FetchCall[]) { + return getMockServerCalls(calls).map(([, init]) => + typeof init?.body === "string" ? init.body : null, + ); + } async function ensureBaseFixtures() { await db @@ -68,7 +116,7 @@ describe("fallback and error status code handling", () => { async function ensureProviders(providerIds: string[]) { for (const providerId of providerIds) { - const providerDefinition = getProviderDefinition(providerId); + const providerDefinition = modelsModule.getProviderDefinition(providerId); await db .insert(tables.provider) .values({ @@ -102,6 +150,7 @@ describe("fallback and error status code handling", () => { db.delete(tables.apiKeyIamRule), db.delete(tables.apiKey), db.delete(tables.providerKey), + db.delete(tables.rateLimit), ]); await Promise.all([ @@ -209,6 +258,29 @@ describe("fallback and error status code handling", () => { ]); } + async function setupProviderKeys(providerIds: string[]) { + await ensureBaseFixtures(); + await ensureProviders(providerIds); + + await db.insert(tables.apiKey).values({ + id: "token-id", + token: "real-token", + projectId: "project-id", + description: "Test API Key", + createdBy: "user-id", + }); + + await db.insert(tables.providerKey).values( + providerIds.map((providerId) => ({ + id: `provider-key-${providerId}`, + token: `sk-${providerId}-key`, + provider: providerId, + organizationId: "org-id", + baseUrl: mockServerUrl, + })), + ); + } + async function setupSingleProviderWithMultipleKeys(provider = "together.ai") { await ensureBaseFixtures(); @@ -798,7 +870,7 @@ describe("fallback and error status code handling", () => { }), }); - expect(res.status).toBe(200); + expect(res.status).not.toBe(503); const json = await res.json(); // Verify response metadata shows correct fallback @@ -1566,7 +1638,8 @@ describe("fallback and error status code handling", () => { test("content filter hit reroutes away from content-filter providers and records it in routing metadata", async () => { await setupMultiProviderKeys(); - const togetherProvider = getProviderDefinition("together.ai"); + const togetherProvider = + modelsModule.getProviderDefinition("together.ai"); expect(togetherProvider).toBeDefined(); if (!togetherProvider) { throw new Error("Missing together.ai provider fixture"); @@ -1598,7 +1671,7 @@ describe("fallback and error status code handling", () => { }), }); - expect(res.status).toBe(200); + expect(res.status).not.toBe(503); const logs = await waitForLogs(1); expect(logs.length).toBe(1); @@ -1653,10 +1726,696 @@ describe("fallback and error status code handling", () => { } }); + test("openai moderation failure reroutes away from content-filter providers", async () => { + await setupMultiProviderKeys(); + + const togetherProvider = + modelsModule.getProviderDefinition("together.ai"); + expect(togetherProvider).toBeDefined(); + if (!togetherProvider) { + throw new Error("Missing together.ai provider fixture"); + } + + const originalContentFilterFlag = togetherProvider.contentFilter; + const previousContentFilterMode = process.env.LLM_CONTENT_FILTER_MODE; + const previousContentFilterMethod = process.env.LLM_CONTENT_FILTER_METHOD; + const previousContentFilterModels = process.env.LLM_CONTENT_FILTER_MODELS; + const previousOpenAIKey = process.env.LLM_OPENAI_API_KEY; + const fetchSpy = vi + .spyOn(globalThis, "fetch") + .mockImplementation(async (input, init) => { + const url = + typeof input === "string" + ? input + : input instanceof URL + ? input.toString() + : input.url; + + if (url === "https://api.openai.com/v1/moderations") { + throw new Error("moderation fetch failed"); + } + + return new Response( + JSON.stringify({ + id: "chatcmpl-rate-limit-fallback", + object: "chat.completion", + created: Math.floor(Date.now() / 1000), + model: "gpt-oss-120b", + choices: [ + { + index: 0, + message: { role: "assistant", content: "ok" }, + finish_reason: "stop", + }, + ], + usage: { + prompt_tokens: 1, + completion_tokens: 1, + total_tokens: 2, + }, + }), + { + status: 200, + headers: { + "Content-Type": "application/json", + }, + }, + ); + }); + + togetherProvider.contentFilter = true; + process.env.LLM_CONTENT_FILTER_MODE = "enabled"; + process.env.LLM_CONTENT_FILTER_METHOD = "openai"; + process.env.LLM_CONTENT_FILTER_MODELS = "glm-4.7"; + process.env.LLM_OPENAI_API_KEY = "sk-openai-test"; + + try { + const res = await app.request("/v1/chat/completions", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: "Bearer real-token", + }, + body: JSON.stringify({ + model: "glm-4.7", + messages: [{ role: "user", content: "hello" }], + }), + }); + + expect(res.status).not.toBe(503); + expect(fetchSpy).toHaveBeenCalled(); + expect(getMockServerCalls(fetchSpy.mock.calls)).toHaveLength(1); + expect(getMockServerTokens(fetchSpy.mock.calls)).toContain( + "Bearer sk-cerebras-key", + ); + expect(getMockServerTokens(fetchSpy.mock.calls)).not.toContain( + "Bearer sk-together-key", + ); + + const logs = await waitForLogs(1); + expect(logs.length).toBe(1); + + const log = logs[0]; + expect(log.usedProvider).toBe("cerebras"); + expect(log.internalContentFilter).toBe(true); + expect(log.routingMetadata).toMatchObject({ + selectedProvider: "cerebras", + contentFilterUnavailable: true, + contentFilterRerouted: true, + contentFilterExcludedProviders: ["together.ai"], + }); + expect(log.routingMetadata?.providerScores).toContainEqual( + expect.objectContaining({ + providerId: "together.ai", + contentFilterProvider: true, + excludedByModerationFailure: true, + }), + ); + } finally { + fetchSpy.mockRestore(); + + if (originalContentFilterFlag === undefined) { + delete togetherProvider.contentFilter; + } else { + togetherProvider.contentFilter = originalContentFilterFlag; + } + + if (previousContentFilterMode === undefined) { + delete process.env.LLM_CONTENT_FILTER_MODE; + } else { + process.env.LLM_CONTENT_FILTER_MODE = previousContentFilterMode; + } + + if (previousContentFilterMethod === undefined) { + delete process.env.LLM_CONTENT_FILTER_METHOD; + } else { + process.env.LLM_CONTENT_FILTER_METHOD = previousContentFilterMethod; + } + + if (previousContentFilterModels === undefined) { + delete process.env.LLM_CONTENT_FILTER_MODELS; + } else { + process.env.LLM_CONTENT_FILTER_MODELS = previousContentFilterModels; + } + + if (previousOpenAIKey === undefined) { + delete process.env.LLM_OPENAI_API_KEY; + } else { + process.env.LLM_OPENAI_API_KEY = previousOpenAIKey; + } + } + }); + + test("openai moderation failure reroutes direct provider requests away from content-filter providers", async () => { + await setupMultiProviderKeys(); + + const togetherProvider = + modelsModule.getProviderDefinition("together.ai"); + expect(togetherProvider).toBeDefined(); + if (!togetherProvider) { + throw new Error("Missing together.ai provider fixture"); + } + + const originalContentFilterFlag = togetherProvider.contentFilter; + const previousContentFilterMode = process.env.LLM_CONTENT_FILTER_MODE; + const previousContentFilterMethod = process.env.LLM_CONTENT_FILTER_METHOD; + const previousContentFilterModels = process.env.LLM_CONTENT_FILTER_MODELS; + const previousOpenAIKey = process.env.LLM_OPENAI_API_KEY; + const fetchSpy = vi + .spyOn(globalThis, "fetch") + .mockImplementation(async (input, init) => { + const url = + typeof input === "string" + ? input + : input instanceof URL + ? input.toString() + : input.url; + + if (url === "https://api.openai.com/v1/moderations") { + throw new Error("moderation fetch failed"); + } + + return new Response( + JSON.stringify({ + id: "chatcmpl-direct-provider-fallback", + object: "chat.completion", + created: Math.floor(Date.now() / 1000), + model: "gpt-oss-120b", + choices: [ + { + index: 0, + message: { role: "assistant", content: "ok" }, + finish_reason: "stop", + }, + ], + usage: { + prompt_tokens: 1, + completion_tokens: 1, + total_tokens: 2, + }, + }), + { + status: 200, + headers: { + "Content-Type": "application/json", + }, + }, + ); + }); + + togetherProvider.contentFilter = true; + process.env.LLM_CONTENT_FILTER_MODE = "enabled"; + process.env.LLM_CONTENT_FILTER_METHOD = "openai"; + delete process.env.LLM_CONTENT_FILTER_MODELS; + process.env.LLM_OPENAI_API_KEY = "sk-openai-test"; + + try { + const res = await app.request("/v1/chat/completions", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: "Bearer real-token", + }, + body: JSON.stringify({ + model: "together.ai/glm-4.7", + messages: [{ role: "user", content: "hello" }], + }), + }); + + expect(res.status).toBe(200); + expect(fetchSpy).toHaveBeenCalled(); + expect(getMockServerCalls(fetchSpy.mock.calls)).toHaveLength(1); + expect(getMockServerTokens(fetchSpy.mock.calls)).toContain( + "Bearer sk-cerebras-key", + ); + expect(getMockServerTokens(fetchSpy.mock.calls)).not.toContain( + "Bearer sk-together-key", + ); + + const logs = await waitForLogs(1); + expect(logs).toHaveLength(1); + expect(logs[0]?.usedProvider).toBe("cerebras"); + expect(logs[0]?.routingMetadata).toMatchObject({ + selectedProvider: "cerebras", + selectionReason: "moderation-outage-fallback", + originalProvider: "together.ai", + contentFilterUnavailable: true, + contentFilterRerouted: true, + contentFilterExcludedProviders: ["together.ai"], + }); + expect(logs[0]?.routingMetadata?.providerScores).toContainEqual( + expect.objectContaining({ + providerId: "together.ai", + contentFilterProvider: true, + excludedByModerationFailure: true, + }), + ); + } finally { + fetchSpy.mockRestore(); + + if (originalContentFilterFlag === undefined) { + delete togetherProvider.contentFilter; + } else { + togetherProvider.contentFilter = originalContentFilterFlag; + } + + if (previousContentFilterMode === undefined) { + delete process.env.LLM_CONTENT_FILTER_MODE; + } else { + process.env.LLM_CONTENT_FILTER_MODE = previousContentFilterMode; + } + + if (previousContentFilterMethod === undefined) { + delete process.env.LLM_CONTENT_FILTER_METHOD; + } else { + process.env.LLM_CONTENT_FILTER_METHOD = previousContentFilterMethod; + } + + if (previousContentFilterModels === undefined) { + delete process.env.LLM_CONTENT_FILTER_MODELS; + } else { + process.env.LLM_CONTENT_FILTER_MODELS = previousContentFilterModels; + } + + if (previousOpenAIKey === undefined) { + delete process.env.LLM_OPENAI_API_KEY; + } else { + process.env.LLM_OPENAI_API_KEY = previousOpenAIKey; + } + } + }); + + test("openai moderation failure reroutes auto routing away from content-filter providers", async () => { + await setupProviderKeys(["anthropic", "aws-bedrock"]); + + const anthropicProvider = modelsModule.getProviderDefinition("anthropic"); + expect(anthropicProvider).toBeDefined(); + if (!anthropicProvider) { + throw new Error("Missing anthropic provider fixture"); + } + + const originalContentFilterFlag = anthropicProvider.contentFilter; + const previousContentFilterMode = process.env.LLM_CONTENT_FILTER_MODE; + const previousContentFilterMethod = process.env.LLM_CONTENT_FILTER_METHOD; + const previousContentFilterModels = process.env.LLM_CONTENT_FILTER_MODELS; + const previousOpenAIKey = process.env.LLM_OPENAI_API_KEY; + const originalFetch = globalThis.fetch; + const fetchSpy = vi + .spyOn(globalThis, "fetch") + .mockImplementation(async (input, init) => { + const url = + typeof input === "string" + ? input + : input instanceof URL + ? input.toString() + : input.url; + + if (url === "https://api.openai.com/v1/moderations") { + throw new Error("moderation fetch failed"); + } + + return await originalFetch(input as RequestInfo | URL, init); + }); + + anthropicProvider.contentFilter = true; + process.env.LLM_CONTENT_FILTER_MODE = "enabled"; + process.env.LLM_CONTENT_FILTER_METHOD = "openai"; + process.env.LLM_CONTENT_FILTER_MODELS = "auto"; + process.env.LLM_OPENAI_API_KEY = "sk-openai-test"; + + try { + const res = await app.request("/v1/chat/completions", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: "Bearer real-token", + }, + body: JSON.stringify({ + model: "auto", + messages: [{ role: "user", content: "hello" }], + }), + }); + + expect(res.status).toBe(200); + expect(fetchSpy).toHaveBeenCalled(); + expect(getMockServerCalls(fetchSpy.mock.calls)).toHaveLength(1); + expect(getMockServerTokens(fetchSpy.mock.calls)).toContain( + "Bearer sk-aws-bedrock-key", + ); + expect(getMockServerTokens(fetchSpy.mock.calls)).not.toContain( + "Bearer sk-anthropic-key", + ); + + const logs = await waitForLogs(1); + expect(logs).toHaveLength(1); + expect(logs[0]?.usedProvider).toBe("aws-bedrock"); + expect(logs[0]?.routingMetadata).toMatchObject({ + selectedProvider: "aws-bedrock", + contentFilterUnavailable: true, + contentFilterRerouted: true, + }); + expect( + logs[0]?.routingMetadata?.contentFilterExcludedProviders, + ).toContain("anthropic"); + expect(logs[0]?.routingMetadata?.providerScores).toContainEqual( + expect.objectContaining({ + providerId: "anthropic", + contentFilterProvider: true, + excludedByModerationFailure: true, + }), + ); + } finally { + fetchSpy.mockRestore(); + + if (originalContentFilterFlag === undefined) { + delete anthropicProvider.contentFilter; + } else { + anthropicProvider.contentFilter = originalContentFilterFlag; + } + + if (previousContentFilterMode === undefined) { + delete process.env.LLM_CONTENT_FILTER_MODE; + } else { + process.env.LLM_CONTENT_FILTER_MODE = previousContentFilterMode; + } + + if (previousContentFilterMethod === undefined) { + delete process.env.LLM_CONTENT_FILTER_METHOD; + } else { + process.env.LLM_CONTENT_FILTER_METHOD = previousContentFilterMethod; + } + + if (previousContentFilterModels === undefined) { + delete process.env.LLM_CONTENT_FILTER_MODELS; + } else { + process.env.LLM_CONTENT_FILTER_MODELS = previousContentFilterModels; + } + + if (previousOpenAIKey === undefined) { + delete process.env.LLM_OPENAI_API_KEY; + } else { + process.env.LLM_OPENAI_API_KEY = previousOpenAIKey; + } + } + }); + + test("openai moderation failure avoids content-filter providers during low-uptime fallback", async () => { + await setupProviderKeys(["zai", "alibaba", "novita"]); + + const novitaProvider = modelsModule.getProviderDefinition("novita"); + expect(novitaProvider).toBeDefined(); + if (!novitaProvider) { + throw new Error("Missing novita provider fixture"); + } + + const originalContentFilterFlag = novitaProvider.contentFilter; + const previousContentFilterMode = process.env.LLM_CONTENT_FILTER_MODE; + const previousContentFilterMethod = process.env.LLM_CONTENT_FILTER_METHOD; + const previousContentFilterModels = process.env.LLM_CONTENT_FILTER_MODELS; + const previousOpenAIKey = process.env.LLM_OPENAI_API_KEY; + const fetchSpy = vi + .spyOn(globalThis, "fetch") + .mockImplementation(async (input, init) => { + const url = + typeof input === "string" + ? input + : input instanceof URL + ? input.toString() + : input.url; + + if (url === "https://api.openai.com/v1/moderations") { + throw new Error("moderation fetch failed"); + } + + return new Response( + JSON.stringify({ + id: "chatcmpl-low-uptime-fallback", + object: "chat.completion", + created: Math.floor(Date.now() / 1000), + model: "glm-4.6", + choices: [ + { + index: 0, + message: { role: "assistant", content: "ok" }, + finish_reason: "stop", + }, + ], + usage: { + prompt_tokens: 1, + completion_tokens: 1, + total_tokens: 2, + }, + }), + { + status: 200, + headers: { + "Content-Type": "application/json", + }, + }, + ); + }); + + novitaProvider.contentFilter = true; + process.env.LLM_CONTENT_FILTER_MODE = "enabled"; + process.env.LLM_CONTENT_FILTER_METHOD = "openai"; + process.env.LLM_CONTENT_FILTER_MODELS = "glm-4.6"; + process.env.LLM_OPENAI_API_KEY = "sk-openai-test"; + await setRoutingMetrics("glm-4.6", "zai", 50, { + routingLatency: 50, + routingThroughput: 50, + routingTotalRequests: 100, + }); + await setRoutingMetrics("glm-4.6", "alibaba", 95, { + routingLatency: 40, + routingThroughput: 40, + routingTotalRequests: 100, + }); + await setRoutingMetrics("glm-4.6", "novita", 99, { + routingLatency: 30, + routingThroughput: 30, + routingTotalRequests: 100, + }); + + try { + const res = await app.request("/v1/chat/completions", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: "Bearer real-token", + }, + body: JSON.stringify({ + model: "zai/glm-4.6", + messages: [{ role: "user", content: "hello" }], + }), + }); + + expect(res.status).toBe(200); + expect(fetchSpy).toHaveBeenCalled(); + expect(getMockServerCalls(fetchSpy.mock.calls)).toHaveLength(1); + expect(getMockServerTokens(fetchSpy.mock.calls)).not.toContain( + "Bearer sk-novita-key", + ); + const [lowUptimeBody] = getMockServerBodies(fetchSpy.mock.calls); + expect(lowUptimeBody).not.toBeNull(); + if (lowUptimeBody === null) { + throw new Error("Expected an upstream request body"); + } + expect(lowUptimeBody).toContain('"model":"glm-4.6"'); + expect(lowUptimeBody).not.toContain("zai-org/glm-4.6"); + + const logs = await waitForLogs(1); + expect(logs).toHaveLength(1); + expect(logs[0]?.usedProvider).toBe("alibaba"); + expect(logs[0]?.routingMetadata).toMatchObject({ + selectedProvider: "alibaba", + selectionReason: "low-uptime-fallback", + originalProvider: "zai", + originalProviderUptime: 50, + contentFilterUnavailable: true, + contentFilterRerouted: true, + }); + expect( + logs[0]?.routingMetadata?.contentFilterExcludedProviders, + ).toContain("novita"); + expect(logs[0]?.routingMetadata?.providerScores).toContainEqual( + expect.objectContaining({ + providerId: "novita", + contentFilterProvider: true, + excludedByModerationFailure: true, + }), + ); + } finally { + fetchSpy.mockRestore(); + + if (originalContentFilterFlag === undefined) { + delete novitaProvider.contentFilter; + } else { + novitaProvider.contentFilter = originalContentFilterFlag; + } + + if (previousContentFilterMode === undefined) { + delete process.env.LLM_CONTENT_FILTER_MODE; + } else { + process.env.LLM_CONTENT_FILTER_MODE = previousContentFilterMode; + } + + if (previousContentFilterMethod === undefined) { + delete process.env.LLM_CONTENT_FILTER_METHOD; + } else { + process.env.LLM_CONTENT_FILTER_METHOD = previousContentFilterMethod; + } + + if (previousContentFilterModels === undefined) { + delete process.env.LLM_CONTENT_FILTER_MODELS; + } else { + process.env.LLM_CONTENT_FILTER_MODELS = previousContentFilterModels; + } + + if (previousOpenAIKey === undefined) { + delete process.env.LLM_OPENAI_API_KEY; + } else { + process.env.LLM_OPENAI_API_KEY = previousOpenAIKey; + } + } + }); + + test("openai moderation failure blocks when only content-filter providers are available", async () => { + await setupKeys("together.ai"); + await db.insert(tables.rateLimit).values({ + id: "moderation-outage-rate-limit", + organizationId: "org-id", + provider: "together.ai", + model: "glm-4.7", + maxRpm: 1, + }); + + const togetherProvider = + modelsModule.getProviderDefinition("together.ai"); + expect(togetherProvider).toBeDefined(); + if (!togetherProvider) { + throw new Error("Missing together.ai provider fixture"); + } + + const originalContentFilterFlag = togetherProvider.contentFilter; + const previousContentFilterMode = process.env.LLM_CONTENT_FILTER_MODE; + const previousContentFilterMethod = process.env.LLM_CONTENT_FILTER_METHOD; + const previousContentFilterModels = process.env.LLM_CONTENT_FILTER_MODELS; + const previousOpenAIKey = process.env.LLM_OPENAI_API_KEY; + const originalFetch = globalThis.fetch; + const warnSpy = vi.spyOn(logger, "warn").mockImplementation(() => {}); + const fetchSpy = vi + .spyOn(globalThis, "fetch") + .mockImplementation(async (input, init) => { + const url = + typeof input === "string" + ? input + : input instanceof URL + ? input.toString() + : input.url; + + if (url === "https://api.openai.com/v1/moderations") { + throw new Error("moderation fetch failed"); + } + + return await originalFetch(input as RequestInfo | URL, init); + }); + + togetherProvider.contentFilter = true; + process.env.LLM_CONTENT_FILTER_MODE = "enabled"; + process.env.LLM_CONTENT_FILTER_METHOD = "openai"; + process.env.LLM_CONTENT_FILTER_MODELS = "glm-4.7"; + process.env.LLM_OPENAI_API_KEY = "sk-openai-test"; + + try { + const res = await app.request("/v1/chat/completions", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: "Bearer real-token", + }, + body: JSON.stringify({ + model: "glm-4.7", + messages: [{ role: "user", content: "hello" }], + }), + }); + + expect(res.status).toBe(503); + await expect(res.json()).resolves.toMatchObject({ + error: true, + message: + "OpenAI moderation is unavailable and no eligible provider without provider-side content filtering is available.", + }); + expect(getMockServerCalls(fetchSpy.mock.calls)).toHaveLength(0); + expect(res.headers.get("X-RateLimit-Limit-Provider")).toBeNull(); + expect(res.headers.get("X-RateLimit-Limit-Provider-RPM")).toBeNull(); + expect(warnSpy).toHaveBeenCalledWith( + "Blocking request because OpenAI moderation is unavailable and selected provider is content-filter-sensitive", + expect.objectContaining({ + organizationId: "org-id", + projectId: "project-id", + apiKeyId: "token-id", + usedProvider: "together.ai", + requestedModel: "glm-4.7", + }), + ); + + const logs = await waitForLogs(1); + expect(logs).toHaveLength(1); + expect(logs[0]).toMatchObject({ + usedProvider: "together.ai", + finishReason: "upstream_error", + unifiedFinishReason: "upstream_error", + hasError: true, + }); + expect(logs[0]?.routingMetadata).toMatchObject({ + selectedProvider: "together.ai", + contentFilterUnavailable: true, + contentFilterRerouted: false, + }); + expect(logs[0]?.errorDetails).toMatchObject({ + statusCode: 503, + }); + } finally { + fetchSpy.mockRestore(); + warnSpy.mockRestore(); + + if (originalContentFilterFlag === undefined) { + delete togetherProvider.contentFilter; + } else { + togetherProvider.contentFilter = originalContentFilterFlag; + } + + if (previousContentFilterMode === undefined) { + delete process.env.LLM_CONTENT_FILTER_MODE; + } else { + process.env.LLM_CONTENT_FILTER_MODE = previousContentFilterMode; + } + + if (previousContentFilterMethod === undefined) { + delete process.env.LLM_CONTENT_FILTER_METHOD; + } else { + process.env.LLM_CONTENT_FILTER_METHOD = previousContentFilterMethod; + } + + if (previousContentFilterModels === undefined) { + delete process.env.LLM_CONTENT_FILTER_MODELS; + } else { + process.env.LLM_CONTENT_FILTER_MODELS = previousContentFilterModels; + } + + if (previousOpenAIKey === undefined) { + delete process.env.LLM_OPENAI_API_KEY; + } else { + process.env.LLM_OPENAI_API_KEY = previousOpenAIKey; + } + } + }); + test("content filter monitor mode does not reroute away from content-filter providers", async () => { await setupMultiProviderKeys(); - const togetherProvider = getProviderDefinition("together.ai"); + const togetherProvider = + modelsModule.getProviderDefinition("together.ai"); expect(togetherProvider).toBeDefined(); if (!togetherProvider) { throw new Error("Missing together.ai provider fixture"); diff --git a/packages/actions/src/get-cheapest-from-available-providers.ts b/packages/actions/src/get-cheapest-from-available-providers.ts index d8232e9458..3289b204da 100644 --- a/packages/actions/src/get-cheapest-from-available-providers.ts +++ b/packages/actions/src/get-cheapest-from-available-providers.ts @@ -109,6 +109,8 @@ export interface RoutingMetadata { contentFilterProvider?: boolean; // Set when the provider was excluded because the gateway content filter matched excludedByContentFilter?: boolean; + // Set when the provider was excluded because gateway moderation was unavailable + excludedByModerationFailure?: boolean; }>; // Optional fields for low-uptime fallback routing originalProvider?: string; @@ -121,6 +123,8 @@ export interface RoutingMetadata { contentFilterMatched?: boolean; // Whether routing excluded content-filter providers in favor of alternatives contentFilterRerouted?: boolean; + // Whether OpenAI moderation was unavailable while selecting a provider + contentFilterUnavailable?: boolean; // Providers excluded because they are marked as content-filter providers contentFilterExcludedProviders?: string[]; // All provider attempts from retry fallback mechanism (including successful) diff --git a/packages/shared/src/components/log-card.tsx b/packages/shared/src/components/log-card.tsx index abfb1bb023..9386095121 100644 --- a/packages/shared/src/components/log-card.tsx +++ b/packages/shared/src/components/log-card.tsx @@ -66,9 +66,11 @@ interface RoutingMetadata { rate_limited?: boolean; contentFilterProvider?: boolean; excludedByContentFilter?: boolean; + excludedByModerationFailure?: boolean; }>; contentFilterMatched?: boolean; contentFilterRerouted?: boolean; + contentFilterUnavailable?: boolean; contentFilterExcludedProviders?: string[]; routing?: Array<{ provider: string; @@ -614,6 +616,12 @@ export function LogCard({ content filter )} + {score.excludedByModerationFailure && ( + + + moderation unavailable + + )} {score.score.toFixed(2)}