Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
218 changes: 170 additions & 48 deletions apps/gateway/src/chat/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -364,9 +364,9 @@ function isContentFilterProvider(providerId: string): boolean {

function getContentFilterRoutingDecision(
availableModelProviders: ProviderModelMapping[],
contentFilterMatched: boolean,
shouldAvoidContentFilterProviders: boolean,
): ContentFilterRoutingDecision {
if (!contentFilterMatched) {
if (!shouldAvoidContentFilterProviders) {
return {
candidates: availableModelProviders,
excludedProviders: [],
Expand Down Expand Up @@ -408,11 +408,12 @@ function getContentFilterRoutingDecision(
function addContentFilterRoutingMetadata(
routingMetadata: RoutingMetadata,
contentFilterMatched: boolean,
contentFilterUnavailable: boolean,
excludedProviders: ProviderModelMapping[],
modelId: string | undefined,
metricsMap: Map<string, ProviderMetrics>,
): RoutingMetadata {
if (!contentFilterMatched) {
if (!contentFilterMatched && !contentFilterUnavailable) {
return routingMetadata;
}

Expand All @@ -438,15 +439,18 @@ function addContentFilterRoutingMetadata(
throughput: metrics?.throughput ?? 0,
price: getProviderSelectionPrice(provider),
contentFilterProvider: true,
excludedByContentFilter: true,
...(contentFilterMatched
? { excludedByContentFilter: true }
: { excludedByModerationFailure: true }),
};
}),
...routingMetadata.providerScores,
];

return {
...routingMetadata,
contentFilterMatched: true,
...(contentFilterMatched ? { contentFilterMatched: true } : {}),
...(contentFilterUnavailable ? { contentFilterUnavailable: true } : {}),
contentFilterRerouted: contentFilterExcludedProviders.length > 0,
contentFilterExcludedProviders:
contentFilterExcludedProviders.length > 0
Expand Down Expand Up @@ -1899,8 +1903,11 @@ chat.openapi(completions, async (c) => {
const contentFilterMatched =
keywordContentFilterMatch !== null ||
openAIContentFilterResult?.flagged === true;
const shouldRerouteContentFilter =
contentFilterMode === "enabled" && contentFilterMatched;
const contentFilterUnavailable =
openAIContentFilterResult?.unavailable === true;
const shouldAvoidContentFilterProviders =
contentFilterMode === "enabled" &&
(contentFilterMatched || contentFilterUnavailable);
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
let contentFilterRoutingExcludedProviders: ProviderModelMapping[] = [];
let contentFilterRoutingApplied = false;

Expand Down Expand Up @@ -2311,7 +2318,7 @@ chat.openapi(completions, async (c) => {

const contentFilterRoutingDecision = getContentFilterRoutingDecision(
availableModelProviders,
shouldRerouteContentFilter,
shouldAvoidContentFilterProviders,
);
const contentFilterPreferredProviders =
contentFilterRoutingDecision.candidates;
Expand Down Expand Up @@ -2382,6 +2389,7 @@ chat.openapi(completions, async (c) => {
...(noFallback ? { noFallback: true } : {}),
},
contentFilterMatched,
contentFilterUnavailable,
contentFilterRoutingExcludedProviders,
modelWithPricing.id,
metricsMap,
Expand Down Expand Up @@ -2569,6 +2577,7 @@ chat.openapi(completions, async (c) => {
...(noFallback ? { noFallback: true } : {}),
},
contentFilterMatched,
contentFilterUnavailable,
contentFilterRoutingExcludedProviders,
baseModelId,
metricsMap,
Expand Down Expand Up @@ -2868,6 +2877,158 @@ chat.openapi(completions, async (c) => {
);
}

if (!usedToken) {
throw new HTTPException(500, {
message: `No token`,
});
}

usedApiKeyHash = getApiKeyFingerprint(usedToken);
routingMetadata = withUsedApiKeyHash(routingMetadata, usedApiKeyHash);

const contentFilterBlocked =
contentFilterMode === "enabled" &&
contentFilterMatched &&
!contentFilterRoutingApplied;
const contentFilterSensitiveProviderBlocked =
contentFilterMode === "enabled" &&
contentFilterUnavailable &&
isContentFilterProvider(usedProvider);
Comment on lines +3252 to +3255
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Re-check fallback candidates before blocking on moderation outage

This new contentFilterSensitiveProviderBlocked gate can return 503 even when a non-content-filter provider is still eligible, because the requested-provider fallback paths (rate-limit/low-uptime rerouting in chat.ts) select the cheapest provider without applying getContentFilterRoutingDecision. In a moderation outage, that can pick a content-filter provider first and then hard-block here, causing avoidable outages instead of routing to a safe alternative. Please re-run content-filter-aware candidate selection (or reselect) before throwing this 503.

Useful? React with 👍 / 👎.

Comment on lines +3252 to +3255
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Do not treat flagged moderation results as outage-only failures

contentFilterSensitiveProviderBlocked is triggered whenever moderation is unavailable and the selected provider is content-filter-sensitive, even if moderation also returned flagged: true. checkOpenAIContentFilter can produce both states (e.g., one moderation sub-request fails while another flags), so this branch can return 503 before the normal contentFilterBlocked flow, changing a definite filter decision into an outage error for mixed-success moderation responses.

Useful? React with 👍 / 👎.

Comment on lines +3252 to +3255
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Re-route sensitive direct requests before outage blocking

This outage gate blocks as soon as the selected provider is content-filter-sensitive, but for direct-provider requests we only evaluate alternative providers in the rate-limit/low-uptime branches. In the common case where the requested sensitive provider is healthy, contentFilterSensitiveProviderBlocked becomes true and returns 503 even when noFallback is not set and a non-sensitive provider is available, creating avoidable outages during moderation API incidents.

Useful? React with 👍 / 👎.


// Preserve monitor tagging, and also tag successful reroutes triggered by a
// gateway content-filter match so the decision remains visible in logs.
const shouldTagContentFilter =
(contentFilterMode === "monitor" && contentFilterMatched) ||
contentFilterRoutingApplied;
const gatewayContentFilterResponse = openAIContentFilterResult?.responses
.length
? openAIContentFilterResult.responses
: null;
const insertLog = (
logData: Parameters<typeof _insertLog>[0],
options?: Parameters<typeof _insertLog>[1],
) =>
_insertLog(
{
...logData,
internalContentFilter: shouldTagContentFilter
? true
: logData.internalContentFilter,
gatewayContentFilterResponse:
logData.gatewayContentFilterResponse ?? gatewayContentFilterResponse,
},
options,
);

if (contentFilterSensitiveProviderBlocked) {
const moderationOutageMessage =
"OpenAI moderation is unavailable and no eligible provider without provider-side content filtering is available.";
const baseLogEntry = createLogEntry(
Comment on lines +3282 to +3285
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Evaluate content-filter blocks before provider limit checks

This new early-return path handles only contentFilterSensitiveProviderBlocked, but regular contentFilterBlocked requests are now evaluated later (after provider RPM consumption and retention-credit checks). In the contentFilterMatched && !contentFilterRoutingApplied case, the request can now decrement provider limits or fail with 402/429 before reaching the gateway content-filter response path, which is a behavior regression from the previous ordering and can incorrectly throttle/deny blocked prompts.

Useful? React with 👍 / 👎.

requestId,
project,
apiKey,
providerKey?.id,
usedModelFormatted,
usedModelMapping,
usedProvider,
initialRequestedModel,
requestedProvider,
messages,
temperature,
max_tokens,
top_p,
frequency_penalty,
presence_penalty,
reasoning_effort,
reasoning_max_tokens,
effort,
response_format,
tools,
tool_choice,
source,
customHeaders,
debugMode,
userAgent,
image_config,
routingMetadata,
rawBody,
null,
null,
null,
undefined,
undefined,
);

logger.warn(
"Blocking request because OpenAI moderation is unavailable and selected provider is content-filter-sensitive",
{
requestId,
organizationId: project.organizationId,
projectId: project.id,
apiKeyId: apiKey.id,
usedProvider,
usedModel,
requestedModel: initialRequestedModel,
routingMetadata,
},
);

try {
await insertLogEntry({
...baseLogEntry,
duration: 0,
timeToFirstToken: null,
timeToFirstReasoningToken: null,
responseSize: 0,
content: null,
reasoningContent: null,
finishReason: "upstream_error",
promptTokens: null,
completionTokens: null,
totalTokens: null,
reasoningTokens: null,
cachedTokens: null,
hasError: true,
streamed: !!stream,
canceled: false,
errorDetails: {
statusCode: 503,
statusText: "Service Unavailable",
responseText: moderationOutageMessage,
},
cachedInputCost: null,
requestCost: null,
webSearchCost: null,
imageInputTokens: null,
imageOutputTokens: null,
imageInputCost: null,
imageOutputCost: null,
estimatedCost: false,
discount: null,
dataStorageCost: "0",
cached: false,
toolResults: null,
unifiedFinishReason: "upstream_error",
});
} catch (error) {
logger.error(
"Failed to persist moderation outage block log",
{
requestId,
organizationId: project.organizationId,
projectId: project.id,
apiKeyId: apiKey.id,
usedProvider,
},
error as Error,
);
}

throw new HTTPException(503, {
message: moderationOutageMessage,
});
}

// Consume a rate-limit slot for the chosen provider (routing already filtered rate-limited ones)
{
const providerRateLimitResult = await checkProviderRateLimit(
Expand Down Expand Up @@ -2971,52 +3132,13 @@ chat.openapi(completions, async (c) => {
}
}

if (!usedToken) {
throw new HTTPException(500, {
message: `No token`,
});
}

usedApiKeyHash = getApiKeyFingerprint(usedToken);
routingMetadata = withUsedApiKeyHash(routingMetadata, usedApiKeyHash);

const contentFilterBlocked =
contentFilterMode === "enabled" &&
contentFilterMatched &&
!contentFilterRoutingApplied;

// Preserve monitor tagging, and also tag successful reroutes triggered by a
// gateway content-filter match so the decision remains visible in logs.
const shouldTagContentFilter =
(contentFilterMode === "monitor" && contentFilterMatched) ||
contentFilterRoutingApplied;
const gatewayContentFilterResponse = openAIContentFilterResult?.responses
.length
? openAIContentFilterResult.responses
: null;
const insertLog = (
logData: Parameters<typeof _insertLog>[0],
options?: Parameters<typeof _insertLog>[1],
) =>
_insertLog(
{
...logData,
internalContentFilter: shouldTagContentFilter
? true
: logData.internalContentFilter,
gatewayContentFilterResponse:
logData.gatewayContentFilterResponse ?? gatewayContentFilterResponse,
},
options,
);

if (contentFilterBlocked) {
const contentFilterResponseId = `chatcmpl-${Date.now()}`;
const contentFilterCreated = Math.floor(Date.now() / 1000);

// Log the filtered request
try {
await insertLog({
await insertLogEntry({
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Log content-filter blocks with moderation metadata

Route this branch through the local insertLog(...) wrapper instead of calling insertLogEntry(...) directly; the wrapper is the only place that attaches gatewayContentFilterResponse (and content-filter tagging) to log records. With the current call site, requests blocked by gateway content filtering are persisted without the moderation payload, which regresses observability/debugging for blocked prompts and can hide why the filter fired in downstream log consumers.

Useful? React with 👍 / 👎.

...createLogEntry(
requestId,
project,
Expand Down
3 changes: 3 additions & 0 deletions apps/gateway/src/chat/tools/openai-content-filter.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,7 @@ describe("checkOpenAIContentFilter", () => {

expect(result).toEqual({
flagged: false,
unavailable: true,
model: "omni-moderation-latest",
upstreamRequestId: null,
results: [],
Expand Down Expand Up @@ -631,6 +632,7 @@ describe("checkOpenAIContentFilter", () => {

expect(result).toEqual({
flagged: false,
unavailable: true,
model: "omni-moderation-latest",
upstreamRequestId: null,
results: [],
Expand Down Expand Up @@ -715,6 +717,7 @@ describe("checkOpenAIContentFilter", () => {

expect(result).toEqual({
flagged: false,
unavailable: true,
model: "omni-moderation-latest",
upstreamRequestId: null,
results: [],
Expand Down
7 changes: 6 additions & 1 deletion apps/gateway/src/chat/tools/openai-content-filter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ interface OpenAIModerationResult {

export interface OpenAIContentFilterCheckResult {
flagged: boolean;
unavailable: boolean;
model: string;
upstreamRequestId: string | null;
results: OpenAIModerationResult[];
Expand Down Expand Up @@ -339,9 +340,11 @@ function buildModerationErrorDetails(error: unknown): Record<string, string> {

function createFailedOpenAIContentFilterResult(
upstreamRequestId: string | null = null,
unavailable = true,
): OpenAIContentFilterCheckResult {
return {
flagged: false,
unavailable,
model: OPENAI_MODERATION_MODEL,
upstreamRequestId,
results: [],
Expand Down Expand Up @@ -446,6 +449,7 @@ async function runOpenAIContentFilterRequest(
flagged: (moderationResponse.results ?? []).some((result) =>
isOpenAIModerationResultFlagged(result),
),
unavailable: false,
model: moderationResponse.model ?? OPENAI_MODERATION_MODEL,
upstreamRequestId,
results: moderationResponse.results ?? [],
Expand Down Expand Up @@ -476,7 +480,7 @@ export async function checkOpenAIContentFilter(
results: [],
});

return createFailedOpenAIContentFilterResult();
return createFailedOpenAIContentFilterResult(null, false);
}
Comment on lines 470 to 484
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the moderationRequests.length === 0 path, the code returns createFailedOpenAIContentFilterResult(null, false). Since this isn’t actually a failure case, consider introducing a separate helper (or renaming the existing one) to represent a successful “no moderation needed” result, to avoid confusing semantics around unavailable vs. failure.

Copilot uses AI. Check for mistakes.

const signal = requestSignal
Expand Down Expand Up @@ -530,6 +534,7 @@ export async function checkOpenAIContentFilter(

return {
flagged,
unavailable: moderationResults.some((result) => !result.success),
model,
upstreamRequestId,
results,
Expand Down
3 changes: 2 additions & 1 deletion apps/gateway/src/chat/tools/retry-with-fallback.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ export function selectNextProvider(
score: number;
region?: string;
excludedByContentFilter?: boolean;
excludedByModerationFailure?: boolean;
}>,
failedProviders: Set<string>,
modelProviders: Array<{
Expand All @@ -95,7 +96,7 @@ export function selectNextProvider(
): { providerId: string; modelName: string; region?: string } | null {
const sorted = [...providerScores].sort((a, b) => a.score - b.score);
for (const score of sorted) {
if (score.excludedByContentFilter) {
if (score.excludedByContentFilter || score.excludedByModerationFailure) {
continue;
}
Comment on lines 97 to 101
Copy link

Copilot AI Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

selectNextProvider() now skips scores marked excludedByModerationFailure, but there’s no accompanying unit test verifying this new exclusion behavior (the existing spec only covers excludedByContentFilter). Please add a test case to ensure providers excluded due to moderation unavailability are never selected during fallback retries.

Copilot uses AI. Check for mistakes.

Expand Down
Loading
Loading