diff --git a/apps/gateway/src/chat/chat.ts b/apps/gateway/src/chat/chat.ts index 27646eff4..33835fb9d 100644 --- a/apps/gateway/src/chat/chat.ts +++ b/apps/gateway/src/chat/chat.ts @@ -3802,34 +3802,20 @@ chat.openapi(completions, async (c) => { } } - // For Moonshot provider, enrich assistant messages with cached reasoning_content - // This is needed for multi-turn tool call conversations with thinking models - // Moonshot requires reasoning_content in assistant messages with tool_calls + // Moonshot's thinking models reject assistant tool_call messages that lack + // reasoning_content. If the client echoes `reasoning` (OpenAI-style) we map + // it across; otherwise fall back to an empty string so multi-turn tool + // conversations don't 400. if (usedProvider === "moonshot") { - const { redisClient } = await import("@llmgateway/cache"); for (const message of messages) { if ( message.role === "assistant" && message.tool_calls && Array.isArray(message.tool_calls) && message.tool_calls.length > 0 && - !(message as any).reasoning_content // Only add if not already present + !(message as any).reasoning_content ) { - // Get reasoning_content from the first tool call (all tool calls share the same reasoning) - const firstToolCall = message.tool_calls[0]; - if (firstToolCall?.id) { - try { - const cachedReasoningContent = await redisClient.get( - `reasoning_content:${firstToolCall.id}`, - ); - if (cachedReasoningContent) { - // Add reasoning_content to the message for Moonshot - (message as any).reasoning_content = cachedReasoningContent; - } - } catch { - // Silently fail - reasoning_content caching is optional - } - } + (message as any).reasoning_content = (message as any).reasoning ?? ""; } } } diff --git a/apps/gateway/src/chat/tools/parse-provider-response.ts b/apps/gateway/src/chat/tools/parse-provider-response.ts index f2e6b2fc4..00dee3681 100644 --- a/apps/gateway/src/chat/tools/parse-provider-response.ts +++ b/apps/gateway/src/chat/tools/parse-provider-response.ts @@ -835,31 +835,6 @@ export function parseProviderResponse( } } - // Cache reasoning_content for Moonshot thinking models when tool_calls are present - // This is needed for multi-turn tool call conversations because Moonshot requires - // reasoning_content to be included in assistant messages with tool_calls - if ( - usedProvider === "moonshot" && - reasoningContent && - toolResults && - Array.isArray(toolResults) && - toolResults.length > 0 - ) { - for (const toolCall of toolResults) { - if (toolCall.id) { - redisClient - .setex( - `reasoning_content:${toolCall.id}`, - 86400, // 1 day expiration - reasoningContent, - ) - .catch((err) => { - logger.error("Failed to cache reasoning_content", { err }); - }); - } - } - } - // For non-reasoning models that return their answer in reasoning_content // (e.g. CanopyWave Mimo), move reasoning to content so the response is visible. if (!supportsReasoning && !content && reasoningContent) {