diff --git a/apps/gateway/src/responses.e2e.ts b/apps/gateway/src/responses.e2e.ts
new file mode 100644
index 0000000000..ed6bda727b
--- /dev/null
+++ b/apps/gateway/src/responses.e2e.ts
@@ -0,0 +1,271 @@
+import "dotenv/config";
+import { beforeAll, beforeEach, describe, expect, test } from "vitest";
+
+import { app } from "@/app.js";
+import {
+	beforeAllHook,
+	beforeEachHook,
+	generateTestRequestId,
+	getConcurrentTestOptions,
+	getTestOptions,
+	logMode,
+	testModels,
+	toolCallModels,
+	validateLogByRequestId,
+} from "@/chat-helpers.e2e.js";
+
+// Pick one model per provider to keep CI cost manageable while still
+// validating the Responses API conversion layer across every provider.
+function oneModelPerProvider<T extends { model: string }>(list: T[]): T[] {
+	const seen = new Set<string>();
+	const out: T[] = [];
+	for (const item of list) {
+		const provider = item.model.split("/")[0];
+		if (seen.has(provider)) {
+			continue;
+		}
+		seen.add(provider);
+		out.push(item);
+	}
+	return out;
+}
+
+// Models excluded from the tool-call round-trip test because the underlying
+// provider adapter does not emit stable tool_call ids — the id returned in the
+// first turn is not recognized when sent back as tool_call_id, so the second
+// turn fails. This is a provider/adapter-level issue, unrelated to the
+// Responses API conversion layer.
+const TOOL_CALL_DENYLIST = new Set<string>(["bytedance/gpt-oss-120b"]);
+
+const responsesTestModels = oneModelPerProvider(testModels);
+const responsesToolCallModels = oneModelPerProvider(toolCallModels).filter(
+	(m) => !TOOL_CALL_DENYLIST.has(m.model),
+);
+
+interface ResponsesOutputItem {
+	type: string;
+	role?: string;
+	content?: { type: string; text?: string }[];
+	call_id?: string;
+	name?: string;
+	arguments?: string;
+}
+
+function getOutputText(json: { output?: ResponsesOutputItem[] }): string {
+	const items = json.output ?? [];
+	const parts: string[] = [];
+	for (const item of items) {
+		if (item.type === "message" && Array.isArray(item.content)) {
+			for (const c of item.content) {
+				if (c.type === "output_text" && typeof c.text === "string") {
+					parts.push(c.text);
+				}
+			}
+		}
+	}
+	return parts.join("");
+}
+
+function getFunctionCall(json: {
+	output?: ResponsesOutputItem[];
+}): ResponsesOutputItem | undefined {
+	return (json.output ?? []).find((i) => i.type === "function_call");
+}
+
+async function postResponses(body: unknown, requestId: string) {
+	return await app.request("/v1/responses", {
+		method: "POST",
+		headers: {
+			"Content-Type": "application/json",
+			"x-request-id": requestId,
+			"x-no-fallback": "true",
+			Authorization: `Bearer real-token`,
+		},
+		body: JSON.stringify(body),
+	});
+}
+
+describe("e2e", getConcurrentTestOptions(), () => {
+	beforeAll(beforeAllHook);
+
+	beforeEach(beforeEachHook);
+
+	test("empty", () => {
+		expect(true).toBe(true);
+	});
+
+	test.each(responsesTestModels)(
+		"responses single-turn $model",
+		getTestOptions(),
+		async ({ model }) => {
+			const requestId = generateTestRequestId();
+			const res = await postResponses(
+				{
+					model,
+					input: "Say hello in one short sentence.",
+				},
+				requestId,
+			);
+
+			const json = await res.json();
+			if (logMode) {
+				console.log(
+					"responses single-turn response:",
+					JSON.stringify(json, null, 2),
+				);
+			}
+
+			expect(res.status).toBe(200);
+			expect(json).toHaveProperty("id");
+			expect(typeof json.id).toBe("string");
+			expect(json.id.startsWith("resp_")).toBe(true);
+			expect(Array.isArray(json.output)).toBe(true);
+
+			const text = getOutputText(json);
+			expect(text.length).toBeGreaterThan(0);
+
+			expect(json).toHaveProperty("usage");
+			expect(typeof json.usage.input_tokens).toBe("number");
+			expect(typeof json.usage.output_tokens).toBe("number");
+			expect(json.usage.input_tokens).toBeGreaterThan(0);
+			expect(json.usage.output_tokens).toBeGreaterThan(0);
+
+			await validateLogByRequestId(requestId);
+		},
+	);
+
+	test.each(responsesTestModels)(
+		"responses multi-turn $model",
+		getTestOptions(),
+		async ({ model }) => {
+			const firstRequestId = generateTestRequestId();
+			const firstRes = await postResponses(
+				{
+					model,
+					input:
+						"My name is Ada. Please remember it. Reply with a brief acknowledgement.",
+				},
+				firstRequestId,
+			);
+			const firstJson = await firstRes.json();
+			if (logMode) {
+				console.log(
+					"responses multi-turn first:",
+					JSON.stringify(firstJson, null, 2),
+				);
+			}
+			expect(firstRes.status).toBe(200);
+			expect(typeof firstJson.id).toBe("string");
+
+			const secondRequestId = generateTestRequestId();
+			const secondRes = await postResponses(
+				{
+					model,
+					input: "What is my name? Reply with just the name.",
+					previous_response_id: firstJson.id,
+				},
+				secondRequestId,
+			);
+			const secondJson = await secondRes.json();
+			if (logMode) {
+				console.log(
+					"responses multi-turn second:",
+					JSON.stringify(secondJson, null, 2),
+				);
+			}
+			expect(secondRes.status).toBe(200);
+			const text = getOutputText(secondJson);
+			expect(text.toLowerCase()).toContain("ada");
+		},
+	);
+
+	test.each(responsesToolCallModels)(
+		"responses tool calls $model",
+		getTestOptions(),
+		async ({ model }) => {
+			const tools = [
+				{
+					type: "function",
+					name: "get_weather",
+					description: "Get the current weather for a given city",
+					parameters: {
+						type: "object",
+						properties: {
+							city: {
+								type: "string",
+								description: "The city name to get weather for",
+							},
+						},
+						required: ["city"],
+					},
+				},
+			];
+
+			const firstRequestId = generateTestRequestId();
+			const firstRes = await postResponses(
+				{
+					model,
+					input: [
+						{
+							role: "user",
+							content: "What's the weather like in San Francisco?",
+						},
+					],
+					tools,
+					tool_choice: "required",
+				},
+				firstRequestId,
+			);
+			const firstJson = await firstRes.json();
+			if (logMode) {
+				console.log(
+					"responses tool calls first:",
+					JSON.stringify(firstJson, null, 2),
+				);
+			}
+
+			expect(firstRes.status).toBe(200);
+			const fnCall = getFunctionCall(firstJson);
+			expect(fnCall).toBeDefined();
+			expect(fnCall?.name).toBe("get_weather");
+			expect(typeof fnCall?.call_id).toBe("string");
+			expect(typeof fnCall?.arguments).toBe("string");
+			const parsedArgs = JSON.parse(fnCall?.arguments ?? "{}");
+			expect(typeof parsedArgs.city).toBe("string");
+			expect(parsedArgs.city.toLowerCase()).toContain("san francisco");
+
+			const secondRequestId = generateTestRequestId();
+			const secondRes = await postResponses(
+				{
+					model,
+					previous_response_id: firstJson.id,
+					input: [
+						{
+							type: "function_call_output",
+							call_id: fnCall?.call_id,
+							output: "72F and sunny",
+						},
+					],
+					tools,
+				},
+				secondRequestId,
+			);
+			const secondJson = await secondRes.json();
+			if (logMode) {
+				console.log(
+					"responses tool calls second:",
+					JSON.stringify(secondJson, null, 2),
+				);
+			}
+
+			expect(secondRes.status).toBe(200);
+			const finalText = getOutputText(secondJson).toLowerCase();
+			expect(finalText.length).toBeGreaterThan(0);
+			expect(
+				finalText.includes("sunny") ||
+					finalText.includes("72") ||
+					finalText.includes("weather"),
+			).toBe(true);
+		},
+	);
+});
diff --git a/apps/gateway/src/responses/tools/convert-chat-to-responses.ts b/apps/gateway/src/responses/tools/convert-chat-to-responses.ts
index 965d1434e1..e1f5f24905 100644
--- a/apps/gateway/src/responses/tools/convert-chat-to-responses.ts
+++ b/apps/gateway/src/responses/tools/convert-chat-to-responses.ts
@@ -112,8 +112,17 @@ export function convertChatResponseToResponses(
 		}
 	}
 
-	// Add message output
-	if (message?.content !== null && message?.content !== undefined) {
+	// Add message output. Skip if content is empty/whitespace-only — many
+	// providers return content: "" alongside tool_calls, and emitting an empty
+	// message item pollutes stored conversations: on replay via
+	// previous_response_id it becomes a stray assistant message that separates
+	// the tool_calls assistant from its tool result, causing strict providers
+	// (deepseek, bytedance, aws-bedrock, kimi, etc.) to reject the request.
+	if (
+		message?.content !== null &&
+		message?.content !== undefined &&
+		message.content.trim() !== ""
+	) {
 		const contentParts: Array<Record<string, unknown>> = [
 			{
 				type: "output_text",
diff --git a/apps/gateway/src/responses/tools/convert-responses-to-chat.ts b/apps/gateway/src/responses/tools/convert-responses-to-chat.ts
index 738ead0da7..9cd2be6ba2 100644
--- a/apps/gateway/src/responses/tools/convert-responses-to-chat.ts
+++ b/apps/gateway/src/responses/tools/convert-responses-to-chat.ts
@@ -38,7 +38,15 @@ export function convertResponsesInputToMessages(
 	while (i < input.length) {
 		const item = input[i]!;
 
-		// function_call items -> collect consecutive ones into assistant tool_calls
+		// function_call items -> collect consecutive ones into assistant tool_calls.
+		// Also fold any immediately-following assistant `message` items into the
+		// same assistant message: in the Responses API the tool_calls and the
+		// assistant text are emitted as separate output items, but in chat
+		// completions they belong on a single assistant message. Splitting them
+		// inserts a stray assistant message between the tool_calls and the tool
+		// result, which strict providers (deepseek family, bytedance, etc.)
+		// reject with "assistant message with tool_calls must be followed by
+		// tool messages".
 		if ("type" in item && item.type === "function_call") {
 			const toolCalls: ChatMessage["tool_calls"] = [];
 
@@ -58,9 +66,29 @@ export function convertResponsesInputToMessages(
 				i++;
 			}
 
+			// Fold trailing assistant message content (if any) into this same
+			// assistant message rather than emitting it as a separate message.
+			let foldedContent: string | null = null;
+			while (i < input.length) {
+				const next = input[i] as Record<string, unknown> | undefined;
+				if (
+					next &&
+					next.type === "message" &&
+					(next.role === "assistant" || next.role === undefined)
+				) {
+					const text = extractTextFromContent(next.content);
+					if (text) {
+						foldedContent = (foldedContent ?? "") + text;
+					}
+					i++;
+					continue;
+				}
+				break;
+			}
+
 			messages.push({
 				role: "assistant",
-				content: null,
+				content: foldedContent,
 				tool_calls: toolCalls,
 			});
 			continue;
@@ -130,6 +158,35 @@ export function convertResponsesInputToMessages(
 	return messages;
 }
 
+/**
+ * Extract concatenated plain text from a Responses API message content field
+ * (which can be a string, an array of content parts, null, or undefined).
+ * Used when folding a trailing assistant text message into a tool_calls
+ * assistant message.
+ */
+function extractTextFromContent(content: unknown): string {
+	if (content === null || content === undefined) {
+		return "";
+	}
+	if (typeof content === "string") {
+		return content;
+	}
+	if (!Array.isArray(content)) {
+		return "";
+	}
+	const parts: string[] = [];
+	for (const part of content) {
+		if (
+			part &&
+			typeof part === "object" &&
+			typeof (part as { text?: unknown }).text === "string"
+		) {
+			parts.push((part as { text: string }).text);
+		}
+	}
+	return parts.join("");
+}
+
 /**
  * Convert Responses API content types to chat completions content types.
  * input_text/output_text -> text, input_image -> image_url