Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 192 additions & 2 deletions dwctl/src/responses/transition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,22 @@ fn translate_input_items(items: &[Value]) -> Result<Vec<Value>, String> {
// `role`/`content` would be a regression.
let mut translated = serde_json::Map::new();
for (k, v) in obj {
if k != "type" {
translated.insert(k.clone(), v.clone());
match k.as_str() {
// Drop the Open Responses item discriminator; a
// chat-completions message carries no top-level `type`.
"type" => {}
// Rewrite typed content parts (`input_text`,
// `input_image`, …) into their chat-completions
// equivalents. Verbatim forwarding left Responses-only
// part types in place, which the loopback
// /v1/chat/completions handler's typed deserialization
// rejected with a 422.
"content" => {

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Correct placement of the content translation hook. By handling this in the key-match within the message object iteration, you ensure all messages get their content translated regardless of role.

translated.insert(k.clone(), translate_message_content(v));
}
_ => {
translated.insert(k.clone(), v.clone());
}
}
}
out.push(Value::Object(translated));
Expand Down Expand Up @@ -271,6 +285,78 @@ fn translate_input_items(items: &[Value]) -> Result<Vec<Value>, String> {
Ok(out)
}

/// Translate an Open Responses message `content` value into chat-completions
/// shape.
///
/// String content is valid in both schemas and passes through untouched. An
/// array of typed content parts is rewritten part-by-part: the upstream
/// chat-completions `ContentPart` enum only accepts `text` and `image_url`,
/// so Responses-only part types (`input_text`, `output_text`, `input_image`,
/// `refusal`, …) must be mapped before the body is fired at the loopback
/// `/v1/chat/completions` handler.
///
/// This mirrors onwards' typed `convert_message_content`
/// (`onwards/src/strict/adapter.rs`) at the JSON layer this translator
/// operates on. Without it, array-form content was forwarded verbatim and the
/// loopback handler's `Json<ChatCompletionRequest>` extractor rejected
/// `input_text` parts with a 422 (empty body), failing every array-content
/// request that took the multi-step executor path.
fn translate_message_content(content: &Value) -> Value {
let Value::Array(parts) = content else {
return content.clone();
};
let translated: Vec<Value> = parts.iter().filter_map(translate_content_part).collect();

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Clean use of filter_map to both translate and drop unrepresentable parts in one pass. This is idiomatic Rust and matches the pattern described in the doc comment.

// A message whose parts all dropped collapses to empty-string content
// rather than an empty array, matching the adapter's behavior and keeping
// the upstream message well-formed.
if translated.is_empty() {
Value::String(String::new())
} else {
Value::Array(translated)
}
}

/// Map one Open Responses content part to its chat-completions equivalent.
///
/// Returns `None` for parts with no chat-completions representation
/// (`input_file`, unknown types); these are dropped with a trace rather than
/// forwarded, since the upstream schema would reject them. Already
/// chat-shaped parts (`text`, `image_url`) pass through unchanged so a client
/// that sent chat-completions content directly still works.
Comment on lines +321 to +325
fn translate_content_part(part: &Value) -> Option<Value> {
match part.get("type").and_then(|t| t.as_str()) {
Some("input_text") | Some("output_text") => {

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Non-blocking: Consider annotation preservation for output_text parts.

Why it matters: The test at line 840 shows an output_text part with "annotations":[] being passed. The current implementation extracts only the text field and drops any annotations array. While most upstreams may not use annotations, the OpenAI Responses API spec includes annotations for things like citations, and dropping them could lose metadata that clients expect to be preserved round-trip.

Suggested fix: If annotation preservation is desired, you could add the annotations field to the translated part:

Some("input_text") | Some("output_text") => {
    let text = part.get("text").and_then(|t| t.as_str()).unwrap_or_default();
    let mut result = serde_json::Map::new();
    result.insert("type".to_string(), json!("text"));
    result.insert("text".to_string(), json!(text));
    // Optionally preserve annotations if present
    if let Some(annotations) = part.get("annotations") {
        result.insert("annotations".to_string(), annotations.clone());
    }
    Some(json!({"type": "text", "text": text})) // or Value::Object(result)
}

However, note that chat-completions ContentPart may not accept annotations either - verify what the upstream schema expects before adding this.

let text = part.get("text").and_then(|t| t.as_str()).unwrap_or_default();
Some(json!({"type": "text", "text": text}))
}
Some("input_image") => {
// Responses carries the image as a bare `image_url` string;
// chat-completions wraps it in an object with optional detail.
let url = part.get("image_url").and_then(|u| u.as_str())?;

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Non-blocking: The input_image translation only handles the image_url field, but according to OpenAI's Responses API documentation, input_image can also be expressed with file_id instead:

{"type": "input_image", "file_id": "file-abc123"}

When image_url is missing (e.g., when file_id is used), the ? operator returns None, silently dropping the content part. This may be acceptable for now if file_id-based images are rare in your use cases, but it's worth noting the gap.

Why it matters: Users sending input_image with file_id (uploaded via the Files API for vision) will have those image parts silently dropped, potentially breaking image-based workflows that rely on file uploads rather than URLs.

Suggested fix: Either:

  1. Add support for file_id by translating it to the chat-completions equivalent ({"type": "image_url", "image_url": {"file_id": "..."}}), or
  2. Add explicit logging when dropping input_image with file_id to make the limitation observable, or
  3. Document this limitation in the module-level docs

For reference, the OpenAI docs show both forms are valid:

// URL form
{"type": "input_image", "image_url": "https://..."}

// File ID form  
{"type": "input_image", "file_id": "file-abc123"}

let mut image_url = serde_json::Map::new();
image_url.insert("url".to_string(), json!(url));
if let Some(detail) = part.get("detail").and_then(|d| d.as_str()) {
image_url.insert("detail".to_string(), json!(detail));
}
Some(json!({"type": "image_url", "image_url": Value::Object(image_url)}))
}
Some("refusal") => {

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Non-blocking: Verify refusaltext mapping is acceptable.

Why it matters: The refusal part type carries model refusal messages (typically when the model declines to answer due to safety policies). Mapping it to plain text loses the semantic distinction between normal response content and a refusal. Clients that inspect the response structure might expect to handle refusals differently.

That said, this appears intentional - the comment says parts with "no chat-completions representation" are dropped or mapped. Since chat-completions doesn't have a refusal part type, mapping to text is pragmatic. The alternative would be to drop it entirely (like input_file), but that would lose the refusal message content entirely, which is worse.

Suggested fix: No change needed if this behavior is intentional. Consider adding a comment noting that refusal semantics are lost in the translation if this is a known limitation.

let text = part.get("refusal").and_then(|t| t.as_str()).unwrap_or_default();
Some(json!({"type": "text", "text": text}))
}
// Already chat-completions-shaped — forward unchanged.
Some("text") | Some("image_url") => Some(part.clone()),
Some("input_file") => {
tracing::debug!("dropping 'input_file' content part during /v1/responses translation");
None
}
other => {
tracing::warn!(part_type = ?other, "dropping unknown content part during /v1/responses translation");
None
}
}
}

/// Normalize the `tools` array into chat-completions wrapped shape:
/// `[{type:"function", function:{name, description, parameters, …}}]`.
///
Expand Down Expand Up @@ -724,6 +810,110 @@ mod tests {
assert_eq!(p.initial_messages[4]["tool_call_id"], "call_a");
}

#[test]
fn translates_input_text_content_parts_to_chat_text() {
// The production 422 repro: a Responses client sends array-form
// content with `input_text` parts. Verbatim forwarding left the
// `input_text` type in place, which the loopback
// /v1/chat/completions handler rejected with a 422 (empty body).
// After translation the part must be chat-completions `text`.
let body = r#"{
"model": "m",
"input": [
{"type":"message","role":"user","content":[{"type":"input_text","text":"hello"}]}
]
}"#;
let p = parse_parent_request(body).unwrap();
let msg = &p.initial_messages[0];
assert_eq!(msg["role"], "user");
let parts = msg["content"].as_array().unwrap();
assert_eq!(parts.len(), 1);
assert_eq!(parts[0]["type"], "text");
assert_eq!(parts[0]["text"], "hello");
}

#[test]
fn translates_output_text_content_parts_to_chat_text() {
let body = r#"{
"model": "m",
"input": [
{"type":"message","role":"assistant","content":[{"type":"output_text","text":"hi","annotations":[]}]}
]
}"#;
let p = parse_parent_request(body).unwrap();
let parts = p.initial_messages[0]["content"].as_array().unwrap();
assert_eq!(parts.len(), 1);
assert_eq!(parts[0]["type"], "text");
assert_eq!(parts[0]["text"], "hi");
}

#[test]
fn passes_string_content_through_unchanged() {
// String content is valid in both schemas — it must not be
// rewritten into an array.
let body = r#"{
"model": "m",
"input": [{"type":"message","role":"user","content":"plain string"}]
}"#;
let p = parse_parent_request(body).unwrap();
assert_eq!(p.initial_messages[0]["content"], "plain string");
}

#[test]
fn translates_input_image_content_part_to_image_url() {
let body = r#"{
"model": "m",
"input": [
{"type":"message","role":"user","content":[
{"type":"input_image","image_url":"https://x/y.png","detail":"low"}
]}
]
}"#;
let p = parse_parent_request(body).unwrap();
let parts = p.initial_messages[0]["content"].as_array().unwrap();
assert_eq!(parts[0]["type"], "image_url");
assert_eq!(parts[0]["image_url"]["url"], "https://x/y.png");
assert_eq!(parts[0]["image_url"]["detail"], "low");
}

#[test]
fn mixed_content_parts_keep_representable_drop_rest() {
// input_file has no chat-completions representation; it is dropped
// while the representable input_text part is kept.
let body = r#"{
"model": "m",
"input": [
{"type":"message","role":"user","content":[
{"type":"input_text","text":"keep me"},
{"type":"input_file","file_id":"f1"}
]}
]
}"#;
let p = parse_parent_request(body).unwrap();
let parts = p.initial_messages[0]["content"].as_array().unwrap();
assert_eq!(parts.len(), 1);
assert_eq!(parts[0]["type"], "text");
assert_eq!(parts[0]["text"], "keep me");
}

#[test]
fn all_unrepresentable_content_parts_collapse_to_empty_string() {
// A message whose parts all drop becomes empty-string content
// rather than an empty array, keeping the upstream message
// well-formed instead of triggering a fresh validation error.
let body = r#"{
"model": "m",
"input": [
{"type":"message","role":"user","content":[
{"type":"input_file","file_id":"f1"},
{"type":"some_future_type","blob":"x"}
]}
]
}"#;
let p = parse_parent_request(body).unwrap();
assert_eq!(p.initial_messages[0]["content"], "");
}

#[test]
fn drops_reasoning_input_items() {
// Reasoning items have no chat-completions equivalent — chat
Expand Down
Loading