-
Notifications
You must be signed in to change notification settings - Fork 10
fix(responses): translate content parts in multi-step executor input #1161
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -161,8 +161,22 @@ fn translate_input_items(items: &[Value]) -> Result<Vec<Value>, String> { | |
| // `role`/`content` would be a regression. | ||
| let mut translated = serde_json::Map::new(); | ||
| for (k, v) in obj { | ||
| if k != "type" { | ||
| translated.insert(k.clone(), v.clone()); | ||
| match k.as_str() { | ||
| // Drop the Open Responses item discriminator; a | ||
| // chat-completions message carries no top-level `type`. | ||
| "type" => {} | ||
| // Rewrite typed content parts (`input_text`, | ||
| // `input_image`, …) into their chat-completions | ||
| // equivalents. Verbatim forwarding left Responses-only | ||
| // part types in place, which the loopback | ||
| // /v1/chat/completions handler's typed deserialization | ||
| // rejected with a 422. | ||
| "content" => { | ||
| translated.insert(k.clone(), translate_message_content(v)); | ||
| } | ||
| _ => { | ||
| translated.insert(k.clone(), v.clone()); | ||
| } | ||
| } | ||
| } | ||
| out.push(Value::Object(translated)); | ||
|
|
@@ -271,6 +285,78 @@ fn translate_input_items(items: &[Value]) -> Result<Vec<Value>, String> { | |
| Ok(out) | ||
| } | ||
|
|
||
| /// Translate an Open Responses message `content` value into chat-completions | ||
| /// shape. | ||
| /// | ||
| /// String content is valid in both schemas and passes through untouched. An | ||
| /// array of typed content parts is rewritten part-by-part: the upstream | ||
| /// chat-completions `ContentPart` enum only accepts `text` and `image_url`, | ||
| /// so Responses-only part types (`input_text`, `output_text`, `input_image`, | ||
| /// `refusal`, …) must be mapped before the body is fired at the loopback | ||
| /// `/v1/chat/completions` handler. | ||
| /// | ||
| /// This mirrors onwards' typed `convert_message_content` | ||
| /// (`onwards/src/strict/adapter.rs`) at the JSON layer this translator | ||
| /// operates on. Without it, array-form content was forwarded verbatim and the | ||
| /// loopback handler's `Json<ChatCompletionRequest>` extractor rejected | ||
| /// `input_text` parts with a 422 (empty body), failing every array-content | ||
| /// request that took the multi-step executor path. | ||
| fn translate_message_content(content: &Value) -> Value { | ||
| let Value::Array(parts) = content else { | ||
| return content.clone(); | ||
| }; | ||
| let translated: Vec<Value> = parts.iter().filter_map(translate_content_part).collect(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Clean use of |
||
| // A message whose parts all dropped collapses to empty-string content | ||
| // rather than an empty array, matching the adapter's behavior and keeping | ||
| // the upstream message well-formed. | ||
| if translated.is_empty() { | ||
| Value::String(String::new()) | ||
| } else { | ||
| Value::Array(translated) | ||
| } | ||
| } | ||
|
|
||
| /// Map one Open Responses content part to its chat-completions equivalent. | ||
| /// | ||
| /// Returns `None` for parts with no chat-completions representation | ||
| /// (`input_file`, unknown types); these are dropped with a trace rather than | ||
| /// forwarded, since the upstream schema would reject them. Already | ||
| /// chat-shaped parts (`text`, `image_url`) pass through unchanged so a client | ||
| /// that sent chat-completions content directly still works. | ||
|
Comment on lines
+321
to
+325
|
||
| fn translate_content_part(part: &Value) -> Option<Value> { | ||
| match part.get("type").and_then(|t| t.as_str()) { | ||
| Some("input_text") | Some("output_text") => { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Non-blocking: Consider annotation preservation for Why it matters: The test at line 840 shows an Suggested fix: If annotation preservation is desired, you could add the annotations field to the translated part: Some("input_text") | Some("output_text") => {
let text = part.get("text").and_then(|t| t.as_str()).unwrap_or_default();
let mut result = serde_json::Map::new();
result.insert("type".to_string(), json!("text"));
result.insert("text".to_string(), json!(text));
// Optionally preserve annotations if present
if let Some(annotations) = part.get("annotations") {
result.insert("annotations".to_string(), annotations.clone());
}
Some(json!({"type": "text", "text": text})) // or Value::Object(result)
}However, note that chat-completions |
||
| let text = part.get("text").and_then(|t| t.as_str()).unwrap_or_default(); | ||
| Some(json!({"type": "text", "text": text})) | ||
| } | ||
| Some("input_image") => { | ||
| // Responses carries the image as a bare `image_url` string; | ||
| // chat-completions wraps it in an object with optional detail. | ||
| let url = part.get("image_url").and_then(|u| u.as_str())?; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Non-blocking: The {"type": "input_image", "file_id": "file-abc123"}When Why it matters: Users sending Suggested fix: Either:
For reference, the OpenAI docs show both forms are valid: // URL form
{"type": "input_image", "image_url": "https://..."}
// File ID form
{"type": "input_image", "file_id": "file-abc123"} |
||
| let mut image_url = serde_json::Map::new(); | ||
| image_url.insert("url".to_string(), json!(url)); | ||
| if let Some(detail) = part.get("detail").and_then(|d| d.as_str()) { | ||
| image_url.insert("detail".to_string(), json!(detail)); | ||
| } | ||
| Some(json!({"type": "image_url", "image_url": Value::Object(image_url)})) | ||
| } | ||
| Some("refusal") => { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Non-blocking: Verify Why it matters: The That said, this appears intentional - the comment says parts with "no chat-completions representation" are dropped or mapped. Since chat-completions doesn't have a refusal part type, mapping to text is pragmatic. The alternative would be to drop it entirely (like Suggested fix: No change needed if this behavior is intentional. Consider adding a comment noting that refusal semantics are lost in the translation if this is a known limitation. |
||
| let text = part.get("refusal").and_then(|t| t.as_str()).unwrap_or_default(); | ||
| Some(json!({"type": "text", "text": text})) | ||
| } | ||
| // Already chat-completions-shaped — forward unchanged. | ||
| Some("text") | Some("image_url") => Some(part.clone()), | ||
| Some("input_file") => { | ||
| tracing::debug!("dropping 'input_file' content part during /v1/responses translation"); | ||
| None | ||
| } | ||
| other => { | ||
| tracing::warn!(part_type = ?other, "dropping unknown content part during /v1/responses translation"); | ||
| None | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// Normalize the `tools` array into chat-completions wrapped shape: | ||
| /// `[{type:"function", function:{name, description, parameters, …}}]`. | ||
| /// | ||
|
|
@@ -724,6 +810,110 @@ mod tests { | |
| assert_eq!(p.initial_messages[4]["tool_call_id"], "call_a"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn translates_input_text_content_parts_to_chat_text() { | ||
| // The production 422 repro: a Responses client sends array-form | ||
| // content with `input_text` parts. Verbatim forwarding left the | ||
| // `input_text` type in place, which the loopback | ||
| // /v1/chat/completions handler rejected with a 422 (empty body). | ||
| // After translation the part must be chat-completions `text`. | ||
| let body = r#"{ | ||
| "model": "m", | ||
| "input": [ | ||
| {"type":"message","role":"user","content":[{"type":"input_text","text":"hello"}]} | ||
| ] | ||
| }"#; | ||
| let p = parse_parent_request(body).unwrap(); | ||
| let msg = &p.initial_messages[0]; | ||
| assert_eq!(msg["role"], "user"); | ||
| let parts = msg["content"].as_array().unwrap(); | ||
| assert_eq!(parts.len(), 1); | ||
| assert_eq!(parts[0]["type"], "text"); | ||
| assert_eq!(parts[0]["text"], "hello"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn translates_output_text_content_parts_to_chat_text() { | ||
| let body = r#"{ | ||
| "model": "m", | ||
| "input": [ | ||
| {"type":"message","role":"assistant","content":[{"type":"output_text","text":"hi","annotations":[]}]} | ||
| ] | ||
| }"#; | ||
| let p = parse_parent_request(body).unwrap(); | ||
| let parts = p.initial_messages[0]["content"].as_array().unwrap(); | ||
| assert_eq!(parts.len(), 1); | ||
| assert_eq!(parts[0]["type"], "text"); | ||
| assert_eq!(parts[0]["text"], "hi"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn passes_string_content_through_unchanged() { | ||
| // String content is valid in both schemas — it must not be | ||
| // rewritten into an array. | ||
| let body = r#"{ | ||
| "model": "m", | ||
| "input": [{"type":"message","role":"user","content":"plain string"}] | ||
| }"#; | ||
| let p = parse_parent_request(body).unwrap(); | ||
| assert_eq!(p.initial_messages[0]["content"], "plain string"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn translates_input_image_content_part_to_image_url() { | ||
| let body = r#"{ | ||
| "model": "m", | ||
| "input": [ | ||
| {"type":"message","role":"user","content":[ | ||
| {"type":"input_image","image_url":"https://x/y.png","detail":"low"} | ||
| ]} | ||
| ] | ||
| }"#; | ||
| let p = parse_parent_request(body).unwrap(); | ||
| let parts = p.initial_messages[0]["content"].as_array().unwrap(); | ||
| assert_eq!(parts[0]["type"], "image_url"); | ||
| assert_eq!(parts[0]["image_url"]["url"], "https://x/y.png"); | ||
| assert_eq!(parts[0]["image_url"]["detail"], "low"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn mixed_content_parts_keep_representable_drop_rest() { | ||
| // input_file has no chat-completions representation; it is dropped | ||
| // while the representable input_text part is kept. | ||
| let body = r#"{ | ||
| "model": "m", | ||
| "input": [ | ||
| {"type":"message","role":"user","content":[ | ||
| {"type":"input_text","text":"keep me"}, | ||
| {"type":"input_file","file_id":"f1"} | ||
| ]} | ||
| ] | ||
| }"#; | ||
| let p = parse_parent_request(body).unwrap(); | ||
| let parts = p.initial_messages[0]["content"].as_array().unwrap(); | ||
| assert_eq!(parts.len(), 1); | ||
| assert_eq!(parts[0]["type"], "text"); | ||
| assert_eq!(parts[0]["text"], "keep me"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn all_unrepresentable_content_parts_collapse_to_empty_string() { | ||
| // A message whose parts all drop becomes empty-string content | ||
| // rather than an empty array, keeping the upstream message | ||
| // well-formed instead of triggering a fresh validation error. | ||
| let body = r#"{ | ||
| "model": "m", | ||
| "input": [ | ||
| {"type":"message","role":"user","content":[ | ||
| {"type":"input_file","file_id":"f1"}, | ||
| {"type":"some_future_type","blob":"x"} | ||
| ]} | ||
| ] | ||
| }"#; | ||
| let p = parse_parent_request(body).unwrap(); | ||
| assert_eq!(p.initial_messages[0]["content"], ""); | ||
| } | ||
|
|
||
| #[test] | ||
| fn drops_reasoning_input_items() { | ||
| // Reasoning items have no chat-completions equivalent — chat | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Correct placement of the content translation hook. By handling this in the key-match within the message object iteration, you ensure all messages get their content translated regardless of role.