diff --git a/agent-schema.json b/agent-schema.json
index 0ee0217e5..88f54e0b0 100644
--- a/agent-schema.json
+++ b/agent-schema.json
@@ -634,7 +634,7 @@
},
"provider_opts": {
"type": "object",
- "description": "Provider-specific options. Sampling parameters: top_k (integer, supported by anthropic, google, amazon-bedrock, and custom OpenAI-compatible providers like vLLM/Ollama), repetition_penalty (float, forwarded to custom OpenAI-compatible providers), min_p (float, forwarded to custom providers), seed (integer, forwarded to OpenAI). Infrastructure options: dmr: runtime_flags. anthropic/amazon-bedrock (Claude): interleaved_thinking (boolean, default true). openai: transport ('sse' or 'websocket') to choose between SSE and WebSocket streaming for the Responses API. openai/anthropic/google: rerank_prompt (string) to fully override the system prompt used for RAG reranking (advanced - prefer using results.reranking.criteria for domain-specific guidance). Google: google_search (boolean) enables Google Search grounding, google_maps (boolean) enables Google Maps grounding, code_execution (boolean) enables server-side code execution.",
+ "description": "Provider-specific options. Sampling parameters: top_k (integer, supported by anthropic, google, amazon-bedrock, and custom OpenAI-compatible providers like vLLM/Ollama), repetition_penalty (float, forwarded to custom OpenAI-compatible providers), min_p (float, forwarded to custom providers), seed (integer, forwarded to OpenAI). Infrastructure options: dmr: runtime_flags. anthropic/amazon-bedrock (Claude): interleaved_thinking (boolean, default true), thinking_display ('summarized', 'omitted', or 'display') controls whether thinking blocks are returned in responses when thinking is enabled. Claude Opus 4.7 hides thinking by default ('omitted'); set thinking_display: summarized (or thinking_display: display) to receive thinking blocks. openai: transport ('sse' or 'websocket') to choose between SSE and WebSocket streaming for the Responses API. openai/anthropic/google: rerank_prompt (string) to fully override the system prompt used for RAG reranking (advanced - prefer using results.reranking.criteria for domain-specific guidance). Google: google_search (boolean) enables Google Search grounding, google_maps (boolean) enables Google Maps grounding, code_execution (boolean) enables server-side code execution.",
"additionalProperties": true
},
"track_usage": {
diff --git a/docs/configuration/models/index.md b/docs/configuration/models/index.md
index eb82366fe..f45800c80 100644
--- a/docs/configuration/models/index.md
+++ b/docs/configuration/models/index.md
@@ -178,6 +178,22 @@ models:
interleaved_thinking: false # disable if needed
```
+## Thinking Display (Anthropic)
+
+For Anthropic Claude models, `thinking_display` controls whether thinking blocks are returned in responses when thinking is enabled. Claude Opus 4.7 hides thinking content by default (`omitted`); set this provider option to receive summarized thinking:
+
+```yaml
+models:
+ opus-4-7:
+ provider: anthropic
+ model: claude-opus-4-7
+ thinking_budget: adaptive
+ provider_opts:
+ thinking_display: summarized # "summarized", "display", or "omitted"
+```
+
+See the [Anthropic provider page](/providers/anthropic/#thinking-display) for details.
+
## Examples by Provider
```yaml
diff --git a/docs/providers/anthropic/index.md b/docs/providers/anthropic/index.md
index c5b5038c2..61baf0e16 100644
--- a/docs/providers/anthropic/index.md
+++ b/docs/providers/anthropic/index.md
@@ -106,6 +106,28 @@ Object form (forward-compatible with future budget types):
See the full schema on the [Model Configuration]({{ '/configuration/models/#task-budget' | relative_url }}) page.
+## Thinking Display
+
+Controls whether thinking blocks are returned in responses when thinking is enabled. Claude Opus 4.7 hides thinking content by default (`omitted`); earlier Claude 4 models default to `summarized`. Set `thinking_display` in `provider_opts` to override:
+
+```yaml
+models:
+ claude-opus-4-7:
+ provider: anthropic
+ model: claude-opus-4-7
+ thinking_budget: adaptive
+ provider_opts:
+ thinking_display: summarized # "summarized", "display", or "omitted"
+```
+
+Valid values:
+
+- `summarized`: thinking blocks are returned with summarized thinking text (default for Claude 4 models prior to Opus 4.7).
+- `display`: thinking blocks are returned for display (use this to re-enable thinking output on Opus 4.7).
+- `omitted`: thinking blocks are returned with an empty thinking field; the signature is still returned for multi-turn continuity (default for Opus 4.7). Useful to reduce time-to-first-text-token when streaming.
+
+Note: `thinking_display` applies to both `thinking_budget` with token counts and adaptive/effort-based budgets. Full thinking tokens are billed regardless of the `thinking_display` value.
+
ℹ️ Note
diff --git a/examples/thinking_budget.yaml b/examples/thinking_budget.yaml
index 8c906e7c8..3ef7bb6d9 100644
--- a/examples/thinking_budget.yaml
+++ b/examples/thinking_budget.yaml
@@ -48,6 +48,13 @@ models:
model: claude-opus-4-6
thinking_budget: low # <- adaptive thinking with low effort: "low", "medium", "high", "max"
+ claude-opus-4-7-summarized:
+ provider: anthropic
+ model: claude-opus-4-6 # <- Opus 4.7 hides thinking by default; use the same flag with any recent Claude model
+ thinking_budget: adaptive
+ provider_opts:
+ thinking_display: summarized # <- "summarized", "display", or "omitted" (Opus 4.7 defaults to omitted)
+
gemini-2-5-flash-dynamic-thinking:
provider: google
model: gemini-2.5-flash
diff --git a/pkg/model/provider/anthropic/beta_client.go b/pkg/model/provider/anthropic/beta_client.go
index 75d5e4b1e..ad825c0ec 100644
--- a/pkg/model/provider/anthropic/beta_client.go
+++ b/pkg/model/provider/anthropic/beta_client.go
@@ -90,17 +90,7 @@ func (c *Client) createBetaStream(
// Configure thinking if a thinking budget is set in the model config.
// The beta client is also used for structured output and file attachments,
// which don't require thinking.
- if budget := c.ModelConfig.ThinkingBudget; budget != nil {
- if effort, ok := anthropicThinkingEffort(budget); ok {
- adaptive := anthropic.BetaThinkingConfigAdaptiveParam{}
- params.Thinking = anthropic.BetaThinkingConfigParamUnion{OfAdaptive: &adaptive}
- params.OutputConfig.Effort = anthropic.BetaOutputConfigEffort(effort)
- slog.Debug("Anthropic Beta API using adaptive thinking", "effort", effort)
- } else if tokens, ok := validThinkingTokens(int64(budget.Tokens), maxTokens); ok {
- params.Thinking = anthropic.BetaThinkingConfigParamOfEnabled(tokens)
- slog.Debug("Anthropic Beta API using thinking_budget", "budget_tokens", tokens)
- }
- }
+ c.applyBetaThinkingConfig(¶ms, maxTokens)
// Forward task_budget via `output_config.task_budget` (Anthropic
// Opus 4.7+) and enable the corresponding beta header. Older Claude
diff --git a/pkg/model/provider/anthropic/client.go b/pkg/model/provider/anthropic/client.go
index b10f24b8d..3d6aea955 100644
--- a/pkg/model/provider/anthropic/client.go
+++ b/pkg/model/provider/anthropic/client.go
@@ -17,7 +17,6 @@ import (
"github.com/docker/docker-agent/pkg/chat"
"github.com/docker/docker-agent/pkg/config/latest"
- "github.com/docker/docker-agent/pkg/effort"
"github.com/docker/docker-agent/pkg/environment"
"github.com/docker/docker-agent/pkg/httpclient"
"github.com/docker/docker-agent/pkg/model/provider/base"
@@ -35,79 +34,6 @@ type Client struct {
fileManager *FileManager
}
-// adjustMaxTokensForThinking checks if max_tokens needs adjustment for thinking_budget.
-// Anthropic's max_tokens represents the combined budget for thinking + output tokens.
-// Returns the adjusted maxTokens value and an error if user-set max_tokens is too low.
-//
-// Only fixed token budgets need adjustment. Adaptive and effort-based budgets
-// don't need it since the model manages its own thinking allocation.
-func (c *Client) adjustMaxTokensForThinking(maxTokens int64) (int64, error) {
- if c.ModelConfig.ThinkingBudget == nil {
- return maxTokens, nil
- }
- // Adaptive and effort-based budgets: no token adjustment needed.
- if _, ok := anthropicThinkingEffort(c.ModelConfig.ThinkingBudget); ok {
- return maxTokens, nil
- }
-
- thinkingTokens := int64(c.ModelConfig.ThinkingBudget.Tokens)
- if thinkingTokens <= 0 {
- return maxTokens, nil
- }
-
- minRequired := thinkingTokens + 1024 // configured thinking budget + minimum output buffer
-
- if maxTokens <= thinkingTokens {
- userSetMaxTokens := c.ModelConfig.MaxTokens != nil
- if userSetMaxTokens {
- // User explicitly set max_tokens too low - return error
- slog.Error("Anthropic: max_tokens must be greater than thinking_budget",
- "max_tokens", maxTokens,
- "thinking_budget", thinkingTokens)
- return 0, fmt.Errorf("anthropic: max_tokens (%d) must be greater than thinking_budget (%d); increase max_tokens to at least %d",
- maxTokens, thinkingTokens, minRequired)
- }
- // Auto-adjust when user didn't set max_tokens
- slog.Info("Anthropic: auto-adjusting max_tokens to accommodate thinking_budget",
- "original_max_tokens", maxTokens,
- "thinking_budget", thinkingTokens,
- "new_max_tokens", minRequired)
- // return the configured thinking budget + 8192 because that's the default
- // max_tokens value for anthropic models when unspecified by the user
- return thinkingTokens + 8192, nil
- }
-
- return maxTokens, nil
-}
-
-// interleavedThinkingEnabled returns false unless explicitly enabled via
-// models:provider_opts:interleaved_thinking: true
-func (c *Client) interleavedThinkingEnabled() bool {
- // Default to false if not provided
- if c == nil || len(c.ModelConfig.ProviderOpts) == 0 {
- return false
- }
- v, ok := c.ModelConfig.ProviderOpts["interleaved_thinking"]
- if !ok {
- return false
- }
- switch t := v.(type) {
- case bool:
- return t
- case string:
- s := strings.TrimSpace(strings.ToLower(t))
- return s != "false" && s != "0" && s != "no"
- case int:
- return t != 0
- case int64:
- return t != 0
- case float64:
- return t != 0
- default:
- return false
- }
-}
-
// NewClient creates a new Anthropic client from the provided configuration
func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Provider, opts ...options.Opt) (*Client, error) {
if cfg == nil {
@@ -288,20 +214,7 @@ func (c *Client) CreateChatCompletionStream(
}
// Apply thinking budget first, as it affects whether we can set temperature
- thinkingEnabled := false
- if budget := c.ModelConfig.ThinkingBudget; budget != nil {
- if effortStr, ok := anthropicThinkingEffort(budget); ok {
- adaptive := anthropic.ThinkingConfigAdaptiveParam{}
- params.Thinking = anthropic.ThinkingConfigParamUnion{OfAdaptive: &adaptive}
- params.OutputConfig.Effort = anthropic.OutputConfigEffort(effortStr)
- thinkingEnabled = true
- slog.Debug("Anthropic API using adaptive thinking", "effort", effortStr)
- } else if tokens, ok := validThinkingTokens(int64(budget.Tokens), maxTokens); ok {
- params.Thinking = anthropic.ThinkingConfigParamOfEnabled(tokens)
- thinkingEnabled = true
- slog.Debug("Anthropic API using thinking_budget", "budget_tokens", tokens)
- }
- }
+ thinkingEnabled := c.applyThinkingConfig(¶ms, maxTokens)
// Temperature and TopP cannot be set when extended thinking is enabled
// (Anthropic requires temperature=1.0 which is the default when thinking is on)
@@ -753,38 +666,6 @@ func contentArray(m map[string]any) []any {
return nil
}
-// validThinkingTokens validates that the token budget is within the
-// acceptable range for Anthropic (>= 1024 and < maxTokens).
-// Returns (tokens, true) if valid, or (0, false) with a warning log if not.
-func validThinkingTokens(tokens, maxTokens int64) (int64, bool) {
- if tokens < 1024 {
- slog.Warn("Anthropic thinking_budget below minimum (1024), ignoring", "tokens", tokens)
- return 0, false
- }
- if tokens >= maxTokens {
- slog.Warn("Anthropic thinking_budget must be less than max_tokens, ignoring", "tokens", tokens, "max_tokens", maxTokens)
- return 0, false
- }
- return tokens, true
-}
-
-// anthropicThinkingEffort returns the Anthropic API effort level for the given
-// ThinkingBudget. It covers both explicit adaptive mode and string effort
-// levels. Returns ("", false) when the budget uses token counts or is nil.
-func anthropicThinkingEffort(b *latest.ThinkingBudget) (string, bool) {
- if b == nil {
- return "", false
- }
- if e, ok := b.AdaptiveEffort(); ok {
- return e, true
- }
- l, ok := b.EffortLevel()
- if !ok {
- return "", false
- }
- return effort.ForAnthropic(l)
-}
-
// anthropicContextLimit returns a reasonable default context window for Anthropic models.
// We default to 200k tokens, which is what 3.5-4.5 models support; adjust as needed over time.
func anthropicContextLimit(model string) int64 {
diff --git a/pkg/model/provider/anthropic/thinking.go b/pkg/model/provider/anthropic/thinking.go
new file mode 100644
index 000000000..0c65c8694
--- /dev/null
+++ b/pkg/model/provider/anthropic/thinking.go
@@ -0,0 +1,226 @@
+package anthropic
+
+import (
+ "fmt"
+ "log/slog"
+ "strings"
+
+ "github.com/anthropics/anthropic-sdk-go"
+
+ "github.com/docker/docker-agent/pkg/config/latest"
+ "github.com/docker/docker-agent/pkg/effort"
+)
+
+// Valid values for the `thinking_display` provider option.
+const (
+ thinkingDisplaySummarized = "summarized"
+ thinkingDisplayOmitted = "omitted"
+ thinkingDisplayDisplay = "display"
+)
+
+// adjustMaxTokensForThinking checks if max_tokens needs adjustment for thinking_budget.
+// Anthropic's max_tokens represents the combined budget for thinking + output tokens.
+// Returns the adjusted maxTokens value and an error if user-set max_tokens is too low.
+//
+// Only fixed token budgets need adjustment. Adaptive and effort-based budgets
+// don't need it since the model manages its own thinking allocation.
+func (c *Client) adjustMaxTokensForThinking(maxTokens int64) (int64, error) {
+ if c.ModelConfig.ThinkingBudget == nil {
+ return maxTokens, nil
+ }
+ // Adaptive and effort-based budgets: no token adjustment needed.
+ if _, ok := anthropicThinkingEffort(c.ModelConfig.ThinkingBudget); ok {
+ return maxTokens, nil
+ }
+
+ thinkingTokens := int64(c.ModelConfig.ThinkingBudget.Tokens)
+ if thinkingTokens <= 0 {
+ return maxTokens, nil
+ }
+
+ minRequired := thinkingTokens + 1024 // configured thinking budget + minimum output buffer
+
+ if maxTokens <= thinkingTokens {
+ userSetMaxTokens := c.ModelConfig.MaxTokens != nil
+ if userSetMaxTokens {
+ // User explicitly set max_tokens too low - return error
+ slog.Error("Anthropic: max_tokens must be greater than thinking_budget",
+ "max_tokens", maxTokens,
+ "thinking_budget", thinkingTokens)
+ return 0, fmt.Errorf("anthropic: max_tokens (%d) must be greater than thinking_budget (%d); increase max_tokens to at least %d",
+ maxTokens, thinkingTokens, minRequired)
+ }
+ // Auto-adjust when user didn't set max_tokens
+ slog.Info("Anthropic: auto-adjusting max_tokens to accommodate thinking_budget",
+ "original_max_tokens", maxTokens,
+ "thinking_budget", thinkingTokens,
+ "new_max_tokens", minRequired)
+ // return the configured thinking budget + 8192 because that's the default
+ // max_tokens value for anthropic models when unspecified by the user
+ return thinkingTokens + 8192, nil
+ }
+
+ return maxTokens, nil
+}
+
+// interleavedThinkingEnabled returns false unless explicitly enabled via
+// models:provider_opts:interleaved_thinking: true
+func (c *Client) interleavedThinkingEnabled() bool {
+ if c == nil || len(c.ModelConfig.ProviderOpts) == 0 {
+ return false
+ }
+ v, ok := c.ModelConfig.ProviderOpts["interleaved_thinking"]
+ if !ok {
+ return false
+ }
+ switch t := v.(type) {
+ case bool:
+ return t
+ case string:
+ s := strings.TrimSpace(strings.ToLower(t))
+ return s != "false" && s != "0" && s != "no"
+ case int:
+ return t != 0
+ case int64:
+ return t != 0
+ case float64:
+ return t != 0
+ default:
+ return false
+ }
+}
+
+// validThinkingTokens validates that the token budget is within the
+// acceptable range for Anthropic (>= 1024 and < maxTokens).
+// Returns (tokens, true) if valid, or (0, false) with a warning log if not.
+func validThinkingTokens(tokens, maxTokens int64) (int64, bool) {
+ if tokens < 1024 {
+ slog.Warn("Anthropic thinking_budget below minimum (1024), ignoring", "tokens", tokens)
+ return 0, false
+ }
+ if tokens >= maxTokens {
+ slog.Warn("Anthropic thinking_budget must be less than max_tokens, ignoring", "tokens", tokens, "max_tokens", maxTokens)
+ return 0, false
+ }
+ return tokens, true
+}
+
+// anthropicThinkingEffort returns the Anthropic API effort level for the given
+// ThinkingBudget. It covers both explicit adaptive mode and string effort
+// levels. Returns ("", false) when the budget uses token counts or is nil.
+func anthropicThinkingEffort(b *latest.ThinkingBudget) (string, bool) {
+ if b == nil {
+ return "", false
+ }
+ if e, ok := b.AdaptiveEffort(); ok {
+ return e, true
+ }
+ l, ok := b.EffortLevel()
+ if !ok {
+ return "", false
+ }
+ return effort.ForAnthropic(l)
+}
+
+// anthropicThinkingDisplay returns the validated `thinking_display` value
+// from provider_opts, if set. Valid values are "summarized", "omitted", and
+// "display".
+//
+// Claude Opus 4.7 hides thinking content by default ("omitted"). Set
+// thinking_display: summarized (or thinking_display: display) in
+// provider_opts to receive thinking blocks, or thinking_display: omitted to
+// explicitly hide them.
+//
+// Returns ("", false) when not set or invalid.
+func anthropicThinkingDisplay(opts map[string]any) (string, bool) {
+ v, ok := opts["thinking_display"]
+ if !ok {
+ return "", false
+ }
+ s, ok := v.(string)
+ if !ok {
+ slog.Debug("provider_opts type mismatch, ignoring",
+ "key", "thinking_display",
+ "expected_type", "string",
+ "actual_type", fmt.Sprintf("%T", v),
+ "value", v)
+ return "", false
+ }
+ switch strings.TrimSpace(strings.ToLower(s)) {
+ case thinkingDisplaySummarized:
+ return thinkingDisplaySummarized, true
+ case thinkingDisplayOmitted:
+ return thinkingDisplayOmitted, true
+ case thinkingDisplayDisplay:
+ return thinkingDisplayDisplay, true
+ default:
+ slog.Warn("Anthropic provider_opts: invalid thinking_display value, ignoring",
+ "value", s,
+ "valid_values", []string{thinkingDisplaySummarized, thinkingDisplayOmitted, thinkingDisplayDisplay})
+ return "", false
+ }
+}
+
+// applyThinkingConfig configures extended thinking on a standard MessageNewParams
+// based on the model's ThinkingBudget and provider_opts.thinking_display.
+// Returns true when thinking is enabled (i.e., temperature/top_p must not be set).
+func (c *Client) applyThinkingConfig(params *anthropic.MessageNewParams, maxTokens int64) bool {
+ budget := c.ModelConfig.ThinkingBudget
+ if budget == nil {
+ return false
+ }
+ display, _ := anthropicThinkingDisplay(c.ModelConfig.ProviderOpts)
+
+ if effortStr, ok := anthropicThinkingEffort(budget); ok {
+ adaptive := &anthropic.ThinkingConfigAdaptiveParam{}
+ if display != "" {
+ adaptive.Display = anthropic.ThinkingConfigAdaptiveDisplay(display)
+ }
+ params.Thinking = anthropic.ThinkingConfigParamUnion{OfAdaptive: adaptive}
+ params.OutputConfig.Effort = anthropic.OutputConfigEffort(effortStr)
+ slog.Debug("Anthropic API using adaptive thinking", "effort", effortStr, "display", display)
+ return true
+ }
+
+ tokens, ok := validThinkingTokens(int64(budget.Tokens), maxTokens)
+ if !ok {
+ return false
+ }
+ params.Thinking = anthropic.ThinkingConfigParamOfEnabled(tokens)
+ if display != "" && params.Thinking.OfEnabled != nil {
+ params.Thinking.OfEnabled.Display = anthropic.ThinkingConfigEnabledDisplay(display)
+ }
+ slog.Debug("Anthropic API using thinking_budget", "budget_tokens", tokens, "display", display)
+ return true
+}
+
+// applyBetaThinkingConfig configures extended thinking on a BetaMessageNewParams
+// based on the model's ThinkingBudget and provider_opts.thinking_display.
+func (c *Client) applyBetaThinkingConfig(params *anthropic.BetaMessageNewParams, maxTokens int64) {
+ budget := c.ModelConfig.ThinkingBudget
+ if budget == nil {
+ return
+ }
+ display, _ := anthropicThinkingDisplay(c.ModelConfig.ProviderOpts)
+
+ if effortStr, ok := anthropicThinkingEffort(budget); ok {
+ adaptive := &anthropic.BetaThinkingConfigAdaptiveParam{}
+ if display != "" {
+ adaptive.Display = anthropic.BetaThinkingConfigAdaptiveDisplay(display)
+ }
+ params.Thinking = anthropic.BetaThinkingConfigParamUnion{OfAdaptive: adaptive}
+ params.OutputConfig.Effort = anthropic.BetaOutputConfigEffort(effortStr)
+ slog.Debug("Anthropic Beta API using adaptive thinking", "effort", effortStr, "display", display)
+ return
+ }
+
+ tokens, ok := validThinkingTokens(int64(budget.Tokens), maxTokens)
+ if !ok {
+ return
+ }
+ params.Thinking = anthropic.BetaThinkingConfigParamOfEnabled(tokens)
+ if display != "" && params.Thinking.OfEnabled != nil {
+ params.Thinking.OfEnabled.Display = anthropic.BetaThinkingConfigEnabledDisplay(display)
+ }
+ slog.Debug("Anthropic Beta API using thinking_budget", "budget_tokens", tokens, "display", display)
+}
diff --git a/pkg/model/provider/anthropic/thinking_test.go b/pkg/model/provider/anthropic/thinking_test.go
new file mode 100644
index 000000000..137555ea7
--- /dev/null
+++ b/pkg/model/provider/anthropic/thinking_test.go
@@ -0,0 +1,364 @@
+package anthropic
+
+import (
+ "encoding/json"
+ "testing"
+
+ "github.com/anthropics/anthropic-sdk-go"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+
+ "github.com/docker/docker-agent/pkg/config/latest"
+ "github.com/docker/docker-agent/pkg/model/provider/base"
+)
+
+func TestAnthropicThinkingDisplay(t *testing.T) {
+ tests := []struct {
+ name string
+ opts map[string]any
+ want string
+ wantOk bool
+ }{
+ {
+ name: "nil opts",
+ opts: nil,
+ want: "",
+ wantOk: false,
+ },
+ {
+ name: "empty opts",
+ opts: map[string]any{},
+ want: "",
+ wantOk: false,
+ },
+ {
+ name: "key missing",
+ opts: map[string]any{"other": "foo"},
+ want: "",
+ wantOk: false,
+ },
+ {
+ name: "summarized",
+ opts: map[string]any{"thinking_display": "summarized"},
+ want: "summarized",
+ wantOk: true,
+ },
+ {
+ name: "omitted",
+ opts: map[string]any{"thinking_display": "omitted"},
+ want: "omitted",
+ wantOk: true,
+ },
+ {
+ name: "display",
+ opts: map[string]any{"thinking_display": "display"},
+ want: "display",
+ wantOk: true,
+ },
+ {
+ name: "case insensitive",
+ opts: map[string]any{"thinking_display": "SUMMARIZED"},
+ want: "summarized",
+ wantOk: true,
+ },
+ {
+ name: "whitespace trimmed",
+ opts: map[string]any{"thinking_display": " omitted "},
+ want: "omitted",
+ wantOk: true,
+ },
+ {
+ name: "invalid string",
+ opts: map[string]any{"thinking_display": "not-a-valid-value"},
+ want: "",
+ wantOk: false,
+ },
+ {
+ name: "non-string value",
+ opts: map[string]any{"thinking_display": 42},
+ want: "",
+ wantOk: false,
+ },
+ {
+ name: "bool value",
+ opts: map[string]any{"thinking_display": true},
+ want: "",
+ wantOk: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got, ok := anthropicThinkingDisplay(tt.opts)
+ assert.Equal(t, tt.wantOk, ok)
+ assert.Equal(t, tt.want, got)
+ })
+ }
+}
+
+// clientWith builds a minimal Client with the given ThinkingBudget and
+// provider_opts for use in thinking-config tests.
+func clientWith(budget *latest.ThinkingBudget, opts map[string]any) *Client {
+ return &Client{
+ Config: base.Config{
+ ModelConfig: latest.ModelConfig{
+ Provider: "anthropic",
+ Model: "claude-opus-4-7",
+ ThinkingBudget: budget,
+ ProviderOpts: opts,
+ },
+ },
+ }
+}
+
+func TestApplyThinkingConfig(t *testing.T) {
+ tests := []struct {
+ name string
+ budget *latest.ThinkingBudget
+ opts map[string]any
+ maxTokens int64
+ wantEnabled bool
+ wantAdaptive bool
+ wantTokens int64
+ wantEffort string
+ wantDisplayJSON string // "" means the display field must not be present in JSON
+ }{
+ {
+ name: "nil budget disables thinking",
+ budget: nil,
+ maxTokens: 8192,
+ wantEnabled: false,
+ },
+ {
+ name: "token budget below minimum is ignored",
+ budget: &latest.ThinkingBudget{Tokens: 500},
+ maxTokens: 8192,
+ wantEnabled: false,
+ },
+ {
+ name: "token budget above max_tokens is ignored",
+ budget: &latest.ThinkingBudget{Tokens: 9000},
+ maxTokens: 8192,
+ wantEnabled: false,
+ },
+ {
+ name: "adaptive high effort without display",
+ budget: &latest.ThinkingBudget{Effort: "adaptive"},
+ maxTokens: 8192,
+ wantEnabled: true,
+ wantAdaptive: true,
+ wantEffort: "high",
+ },
+ {
+ name: "adaptive with display=summarized",
+ budget: &latest.ThinkingBudget{Effort: "adaptive"},
+ opts: map[string]any{"thinking_display": "summarized"},
+ maxTokens: 8192,
+ wantEnabled: true,
+ wantAdaptive: true,
+ wantEffort: "high",
+ wantDisplayJSON: "summarized",
+ },
+ {
+ name: "adaptive with display=omitted",
+ budget: &latest.ThinkingBudget{Effort: "adaptive/low"},
+ opts: map[string]any{"thinking_display": "omitted"},
+ maxTokens: 8192,
+ wantEnabled: true,
+ wantAdaptive: true,
+ wantEffort: "low",
+ wantDisplayJSON: "omitted",
+ },
+ {
+ name: "token budget without display",
+ budget: &latest.ThinkingBudget{Tokens: 2048},
+ maxTokens: 8192,
+ wantEnabled: true,
+ wantTokens: 2048,
+ },
+ {
+ name: "token budget with display=display",
+ budget: &latest.ThinkingBudget{Tokens: 2048},
+ opts: map[string]any{"thinking_display": "display"},
+ maxTokens: 8192,
+ wantEnabled: true,
+ wantTokens: 2048,
+ wantDisplayJSON: "display",
+ },
+ {
+ name: "invalid display value is ignored",
+ budget: &latest.ThinkingBudget{Tokens: 2048},
+ opts: map[string]any{"thinking_display": "bogus"},
+ maxTokens: 8192,
+ wantEnabled: true,
+ wantTokens: 2048,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ c := clientWith(tt.budget, tt.opts)
+ params := anthropic.MessageNewParams{}
+
+ gotEnabled := c.applyThinkingConfig(¶ms, tt.maxTokens)
+ assert.Equal(t, tt.wantEnabled, gotEnabled)
+
+ if !tt.wantEnabled {
+ assert.Nil(t, params.Thinking.OfAdaptive)
+ assert.Nil(t, params.Thinking.OfEnabled)
+ return
+ }
+
+ if tt.wantAdaptive {
+ require.NotNil(t, params.Thinking.OfAdaptive)
+ assert.Equal(t, tt.wantEffort, string(params.OutputConfig.Effort))
+ assert.Equal(t, tt.wantDisplayJSON, string(params.Thinking.OfAdaptive.Display))
+ } else {
+ require.NotNil(t, params.Thinking.OfEnabled)
+ assert.Equal(t, tt.wantTokens, params.Thinking.OfEnabled.BudgetTokens)
+ assert.Equal(t, tt.wantDisplayJSON, string(params.Thinking.OfEnabled.Display))
+ }
+
+ // Sanity-check: the marshaled JSON omits display entirely when unset,
+ // thanks to the SDK's `json:"display,omitzero"` tag.
+ b, err := json.Marshal(params.Thinking)
+ require.NoError(t, err)
+ if tt.wantDisplayJSON == "" {
+ assert.NotContains(t, string(b), `"display"`)
+ } else {
+ assert.Contains(t, string(b), `"display":"`+tt.wantDisplayJSON+`"`)
+ }
+ })
+ }
+}
+
+func TestApplyBetaThinkingConfig(t *testing.T) {
+ tests := []struct {
+ name string
+ budget *latest.ThinkingBudget
+ opts map[string]any
+ maxTokens int64
+ wantAdaptive bool
+ wantEnabled bool
+ wantTokens int64
+ wantEffort string
+ wantDisplayJSON string
+ }{
+ {
+ name: "nil budget leaves params untouched",
+ budget: nil,
+ maxTokens: 8192,
+ },
+ {
+ name: "adaptive with display",
+ budget: &latest.ThinkingBudget{Effort: "adaptive/medium"},
+ opts: map[string]any{"thinking_display": "display"},
+ maxTokens: 8192,
+ wantAdaptive: true,
+ wantEffort: "medium",
+ wantDisplayJSON: "display",
+ },
+ {
+ name: "token budget with display=omitted",
+ budget: &latest.ThinkingBudget{Tokens: 4096},
+ opts: map[string]any{"thinking_display": "omitted"},
+ maxTokens: 8192,
+ wantEnabled: true,
+ wantTokens: 4096,
+ wantDisplayJSON: "omitted",
+ },
+ {
+ name: "invalid token budget leaves params untouched",
+ budget: &latest.ThinkingBudget{Tokens: 100},
+ maxTokens: 8192,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ c := clientWith(tt.budget, tt.opts)
+ params := anthropic.BetaMessageNewParams{}
+
+ c.applyBetaThinkingConfig(¶ms, tt.maxTokens)
+
+ switch {
+ case tt.wantAdaptive:
+ require.NotNil(t, params.Thinking.OfAdaptive)
+ assert.Equal(t, tt.wantEffort, string(params.OutputConfig.Effort))
+ assert.Equal(t, tt.wantDisplayJSON, string(params.Thinking.OfAdaptive.Display))
+ case tt.wantEnabled:
+ require.NotNil(t, params.Thinking.OfEnabled)
+ assert.Equal(t, tt.wantTokens, params.Thinking.OfEnabled.BudgetTokens)
+ assert.Equal(t, tt.wantDisplayJSON, string(params.Thinking.OfEnabled.Display))
+ default:
+ assert.Nil(t, params.Thinking.OfAdaptive)
+ assert.Nil(t, params.Thinking.OfEnabled)
+ }
+ })
+ }
+}
+
+func TestAdjustMaxTokensForThinking(t *testing.T) {
+ t.Run("no budget returns input unchanged", func(t *testing.T) {
+ c := clientWith(nil, nil)
+ got, err := c.adjustMaxTokensForThinking(8192)
+ require.NoError(t, err)
+ assert.Equal(t, int64(8192), got)
+ })
+
+ t.Run("adaptive budget returns input unchanged", func(t *testing.T) {
+ c := clientWith(&latest.ThinkingBudget{Effort: "adaptive"}, nil)
+ got, err := c.adjustMaxTokensForThinking(8192)
+ require.NoError(t, err)
+ assert.Equal(t, int64(8192), got)
+ })
+
+ t.Run("token budget fits in max_tokens", func(t *testing.T) {
+ c := clientWith(&latest.ThinkingBudget{Tokens: 2048}, nil)
+ got, err := c.adjustMaxTokensForThinking(8192)
+ require.NoError(t, err)
+ assert.Equal(t, int64(8192), got)
+ })
+
+ t.Run("auto-adjust when user didn't set max_tokens", func(t *testing.T) {
+ c := clientWith(&latest.ThinkingBudget{Tokens: 16384}, nil)
+ got, err := c.adjustMaxTokensForThinking(8192)
+ require.NoError(t, err)
+ assert.Equal(t, int64(16384+8192), got)
+ })
+
+ t.Run("error when user explicitly set max_tokens too low", func(t *testing.T) {
+ c := clientWith(&latest.ThinkingBudget{Tokens: 16384}, nil)
+ userMax := int64(8192)
+ c.ModelConfig.MaxTokens = &userMax
+ _, err := c.adjustMaxTokensForThinking(8192)
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), "max_tokens")
+ })
+}
+
+func TestInterleavedThinkingEnabled(t *testing.T) {
+ tests := []struct {
+ name string
+ opts map[string]any
+ want bool
+ }{
+ {"nil opts", nil, false},
+ {"missing key", map[string]any{"other": true}, false},
+ {"bool true", map[string]any{"interleaved_thinking": true}, true},
+ {"bool false", map[string]any{"interleaved_thinking": false}, false},
+ {"string true", map[string]any{"interleaved_thinking": "true"}, true},
+ {"string false", map[string]any{"interleaved_thinking": "false"}, false},
+ {"string no", map[string]any{"interleaved_thinking": "no"}, false},
+ {"int 0", map[string]any{"interleaved_thinking": 0}, false},
+ {"int 1", map[string]any{"interleaved_thinking": 1}, true},
+ {"unsupported type", map[string]any{"interleaved_thinking": []string{}}, false},
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ c := clientWith(nil, tt.opts)
+ assert.Equal(t, tt.want, c.interleavedThinkingEnabled())
+ })
+ }
+}