diff --git a/README.md b/README.md index b3a67bce6..08c24d14f 100644 --- a/README.md +++ b/README.md @@ -134,12 +134,32 @@ uv run ralph --mcp --transport http --host 127.0.0.1 --port 8000 - `ralph.sh` - Wrapper with preflight checks (`uv sync`, TaskMaster bootstrap, gitignore checks) - `ralph/` - Python implementation and CLI +- `ralph/budget_guardian.py` - Officer Clancy budget guardian module - `OPERATIONS.md` - Operational reference - `.taskmaster/tasks/tasks.json` - TaskMaster task data - `skills/ralph-prd/` - Source for the `/ralph-prd` skill - `skills/ralph/` - Source for the `/ralph` conversion skill +- `skills/officer-clancy/` - Source for the Officer Clancy budget guardian skill - `flowchart/` - Interactive visualization source +## Officer Clancy: Budget Guardian + +Ralph includes Officer Clancy, a budget guardian that prevents runaway agent loops by: + +- **Tracking attempts**: Limits the number of task retry attempts +- **Managing budgets**: Sets computational/time budgets with escalation alerts +- **Preventing infinite loops**: Automatically stops execution when limits are exceeded + +Configure via environment variables: + +```bash +export RALPH_BUDGET_ENABLED=true +export RALPH_MAX_ATTEMPTS=10 +export RALPH_BUDGET_LIMIT=100.0 +``` + +See [Officer Clancy skill documentation](skills/officer-clancy/SKILL.md) for detailed usage. + ## Flowchart Flowchart source lives in `flowchart/`: diff --git a/justfile b/justfile index 43f7adb69..8e1860a24 100644 --- a/justfile +++ b/justfile @@ -8,6 +8,10 @@ ralph TOOL ITERATIONS='10': ralph-test: uv run pytest tests/test_ralph* +# Run budget guardian tests +ralph-test-budget: + uv run pytest tests/test_budget_guardian.py -v + # Type check and lint ralph module ralph-check: uv run mypy --strict ralph/ && uv run ruff check ralph/ @@ -56,6 +60,14 @@ ralph-tasks-active: ralph-dry-run TOOL='amp' ITERATIONS='10': uv run ralph run --tool {{TOOL}} --max-iterations {{ITERATIONS}} --dry-run +# Run ralph with Officer Clancy budget limits +ralph-budget TOOL='amp' ITERATIONS='10' MAX_ATTEMPTS='5' BUDGET='50': + RALPH_MAX_ATTEMPTS={{MAX_ATTEMPTS}} RALPH_BUDGET_LIMIT={{BUDGET}} uv run ralph run --tool {{TOOL}} --max-iterations {{ITERATIONS}} + +# Run ralph with budget guardian disabled +ralph-no-budget TOOL='amp' ITERATIONS='10': + RALPH_BUDGET_ENABLED=false uv run ralph run --tool {{TOOL}} --max-iterations {{ITERATIONS}} + # Run ralph as MCP server (stdio transport) ralph-mcp: uv run ralph --mcp diff --git a/ralph/README.md b/ralph/README.md index 6d425ad37..c82f777b8 100644 --- a/ralph/README.md +++ b/ralph/README.md @@ -153,6 +153,20 @@ Ralph supports the following environment variables for configuration: | `OPENCODE_EXTRA_ARGS` | Additional opencode arguments | (empty) | | `TASKMASTER_URL` | TaskMaster server URL (if using MCP) | (empty, uses file-based) | +### Officer Clancy Budget Guardian + +Ralph includes Officer Clancy, a budget guardian that prevents runaway agent loops. **Disabled by default** (opt-in): + +| Variable | Description | Default | +|----------|-------------|---------| +| `RALPH_BUDGET_ENABLED` | Enable budget guardian | `false` | +| `RALPH_MAX_ATTEMPTS` | Maximum task attempts | `10` | +| `RALPH_BUDGET_LIMIT` | Total budget limit (abstract units) | `100.0` | +| `RALPH_COST_PER_ATTEMPT` | Cost deducted per attempt | `10.0` | +| `RALPH_BUDGET_ALLOW_OVERFLOW` | Allow exceeding budget with warning | `false` | + +See [Officer Clancy skill documentation](../skills/officer-clancy/SKILL.md) for detailed usage. + ### Example: Custom Codex Configuration ```bash diff --git a/ralph/budget_guardian.py b/ralph/budget_guardian.py new file mode 100644 index 000000000..fa49bb4b7 --- /dev/null +++ b/ralph/budget_guardian.py @@ -0,0 +1,338 @@ +"""Officer Clancy Budget Guardian - Resource management for AI agents. + +This module implements the Officer Clancy protocol for AI agent budget +and attempt management. It prevents runaway agents by: +- Tracking and limiting the number of task attempts +- Managing computational and time budgets +- Providing escalation protocols and intelligent fallback mechanisms +- Preventing infinite loops and resource drain +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import datetime +from enum import Enum +from typing import TYPE_CHECKING + +from returns.result import Failure, Result, Success + +if TYPE_CHECKING: + from collections.abc import Callable + + +class BudgetExceededError(RuntimeError): + """Raised when budget or attempt limits are exceeded.""" + + def __init__(self, message: str, budget_data: BudgetState | None = None) -> None: + super().__init__(message) + self.budget_data = budget_data + + +class EscalationLevel(Enum): + """Escalation levels for budget warnings.""" + + NORMAL = "normal" # Under 50% budget used + WARNING = "warning" # 50-80% budget used + CRITICAL = "critical" # 80-90% budget used + EXCEEDED = "exceeded" # Over 90% or limit hit + + +@dataclass(frozen=True) +class BudgetConfig: + """Configuration for budget guardian limits. + + Attributes: + max_attempts: Maximum number of task attempts before failure (default: 10) + budget_limit: Total budget limit in abstract units (default: 100.0) + cost_per_attempt: Cost deducted per attempt (default: 10.0) + warning_threshold: Percentage at which to issue warnings (default: 0.5) + critical_threshold: Percentage at which to issue critical alerts (default: 0.8) + exceeded_threshold: Percentage at which to stop execution (default: 0.9) + allow_budget_overflow: If True, allows going over budget with warning (default: False) + escalation_callback: Optional callback for escalation events + """ + + max_attempts: int = 10 + budget_limit: float = 100.0 + cost_per_attempt: float = 10.0 + warning_threshold: float = 0.5 + critical_threshold: float = 0.8 + exceeded_threshold: float = 0.9 + allow_budget_overflow: bool = False + + +@dataclass +class BudgetState: + """Current state of budget tracking. + + Mutable dataclass to track real-time budget consumption. + """ + + total_attempts: int = 0 + total_cost: float = 0.0 + successful_attempts: int = 0 + failed_attempts: int = 0 + start_time: datetime = field(default_factory=datetime.now) + last_attempt_time: datetime | None = None + escalation_level: EscalationLevel = EscalationLevel.NORMAL + notes: list[str] = field(default_factory=list) + + +@dataclass +class AttemptResult: + """Result of a single authorized attempt. + + Attributes: + attempt_number: The sequential attempt number + cost: Cost of this attempt + remaining_budget: Budget remaining after this attempt + remaining_attempts: Attempts remaining after this one + escalation_level: Current escalation level after this attempt + """ + + attempt_number: int + cost: float + remaining_budget: float + remaining_attempts: int + escalation_level: EscalationLevel + + +class BudgetGuardian: + """Officer Clancy - Budget guardian for AI agent resource management. + + Monitors and limits the number of task attempts, manages resource budgets, + and provides escalation protocols. Think of it as a responsible adult + supervising an enthusiastic (but potentially costly) AI agent. + + Example: + >>> config = BudgetConfig(max_attempts=5, budget_limit=50.0, cost_per_attempt=10.0) + >>> guardian = BudgetGuardian(config) + >>> # Authorize an attempt + >>> result = guardian.authorize_attempt() + >>> if isinstance(result, Success): + ... attempt = result.unwrap() + ... print(f"Attempt {attempt.attempt_number} authorized") + ... # Execute the actual work here + ... guardian.record_success() + ... else: + ... print(f"Denied: {result.failure()}") + """ + + def __init__( + self, + config: BudgetConfig, + escalation_callback: Callable[[EscalationLevel, BudgetState], None] | None = None, + ) -> None: + """Initialize the budget guardian. + + Args: + config: Budget configuration with limits and thresholds + escalation_callback: Optional callback invoked on escalation changes + """ + self._config = config + self._state = BudgetState() + self._escalation_callback = escalation_callback + + @property + def config(self) -> BudgetConfig: + """Get the budget configuration.""" + return self._config + + @property + def state(self) -> BudgetState: + """Get the current budget state.""" + return self._state + + @property + def remaining_attempts(self) -> int: + """Get the number of remaining attempts.""" + return max(0, self._config.max_attempts - self._state.total_attempts) + + @property + def remaining_budget(self) -> float: + """Get the remaining budget.""" + return max(0.0, self._config.budget_limit - self._state.total_cost) + + @property + def budget_percentage_used(self) -> float: + """Get the percentage of budget used (0.0 to 1.0+).""" + if self._config.budget_limit <= 0: + return 1.0 + return self._state.total_cost / self._config.budget_limit + + def _check_and_update_escalation(self) -> EscalationLevel: + """Check budget usage and update escalation level, triggering callbacks if changed.""" + percentage = self.budget_percentage_used + attempt_percentage = ( + self._state.total_attempts / self._config.max_attempts + if self._config.max_attempts > 0 + else 1.0 + ) + + # Use the higher of budget or attempt percentage + effective_percentage = max(percentage, attempt_percentage) + + if effective_percentage >= self._config.exceeded_threshold: + new_level = EscalationLevel.EXCEEDED + elif effective_percentage >= self._config.critical_threshold: + new_level = EscalationLevel.CRITICAL + elif effective_percentage >= self._config.warning_threshold: + new_level = EscalationLevel.WARNING + else: + new_level = EscalationLevel.NORMAL + + # Check if escalation level changed + if new_level != self._state.escalation_level: + old_level = self._state.escalation_level + self._state.escalation_level = new_level + self._state.notes.append( + f"Escalation: {old_level.value} -> {new_level.value} " + f"({effective_percentage:.1%} used)" + ) + if self._escalation_callback: + self._escalation_callback(new_level, self._state) + + return new_level + + def authorize_attempt( + self, + task_cost: float | None = None, + ) -> Result[AttemptResult, BudgetExceededError]: + """Authorize a task attempt, deducting from budget. + + Args: + task_cost: Optional custom cost for this attempt. + Defaults to config.cost_per_attempt. + + Returns: + Success[AttemptResult] if attempt is authorized + Failure[BudgetExceededError] if limits exceeded + """ + cost = task_cost if task_cost is not None else self._config.cost_per_attempt + + # Check attempt limit + if self._state.total_attempts >= self._config.max_attempts: + error = BudgetExceededError( + f"MAXIMUM ATTEMPTS REACHED ({self._config.max_attempts})", + budget_data=self._state, + ) + self._state.notes.append(f"Denied: max attempts ({self._config.max_attempts})") + return Failure(error) + + # Check budget limit + if not self._config.allow_budget_overflow and self.remaining_budget < cost: + error = BudgetExceededError( + f"INSUFFICIENT BUDGET (remaining: {self.remaining_budget:.2f}, " + f"required: {cost:.2f})", + budget_data=self._state, + ) + self._state.notes.append( + f"Denied: insufficient budget " + f"({self.remaining_budget:.2f} < {cost:.2f})" + ) + return Failure(error) + + # Authorize the attempt + self._state.total_attempts += 1 + self._state.total_cost += cost + self._state.last_attempt_time = datetime.now() + + # Update escalation level + escalation = self._check_and_update_escalation() + + result = AttemptResult( + attempt_number=self._state.total_attempts, + cost=cost, + remaining_budget=self.remaining_budget, + remaining_attempts=self.remaining_attempts, + escalation_level=escalation, + ) + + self._state.notes.append( + f"Attempt {result.attempt_number} authorized " + f"(cost: {cost:.2f}, remaining: {result.remaining_budget:.2f})" + ) + + return Success(result) + + def record_success(self) -> None: + """Record a successful attempt.""" + self._state.successful_attempts += 1 + self._state.notes.append( + f"Attempt {self._state.total_attempts} succeeded" + ) + + def record_failure(self, reason: str = "") -> None: + """Record a failed attempt. + + Args: + reason: Optional reason for failure + """ + self._state.failed_attempts += 1 + msg = f"Attempt {self._state.total_attempts} failed" + if reason: + msg += f": {reason}" + self._state.notes.append(msg) + + def get_elapsed_time(self) -> float: + """Get elapsed time since guardian was created in seconds.""" + return (datetime.now() - self._state.start_time).total_seconds() + + def get_summary(self) -> str: + """Get a human-readable summary of the budget state.""" + elapsed = self.get_elapsed_time() + lines = [ + "=" * 50, + "🚨 Officer Clancy Budget Report", + "=" * 50, + f"📊 Attempts: {self._state.total_attempts}/{self._config.max_attempts} " + f"(✓{self._state.successful_attempts} ✗{self._state.failed_attempts})", + f"💰 Budget: {self._state.total_cost:.2f}/{self._config.budget_limit:.2f} " + f"({self.budget_percentage_used:.1%} used)", + f"⏱️ Elapsed: {elapsed:.1f}s", + f"🚦 Status: {self._state.escalation_level.value.upper()}", + "=" * 50, + ] + return "\n".join(lines) + + def reset(self) -> None: + """Reset the budget guardian state for a new session.""" + self._state = BudgetState() + + +def create_guardian_from_env( + max_attempts: int | None = None, + budget_limit: float | None = None, + cost_per_attempt: float | None = None, +) -> BudgetGuardian: + """Create a budget guardian with optional overrides. + + Args: + max_attempts: Override max attempts (default: 10) + budget_limit: Override budget limit (default: 100.0) + cost_per_attempt: Override cost per attempt (default: 10.0) + + Returns: + Configured BudgetGuardian instance + """ + import os + + config = BudgetConfig( + max_attempts=max_attempts or int(os.environ.get("RALPH_MAX_ATTEMPTS", "10")), + budget_limit=budget_limit or float(os.environ.get("RALPH_BUDGET_LIMIT", "100.0")), + cost_per_attempt=cost_per_attempt + or float(os.environ.get("RALPH_COST_PER_ATTEMPT", "10.0")), + ) + return BudgetGuardian(config) + + +__all__ = [ + "AttemptResult", + "BudgetConfig", + "BudgetExceededError", + "BudgetGuardian", + "BudgetState", + "EscalationLevel", + "create_guardian_from_env", +] diff --git a/ralph/config.py b/ralph/config.py index 5ab983039..f99b492b7 100644 --- a/ralph/config.py +++ b/ralph/config.py @@ -25,6 +25,13 @@ class RalphConfig: use_mcp: bool = False taskmaster_url: str | None = None + # Officer Clancy Budget Guardian configuration + budget_enabled: bool = False + budget_max_attempts: int = 10 + budget_limit: float = 100.0 + budget_cost_per_attempt: float = 10.0 + budget_allow_overflow: bool = False + # Codex-specific configuration codex_prompt_file: Path = field(default_factory=_default_codex_prompt_file) codex_model: str = "gpt-5-codex" @@ -45,6 +52,15 @@ def from_env(cls, tool: str = "amp", use_mcp: bool = False) -> RalphConfig: tool=tool, use_mcp=use_mcp, taskmaster_url=os.environ.get("TASKMASTER_URL"), + # Officer Clancy Budget Guardian + budget_enabled=os.environ.get("RALPH_BUDGET_ENABLED", "false").lower() == "true", + budget_max_attempts=int(os.environ.get("RALPH_MAX_ATTEMPTS", "10")), + budget_limit=float(os.environ.get("RALPH_BUDGET_LIMIT", "100.0")), + budget_cost_per_attempt=float(os.environ.get("RALPH_COST_PER_ATTEMPT", "10.0")), + budget_allow_overflow=os.environ.get( + "RALPH_BUDGET_ALLOW_OVERFLOW", "false" + ).lower() == "true", + # Codex codex_prompt_file=Path(os.environ.get("CODEX_PROMPT_FILE", str(root / "CLAUDE.md"))), codex_model=os.environ.get("CODEX_MODEL", "gpt-5-codex"), codex_reasoning_effort=os.environ.get("CODEX_REASONING_EFFORT", "high"), diff --git a/ralph/runner.py b/ralph/runner.py index 3a59c24d1..0cccdea03 100644 --- a/ralph/runner.py +++ b/ralph/runner.py @@ -2,12 +2,18 @@ from __future__ import annotations +import logging import time from pathlib import Path from typing import TypeVar from returns.result import Failure, Result +from ralph.budget_guardian import ( + BudgetConfig, + BudgetGuardian, + EscalationLevel, +) from ralph.config import RalphConfig from ralph.executors import ( AmpExecutor, @@ -67,6 +73,32 @@ def _build_executor(tool: str, config: RalphConfig) -> ToolExecutor: raise ValueError(f"Unsupported tool requested: {tool}") +def _create_budget_guardian( + config: RalphConfig, logger: logging.Logger +) -> BudgetGuardian | None: + """Create a budget guardian from config if enabled.""" + if not config.budget_enabled: + return None + + def escalation_callback(level: EscalationLevel, _state: object) -> None: + """Callback for escalation level changes.""" + if level == EscalationLevel.WARNING: + log_warning(logger, "🚨 Officer Clancy: Budget at WARNING level") + elif level == EscalationLevel.CRITICAL: + log_warning(logger, "🚨 Officer Clancy: Budget at CRITICAL level - approaching limit") + elif level == EscalationLevel.EXCEEDED: + log_error(logger, "🚨 Officer Clancy: BUDGET EXCEEDED - stopping execution") + + budget_config = BudgetConfig( + max_attempts=config.budget_max_attempts, + budget_limit=config.budget_limit, + cost_per_attempt=config.budget_cost_per_attempt, + allow_budget_overflow=config.budget_allow_overflow, + ) + + return BudgetGuardian(budget_config, escalation_callback=escalation_callback) + + def run_ralph(config: RalphConfig, max_iterations: int) -> int: """Run the Ralph tool loop for a maximum number of iterations.""" @@ -77,6 +109,15 @@ def run_ralph(config: RalphConfig, max_iterations: int) -> int: f"Configuration loaded: tool={config.tool} iterations={max_iterations} model={config.codex_model}", ) + # Initialize Officer Clancy Budget Guardian + guardian = _create_budget_guardian(config, logger) + if guardian is not None: + log_info( + logger, + f"🚨 Officer Clancy on duty: max_attempts={config.budget_max_attempts}, " + f"budget={config.budget_limit:.2f}", + ) + # Create TaskMaster client for progress tracking # Note: TaskMaster finds tasks in .taskmaster/ (set up by ralph.sh) taskmaster = create_client( @@ -96,6 +137,21 @@ def run_ralph(config: RalphConfig, max_iterations: int) -> int: executor = _build_executor(config.tool, config) for iteration in range(1, max_iterations + 1): + # Check with Officer Clancy before each iteration + if guardian is not None: + attempt_result = guardian.authorize_attempt() + if isinstance(attempt_result, Failure): + error = attempt_result.failure() + log_error(logger, f"🚨 Officer Clancy: {error}") + log_info(logger, guardian.get_summary()) + return 1 + attempt = attempt_result.unwrap() + log_info( + logger, + f"🚨 Officer Clancy: Attempt {attempt.attempt_number} authorized " + f"(budget: {attempt.remaining_budget:.2f}/{config.budget_limit:.2f})", + ) + log_info(logger, "") log_info(logger, "=" * 63) log_info(logger, f"Ralph Iteration {iteration} of {max_iterations} ({config.tool})") @@ -103,8 +159,13 @@ def run_ralph(config: RalphConfig, max_iterations: int) -> int: try: output = _unwrap_result(executor.run(), "Tool execution failed") + if guardian is not None: + guardian.record_success() except RuntimeError as exc: log_error(logger, "Tool execution failed", exc) + if guardian is not None: + guardian.record_failure(str(exc)) + log_info(logger, guardian.get_summary()) return 1 if _check_for_completion(output): @@ -112,6 +173,10 @@ def run_ralph(config: RalphConfig, max_iterations: int) -> int: log_success(logger, "Ralph completed all tasks!") log_success(logger, f"Completed at iteration {iteration} of {max_iterations}") + # Display Officer Clancy final report + if guardian is not None: + log_success(logger, guardian.get_summary()) + # Display final task summary with visual progress tasks_result = taskmaster.get_all_tasks() if not isinstance(tasks_result, Failure): @@ -128,6 +193,8 @@ def run_ralph(config: RalphConfig, max_iterations: int) -> int: logger, f"Ralph reached max iterations ({max_iterations}) without completing all tasks.", ) + if guardian is not None: + log_info(logger, guardian.get_summary()) return 1 diff --git a/skills/officer-clancy/SKILL.md b/skills/officer-clancy/SKILL.md new file mode 100644 index 000000000..395d43a4a --- /dev/null +++ b/skills/officer-clancy/SKILL.md @@ -0,0 +1,248 @@ +--- +name: officer-clancy +description: "Officer Clancy Budget Guardian - Real-time budget and attempt limit enforcement for AI agents. Use when you need to prevent runaway AI loops, track resource consumption, or implement cost controls. Triggers on: budget limit, cost tracking, prevent runaway, max attempts, resource management, agent guardrails." +user-invocable: true +--- + +# Officer Clancy: The Budget Guardian + +Officer Clancy is the responsible adult supervising enthusiastic (but potentially costly) AI agents. Unlike Ralph Wiggum's unbounded determination, Officer Clancy embodies strategic resource management. + +--- + +## The Job + +Protect your AI agent workflows from runaway costs by: +- **Tracking attempts**: Monitor how many times a task has been attempted +- **Managing budgets**: Set and enforce computational/time budgets +- **Escalating alerts**: Provide intelligent warnings at configurable thresholds +- **Preventing infinite loops**: Stop execution when limits are exceeded + +--- + +## Quick Start + +### Basic Usage + +```python +from ralph.budget_guardian import BudgetConfig, BudgetGuardian + +# Configure limits +config = BudgetConfig( + max_attempts=10, # Maximum task attempts + budget_limit=100.0, # Abstract budget units + cost_per_attempt=10.0, # Cost deducted per attempt +) + +# Create guardian +guardian = BudgetGuardian(config) + +# Before each iteration +result = guardian.authorize_attempt() +if result.is_failure(): + print(f"Denied: {result.failure()}") +else: + attempt = result.unwrap() + print(f"Attempt {attempt.attempt_number} authorized") + + # ... execute the actual work ... + + guardian.record_success() # or record_failure() +``` + +### With Ralph Runner + +Officer Clancy is integrated into Ralph's iteration loop but **disabled by default** (opt-in). Configure via environment variables: + +```bash +# Enable budget guardian (disabled by default for opt-in behavior) +export RALPH_BUDGET_ENABLED=true + +# Set limits +export RALPH_MAX_ATTEMPTS=10 +export RALPH_BUDGET_LIMIT=100.0 +export RALPH_COST_PER_ATTEMPT=10.0 + +# Allow going over budget with warning (not recommended) +export RALPH_BUDGET_ALLOW_OVERFLOW=false +``` + +Or pass configuration directly: + +```bash +uv run ralph run --tool amp --max-iterations 10 +``` + +--- + +## Configuration Options + +| Option | Default | Description | +|--------|---------|-------------| +| `max_attempts` | 10 | Maximum number of task attempts | +| `budget_limit` | 100.0 | Total budget limit (abstract units) | +| `cost_per_attempt` | 10.0 | Cost deducted per attempt | +| `warning_threshold` | 0.5 | Percentage for warning alerts (50%) | +| `critical_threshold` | 0.8 | Percentage for critical alerts (80%) | +| `exceeded_threshold` | 0.9 | Percentage at which to deny attempts (90%) | +| `allow_budget_overflow` | False | Allow continuing past budget with warning | + +--- + +## Escalation Levels + +Officer Clancy uses four escalation levels: + +| Level | Trigger | Behavior | +|-------|---------|----------| +| 🟢 NORMAL | < 50% used | Proceed normally | +| 🟡 WARNING | 50-80% used | Log warning, continue | +| 🟠 CRITICAL | 80-90% used | Log critical alert, continue | +| 🔴 EXCEEDED | > 90% or limit hit | Deny attempt, stop execution | + +### Escalation Callback + +Register a callback to react to escalation changes: + +```python +def my_callback(level, state): + if level == EscalationLevel.CRITICAL: + send_slack_alert(f"Budget critical: {state.total_cost}/{config.budget_limit}") + +guardian = BudgetGuardian(config, escalation_callback=my_callback) +``` + +--- + +## AttemptResult + +When an attempt is authorized, you receive an `AttemptResult`: + +```python +@dataclass +class AttemptResult: + attempt_number: int # Sequential attempt number + cost: float # Cost of this attempt + remaining_budget: float # Budget remaining after this attempt + remaining_attempts: int # Attempts remaining after this one + escalation_level: str # Current escalation level +``` + +--- + +## Budget Summary Report + +Get a human-readable summary at any time: + +```python +print(guardian.get_summary()) +``` + +Output: +``` +================================================== +🚨 Officer Clancy Budget Report +================================================== +📊 Attempts: 5/10 (✓4 ✗1) +💰 Budget: 50.00/100.00 (50.0% used) +⏱️ Elapsed: 123.4s +🚦 Status: WARNING +================================================== +``` + +--- + +## Integration with AgentGuard + +Officer Clancy is inspired by [AgentGuard](https://github.com/dipampaul17/AgentGuard), a real-time guardrail that tracks token spend and kills runaway LLM/agent loops. + +Key differences: +- **AgentGuard**: JavaScript/Node.js, tracks actual API costs in USD, intercepts HTTP calls +- **Officer Clancy**: Python, tracks abstract budget units, integrates with Ralph's iteration loop + +You can use both together: +- AgentGuard for real-time USD cost tracking at the API level +- Officer Clancy for attempt-based iteration control at the workflow level + +--- + +## Design Principles + +### Budget, Not Brutality + +Officer Clancy isn't about shutting down creativity. It's about channeling AI persistence into productive, efficient problem-solving. Sometimes, the most intelligent action is knowing when to pause and recalibrate. + +### Governance Challenge + +AI agents need boundaries. While Ralph Wiggum represents unbridled determination, Officer Clancy embodies strategic resource management. It's not just about stopping an agent; it's about guiding it intelligently. + +### Cost-Efficiency Optimization + +By preventing infinite loops and resource drain, Officer Clancy helps you: +- Avoid surprise cloud bills +- Detect stuck or failing tasks early +- Make data-driven decisions about task complexity +- Maintain predictable operational costs + +--- + +## Example: Full Workflow + +```python +from ralph.budget_guardian import BudgetConfig, BudgetGuardian, EscalationLevel + +def run_ai_workflow(): + # Configure for 5 attempts with 50 unit budget + config = BudgetConfig( + max_attempts=5, + budget_limit=50.0, + cost_per_attempt=10.0, + ) + + # Create guardian with escalation callback + def on_escalation(level, state): + if level == EscalationLevel.WARNING: + print(f"⚠️ Warning: {state.budget_percentage_used:.0%} budget used") + elif level == EscalationLevel.CRITICAL: + print(f"🚨 Critical: Consider simplifying the task") + + guardian = BudgetGuardian(config, escalation_callback=on_escalation) + + for i in range(10): # Try up to 10 times + # Request authorization + result = guardian.authorize_attempt() + + if result.is_failure(): + print(f"❌ Stopped: {result.failure()}") + break + + attempt = result.unwrap() + print(f"🔄 Attempt {attempt.attempt_number}...") + + # Simulate work + success = do_ai_task() + + if success: + guardian.record_success() + print("✅ Task completed!") + break + else: + guardian.record_failure("AI model returned error") + + # Print final report + print(guardian.get_summary()) +``` + +--- + +## Melvin's Lucky Hint + +🤓 "Always budget your AI's enthusiasm! A well-bounded agent is a productive agent." + +--- + +## See Also + +- [Ralph Runner](../ralph/README.md) - Autonomous AI agent loop +- [TaskMaster Integration](../README-MCP.md) - Task management +- [AgentGuard](https://github.com/dipampaul17/AgentGuard) - Real-time cost tracking diff --git a/tests/test_budget_guardian.py b/tests/test_budget_guardian.py new file mode 100644 index 000000000..852803c7f --- /dev/null +++ b/tests/test_budget_guardian.py @@ -0,0 +1,383 @@ +"""Tests for the Officer Clancy Budget Guardian module.""" + +from __future__ import annotations + +from datetime import datetime +from unittest.mock import MagicMock + +import pytest +from returns.result import Failure, Success + +from ralph.budget_guardian import ( + AttemptResult, + BudgetConfig, + BudgetExceededError, + BudgetGuardian, + BudgetState, + EscalationLevel, + create_guardian_from_env, +) + + +class TestBudgetConfig: + """Tests for BudgetConfig dataclass.""" + + def test_default_values(self) -> None: + """Test default configuration values.""" + config = BudgetConfig() + assert config.max_attempts == 10 + assert config.budget_limit == 100.0 + assert config.cost_per_attempt == 10.0 + assert config.warning_threshold == 0.5 + assert config.critical_threshold == 0.8 + assert config.exceeded_threshold == 0.9 + assert config.allow_budget_overflow is False + + def test_custom_values(self) -> None: + """Test custom configuration values.""" + config = BudgetConfig( + max_attempts=5, + budget_limit=50.0, + cost_per_attempt=5.0, + warning_threshold=0.4, + critical_threshold=0.7, + exceeded_threshold=0.85, + allow_budget_overflow=True, + ) + assert config.max_attempts == 5 + assert config.budget_limit == 50.0 + assert config.cost_per_attempt == 5.0 + assert config.warning_threshold == 0.4 + assert config.critical_threshold == 0.7 + assert config.exceeded_threshold == 0.85 + assert config.allow_budget_overflow is True + + +class TestBudgetState: + """Tests for BudgetState dataclass.""" + + def test_default_state(self) -> None: + """Test default state initialization.""" + state = BudgetState() + assert state.total_attempts == 0 + assert state.total_cost == 0.0 + assert state.successful_attempts == 0 + assert state.failed_attempts == 0 + assert state.escalation_level == EscalationLevel.NORMAL + assert state.notes == [] + assert state.last_attempt_time is None + assert isinstance(state.start_time, datetime) + + +class TestBudgetGuardian: + """Tests for BudgetGuardian class.""" + + def test_initial_state(self) -> None: + """Test guardian initial state.""" + config = BudgetConfig() + guardian = BudgetGuardian(config) + + assert guardian.remaining_attempts == 10 + assert guardian.remaining_budget == 100.0 + assert guardian.budget_percentage_used == 0.0 + assert guardian.state.escalation_level == EscalationLevel.NORMAL + + def test_authorize_attempt_success(self) -> None: + """Test successful attempt authorization.""" + config = BudgetConfig(max_attempts=5, budget_limit=50.0, cost_per_attempt=10.0) + guardian = BudgetGuardian(config) + + result = guardian.authorize_attempt() + + assert isinstance(result, Success) + attempt = result.unwrap() + assert isinstance(attempt, AttemptResult) + assert attempt.attempt_number == 1 + assert attempt.cost == 10.0 + assert attempt.remaining_budget == 40.0 + assert attempt.remaining_attempts == 4 + + def test_authorize_attempt_custom_cost(self) -> None: + """Test attempt authorization with custom cost.""" + config = BudgetConfig(max_attempts=10, budget_limit=100.0) + guardian = BudgetGuardian(config) + + result = guardian.authorize_attempt(task_cost=25.0) + + assert isinstance(result, Success) + attempt = result.unwrap() + assert attempt.cost == 25.0 + assert attempt.remaining_budget == 75.0 + + def test_authorize_attempt_max_attempts_exceeded(self) -> None: + """Test denial when max attempts exceeded.""" + config = BudgetConfig(max_attempts=2, budget_limit=100.0, cost_per_attempt=10.0) + guardian = BudgetGuardian(config) + + # Use up all attempts + guardian.authorize_attempt() + guardian.authorize_attempt() + + # Third attempt should fail + result = guardian.authorize_attempt() + + assert isinstance(result, Failure) + error = result.failure() + assert isinstance(error, BudgetExceededError) + assert "MAXIMUM ATTEMPTS REACHED" in str(error) + + def test_authorize_attempt_insufficient_budget(self) -> None: + """Test denial when budget insufficient.""" + config = BudgetConfig(max_attempts=10, budget_limit=25.0, cost_per_attempt=10.0) + guardian = BudgetGuardian(config) + + # Use up most of budget + guardian.authorize_attempt() # 15 remaining + guardian.authorize_attempt() # 5 remaining + + # Third attempt should fail (needs 10, has 5) + result = guardian.authorize_attempt() + + assert isinstance(result, Failure) + error = result.failure() + assert isinstance(error, BudgetExceededError) + assert "INSUFFICIENT BUDGET" in str(error) + + def test_allow_budget_overflow(self) -> None: + """Test that overflow is allowed when configured.""" + config = BudgetConfig( + max_attempts=10, + budget_limit=25.0, + cost_per_attempt=10.0, + allow_budget_overflow=True, + ) + guardian = BudgetGuardian(config) + + # Use up budget + guardian.authorize_attempt() # 15 remaining + guardian.authorize_attempt() # 5 remaining + + # Third attempt should succeed with overflow + result = guardian.authorize_attempt() + + assert isinstance(result, Success) + # Budget is now negative, but allowed + assert guardian.remaining_budget == 0.0 # max(0, -5) = 0 + + def test_record_success(self) -> None: + """Test recording successful attempts.""" + config = BudgetConfig() + guardian = BudgetGuardian(config) + + guardian.authorize_attempt() + guardian.record_success() + + assert guardian.state.successful_attempts == 1 + assert guardian.state.failed_attempts == 0 + + def test_record_failure(self) -> None: + """Test recording failed attempts.""" + config = BudgetConfig() + guardian = BudgetGuardian(config) + + guardian.authorize_attempt() + guardian.record_failure("Test failure reason") + + assert guardian.state.successful_attempts == 0 + assert guardian.state.failed_attempts == 1 + assert "Test failure reason" in guardian.state.notes[-1] + + def test_escalation_levels(self) -> None: + """Test escalation level progression.""" + config = BudgetConfig( + max_attempts=10, + budget_limit=100.0, + cost_per_attempt=10.0, + warning_threshold=0.5, + critical_threshold=0.8, + exceeded_threshold=0.9, + ) + guardian = BudgetGuardian(config) + + # Start at NORMAL + assert guardian.state.escalation_level == EscalationLevel.NORMAL + + # After 5 attempts (50%), should be WARNING + for _ in range(5): + guardian.authorize_attempt() + assert guardian.state.escalation_level == EscalationLevel.WARNING + + # After 8 attempts (80%), should be CRITICAL + for _ in range(3): + guardian.authorize_attempt() + assert guardian.state.escalation_level == EscalationLevel.CRITICAL + + # After 9 attempts (90%), should be EXCEEDED + guardian.authorize_attempt() + assert guardian.state.escalation_level == EscalationLevel.EXCEEDED + + def test_escalation_callback(self) -> None: + """Test escalation callback is invoked on level change.""" + callback = MagicMock() + config = BudgetConfig( + max_attempts=10, + budget_limit=100.0, + cost_per_attempt=10.0, + warning_threshold=0.5, + ) + guardian = BudgetGuardian(config, escalation_callback=callback) + + # Should not trigger callback yet + for _ in range(4): + guardian.authorize_attempt() + callback.assert_not_called() + + # 5th attempt should trigger WARNING + guardian.authorize_attempt() + callback.assert_called_once() + call_args = callback.call_args + assert call_args[0][0] == EscalationLevel.WARNING + + def test_get_elapsed_time(self) -> None: + """Test elapsed time calculation.""" + config = BudgetConfig() + guardian = BudgetGuardian(config) + + # Should be very small on creation + elapsed = guardian.get_elapsed_time() + assert elapsed >= 0 + assert elapsed < 1.0 # Less than 1 second + + def test_get_summary(self) -> None: + """Test summary generation.""" + config = BudgetConfig(max_attempts=5, budget_limit=50.0, cost_per_attempt=10.0) + guardian = BudgetGuardian(config) + + guardian.authorize_attempt() + guardian.record_success() + guardian.authorize_attempt() + guardian.record_failure() + + summary = guardian.get_summary() + + assert "Officer Clancy Budget Report" in summary + assert "Attempts: 2/5" in summary + assert "✓1 ✗1" in summary + assert "Budget: 20.00/50.00" in summary + assert "40.0% used" in summary + + def test_reset(self) -> None: + """Test guardian reset.""" + config = BudgetConfig(max_attempts=5, budget_limit=50.0, cost_per_attempt=10.0) + guardian = BudgetGuardian(config) + + # Make some attempts + guardian.authorize_attempt() + guardian.authorize_attempt() + guardian.record_success() + + # Verify state changed + assert guardian.state.total_attempts == 2 + assert guardian.state.total_cost == 20.0 + + # Reset + guardian.reset() + + # Verify state is fresh + assert guardian.state.total_attempts == 0 + assert guardian.state.total_cost == 0.0 + assert guardian.remaining_attempts == 5 + assert guardian.remaining_budget == 50.0 + + def test_budget_percentage_with_zero_limit(self) -> None: + """Test budget percentage when limit is zero.""" + config = BudgetConfig(budget_limit=0.0) + guardian = BudgetGuardian(config) + + assert guardian.budget_percentage_used == 1.0 + + +class TestBudgetExceededError: + """Tests for BudgetExceededError exception.""" + + def test_error_message(self) -> None: + """Test error message.""" + error = BudgetExceededError("Test message") + assert str(error) == "Test message" + + def test_error_with_budget_data(self) -> None: + """Test error with budget data attached.""" + state = BudgetState(total_attempts=5, total_cost=50.0) + error = BudgetExceededError("Budget exceeded", budget_data=state) + + assert error.budget_data is not None + assert error.budget_data.total_attempts == 5 + assert error.budget_data.total_cost == 50.0 + + +class TestCreateGuardianFromEnv: + """Tests for create_guardian_from_env helper.""" + + def test_with_defaults(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Test with default environment.""" + # Clear any existing env vars + monkeypatch.delenv("RALPH_MAX_ATTEMPTS", raising=False) + monkeypatch.delenv("RALPH_BUDGET_LIMIT", raising=False) + monkeypatch.delenv("RALPH_COST_PER_ATTEMPT", raising=False) + + guardian = create_guardian_from_env() + + assert guardian.config.max_attempts == 10 + assert guardian.config.budget_limit == 100.0 + assert guardian.config.cost_per_attempt == 10.0 + + def test_with_env_vars(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Test with custom environment variables.""" + monkeypatch.setenv("RALPH_MAX_ATTEMPTS", "5") + monkeypatch.setenv("RALPH_BUDGET_LIMIT", "50.0") + monkeypatch.setenv("RALPH_COST_PER_ATTEMPT", "5.0") + + guardian = create_guardian_from_env() + + assert guardian.config.max_attempts == 5 + assert guardian.config.budget_limit == 50.0 + assert guardian.config.cost_per_attempt == 5.0 + + def test_with_overrides(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Test with explicit overrides (should take precedence).""" + monkeypatch.setenv("RALPH_MAX_ATTEMPTS", "100") + + guardian = create_guardian_from_env(max_attempts=3) + + assert guardian.config.max_attempts == 3 # Override wins + + +class TestEscalationLevel: + """Tests for EscalationLevel enum.""" + + def test_values(self) -> None: + """Test enum values.""" + assert EscalationLevel.NORMAL.value == "normal" + assert EscalationLevel.WARNING.value == "warning" + assert EscalationLevel.CRITICAL.value == "critical" + assert EscalationLevel.EXCEEDED.value == "exceeded" + + +class TestAttemptResult: + """Tests for AttemptResult dataclass.""" + + def test_creation(self) -> None: + """Test AttemptResult creation.""" + result = AttemptResult( + attempt_number=1, + cost=10.0, + remaining_budget=90.0, + remaining_attempts=9, + escalation_level=EscalationLevel.NORMAL, + ) + + assert result.attempt_number == 1 + assert result.cost == 10.0 + assert result.remaining_budget == 90.0 + assert result.remaining_attempts == 9 + assert result.escalation_level == EscalationLevel.NORMAL diff --git a/tests/test_config.py b/tests/test_config.py index 8c0b0b37c..c417995a2 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -23,6 +23,12 @@ def test_config_from_env_defaults() -> None: assert config.codex_full_auto is True assert config.codex_extra_args == "" assert config.codex_prompt_file.name == "CLAUDE.md" + # Officer Clancy defaults (disabled by default for opt-in behavior) + assert config.budget_enabled is False + assert config.budget_max_attempts == 10 + assert config.budget_limit == 100.0 + assert config.budget_cost_per_attempt == 10.0 + assert config.budget_allow_overflow is False def test_config_from_env_custom_values() -> None: @@ -78,3 +84,35 @@ def test_config_immutability() -> None: with pytest.raises(AttributeError): config.codex_model = "new-model" # type: ignore[misc] + + +def test_config_budget_from_env() -> None: + """Test RalphConfig.from_env() with Officer Clancy budget configuration.""" + env_vars = { + "RALPH_BUDGET_ENABLED": "true", + "RALPH_MAX_ATTEMPTS": "5", + "RALPH_BUDGET_LIMIT": "50.0", + "RALPH_COST_PER_ATTEMPT": "5.0", + "RALPH_BUDGET_ALLOW_OVERFLOW": "true", + } + + with patch.dict(os.environ, env_vars, clear=False): + config = RalphConfig.from_env(tool="amp") + + assert config.budget_enabled is True + assert config.budget_max_attempts == 5 + assert config.budget_limit == 50.0 + assert config.budget_cost_per_attempt == 5.0 + assert config.budget_allow_overflow is True + + +def test_config_budget_disabled() -> None: + """Test RalphConfig.from_env() with budget guardian disabled.""" + env_vars = { + "RALPH_BUDGET_ENABLED": "false", + } + + with patch.dict(os.environ, env_vars, clear=False): + config = RalphConfig.from_env(tool="amp") + + assert config.budget_enabled is False