diff --git a/.env.example b/.env.example index e8430352..17dc9599 100644 --- a/.env.example +++ b/.env.example @@ -73,6 +73,16 @@ ANTHROPIC_API_KEY=your-api-key-here # ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id # GOOGLE_APPLICATION_CREDENTIALS=./credentials/google-sa-key.json +# ============================================================================= +# Spending Guard +# ============================================================================= +# Shannon's text-pattern spending guard can false-positive on normal pentest +# output (e.g. "password reset", "usage limit per user"). Set to 1 to disable +# the text-pattern guard. Structured SDK error detection (billing_error, +# rate_limit) and the behavioral heuristic (turns <= 2, cost === $0) remain +# active regardless of this setting. +# SHANNON_DISABLE_SPENDING_GUARD=1 + # ============================================================================= # Available Models # ============================================================================= diff --git a/apps/cli/src/env.ts b/apps/cli/src/env.ts index 4241ee14..4026f490 100644 --- a/apps/cli/src/env.ts +++ b/apps/cli/src/env.ts @@ -29,6 +29,7 @@ const FORWARD_VARS = [ 'CLAUDE_CODE_MAX_OUTPUT_TOKENS', 'OPENAI_API_KEY', 'OPENROUTER_API_KEY', + 'SHANNON_DISABLE_SPENDING_GUARD', ] as const; /** diff --git a/apps/worker/src/utils/__tests__/billing-detection.test.ts b/apps/worker/src/utils/__tests__/billing-detection.test.ts new file mode 100644 index 00000000..10ba1f3d --- /dev/null +++ b/apps/worker/src/utils/__tests__/billing-detection.test.ts @@ -0,0 +1,124 @@ +/** + * Tests for billing-detection.ts + * + * Validates pattern matching, false-positive avoidance, and the + * SHANNON_DISABLE_SPENDING_GUARD escape hatch. + * + * Run with: npx tsx --test apps/worker/src/utils/__tests__/billing-detection.test.ts + */ + +import assert from 'node:assert/strict'; +import { describe, it } from 'node:test'; + +// Import the functions under test. +// NOTE: spendingGuardDisabled is evaluated at module load time from +// process.env, so we test the disabled path in a separate subprocess. +import { + BILLING_API_PATTERNS, + BILLING_TEXT_PATTERNS, + isSpendingCapBehavior, + matchesBillingApiPattern, + matchesBillingTextPattern, + spendingGuardDisabled, +} from '../billing-detection.js'; + +// --------------------------------------------------------------------------- +// matchesBillingTextPattern +// --------------------------------------------------------------------------- +describe('matchesBillingTextPattern', () => { + it('detects actual billing messages', () => { + assert.ok(matchesBillingTextPattern('Spending cap reached. Resets at 8 AM PT.')); + assert.ok(matchesBillingTextPattern('Your spending limit has been exceeded')); + assert.ok(matchesBillingTextPattern('Budget exceeded, please upgrade')); + assert.ok(matchesBillingTextPattern('Usage limit reached')); + }); + + it('does NOT false-positive on common pentest vocabulary', () => { + // These are real phrases from pentesting output that previously + // triggered false positives (see issue #263). + assert.ok(!matchesBillingTextPattern('password reset')); + assert.ok(!matchesBillingTextPattern('The reset token was expired')); + assert.ok(!matchesBillingTextPattern('Account resets after 3 failed attempts')); + assert.ok(!matchesBillingTextPattern('Usage limit per user is 100 requests')); + assert.ok(!matchesBillingTextPattern('rate limit exceeded')); + }); + + it('returns false for empty / unrelated text', () => { + assert.ok(!matchesBillingTextPattern('')); + assert.ok(!matchesBillingTextPattern('Found SQL injection in login form')); + assert.ok(!matchesBillingTextPattern('XSS payload executed successfully')); + }); +}); + +// --------------------------------------------------------------------------- +// matchesBillingApiPattern +// --------------------------------------------------------------------------- +describe('matchesBillingApiPattern', () => { + it('detects API billing errors', () => { + assert.ok(matchesBillingApiPattern('billing_error')); + assert.ok(matchesBillingApiPattern('credit balance is too low')); + assert.ok(matchesBillingApiPattern('insufficient credits')); + assert.ok(matchesBillingApiPattern('quota exceeded')); + assert.ok(matchesBillingApiPattern('limit will reset')); + }); + + it('is case-insensitive', () => { + assert.ok(matchesBillingApiPattern('BILLING_ERROR')); + assert.ok(matchesBillingApiPattern('Quota Exceeded')); + }); +}); + +// --------------------------------------------------------------------------- +// isSpendingCapBehavior +// --------------------------------------------------------------------------- +describe('isSpendingCapBehavior', () => { + it('triggers on low-turn zero-cost billing message', () => { + assert.ok(isSpendingCapBehavior(1, 0, 'Spending cap reached')); + assert.ok(isSpendingCapBehavior(2, 0, 'Your spending limit hit')); + }); + + it('does NOT trigger when turns > 2', () => { + assert.ok(!isSpendingCapBehavior(3, 0, 'Spending cap reached')); + }); + + it('does NOT trigger when cost > 0', () => { + assert.ok(!isSpendingCapBehavior(1, 0.01, 'Spending cap reached')); + }); + + it('does NOT trigger on normal pentest output even with low turns', () => { + assert.ok(!isSpendingCapBehavior(1, 0, 'Found password reset vulnerability')); + assert.ok(!isSpendingCapBehavior(2, 0, 'Testing account resets')); + }); +}); + +// --------------------------------------------------------------------------- +// Pattern lists – sanity checks +// --------------------------------------------------------------------------- +describe('pattern lists', () => { + it('BILLING_TEXT_PATTERNS does not contain bare "resets"', () => { + const patterns: readonly string[] = BILLING_TEXT_PATTERNS; + assert.ok(!patterns.includes('resets'), '"resets" should have been removed (see #263)'); + }); + + it('BILLING_TEXT_PATTERNS uses "usage limit reached" not bare "usage limit"', () => { + const patterns: readonly string[] = BILLING_TEXT_PATTERNS; + assert.ok(!patterns.includes('usage limit'), 'bare "usage limit" is too broad'); + assert.ok(patterns.includes('usage limit reached')); + }); + + it('BILLING_API_PATTERNS is unchanged and contains expected entries', () => { + const patterns: readonly string[] = BILLING_API_PATTERNS; + assert.ok(patterns.includes('billing_error')); + assert.ok(patterns.includes('quota exceeded')); + }); +}); + +// --------------------------------------------------------------------------- +// SHANNON_DISABLE_SPENDING_GUARD flag +// --------------------------------------------------------------------------- +describe('SHANNON_DISABLE_SPENDING_GUARD', () => { + it('is disabled by default in test environment', () => { + // Unless the test runner sets the env var, the guard should be active + assert.equal(spendingGuardDisabled, false); + }); +}); diff --git a/apps/worker/src/utils/billing-detection.ts b/apps/worker/src/utils/billing-detection.ts index 6f25c720..9d292222 100644 --- a/apps/worker/src/utils/billing-detection.ts +++ b/apps/worker/src/utils/billing-detection.ts @@ -14,19 +14,35 @@ * * This module provides defense-in-depth detection with shared pattern lists * to prevent drift between detection points. + * + * The text-pattern guard can produce false positives when pentest output + * contains billing-sounding phrases (e.g. "password reset", "usage limit + * per user"). Set SHANNON_DISABLE_SPENDING_GUARD=1 to bypass the + * text-pattern checks entirely while still preserving structured-error + * and behavioral (zero-cost) detection. */ +/** + * When true, all text-pattern spending guard checks are skipped. + * Structured SDK error detection (billing_error, rate_limit, etc.) and + * the behavioral heuristic (turns <= 2 && cost === 0) remain active. + */ +export const spendingGuardDisabled = process.env.SHANNON_DISABLE_SPENDING_GUARD === '1'; + /** * Text patterns for SDK output sniffing (what Claude says). * Used by message-handlers.ts and the behavioral heuristic. + * + * NOTE: Only patterns that are unambiguous in a pentesting context belong + * here. "resets" was removed because it matches innocuous pentest + * vocabulary like "password reset" / "reset token" (see #263). */ export const BILLING_TEXT_PATTERNS = [ 'spending cap', 'spending limit', 'cap reached', 'budget exceeded', - 'usage limit', - 'resets', + 'usage limit reached', ] as const; /** @@ -50,8 +66,15 @@ export const BILLING_API_PATTERNS = [ /** * Checks if text matches any billing text pattern. * Used for sniffing SDK output content for spending cap messages. + * + * Returns false immediately when SHANNON_DISABLE_SPENDING_GUARD=1, + * letting the caller fall through to structured-error or behavioral + * detection instead. */ export function matchesBillingTextPattern(text: string): boolean { + if (spendingGuardDisabled) { + return false; + } const lowerText = text.toLowerCase(); return BILLING_TEXT_PATTERNS.some((pattern) => lowerText.includes(pattern)); } @@ -76,6 +99,10 @@ export function matchesBillingApiPattern(message: string): boolean { * 2. Zero cost ($0) * 3. Text matches billing patterns * + * NOTE: The text-pattern leg respects SHANNON_DISABLE_SPENDING_GUARD; + * when the guard is disabled, this function can only return true if + * the caller adds an additional check. + * * @param turns - Number of turns the agent took * @param cost - Total cost in USD * @param resultText - The result text from the agent