Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
436 changes: 436 additions & 0 deletions docs/design/auto-compaction-threshold-redesign.md

Large diffs are not rendered by default.

1,752 changes: 1,752 additions & 0 deletions docs/plans/2026-05-14-auto-compaction-threshold-redesign.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/users/configuration/settings.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ Settings are organized into categories. Most settings should be placed within th
| `model.name` | string | The Qwen model to use for conversations. | `undefined` |
| `model.maxSessionTurns` | number | Maximum number of user/model/tool turns to keep in a session. -1 means unlimited. | `-1` |
| `model.generationConfig` | object | Advanced overrides passed to the underlying content generator. Supports request controls such as `timeout`, `maxRetries`, `enableCacheControl`, `splitToolMedia` (set `true` for strict OpenAI-compatible servers like LM Studio that reject non-text content on `role: "tool"` messages — splits media into a follow-up user message), `contextWindowSize` (override model's context window size), `modalities` (override auto-detected input modalities), `customHeaders` (custom HTTP headers for API requests), and `extra_body` (additional body parameters for OpenAI-compatible API requests only), along with fine-tuning knobs under `samplingParams` (for example `temperature`, `top_p`, `max_tokens`). Leave unset to rely on provider defaults. | `undefined` |
| `model.chatCompression.contextPercentageThreshold` | number | Sets the threshold for chat history compression as a percentage of the model's total token limit. This is a value between 0 and 1 that applies to both automatic compression and the manual `/compress` command. For example, a value of `0.6` will trigger compression when the chat history exceeds 60% of the token limit. Use `0` to disable compression entirely. | `0.7` |
| `model.chatCompression.contextPercentageThreshold` | number | **REMOVED.** Auto-compaction now uses a three-tier threshold ladder (warn / auto / hard) computed internally from the model's context window via the `computeThresholds()` function — no longer user-configurable. Setting this field in `settings.json` is silently ignored, and a one-line deprecation warning is emitted to stderr at startup. There is currently no replacement for "disable compression entirely" — reactive overflow recovery remains the safety net at the API layer if compression itself fails. (See PR #4345 / `docs/design/auto-compaction-threshold-redesign.md` for the redesign rationale.) | `N/A` |
| `model.skipNextSpeakerCheck` | boolean | Skip the next speaker check. | `false` |
| `model.skipLoopDetection` | boolean | Disables loop detection checks. Loop detection prevents infinite loops in AI responses but can generate false positives that interrupt legitimate workflows. Enable this option if you experience frequent false positive loop detection interruptions. | `false` |
| `model.skipStartupContext` | boolean | Skips sending the startup workspace context (environment summary and acknowledgement) at the beginning of each session. Enable this if you prefer to provide context manually or want to save tokens on startup. | `false` |
Expand Down
1 change: 0 additions & 1 deletion packages/cli/src/services/tips/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ export { TipHistory } from './tipHistory.js';
export { selectTip } from './tipScheduler.js';
export {
tipRegistry,
getContextUsagePercent,
type ContextualTip,
type TipContext,
type TipTrigger,
Expand Down
92 changes: 92 additions & 0 deletions packages/cli/src/services/tips/tipRegistry.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/

import { describe, it, expect } from 'vitest';
import { tipRegistry, type TipContext } from './tipRegistry.js';

const baseCtx: TipContext = {
lastPromptTokenCount: 0,
contextWindowSize: 200_000,
sessionPromptCount: 10,
sessionCount: 1,
platform: 'darwin',
thresholds: {
warn: 147_000,
auto: 167_000,
hard: 177_000,
effectiveWindow: 180_000,
},
};

function tipById(id: string) {
return tipRegistry.find((t) => t.id === id)!;
}

describe('context-* tip thresholds align with computeThresholds', () => {
it('compress-intro fires between warn and auto', () => {
const t = tipById('compress-intro');
expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 100_000 })).toBe(
false,
);
expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 150_000 })).toBe(
true,
);
expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 168_000 })).toBe(
false,
);
});

it('context-high fires between auto and hard', () => {
const t = tipById('context-high');
expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 150_000 })).toBe(
false,
);
expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 170_000 })).toBe(
true,
);
expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 178_000 })).toBe(
false,
);
});

it('context-critical fires at or above hard', () => {
const t = tipById('context-critical');
expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 170_000 })).toBe(
false,
);
expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 178_000 })).toBe(
true,
);
});

it('falls back gracefully when thresholds undefined (legacy callers)', () => {
const ctx = { ...baseCtx, thresholds: undefined };
// All three context-* tips return false when thresholds are missing
// (the comparison would be unsafe without them).
expect(tipById('compress-intro').isRelevant(ctx)).toBe(false);
expect(tipById('context-high').isRelevant(ctx)).toBe(false);
expect(tipById('context-critical').isRelevant(ctx)).toBe(false);
});

it('compress-intro additionally gates on sessionPromptCount > 5', () => {
const t = tipById('compress-intro');
// Above warn, below auto, but session is too new.
expect(
t.isRelevant({
...baseCtx,
lastPromptTokenCount: 150_000,
sessionPromptCount: 3,
}),
).toBe(false);
expect(
t.isRelevant({
...baseCtx,
lastPromptTokenCount: 150_000,
sessionPromptCount: 6,
}),
).toBe(true);
});
});
36 changes: 20 additions & 16 deletions packages/cli/src/services/tips/tipRegistry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
* Contextual tip registry — defines tips, their conditions, and display rules.
*/

import { DEFAULT_TOKEN_LIMIT } from '@qwen-code/qwen-code-core';
import { type CompactionThresholds } from '@qwen-code/qwen-code-core';

export type TipTrigger = 'startup' | 'post-response';

Expand All @@ -18,6 +18,12 @@ export interface TipContext {
sessionPromptCount: number;
sessionCount: number;
platform: string;
/**
* Three-tier auto-compaction thresholds, computed by callers via
* `computeThresholds(contextWindowSize)`. Optional for backward compat;
* context-* tip checks return false when missing.
*/
thresholds?: CompactionThresholds;
}

export interface ContextualTip {
Expand All @@ -29,41 +35,39 @@ export interface ContextualTip {
priority: number;
}

export function getContextUsagePercent(ctx: TipContext): number {
const windowSize = ctx.contextWindowSize || DEFAULT_TOKEN_LIMIT;
return (ctx.lastPromptTokenCount / windowSize) * 100;
}

export const tipRegistry: ContextualTip[] = [
// --- Post-response contextual tips (priority: higher = more urgent) ---
{
id: 'context-critical',
content:
'Context is almost full! Run /compress now or start /new to continue.',
'Context near hard limit — auto-compact will force on next send. Consider /clear if you want to start fresh.',
trigger: 'post-response',
isRelevant: (ctx) => getContextUsagePercent(ctx) >= 95,
isRelevant: (ctx) =>
ctx.thresholds !== undefined &&
ctx.lastPromptTokenCount >= ctx.thresholds.hard,
cooldownPrompts: 3,
priority: 100,
},
{
id: 'context-high',
content: 'Context is getting full. Use /compress to free up space.',
trigger: 'post-response',
isRelevant: (ctx) => {
const pct = getContextUsagePercent(ctx);
return pct >= 80 && pct < 95;
},
isRelevant: (ctx) =>
ctx.thresholds !== undefined &&
ctx.lastPromptTokenCount >= ctx.thresholds.auto &&
Comment thread
LaZzyMan marked this conversation as resolved.
ctx.lastPromptTokenCount < ctx.thresholds.hard,
cooldownPrompts: 5,
priority: 90,
},
{
id: 'compress-intro',
content: 'Long conversation? /compress summarizes history to free context.',
trigger: 'post-response',
isRelevant: (ctx) => {
const pct = getContextUsagePercent(ctx);
return pct >= 50 && pct < 80 && ctx.sessionPromptCount > 5;
},
isRelevant: (ctx) =>
ctx.thresholds !== undefined &&
ctx.lastPromptTokenCount >= ctx.thresholds.warn &&
ctx.lastPromptTokenCount < ctx.thresholds.auto &&
ctx.sessionPromptCount > 5,
cooldownPrompts: 10,
priority: 50,
},
Expand Down
112 changes: 108 additions & 4 deletions packages/cli/src/ui/commands/contextCommand.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,59 @@

import { describe, it, expect, vi, beforeEach } from 'vitest';
import type { Config } from '@qwen-code/qwen-code-core';
import { collectContextData } from './contextCommand.js';
import {
collectContextData,
formatContextUsageText,
} from './contextCommand.js';

// uiTelemetryService is consumed inside collectContextData via the
// re-export from core; mock it here so the function returns deterministic
// numbers without needing a real session.
// numbers without needing a real session. The mock fns live inside
// vi.hoisted so they are available when vi.mock's factory runs (vi.mock
// is hoisted above module-level const declarations).
const { mockGetLastPromptTokenCount, mockGetLastCachedContentTokenCount } =
vi.hoisted(() => ({
mockGetLastPromptTokenCount: vi.fn().mockReturnValue(0),
mockGetLastCachedContentTokenCount: vi.fn().mockReturnValue(0),
}));

vi.mock('@qwen-code/qwen-code-core', async (importOriginal) => {
const original =
await importOriginal<typeof import('@qwen-code/qwen-code-core')>();
return {
...original,
uiTelemetryService: {
getLastPromptTokenCount: vi.fn().mockReturnValue(0),
getLastCachedContentTokenCount: vi.fn().mockReturnValue(0),
getLastPromptTokenCount: mockGetLastPromptTokenCount,
getLastCachedContentTokenCount: mockGetLastCachedContentTokenCount,
},
};
});

function makeMockConfig(contextWindowSize = 32_000): Config {
return {
getModel: vi.fn().mockReturnValue('test-model'),
getContentGeneratorConfig: vi.fn().mockReturnValue({
contextWindowSize,
}),
getToolRegistry: vi.fn().mockReturnValue({
getAllTools: vi.fn().mockReturnValue([]),
getFunctionDeclarations: vi.fn().mockReturnValue([]),
}),
getUserMemory: vi.fn().mockReturnValue(''),
getSkillManager: vi.fn().mockReturnValue({
listSkills: vi.fn().mockResolvedValue([]),
}),
getChatCompression: vi.fn().mockReturnValue(undefined),
} as unknown as Config;
}

describe('collectContextData (contextCommand)', () => {
let getFunctionDeclarationsSpy: ReturnType<typeof vi.fn>;
let mockConfig: Config;

beforeEach(() => {
mockGetLastPromptTokenCount.mockReturnValue(0);
mockGetLastCachedContentTokenCount.mockReturnValue(0);
getFunctionDeclarationsSpy = vi.fn().mockReturnValue([]);
mockConfig = {
getModel: vi.fn().mockReturnValue('test-model'),
Expand Down Expand Up @@ -62,3 +93,76 @@ describe('collectContextData (contextCommand)', () => {
});
});
});

describe('/context shows three-tier thresholds', () => {
beforeEach(() => {
mockGetLastPromptTokenCount.mockReturnValue(0);
mockGetLastCachedContentTokenCount.mockReturnValue(0);
});

it('renders warn/auto/hard with the warn-tier marker when usage sits between warn and auto', async () => {
// 200K window. computeThresholds(200K) = {
// warn: 147,000, auto: 167,000, hard: 177,000, effectiveWindow: 180,000
// }
// lastPromptTokenCount = 150K → between warn and auto → tier = warn.
mockGetLastPromptTokenCount.mockReturnValue(150_000);
const data = await collectContextData(makeMockConfig(200_000), false);
const text = formatContextUsageText(data);

expect(text).toMatch(/Effective window:\s+180,000/);
expect(text).toMatch(/Warn threshold:\s+147,000/);
expect(text).toMatch(/Auto threshold:\s+167,000/);
expect(text).toMatch(/Hard threshold:\s+177,000/);
expect(text).toMatch(/Current tier:\s+warn/);
expect(data.breakdown.currentTier).toBe('warn');
expect(data.breakdown.thresholds).toEqual({
effectiveWindow: 180_000,
warn: 147_000,
auto: 167_000,
hard: 177_000,
});
});

it('classifies usage below the warn threshold as the safe tier', async () => {
mockGetLastPromptTokenCount.mockReturnValue(50_000);
const data = await collectContextData(makeMockConfig(200_000), false);
const text = formatContextUsageText(data);

expect(text).toMatch(/Current tier:\s+safe/);
expect(data.breakdown.currentTier).toBe('safe');
});

it('classifies usage at or above the hard threshold as the hard tier', async () => {
mockGetLastPromptTokenCount.mockReturnValue(180_000);
const data = await collectContextData(makeMockConfig(200_000), false);
expect(data.breakdown.currentTier).toBe('hard');
});

it('classifies usage between auto and hard as the auto tier', async () => {
// 200K window — between 167K (auto) and 177K (hard) → tier = auto.
mockGetLastPromptTokenCount.mockReturnValue(170_000);
const data = await collectContextData(makeMockConfig(200_000), false);
expect(data.breakdown.currentTier).toBe('auto');
const text = formatContextUsageText(data);
expect(text).toMatch(/Current tier:\s+auto/);
});

it('treats no-API-data sessions as safe and omits the threshold section from text', async () => {
// lastPromptTokenCount = 0 → collectContextData uses the estimated branch
// (classifies against `rawOverhead`, not apiTotalTokens). With these
// default fixtures rawOverhead lands well below `warn`, so currentTier
// resolves to `safe`. On heavy system-prompt / skill / MCP loads the
// estimated branch can return warn/auto/hard — this test only covers
// the default-fixture safe case. formatContextUsageText must NOT emit
// the "Compaction thresholds" section because the estimated path
// renders a different layout.
mockGetLastPromptTokenCount.mockReturnValue(0);
const data = await collectContextData(makeMockConfig(200_000), false);
expect(data.breakdown.currentTier).toBe('safe');
// Thresholds are still computed and exposed on the breakdown for downstream
// consumers, even though the text layout suppresses them.
expect(data.breakdown.thresholds.auto).toBe(167_000);
const text = formatContextUsageText(data);
expect(text).not.toMatch(/Compaction thresholds/);
});
});
Loading