QwenLM · LaZzyMan · May 15, 2026 · May 15, 2026 · May 15, 2026 · May 15, 2026
diff --git a/docs/design/auto-compaction-threshold-redesign.md b/docs/design/auto-compaction-threshold-redesign.md
diff --git a/docs/plans/2026-05-14-auto-compaction-threshold-redesign.md b/docs/plans/2026-05-14-auto-compaction-threshold-redesign.md
diff --git a/docs/users/configuration/settings.md b/docs/users/configuration/settings.md
@@ -144,7 +144,7 @@ Settings are organized into categories. Most settings should be placed within th
 | `model.name`                                       | string  | The Qwen model to use for conversations.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | `undefined` |
 | `model.maxSessionTurns`                            | number  | Maximum number of user/model/tool turns to keep in a session. -1 means unlimited.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            | `-1`        |
 | `model.generationConfig`                           | object  | Advanced overrides passed to the underlying content generator. Supports request controls such as `timeout`, `maxRetries`, `enableCacheControl`, `splitToolMedia` (set `true` for strict OpenAI-compatible servers like LM Studio that reject non-text content on `role: "tool"` messages — splits media into a follow-up user message), `contextWindowSize` (override model's context window size), `modalities` (override auto-detected input modalities), `customHeaders` (custom HTTP headers for API requests), and `extra_body` (additional body parameters for OpenAI-compatible API requests only), along with fine-tuning knobs under `samplingParams` (for example `temperature`, `top_p`, `max_tokens`). Leave unset to rely on provider defaults. | `undefined` |
-| `model.chatCompression.contextPercentageThreshold` | number  | Sets the threshold for chat history compression as a percentage of the model's total token limit. This is a value between 0 and 1 that applies to both automatic compression and the manual `/compress` command. For example, a value of `0.6` will trigger compression when the chat history exceeds 60% of the token limit. Use `0` to disable compression entirely.                                                                                                                                                                                                                                                                                                                                                                                       | `0.7`       |
+| `model.chatCompression.contextPercentageThreshold` | number  | **REMOVED.** Auto-compaction now uses a three-tier threshold ladder (warn / auto / hard) computed internally from the model's context window via the `computeThresholds()` function — no longer user-configurable. Setting this field in `settings.json` is silently ignored, and a one-line deprecation warning is emitted to stderr at startup. There is currently no replacement for "disable compression entirely" — reactive overflow recovery remains the safety net at the API layer if compression itself fails. (See PR #4345 / `docs/design/auto-compaction-threshold-redesign.md` for the redesign rationale.)                                                                                                                                    | `N/A`       |
 | `model.skipNextSpeakerCheck`                       | boolean | Skip the next speaker check.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | `false`     |
 | `model.skipLoopDetection`                          | boolean | Disables loop detection checks. Loop detection prevents infinite loops in AI responses but can generate false positives that interrupt legitimate workflows. Enable this option if you experience frequent false positive loop detection interruptions.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      | `false`     |
 | `model.skipStartupContext`                         | boolean | Skips sending the startup workspace context (environment summary and acknowledgement) at the beginning of each session. Enable this if you prefer to provide context manually or want to save tokens on startup.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | `false`     |

diff --git a/packages/cli/src/services/tips/index.ts b/packages/cli/src/services/tips/index.ts
@@ -10,7 +10,6 @@ export { TipHistory } from './tipHistory.js';
 export { selectTip } from './tipScheduler.js';
 export {
   tipRegistry,
-  getContextUsagePercent,
   type ContextualTip,
   type TipContext,
   type TipTrigger,

diff --git a/packages/cli/src/services/tips/tipRegistry.test.ts b/packages/cli/src/services/tips/tipRegistry.test.ts
@@ -0,0 +1,92 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { tipRegistry, type TipContext } from './tipRegistry.js';
+
+const baseCtx: TipContext = {
+  lastPromptTokenCount: 0,
+  contextWindowSize: 200_000,
+  sessionPromptCount: 10,
+  sessionCount: 1,
+  platform: 'darwin',
+  thresholds: {
+    warn: 147_000,
+    auto: 167_000,
+    hard: 177_000,
+    effectiveWindow: 180_000,
+  },
+};
+
+function tipById(id: string) {
+  return tipRegistry.find((t) => t.id === id)!;
+}
+
+describe('context-* tip thresholds align with computeThresholds', () => {
+  it('compress-intro fires between warn and auto', () => {
+    const t = tipById('compress-intro');
+    expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 100_000 })).toBe(
+      false,
+    );
+    expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 150_000 })).toBe(
+      true,
+    );
+    expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 168_000 })).toBe(
+      false,
+    );
+  });
+
+  it('context-high fires between auto and hard', () => {
+    const t = tipById('context-high');
+    expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 150_000 })).toBe(
+      false,
+    );
+    expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 170_000 })).toBe(
+      true,
+    );
+    expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 178_000 })).toBe(
+      false,
+    );
+  });
+
+  it('context-critical fires at or above hard', () => {
+    const t = tipById('context-critical');
+    expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 170_000 })).toBe(
+      false,
+    );
+    expect(t.isRelevant({ ...baseCtx, lastPromptTokenCount: 178_000 })).toBe(
+      true,
+    );
+  });
+
+  it('falls back gracefully when thresholds undefined (legacy callers)', () => {
+    const ctx = { ...baseCtx, thresholds: undefined };
+    // All three context-* tips return false when thresholds are missing
+    // (the comparison would be unsafe without them).
+    expect(tipById('compress-intro').isRelevant(ctx)).toBe(false);
+    expect(tipById('context-high').isRelevant(ctx)).toBe(false);
+    expect(tipById('context-critical').isRelevant(ctx)).toBe(false);
+  });
+
+  it('compress-intro additionally gates on sessionPromptCount > 5', () => {
+    const t = tipById('compress-intro');
+    // Above warn, below auto, but session is too new.
+    expect(
+      t.isRelevant({
+        ...baseCtx,
+        lastPromptTokenCount: 150_000,
+        sessionPromptCount: 3,
+      }),
+    ).toBe(false);
+    expect(
+      t.isRelevant({
+        ...baseCtx,
+        lastPromptTokenCount: 150_000,
+        sessionPromptCount: 6,
+      }),
+    ).toBe(true);
+  });
+});
diff --git a/packages/cli/src/services/tips/tipRegistry.ts b/packages/cli/src/services/tips/tipRegistry.ts
@@ -8,7 +8,7 @@
  * Contextual tip registry — defines tips, their conditions, and display rules.
  */
 
-import { DEFAULT_TOKEN_LIMIT } from '@qwen-code/qwen-code-core';
+import { type CompactionThresholds } from '@qwen-code/qwen-code-core';
 
 export type TipTrigger = 'startup' | 'post-response';
 
@@ -18,6 +18,12 @@ export interface TipContext {
   sessionPromptCount: number;
   sessionCount: number;
   platform: string;
+  /**
+   * Three-tier auto-compaction thresholds, computed by callers via
+   * `computeThresholds(contextWindowSize)`. Optional for backward compat;
+   * context-* tip checks return false when missing.
+   */
+  thresholds?: CompactionThresholds;
 }
 
 export interface ContextualTip {
@@ -29,41 +35,39 @@ export interface ContextualTip {
   priority: number;
 }
 
-export function getContextUsagePercent(ctx: TipContext): number {
-  const windowSize = ctx.contextWindowSize || DEFAULT_TOKEN_LIMIT;
-  return (ctx.lastPromptTokenCount / windowSize) * 100;
-}
-
 export const tipRegistry: ContextualTip[] = [
   // --- Post-response contextual tips (priority: higher = more urgent) ---
   {
     id: 'context-critical',
     content:
-      'Context is almost full! Run /compress now or start /new to continue.',
+      'Context near hard limit — auto-compact will force on next send. Consider /clear if you want to start fresh.',
     trigger: 'post-response',
-    isRelevant: (ctx) => getContextUsagePercent(ctx) >= 95,
+    isRelevant: (ctx) =>
+      ctx.thresholds !== undefined &&
+      ctx.lastPromptTokenCount >= ctx.thresholds.hard,
     cooldownPrompts: 3,
     priority: 100,
   },
   {
     id: 'context-high',
     content: 'Context is getting full. Use /compress to free up space.',
     trigger: 'post-response',
-    isRelevant: (ctx) => {
-      const pct = getContextUsagePercent(ctx);
-      return pct >= 80 && pct < 95;
-    },
+    isRelevant: (ctx) =>
+      ctx.thresholds !== undefined &&
+      ctx.lastPromptTokenCount >= ctx.thresholds.auto &&
+      ctx.lastPromptTokenCount < ctx.thresholds.hard,
     cooldownPrompts: 5,
     priority: 90,
   },
   {
     id: 'compress-intro',
     content: 'Long conversation? /compress summarizes history to free context.',
     trigger: 'post-response',
-    isRelevant: (ctx) => {
-      const pct = getContextUsagePercent(ctx);
-      return pct >= 50 && pct < 80 && ctx.sessionPromptCount > 5;
-    },
+    isRelevant: (ctx) =>
+      ctx.thresholds !== undefined &&
+      ctx.lastPromptTokenCount >= ctx.thresholds.warn &&
+      ctx.lastPromptTokenCount < ctx.thresholds.auto &&
+      ctx.sessionPromptCount > 5,
     cooldownPrompts: 10,
     priority: 50,
   },

diff --git a/packages/cli/src/ui/commands/contextCommand.test.ts b/packages/cli/src/ui/commands/contextCommand.test.ts
@@ -6,28 +6,59 @@
 
 import { describe, it, expect, vi, beforeEach } from 'vitest';
 import type { Config } from '@qwen-code/qwen-code-core';
-import { collectContextData } from './contextCommand.js';
+import {
+  collectContextData,
+  formatContextUsageText,
+} from './contextCommand.js';
 
 // uiTelemetryService is consumed inside collectContextData via the
 // re-export from core; mock it here so the function returns deterministic
-// numbers without needing a real session.
+// numbers without needing a real session. The mock fns live inside
+// vi.hoisted so they are available when vi.mock's factory runs (vi.mock
+// is hoisted above module-level const declarations).
+const { mockGetLastPromptTokenCount, mockGetLastCachedContentTokenCount } =
+  vi.hoisted(() => ({
+    mockGetLastPromptTokenCount: vi.fn().mockReturnValue(0),
+    mockGetLastCachedContentTokenCount: vi.fn().mockReturnValue(0),
+  }));
+
 vi.mock('@qwen-code/qwen-code-core', async (importOriginal) => {
   const original =
     await importOriginal<typeof import('@qwen-code/qwen-code-core')>();
   return {
     ...original,
     uiTelemetryService: {
-      getLastPromptTokenCount: vi.fn().mockReturnValue(0),
-      getLastCachedContentTokenCount: vi.fn().mockReturnValue(0),
+      getLastPromptTokenCount: mockGetLastPromptTokenCount,
+      getLastCachedContentTokenCount: mockGetLastCachedContentTokenCount,
     },
   };
 });
 
+function makeMockConfig(contextWindowSize = 32_000): Config {
+  return {
+    getModel: vi.fn().mockReturnValue('test-model'),
+    getContentGeneratorConfig: vi.fn().mockReturnValue({
+      contextWindowSize,
+    }),
+    getToolRegistry: vi.fn().mockReturnValue({
+      getAllTools: vi.fn().mockReturnValue([]),
+      getFunctionDeclarations: vi.fn().mockReturnValue([]),
+    }),
+    getUserMemory: vi.fn().mockReturnValue(''),
+    getSkillManager: vi.fn().mockReturnValue({
+      listSkills: vi.fn().mockResolvedValue([]),
+    }),
+    getChatCompression: vi.fn().mockReturnValue(undefined),
+  } as unknown as Config;
+}
+
 describe('collectContextData (contextCommand)', () => {
   let getFunctionDeclarationsSpy: ReturnType<typeof vi.fn>;
   let mockConfig: Config;
 
   beforeEach(() => {
+    mockGetLastPromptTokenCount.mockReturnValue(0);
+    mockGetLastCachedContentTokenCount.mockReturnValue(0);
     getFunctionDeclarationsSpy = vi.fn().mockReturnValue([]);
     mockConfig = {
       getModel: vi.fn().mockReturnValue('test-model'),
@@ -62,3 +93,76 @@ describe('collectContextData (contextCommand)', () => {
     });
   });
 });
+
+describe('/context shows three-tier thresholds', () => {
+  beforeEach(() => {
+    mockGetLastPromptTokenCount.mockReturnValue(0);
+    mockGetLastCachedContentTokenCount.mockReturnValue(0);
+  });
+
+  it('renders warn/auto/hard with the warn-tier marker when usage sits between warn and auto', async () => {
+    // 200K window. computeThresholds(200K) = {
+    //   warn: 147,000, auto: 167,000, hard: 177,000, effectiveWindow: 180,000
+    // }
+    // lastPromptTokenCount = 150K → between warn and auto → tier = warn.
+    mockGetLastPromptTokenCount.mockReturnValue(150_000);
+    const data = await collectContextData(makeMockConfig(200_000), false);
+    const text = formatContextUsageText(data);
+
+    expect(text).toMatch(/Effective window:\s+180,000/);
+    expect(text).toMatch(/Warn threshold:\s+147,000/);
+    expect(text).toMatch(/Auto threshold:\s+167,000/);
+    expect(text).toMatch(/Hard threshold:\s+177,000/);
+    expect(text).toMatch(/Current tier:\s+warn/);
+    expect(data.breakdown.currentTier).toBe('warn');
+    expect(data.breakdown.thresholds).toEqual({
+      effectiveWindow: 180_000,
+      warn: 147_000,
+      auto: 167_000,
+      hard: 177_000,
+    });
+  });
+
+  it('classifies usage below the warn threshold as the safe tier', async () => {
+    mockGetLastPromptTokenCount.mockReturnValue(50_000);
+    const data = await collectContextData(makeMockConfig(200_000), false);
+    const text = formatContextUsageText(data);
+
+    expect(text).toMatch(/Current tier:\s+safe/);
+    expect(data.breakdown.currentTier).toBe('safe');
+  });
+
+  it('classifies usage at or above the hard threshold as the hard tier', async () => {
+    mockGetLastPromptTokenCount.mockReturnValue(180_000);
+    const data = await collectContextData(makeMockConfig(200_000), false);
+    expect(data.breakdown.currentTier).toBe('hard');
+  });
+
+  it('classifies usage between auto and hard as the auto tier', async () => {
+    // 200K window — between 167K (auto) and 177K (hard) → tier = auto.
+    mockGetLastPromptTokenCount.mockReturnValue(170_000);
+    const data = await collectContextData(makeMockConfig(200_000), false);
+    expect(data.breakdown.currentTier).toBe('auto');
+    const text = formatContextUsageText(data);
+    expect(text).toMatch(/Current tier:\s+auto/);
+  });
+
+  it('treats no-API-data sessions as safe and omits the threshold section from text', async () => {
+    // lastPromptTokenCount = 0 → collectContextData uses the estimated branch
+    // (classifies against `rawOverhead`, not apiTotalTokens). With these
+    // default fixtures rawOverhead lands well below `warn`, so currentTier
+    // resolves to `safe`. On heavy system-prompt / skill / MCP loads the
+    // estimated branch can return warn/auto/hard — this test only covers
+    // the default-fixture safe case. formatContextUsageText must NOT emit
+    // the "Compaction thresholds" section because the estimated path
+    // renders a different layout.
+    mockGetLastPromptTokenCount.mockReturnValue(0);
+    const data = await collectContextData(makeMockConfig(200_000), false);
+    expect(data.breakdown.currentTier).toBe('safe');
+    // Thresholds are still computed and exposed on the breakdown for downstream
+    // consumers, even though the text layout suppresses them.
+    expect(data.breakdown.thresholds.auto).toBe(167_000);
+    const text = formatContextUsageText(data);
+    expect(text).not.toMatch(/Compaction thresholds/);
+  });
+});