diff --git a/packages/core/src/core/coreToolScheduler.test.ts b/packages/core/src/core/coreToolScheduler.test.ts index ec9be0ce3d..e618c5b142 100644 --- a/packages/core/src/core/coreToolScheduler.test.ts +++ b/packages/core/src/core/coreToolScheduler.test.ts @@ -67,6 +67,17 @@ type ToolSpanRecord = { * tests can assert success/error/cancelled values are forwarded correctly. */ endMetadata?: { success?: boolean; error?: string; cancelled?: boolean }; + /** Metadata passed to endToolBlockedOnUserSpan. */ + blockedMetadata?: { decision?: string; source?: string }; + /** Metadata passed to endHookSpan. */ + hookMetadata?: { + success?: boolean; + shouldProceed?: boolean; + shouldStop?: boolean; + blockType?: string; + hasAdditionalContext?: boolean; + error?: string; + }; }; const toolSpanRecords = vi.hoisted((): ToolSpanRecord[] => []); @@ -162,11 +173,63 @@ vi.mock('../telemetry/session-tracing.js', () => ({ span.ended = true; }, ), + startToolBlockedOnUserSpan: vi.fn( + (_toolSpan: unknown, attrs?: { tool_name?: string; call_id?: string }) => { + const extra: Record = {}; + if (attrs?.tool_name !== undefined) extra['tool.name'] = attrs.tool_name; + if (attrs?.call_id !== undefined) extra['tool.call_id'] = attrs.call_id; + return createMockToolSpan('tool.blocked_on_user', extra); + }, + ), + endToolBlockedOnUserSpan: vi.fn( + ( + span: ToolSpanRecord & ReturnType, + metadata?: { decision?: string; source?: string }, + ) => { + if (metadata) { + span.blockedMetadata = metadata; + } + span.ended = true; + }, + ), + startHookSpan: vi.fn( + (opts: { + hookEvent: string; + toolName: string; + toolUseId?: string; + isInterrupt?: boolean; + }) => { + const attrs: Record = { + hook_event: opts.hookEvent, + 'tool.name': opts.toolName, + }; + if (opts.toolUseId !== undefined) attrs['tool.use_id'] = opts.toolUseId; + if (opts.isInterrupt !== undefined) + attrs['is_interrupt'] = opts.isInterrupt; + return createMockToolSpan('hook', attrs); + }, + ), + endHookSpan: vi.fn( + ( + span: ToolSpanRecord & ReturnType, + metadata?: ToolSpanRecord['hookMetadata'], + ) => { + if (metadata) { + span.hookMetadata = metadata; + } + span.ended = true; + }, + ), startInteractionSpan: vi.fn(), endInteractionSpan: vi.fn(), startLLMRequestSpan: vi.fn(), endLLMRequestSpan: vi.fn(), clearSessionTracingForTesting: vi.fn(), + // truncateSpanError is exported from session-tracing and used in + // setToolSpanFailure to bound status messages. Wrap as a spy so a + // dedicated regression test can substitute a sentinel return value + // and verify setToolSpanFailure forwards it (#4321 review-6). + truncateSpanError: vi.fn((s: string): string => s), })); vi.mock('fs/promises', () => ({ @@ -2322,6 +2385,13 @@ describe('CoreToolScheduler request queueing', () => { const abortController = new AbortController(); + // toolSpanRecords accumulates across tests in this describe block. + // Snapshot before schedule() so the assertions below see only this + // test's records. + const blockedSpansBefore = toolSpanRecords.filter( + (r) => r.name === 'tool.blocked_on_user', + ).length; + // Schedule multiple tools that need confirmation const requests = [ { @@ -2378,6 +2448,26 @@ describe('CoreToolScheduler request queueing', () => { // Verify approval mode was changed expect(approvalMode).toBe(ApprovalMode.AUTO_EDIT); + + // #3731 Phase 2 / #4321 review: the first tool's blocked span ends as + // 'proceed_always' / cli; the two siblings auto-approved by + // autoApproveCompatiblePendingTools must end as + // 'auto_approved' / 'auto'. Slice from blockedSpansBefore so we see + // only the spans this test produced. + const blockedRecords = toolSpanRecords + .filter((r) => r.name === 'tool.blocked_on_user') + .slice(blockedSpansBefore); + expect(blockedRecords).toHaveLength(3); + const decisions = blockedRecords + .map((r) => r.blockedMetadata?.decision) + .sort(); + const sources = blockedRecords.map((r) => r.blockedMetadata?.source).sort(); + expect(decisions).toEqual([ + 'auto_approved', + 'auto_approved', + 'proceed_always', + ]); + expect(sources).toEqual(['auto', 'auto', 'cli']); }); }); @@ -3409,6 +3499,9 @@ describe('CoreToolScheduler telemetry spans', () => { expect(execute).not.toHaveBeenCalled(); expect(completedCalls[0].status).toBe('error'); + // This test exercises the actual PreToolUse hook deny path inside + // _executeToolCallBody — which is the only site that should still emit + // 'pre_hook_blocked' (#4321 review C-Critical). expectSanitizedFailure( spanRecord, 'Tool execution blocked by hook', @@ -3416,6 +3509,47 @@ describe('CoreToolScheduler telemetry spans', () => { ); }); + it('setToolSpanFailure forwards the truncateSpanError result to the span status (#4321)', async () => { + // Lock the integration: if a future change drops the + // truncateSpanError(message) call inside setToolSpanFailure, this + // test catches it. Substitute a sentinel return so the assertion + // doesn't depend on the utility's exact truncation behaviour + // (review-6 wenshao). + const sessionTracing = await import('../telemetry/session-tracing.js'); + const truncateSpy = vi.mocked(sessionTracing.truncateSpanError); + truncateSpy.mockImplementationOnce(() => '<>'); + + const messageBus = { + request: vi.fn().mockResolvedValue({ + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'pre-hook', + success: true, + output: { + decision: 'deny', + reason: 'truncate-me-pretty-please', + }, + }), + }; + + const { spanRecord } = await runSingleTool({ + messageBus, + disableHooks: false, + }); + + // setToolSpanFailure(span, kind, msg) → safeSetStatus({code: ERROR, + // message: truncateSpanError(msg)}). The mock returns the sentinel + // for that single call, so the span's status message must equal it. + const errorStatusCall = spanRecord.statusCalls.find( + (s) => s.code === SpanStatusCode.ERROR, + ); + expect(errorStatusCall?.message).toBe('<>'); + expect(truncateSpy).toHaveBeenCalled(); + + // Restore default identity behaviour so other tests aren't affected. + truncateSpy.mockReset(); + truncateSpy.mockImplementation((s) => s); + }); + it('marks post-hook stop with a sanitized failure kind', async () => { const messageBus = { request: vi @@ -3822,6 +3956,1335 @@ describe('CoreToolScheduler telemetry spans', () => { // status. cancelled stays falsy. expect(exec!.endMetadata?.cancelled).toBeFalsy(); }); + + // ------------------------------------------------------------------- + // #3731 Phase 2 — tool span lifecycle now spans validating → + // awaiting_approval → executing in one span; blocked_on_user is a child + // span; each hook fire site gets its own hook span. + // ------------------------------------------------------------------- + + function getToolSpans(): ToolSpanRecord[] { + return toolSpanRecords.filter((r) => r.name === 'tool.mockTool'); + } + function getBlockedSpans(): ToolSpanRecord[] { + return toolSpanRecords.filter((r) => r.name === 'tool.blocked_on_user'); + } + function getHookSpans(): ToolSpanRecord[] { + return toolSpanRecords.filter((r) => r.name === 'hook'); + } + + it('tool span is started in _schedule and ended even when pre-hook denies execution (#3731 Phase 2)', async () => { + const messageBus = { + request: vi.fn().mockResolvedValue({ + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'pre-hook', + success: true, + output: { decision: 'deny', reason: 'denied' }, + }), + }; + await runSingleTool({ messageBus, disableHooks: false }); + + const toolSpans = getToolSpans(); + expect(toolSpans).toHaveLength(1); + expect(toolSpans[0].ended).toBe(true); + // No execution sub-span — request didn't reach _executeToolCallBody. + expect(getExecutionSpan()).toBeUndefined(); + // No blocked span either — the deny path takes the permission_hook + // branch BEFORE awaiting_approval is set. + expect(getBlockedSpans()).toHaveLength(0); + }); + + it('blocked_on_user span ends with cancel when the user rejects (#3731 Phase 2)', async () => { + // Reuses MockEditTool — same setup as the existing edit-cancellation + // test in `CoreToolScheduler edit cancellation`, just instrumented for + // the new Phase 2 spans. + toolSpanRecords.length = 0; + const mockEditTool = new MockEditTool(); + const mockToolRegistry = { + getTool: () => mockEditTool, + ensureTool: async () => mockEditTool, + getFunctionDeclarations: () => [], + tools: new Map(), + discovery: {}, + registerTool: () => {}, + getToolByName: () => mockEditTool, + getToolByDisplayName: () => mockEditTool, + getTools: () => [], + discoverTools: async () => {}, + getAllTools: () => [], + getToolsByServer: () => [], + } as unknown as ToolRegistry; + const mockConfig = { + getSessionId: () => 'test-session-id', + getUsageStatisticsEnabled: () => true, + getDebugMode: () => false, + getApprovalMode: () => ApprovalMode.DEFAULT, + getPermissionsAllow: () => [], + getContentGeneratorConfig: () => ({ + model: 'test-model', + authType: 'gemini', + }), + getShellExecutionConfig: () => ({ + terminalWidth: 90, + terminalHeight: 30, + }), + storage: { getProjectTempDir: () => '/tmp' }, + getToolRegistry: () => mockToolRegistry, + getUseModelRouter: () => false, + getGeminiClient: () => null, + isInteractive: () => true, + getIdeMode: () => false, + getExperimentalZedIntegration: () => false, + getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getDisableAllHooks: vi.fn().mockReturnValue(true), + } as unknown as Config; + const onAllToolCallsComplete = vi.fn(); + const onToolCallsUpdate = vi.fn(); + const scheduler = new CoreToolScheduler({ + config: mockConfig, + onAllToolCallsComplete, + onToolCallsUpdate, + getPreferredEditor: () => 'vscode', + onEditorClose: vi.fn(), + }); + await scheduler.schedule( + [ + { + callId: 'block-1', + name: 'mockEditTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-block', + }, + ], + new AbortController().signal, + ); + + // The blocked span is open while waiting for the user. + const blockedSpans = toolSpanRecords.filter( + (r) => r.name === 'tool.blocked_on_user', + ); + expect(blockedSpans).toHaveLength(1); + expect(blockedSpans[0].ended).toBe(false); + + const awaitingCall = (await waitForStatus( + onToolCallsUpdate, + 'awaiting_approval', + )) as WaitingToolCall; + await awaitingCall.confirmationDetails.onConfirm( + ToolConfirmationOutcome.Cancel, + ); + + // After cancel: blocked + tool spans both ended; decision/source recorded. + expect(blockedSpans[0].ended).toBe(true); + expect(blockedSpans[0].blockedMetadata?.decision).toBe('cancel'); + expect(blockedSpans[0].blockedMetadata?.source).toBe('cli'); + + const toolSpans = toolSpanRecords.filter( + (r) => r.name === 'tool.mockEditTool', + ); + expect(toolSpans).toHaveLength(1); + expect(toolSpans[0].ended).toBe(true); + + // #4321 review: the awaiting_approval phase produces exactly one + // blocked_on_user span across the lifecycle. ModifyWithEditor's + // intentional invariant is the same — re-entering awaiting_approval + // must NOT spawn a second span. This assertion guards against a + // future refactor that re-starts the blocked span on each transition. + expect(blockedSpans).toHaveLength(1); + }); + + it('hook span records shouldProceed=false / blockType=denied when pre-hook blocks (#3731 Phase 2)', async () => { + const messageBus = { + request: vi.fn().mockResolvedValue({ + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'pre-hook', + success: true, + output: { decision: 'block', reason: 'denied' }, + }), + }; + await runSingleTool({ messageBus, disableHooks: false }); + + // The PreToolUse hook span is the only one fired in this path. + const hookSpans = getHookSpans(); + expect(hookSpans).toHaveLength(1); + expect(hookSpans[0].attributes['hook_event']).toBe('PreToolUse'); + expect(hookSpans[0].hookMetadata?.success).toBe(true); + expect(hookSpans[0].hookMetadata?.shouldProceed).toBe(false); + expect(hookSpans[0].hookMetadata?.blockType).toBe('denied'); + }); + + it('hook span records error when underlying hook helper surfaces hookError (#4321)', async () => { + // Runner-layer failure (URL validation, fn exception, etc) shows up + // as response.success: false with response.error populated. Our + // helpers now forward response.error into hookError; withHookSpan's + // toEndMeta callbacks must produce { success: false, error } so + // operators see the failure in telemetry instead of a fake "allow". + const messageBus = { + request: vi.fn().mockResolvedValue({ + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'pre-hook', + success: false, + error: new Error('URL validation failed: hooks-server unreachable'), + }), + }; + await runSingleTool({ messageBus, disableHooks: false }); + + // shouldProceed defaults to true on hookError, so the tool runs and + // a PostToolUse hook span fires too. The PreToolUse one is the one + // we care about — it must report failure + the actual error. + const preHookSpan = getHookSpans().find( + (s) => s.attributes['hook_event'] === 'PreToolUse', + ); + expect(preHookSpan).toBeDefined(); + expect(preHookSpan!.hookMetadata?.success).toBe(false); + expect(preHookSpan!.hookMetadata?.error).toBe( + 'URL validation failed: hooks-server unreachable', + ); + }); + + it('hook span records shouldStop=true when post-hook stops execution (#3731 Phase 2)', async () => { + // Hook protocol: continue:false + stopReason on the post-hook response + // is what the production code maps to shouldStop=true. + const messageBus = { + request: vi + .fn() + .mockResolvedValueOnce({ + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'pre-hook', + success: true, + output: { decision: 'allow' }, + }) + .mockResolvedValueOnce({ + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'post-hook', + success: true, + output: { + decision: 'allow', + continue: false, + stopReason: 'stop reason', + }, + }), + }; + await runSingleTool({ messageBus, disableHooks: false }); + + const postHookSpan = getHookSpans().find( + (s) => s.attributes['hook_event'] === 'PostToolUse', + ); + expect(postHookSpan).toBeDefined(); + expect(postHookSpan!.hookMetadata?.shouldStop).toBe(true); + expect(postHookSpan!.hookMetadata?.blockType).toBe('stop'); + }); + + it('PostToolUseFailure hook span records is_interrupt=true on user-abort path (#4321)', async () => { + // _executeToolCallBody catch fires PostToolUseFailure with + // isInterrupt:true when the abort signal is set. Operators rely on + // is_interrupt to separate user-initiated cancellations from real + // exceptions in dashboards — assert the hook span carries the + // correct value. + toolSpanRecords.length = 0; + const abortController = new AbortController(); + const messageBus = { + request: vi.fn(async (req: { eventName: string }) => ({ + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'fail-hook', + success: true, + output: req.eventName === 'PreToolUse' ? { decision: 'allow' } : {}, + })), + }; + await runSingleTool({ + abortController, + messageBus, + disableHooks: false, + execute: vi.fn().mockImplementation(async () => { + abortController.abort(); + throw new Error('aborted'); + }), + }); + + const failureHookSpan = getHookSpans().find( + (s) => s.attributes['hook_event'] === 'PostToolUseFailure', + ); + expect(failureHookSpan).toBeDefined(); + expect(failureHookSpan!.attributes['is_interrupt']).toBe(true); + expect(failureHookSpan!.hookMetadata?.success).toBe(true); + }); + + it('PostToolUseFailure hook span records is_interrupt=false on real exception path (#4321)', async () => { + // Companion to the abort test — same hook event but the + // executeError-not-from-abort branch tags is_interrupt:false. A + // copy-paste regression flipping the flag would be invisible + // without this assertion. + toolSpanRecords.length = 0; + const messageBus = { + request: vi.fn(async (req: { eventName: string }) => ({ + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'fail-hook', + success: true, + output: req.eventName === 'PreToolUse' ? { decision: 'allow' } : {}, + })), + }; + await runSingleTool({ + messageBus, + disableHooks: false, + execute: vi.fn().mockRejectedValue(new Error('real boom')), + }); + + const failureHookSpan = getHookSpans().find( + (s) => s.attributes['hook_event'] === 'PostToolUseFailure', + ); + expect(failureHookSpan).toBeDefined(); + expect(failureHookSpan!.attributes['is_interrupt']).toBe(false); + expect(failureHookSpan!.hookMetadata?.success).toBe(true); + }); + + it('every span recorded in a successful tool call is ended (#3731 Phase 2)', async () => { + // Leak guard: every span we record should be ended by the time + // schedule() returns. If a future change forgets to finalize a tool + // span on some terminal path, this assertion catches it. + await runSingleTool(); + + const lifecycleSpans = toolSpanRecords.filter( + (r) => + r.name === 'tool.mockTool' || + r.name === 'tool.execution' || + r.name === 'tool.blocked_on_user' || + r.name === 'hook', + ); + expect(lifecycleSpans.length).toBeGreaterThan(0); + for (const span of lifecycleSpans) { + expect(span.ended).toBe(true); + } + }); + + // ------------------------------------------------------------------- + // #4321 follow-up review tests — three behaviors introduced by the + // 6767469b2 follow-up that were not previously asserted. + // ------------------------------------------------------------------- + + /** + * Build a scheduler around a single MockEditTool that requires + * approval. Used by the awaiting_approval-flow tests below. + */ + function buildApprovalScheduler(overrides: { getIdeMode?: () => boolean }): { + scheduler: CoreToolScheduler; + onToolCallsUpdate: ReturnType; + } { + const mockEditTool = new MockEditTool(); + const mockToolRegistry = { + getTool: () => mockEditTool, + ensureTool: async () => mockEditTool, + getFunctionDeclarations: () => [], + tools: new Map(), + discovery: {}, + registerTool: () => {}, + getToolByName: () => mockEditTool, + getToolByDisplayName: () => mockEditTool, + getTools: () => [], + discoverTools: async () => {}, + getAllTools: () => [], + getToolsByServer: () => [], + } as unknown as ToolRegistry; + const mockConfig = { + getSessionId: () => 'test-session-id', + getUsageStatisticsEnabled: () => true, + getDebugMode: () => false, + getApprovalMode: () => ApprovalMode.DEFAULT, + getPermissionsAllow: () => [], + getContentGeneratorConfig: () => ({ + model: 'test-model', + authType: 'gemini', + }), + getShellExecutionConfig: () => ({ + terminalWidth: 90, + terminalHeight: 30, + }), + storage: { getProjectTempDir: () => '/tmp' }, + getToolRegistry: () => mockToolRegistry, + getUseModelRouter: () => false, + getGeminiClient: () => null, + isInteractive: () => true, + getIdeMode: overrides.getIdeMode ?? (() => false), + getExperimentalZedIntegration: () => false, + getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getDisableAllHooks: vi.fn().mockReturnValue(true), + } as unknown as Config; + const onToolCallsUpdate = vi.fn(); + const scheduler = new CoreToolScheduler({ + config: mockConfig, + onAllToolCallsComplete: vi.fn(), + onToolCallsUpdate, + getPreferredEditor: () => 'vscode', + onEditorClose: vi.fn(), + }); + return { scheduler, onToolCallsUpdate }; + } + + it('blocked_on_user span ends with decision=error when getConfirmationDetails throws (#4321)', async () => { + // Trigger _schedule's outer catch (line ~1711) by making + // getConfirmationDetails throw. The blocked span hasn't been started + // yet at the catch point — the span only opens AFTER setStatusInternal + // 'awaiting_approval' which never runs in this path. So the outer + // finalizeBlockedSpan('error', 'system') call is a no-op. Assert the + // tool span still ends correctly. + toolSpanRecords.length = 0; + const declarativeTool = new StructuredErrorOnConfirmationTool( + ToolErrorType.EDIT_REQUIRES_PRIOR_READ, + ); + const mockToolRegistry = { + getTool: () => declarativeTool, + ensureTool: async () => declarativeTool, + getFunctionDeclarations: () => [], + tools: new Map(), + discovery: {}, + registerTool: () => {}, + getToolByName: () => declarativeTool, + getToolByDisplayName: () => declarativeTool, + getTools: () => [], + discoverTools: async () => {}, + getAllTools: () => [], + getToolsByServer: () => [], + } as unknown as ToolRegistry; + const mockConfig = { + getSessionId: () => 'test-session-id', + getUsageStatisticsEnabled: () => true, + getDebugMode: () => false, + getApprovalMode: () => ApprovalMode.DEFAULT, + getPermissionsAllow: () => [], + getContentGeneratorConfig: () => ({ + model: 'test-model', + authType: 'gemini', + }), + getShellExecutionConfig: () => ({ + terminalWidth: 90, + terminalHeight: 30, + }), + storage: { getProjectTempDir: () => '/tmp' }, + getTruncateToolOutputThreshold: () => + DEFAULT_TRUNCATE_TOOL_OUTPUT_THRESHOLD, + getTruncateToolOutputLines: () => DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES, + getToolRegistry: () => mockToolRegistry, + getUseModelRouter: () => false, + getGeminiClient: () => null, + getChatRecordingService: () => undefined, + isInteractive: () => true, + getIdeMode: () => false, + getExperimentalZedIntegration: () => false, + getMessageBus: vi.fn().mockReturnValue(undefined), + getDisableAllHooks: vi.fn().mockReturnValue(true), + } as unknown as Config; + const scheduler = new CoreToolScheduler({ + config: mockConfig, + onAllToolCallsComplete: vi.fn(), + onToolCallsUpdate: vi.fn(), + getPreferredEditor: () => 'vscode', + onEditorClose: vi.fn(), + }); + + await scheduler.schedule( + [ + { + callId: 'err-1', + name: 'structuredErrorOnConfirmationTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-err', + }, + ], + new AbortController().signal, + ); + + // Tool span exists and ended; no blocked span ever opened (the throw + // happens before setStatusInternal awaiting_approval). + const toolSpans = toolSpanRecords.filter( + (r) => r.name === 'tool.structuredErrorOnConfirmationTool', + ); + expect(toolSpans).toHaveLength(1); + expect(toolSpans[0].ended).toBe(true); + expect( + toolSpanRecords.filter((r) => r.name === 'tool.blocked_on_user'), + ).toHaveLength(0); + }); + + it('blocked_on_user span source=ide when getIdeMode returns true (#4321)', async () => { + toolSpanRecords.length = 0; + const { scheduler, onToolCallsUpdate } = buildApprovalScheduler({ + getIdeMode: () => true, + }); + await scheduler.schedule( + [ + { + callId: 'ide-1', + name: 'mockEditTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-ide', + }, + ], + new AbortController().signal, + ); + + const awaitingCall = (await waitForStatus( + onToolCallsUpdate, + 'awaiting_approval', + )) as WaitingToolCall; + await awaitingCall.confirmationDetails.onConfirm( + ToolConfirmationOutcome.Cancel, + ); + + const blockedSpan = toolSpanRecords.find( + (r) => r.name === 'tool.blocked_on_user', + ); + expect(blockedSpan?.blockedMetadata?.decision).toBe('cancel'); + // Key assertion: getBlockedSource() honored getIdeMode -> 'ide'. + expect(blockedSpan?.blockedMetadata?.source).toBe('ide'); + }); + + it('explicit Cancel takes precedence over signal.aborted in decision label (#4321)', async () => { + toolSpanRecords.length = 0; + const abortController = new AbortController(); + const { scheduler, onToolCallsUpdate } = buildApprovalScheduler({}); + await scheduler.schedule( + [ + { + callId: 'cancel-1', + name: 'mockEditTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-cancel', + }, + ], + abortController.signal, + ); + + const awaitingCall = (await waitForStatus( + onToolCallsUpdate, + 'awaiting_approval', + )) as WaitingToolCall; + + // Abort the signal AND pass Cancel as outcome — both conditions true. + abortController.abort(); + await awaitingCall.confirmationDetails.onConfirm( + ToolConfirmationOutcome.Cancel, + ); + + const blockedSpan = toolSpanRecords.find( + (r) => r.name === 'tool.blocked_on_user', + ); + // Pre-fix this would have been 'aborted' / 'system'. The fix flips + // precedence so an explicit user Cancel always wins. + expect(blockedSpan?.blockedMetadata?.decision).toBe('cancel'); + expect(blockedSpan?.blockedMetadata?.source).toBe('cli'); + }); + + it('blocked_on_user span ends with decision=proceed_once on single ProceedOnce confirmation (#4321)', async () => { + // ProceedOnce is the most common user interaction; previously only + // 'cancel' and 'proceed_always' (auto-approve) had decision-label + // assertions. Cover the gap so swapping or dropping the decision + // label for one-off approvals is caught. + toolSpanRecords.length = 0; + const { scheduler, onToolCallsUpdate } = buildApprovalScheduler({}); + await scheduler.schedule( + [ + { + callId: 'proceed-once-1', + name: 'mockEditTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-proceed-once', + }, + ], + new AbortController().signal, + ); + + const awaitingCall = (await waitForStatus( + onToolCallsUpdate, + 'awaiting_approval', + )) as WaitingToolCall; + await awaitingCall.confirmationDetails.onConfirm( + ToolConfirmationOutcome.ProceedOnce, + ); + + const blockedSpan = toolSpanRecords.find( + (r) => r.name === 'tool.blocked_on_user', + ); + expect(blockedSpan?.blockedMetadata?.decision).toBe('proceed_once'); + expect(blockedSpan?.blockedMetadata?.source).toBe('cli'); + }); + + it('handleConfirmationResponse outer catch finalizes spans + rethrows when originalOnConfirm throws (#4321)', async () => { + // Defensive error-recovery path added by this PR: if anything inside + // _handleConfirmationResponseInner throws (originalOnConfirm, + // modifyWithEditor, _applyInlineModify, attemptExecutionOfScheduledCalls), + // both spans must be finalized and the error rethrown — otherwise + // operators see a leak until the 30-min TTL. + toolSpanRecords.length = 0; + const { scheduler, onToolCallsUpdate } = buildApprovalScheduler({}); + await scheduler.schedule( + [ + { + callId: 'rethrow-1', + name: 'mockEditTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-rethrow', + }, + ], + new AbortController().signal, + ); + + // Wait until the call is awaiting_approval — both blocked + tool spans + // are in the scheduler's Maps at this point. + await waitForStatus(onToolCallsUpdate, 'awaiting_approval'); + + // Call handleConfirmationResponse DIRECTLY with a throwing + // originalOnConfirm. The outer catch in handleConfirmationResponse + // is the only thing protecting both spans from leaking. + const boom = new Error('originalOnConfirm boom'); + const throwingOnConfirm = async () => { + throw boom; + }; + await expect( + scheduler.handleConfirmationResponse( + 'rethrow-1', + throwingOnConfirm, + ToolConfirmationOutcome.ProceedOnce, + new AbortController().signal, + ), + ).rejects.toBe(boom); + + // Blocked span finalized as 'error' / 'system'. + const blockedSpan = toolSpanRecords.find( + (r) => r.name === 'tool.blocked_on_user', + ); + expect(blockedSpan?.ended).toBe(true); + expect(blockedSpan?.blockedMetadata?.decision).toBe('error'); + expect(blockedSpan?.blockedMetadata?.source).toBe('system'); + + // Tool span finalized with TOOL_FAILURE_KIND_TOOL_EXCEPTION. + const toolSpan = toolSpanRecords.find( + (r) => r.name === 'tool.mockEditTool', + ); + expect(toolSpan?.ended).toBe(true); + expect(toolSpan?.spanAttributes['tool.failure_kind']).toBe( + 'tool_exception', + ); + }); + + it('PM hard-deny path emits failure_kind=permission_denied (#4321)', async () => { + // _schedule line ~1444: finalPermission === 'deny' branch sets the + // span failure with the PERMISSION_DENIED kind. Without test + // coverage, dropping setToolSpanFailure on this branch would + // silently lose the failure_kind attribution. + toolSpanRecords.length = 0; + class HardDenyTool extends BaseDeclarativeTool< + Record, + ToolResult + > { + constructor() { + super('hardDenyTool', 'hardDenyTool', 'Always deny', Kind.Other, {}); + } + protected createInvocation(params: Record) { + return new (class extends BaseToolInvocation< + Record, + ToolResult + > { + getDescription() { + return 'deny'; + } + override async getDefaultPermission(): Promise { + return 'deny'; + } + async execute(): Promise { + return { llmContent: '', returnDisplay: '' }; + } + })(params); + } + } + const tool = new HardDenyTool(); + const mockToolRegistry = { + getTool: () => tool, + ensureTool: async () => tool, + getFunctionDeclarations: () => [], + tools: new Map(), + discovery: {}, + registerTool: () => {}, + getToolByName: () => tool, + getToolByDisplayName: () => tool, + getTools: () => [], + discoverTools: async () => {}, + getAllTools: () => [], + getToolsByServer: () => [], + } as unknown as ToolRegistry; + const mockConfig = { + getSessionId: () => 'test-session-id', + getUsageStatisticsEnabled: () => true, + getDebugMode: () => false, + getApprovalMode: () => ApprovalMode.DEFAULT, + getPermissionsAllow: () => [], + getContentGeneratorConfig: () => ({}), + getShellExecutionConfig: () => ({ + terminalWidth: 90, + terminalHeight: 30, + }), + storage: { getProjectTempDir: () => '/tmp' }, + getToolRegistry: () => mockToolRegistry, + getUseModelRouter: () => false, + getGeminiClient: () => null, + isInteractive: () => true, + getIdeMode: () => false, + getExperimentalZedIntegration: () => false, + getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getDisableAllHooks: vi.fn().mockReturnValue(true), + } as unknown as Config; + const scheduler = new CoreToolScheduler({ + config: mockConfig, + onAllToolCallsComplete: vi.fn(), + onToolCallsUpdate: vi.fn(), + getPreferredEditor: () => 'vscode', + onEditorClose: vi.fn(), + }); + await scheduler.schedule( + [ + { + callId: 'deny-1', + name: 'hardDenyTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-deny', + }, + ], + new AbortController().signal, + ); + + const toolSpan = toolSpanRecords.find( + (r) => r.name === 'tool.hardDenyTool', + ); + expect(toolSpan?.ended).toBe(true); + expect(toolSpan?.spanAttributes['tool.failure_kind']).toBe( + 'permission_denied', + ); + }); + + it('non-interactive deny path emits failure_kind=non_interactive_denied (#4321)', async () => { + // _schedule line ~1532: when the tool needs confirmation but + // isInteractive() is false (and not zed/streaming-json), the + // scheduler auto-denies and tags failure_kind=non_interactive_denied. + toolSpanRecords.length = 0; + const tool = new MockEditTool(); + const mockToolRegistry = { + getTool: () => tool, + ensureTool: async () => tool, + getFunctionDeclarations: () => [], + tools: new Map(), + discovery: {}, + registerTool: () => {}, + getToolByName: () => tool, + getToolByDisplayName: () => tool, + getTools: () => [], + discoverTools: async () => {}, + getAllTools: () => [], + getToolsByServer: () => [], + } as unknown as ToolRegistry; + const mockConfig = { + getSessionId: () => 'test-session-id', + getUsageStatisticsEnabled: () => true, + getDebugMode: () => false, + getApprovalMode: () => ApprovalMode.DEFAULT, + getPermissionsAllow: () => [], + getContentGeneratorConfig: () => ({}), + getShellExecutionConfig: () => ({ + terminalWidth: 90, + terminalHeight: 30, + }), + storage: { getProjectTempDir: () => '/tmp' }, + getToolRegistry: () => mockToolRegistry, + getUseModelRouter: () => false, + getGeminiClient: () => null, + isInteractive: () => false, // forces non-interactive deny path + getInputFormat: () => undefined, + getIdeMode: () => false, + getExperimentalZedIntegration: () => false, + getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getDisableAllHooks: vi.fn().mockReturnValue(true), + } as unknown as Config; + const scheduler = new CoreToolScheduler({ + config: mockConfig, + onAllToolCallsComplete: vi.fn(), + onToolCallsUpdate: vi.fn(), + getPreferredEditor: () => 'vscode', + onEditorClose: vi.fn(), + }); + await scheduler.schedule( + [ + { + callId: 'noninteractive-1', + name: 'mockEditTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-noninteractive', + }, + ], + new AbortController().signal, + ); + + const toolSpan = toolSpanRecords.find( + (r) => r.name === 'tool.mockEditTool', + ); + expect(toolSpan?.ended).toBe(true); + expect(toolSpan?.spanAttributes['tool.failure_kind']).toBe( + 'non_interactive_denied', + ); + }); + + it('PermissionRequest hook deny path emits failure_kind=permission_hook_denied (#4321)', async () => { + // _schedule line ~1683: when firePermissionRequestHook returns + // hasDecision=true with shouldAllow=false, the scheduler tags the + // span with permission_hook_denied. Without this regression test, + // dropping setToolSpanFailure on this branch would silently lose + // hook-denial attribution for operators. + toolSpanRecords.length = 0; + const tool = new MockEditTool(); + const mockToolRegistry = { + getTool: () => tool, + ensureTool: async () => tool, + getFunctionDeclarations: () => [], + tools: new Map(), + discovery: {}, + registerTool: () => {}, + getToolByName: () => tool, + getToolByDisplayName: () => tool, + getTools: () => [], + discoverTools: async () => {}, + getAllTools: () => [], + getToolsByServer: () => [], + } as unknown as ToolRegistry; + const messageBus = { + request: vi.fn().mockResolvedValue({ + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'permission-request', + success: true, + output: { + hookSpecificOutput: { + decision: { behavior: 'deny', message: 'policy says no' }, + }, + }, + }), + }; + const mockConfig = { + getSessionId: () => 'test-session-id', + getUsageStatisticsEnabled: () => true, + getDebugMode: () => false, + getApprovalMode: () => ApprovalMode.DEFAULT, + getPermissionsAllow: () => [], + getContentGeneratorConfig: () => ({}), + getShellExecutionConfig: () => ({ + terminalWidth: 90, + terminalHeight: 30, + }), + storage: { getProjectTempDir: () => '/tmp' }, + getToolRegistry: () => mockToolRegistry, + getUseModelRouter: () => false, + getGeminiClient: () => null, + isInteractive: () => true, + getIdeMode: () => false, + getExperimentalZedIntegration: () => false, + getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(messageBus), + getDisableAllHooks: vi.fn().mockReturnValue(false), + } as unknown as Config; + const scheduler = new CoreToolScheduler({ + config: mockConfig, + onAllToolCallsComplete: vi.fn(), + onToolCallsUpdate: vi.fn(), + getPreferredEditor: () => 'vscode', + onEditorClose: vi.fn(), + }); + await scheduler.schedule( + [ + { + callId: 'permhook-1', + name: 'mockEditTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-permhook', + }, + ], + new AbortController().signal, + ); + + const toolSpan = toolSpanRecords.find( + (r) => r.name === 'tool.mockEditTool', + ); + expect(toolSpan?.ended).toBe(true); + expect(toolSpan?.spanAttributes['tool.failure_kind']).toBe( + 'permission_hook_denied', + ); + }); + + it('background-agent auto-deny emits failure_kind=background_agent_denied (#4321)', async () => { + // _schedule line ~1697: getShouldAvoidPermissionPrompts() === true + // forces an auto-deny because background agents have no UI to prompt + // on. This branch is otherwise untested — a regression dropping the + // setToolSpanFailure call would silently lose attribution for a key + // deployment mode. + toolSpanRecords.length = 0; + const tool = new MockEditTool(); + const mockToolRegistry = { + getTool: () => tool, + ensureTool: async () => tool, + getFunctionDeclarations: () => [], + tools: new Map(), + discovery: {}, + registerTool: () => {}, + getToolByName: () => tool, + getToolByDisplayName: () => tool, + getTools: () => [], + discoverTools: async () => {}, + getAllTools: () => [], + getToolsByServer: () => [], + } as unknown as ToolRegistry; + const mockConfig = { + getSessionId: () => 'test-session-id', + getUsageStatisticsEnabled: () => true, + getDebugMode: () => false, + getApprovalMode: () => ApprovalMode.DEFAULT, + getPermissionsAllow: () => [], + getContentGeneratorConfig: () => ({}), + getShellExecutionConfig: () => ({ + terminalWidth: 90, + terminalHeight: 30, + }), + storage: { getProjectTempDir: () => '/tmp' }, + getToolRegistry: () => mockToolRegistry, + getUseModelRouter: () => false, + getGeminiClient: () => null, + isInteractive: () => true, + getIdeMode: () => false, + getExperimentalZedIntegration: () => false, + getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getDisableAllHooks: vi.fn().mockReturnValue(true), + getShouldAvoidPermissionPrompts: vi.fn().mockReturnValue(true), + } as unknown as Config; + const scheduler = new CoreToolScheduler({ + config: mockConfig, + onAllToolCallsComplete: vi.fn(), + onToolCallsUpdate: vi.fn(), + getPreferredEditor: () => 'vscode', + onEditorClose: vi.fn(), + }); + await scheduler.schedule( + [ + { + callId: 'bgagent-1', + name: 'mockEditTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-bgagent', + }, + ], + new AbortController().signal, + ); + + const toolSpan = toolSpanRecords.find( + (r) => r.name === 'tool.mockEditTool', + ); + expect(toolSpan?.ended).toBe(true); + expect(toolSpan?.spanAttributes['tool.failure_kind']).toBe( + 'background_agent_denied', + ); + }); + + it('signal.abort drains scheduler-local toolSpans + blockedSpans Maps (#4321)', async () => { + // The 30-min TTL in session-tracing.ts ends underlying spans but + // cannot reach the scheduler-local toolSpans/blockedSpans Maps. If + // the signal aborts while a tool is awaiting_approval (user walked + // away, session abort), the per-batch listener registered in + // _schedule must drain both Maps so they don't grow unbounded. + toolSpanRecords.length = 0; + const { scheduler, onToolCallsUpdate } = buildApprovalScheduler({}); + const abortController = new AbortController(); + await scheduler.schedule( + [ + { + callId: 'abort-drain-1', + name: 'mockEditTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-abort-drain', + }, + ], + abortController.signal, + ); + + // Wait until the call is awaiting_approval — both Maps populated. + await waitForStatus(onToolCallsUpdate, 'awaiting_approval'); + expect( + (scheduler as unknown as { toolSpans: Map }).toolSpans + .size, + ).toBe(1); + expect( + (scheduler as unknown as { blockedSpans: Map }) + .blockedSpans.size, + ).toBe(1); + + // Abort the signal — the listener registered in _schedule schedules + // the drain via setTimeout(0). Flush macrotasks so it runs before + // assertions. + abortController.abort(); + await new Promise((resolve) => setTimeout(resolve, 0)); + + expect( + (scheduler as unknown as { toolSpans: Map }).toolSpans + .size, + ).toBe(0); + expect( + (scheduler as unknown as { blockedSpans: Map }) + .blockedSpans.size, + ).toBe(0); + + const blockedSpan = toolSpanRecords.find( + (r) => r.name === 'tool.blocked_on_user', + ); + expect(blockedSpan?.ended).toBe(true); + expect(blockedSpan?.blockedMetadata?.decision).toBe('aborted'); + expect(blockedSpan?.blockedMetadata?.source).toBe('system'); + + const toolSpan = toolSpanRecords.find( + (r) => r.name === 'tool.mockEditTool', + ); + expect(toolSpan?.ended).toBe(true); + }); + + it('plan-mode block emits failure_kind=plan_mode_blocked (#4321)', async () => { + // _schedule line ~1599: plan mode blocks non-read-only confirmation + // tools. Without a regression test, dropping setToolSpanFailure or + // finalizeToolSpan on this branch would silently leak spans or + // lose attribution. + toolSpanRecords.length = 0; + const tool = new MockEditTool(); + const mockToolRegistry = { + getTool: () => tool, + ensureTool: async () => tool, + getFunctionDeclarations: () => [], + tools: new Map(), + discovery: {}, + registerTool: () => {}, + getToolByName: () => tool, + getToolByDisplayName: () => tool, + getTools: () => [], + discoverTools: async () => {}, + getAllTools: () => [], + getToolsByServer: () => [], + } as unknown as ToolRegistry; + const mockConfig = { + getSessionId: () => 'test-session-id', + getUsageStatisticsEnabled: () => true, + getDebugMode: () => false, + getApprovalMode: () => ApprovalMode.PLAN, + getPermissionsAllow: () => [], + getContentGeneratorConfig: () => ({}), + getShellExecutionConfig: () => ({ + terminalWidth: 90, + terminalHeight: 30, + }), + storage: { getProjectTempDir: () => '/tmp' }, + getToolRegistry: () => mockToolRegistry, + getUseModelRouter: () => false, + getGeminiClient: () => null, + isInteractive: () => true, + getIdeMode: () => false, + getExperimentalZedIntegration: () => false, + getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getDisableAllHooks: vi.fn().mockReturnValue(true), + } as unknown as Config; + const scheduler = new CoreToolScheduler({ + config: mockConfig, + onAllToolCallsComplete: vi.fn(), + onToolCallsUpdate: vi.fn(), + getPreferredEditor: () => 'vscode', + onEditorClose: vi.fn(), + }); + await scheduler.schedule( + [ + { + callId: 'plan-block-1', + name: 'mockEditTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-plan-block', + }, + ], + new AbortController().signal, + ); + + const toolSpan = toolSpanRecords.find( + (r) => r.name === 'tool.mockEditTool', + ); + expect(toolSpan?.ended).toBe(true); + expect(toolSpan?.spanAttributes['tool.failure_kind']).toBe( + 'plan_mode_blocked', + ); + }); + + it('pre-aborted signal: tool span ends without entering execution (#4321)', async () => { + // _schedule line ~1487 early-exit when signal.aborted is true at the + // start of the for-loop. setToolSpanCancelled + finalizeToolSpan + // here are otherwise untested — a regression dropping either would + // leak the span or land it in ERROR rather than UNSET. + toolSpanRecords.length = 0; + const execute = vi + .fn() + .mockResolvedValue({ llmContent: 'ok', returnDisplay: 'ok' }); + const abortController = new AbortController(); + abortController.abort(); + await runSingleTool({ execute, abortController }); + + expect(execute).not.toHaveBeenCalled(); + const toolSpan = toolSpanRecords.findLast( + (r) => r.name === 'tool.mockTool', + ); + expect(toolSpan?.ended).toBe(true); + // setToolSpanCancelled records UNSET status — distinguishes from + // setToolSpanFailure paths which would land ERROR. + expect(toolSpan?.statusCalls).toEqual([{ code: SpanStatusCode.UNSET }]); + }); + + it('signal.abort during awaiting_approval: blocked span ends with aborted/system (#4321)', async () => { + // Companion to "signal.abort drains scheduler-local Maps" — that test + // covers tool span cancellation; this one specifically asserts the + // blocked_on_user decision label/source for the same drain path so + // dashboards filtering on `decision: 'aborted'` are guarded. + toolSpanRecords.length = 0; + const { scheduler, onToolCallsUpdate } = buildApprovalScheduler({}); + const abortController = new AbortController(); + await scheduler.schedule( + [ + { + callId: 'aborted-decision-1', + name: 'mockEditTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-aborted-decision', + }, + ], + abortController.signal, + ); + + await waitForStatus(onToolCallsUpdate, 'awaiting_approval'); + abortController.abort(); + await new Promise((resolve) => setTimeout(resolve, 0)); + + const blockedSpan = toolSpanRecords.find( + (r) => r.name === 'tool.blocked_on_user', + ); + expect(blockedSpan?.ended).toBe(true); + expect(blockedSpan?.blockedMetadata?.decision).toBe('aborted'); + expect(blockedSpan?.blockedMetadata?.source).toBe('system'); + }); + + it('handleConfirmationResponse outer catch routes aborted-signal throw to aborted/system (#4321)', async () => { + // Companion to the existing rethrow test — covers the OTHER branch + // of the catch, where signal.aborted is true at throw time. Without + // this assertion, dropping the abort branch would silently + // misattribute the throw as 'error'/'tool_exception'. + toolSpanRecords.length = 0; + const { scheduler, onToolCallsUpdate } = buildApprovalScheduler({}); + const abortController = new AbortController(); + await scheduler.schedule( + [ + { + callId: 'rethrow-aborted-1', + name: 'mockEditTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-rethrow-aborted', + }, + ], + abortController.signal, + ); + + await waitForStatus(onToolCallsUpdate, 'awaiting_approval'); + + abortController.abort(); + const boom = new Error('originalOnConfirm boom while aborted'); + const throwingOnConfirm = async () => { + throw boom; + }; + await expect( + scheduler.handleConfirmationResponse( + 'rethrow-aborted-1', + throwingOnConfirm, + ToolConfirmationOutcome.ProceedOnce, + abortController.signal, + ), + ).rejects.toBe(boom); + + const blockedSpan = toolSpanRecords.find( + (r) => r.name === 'tool.blocked_on_user', + ); + expect(blockedSpan?.blockedMetadata?.decision).toBe('aborted'); + expect(blockedSpan?.blockedMetadata?.source).toBe('system'); + // Tool span lands UNSET (setToolSpanCancelled), failure_kind is the + // cancelled-marker rather than tool_exception. + const toolSpan = toolSpanRecords.find( + (r) => r.name === 'tool.mockEditTool', + ); + expect(toolSpan?.statusCalls).toContainEqual({ + code: SpanStatusCode.UNSET, + }); + expect(toolSpan?.spanAttributes['tool.failure_kind']).toBe('cancelled'); + }); + + it('ModifyWithEditor !editorType stamps modify_with_editor_unavailable on tool span (#4321)', async () => { + // The bail-out path warns to debug logs; the telemetry attribute + // is the production-visible signal. Assert it's set on the live + // tool span when the editor is unavailable, and that the tool + // remains in awaiting_approval (no premature finalize). + // + // The branch only fires if the tool implements + // ModifiableDeclarativeTool (`getModifyContext` member). Wrap the + // existing MockEditTool with a `getModifyContext` shim so the + // scheduler's `isModifiableDeclarativeTool` check passes. + toolSpanRecords.length = 0; + const mockEditTool = Object.assign(new MockEditTool(), { + getModifyContext: () => ({ + getFilePath: () => '/tmp/test.txt', + getCurrentContent: async () => 'old', + getProposedContent: async () => 'new', + createUpdatedParams: () => ({}), + }), + }); + const mockToolRegistry = { + getTool: () => mockEditTool, + ensureTool: async () => mockEditTool, + getFunctionDeclarations: () => [], + tools: new Map(), + discovery: {}, + registerTool: () => {}, + getToolByName: () => mockEditTool, + getToolByDisplayName: () => mockEditTool, + getTools: () => [], + discoverTools: async () => {}, + getAllTools: () => [], + getToolsByServer: () => [], + } as unknown as ToolRegistry; + const mockConfig = { + getSessionId: () => 'test-session-id', + getUsageStatisticsEnabled: () => true, + getDebugMode: () => false, + getApprovalMode: () => ApprovalMode.DEFAULT, + getPermissionsAllow: () => [], + getContentGeneratorConfig: () => ({}), + getShellExecutionConfig: () => ({ + terminalWidth: 90, + terminalHeight: 30, + }), + storage: { getProjectTempDir: () => '/tmp' }, + getToolRegistry: () => mockToolRegistry, + getUseModelRouter: () => false, + getGeminiClient: () => null, + isInteractive: () => true, + getIdeMode: () => false, + getExperimentalZedIntegration: () => false, + getChatRecordingService: () => undefined, + getMessageBus: vi.fn().mockReturnValue(undefined), + getDisableAllHooks: vi.fn().mockReturnValue(true), + } as unknown as Config; + const onToolCallsUpdate = vi.fn(); + const scheduler = new CoreToolScheduler({ + config: mockConfig, + onAllToolCallsComplete: vi.fn(), + onToolCallsUpdate, + // No editor configured. + getPreferredEditor: () => undefined, + onEditorClose: vi.fn(), + }); + + await scheduler.schedule( + [ + { + callId: 'modify-no-editor-1', + name: 'mockEditTool', + args: {}, + isClientInitiated: false, + prompt_id: 'prompt-modify-no-editor', + }, + ], + new AbortController().signal, + ); + + const awaitingCall = (await waitForStatus( + onToolCallsUpdate, + 'awaiting_approval', + )) as WaitingToolCall; + await awaitingCall.confirmationDetails.onConfirm( + ToolConfirmationOutcome.ModifyWithEditor, + ); + + const toolSpan = toolSpanRecords.find( + (r) => r.name === 'tool.mockEditTool', + ); + expect( + toolSpan?.spanAttributes['qwen-code.tool.modify_with_editor_unavailable'], + ).toBe(true); + // Span stays open — user can recover via Cancel/Proceed. + expect(toolSpan?.ended).toBe(false); + }); + + it('per-batch abort listener removed when batch fully drains synchronously (#4321)', async () => { + // Long-running sessions reuse the same AbortSignal across many + // _schedule calls. The release-on-finalize hook in + // releaseBatchListenerIfDrained must drop the listener once the + // last live batch entry drains, otherwise listeners accumulate + // and Node.js trips MaxListenersExceededWarning. Use Node's + // EventEmitter API surface on AbortSignal to count listeners. + toolSpanRecords.length = 0; + const { scheduler } = buildScheduler({}); + const abortController = new AbortController(); + const listenersBefore = ( + abortController.signal as unknown as { + listenerCount?: (e: string) => number; + } + ).listenerCount?.('abort'); + await scheduler.schedule( + [ + { + callId: 'listener-drain-1', + name: 'mockTool', + args: { input: 'ok' }, + isClientInitiated: false, + prompt_id: 'prompt-listener-drain', + }, + ], + abortController.signal, + ); + + // Tool ran fully synchronously (auto-approved), so its tool span + // finalized inside _schedule → releaseBatchListenerIfDrained ran. + const listenersAfter = ( + abortController.signal as unknown as { + listenerCount?: (e: string) => number; + } + ).listenerCount?.('abort'); + if (listenersBefore !== undefined && listenersAfter !== undefined) { + expect(listenersAfter).toBe(listenersBefore); + } + // Map drain side-assertion: callIdToBatch must be empty too. + expect( + ( + scheduler as unknown as { + callIdToBatch: Map; + } + ).callIdToBatch.size, + ).toBe(0); + }); }); // Integration tests for the fire* functions diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts index 2138a84c91..dc6ecce2b9 100644 --- a/packages/core/src/core/coreToolScheduler.ts +++ b/packages/core/src/core/coreToolScheduler.ts @@ -83,8 +83,17 @@ import { runInToolSpanContext, startToolExecutionSpan, endToolExecutionSpan, + startToolBlockedOnUserSpan, + endToolBlockedOnUserSpan, + startHookSpan, + endHookSpan, addToolInputAttributes, addToolResultAttributes, + truncateSpanError, + type ToolBlockedDecision, + type ToolBlockedSource, + type StartHookSpanOptions, + type HookSpanMetadata, } from '../telemetry/index.js'; import { safeJsonStringify } from '../utils/safeJsonStringify.js'; @@ -94,9 +103,26 @@ const TOOL_FAILURE_KIND_POST_HOOK_STOPPED = 'post_hook_stopped'; const TOOL_FAILURE_KIND_TOOL_ERROR = 'tool_error'; const TOOL_FAILURE_KIND_TOOL_EXCEPTION = 'tool_exception'; const TOOL_FAILURE_KIND_CANCELLED = 'cancelled'; +// Approval-flow failure kinds — distinct from `pre_hook_blocked` (which +// only applies to actual PreToolUse hook denials in `_executeToolCallBody`) +// so dashboards can attribute denies to their real cause (#4321 review). +const TOOL_FAILURE_KIND_PERMISSION_DENIED = 'permission_denied'; +const TOOL_FAILURE_KIND_PERMISSION_HOOK_DENIED = 'permission_hook_denied'; +const TOOL_FAILURE_KIND_PLAN_MODE_BLOCKED = 'plan_mode_blocked'; +const TOOL_FAILURE_KIND_NON_INTERACTIVE_DENIED = 'non_interactive_denied'; +const TOOL_FAILURE_KIND_BACKGROUND_AGENT_DENIED = 'background_agent_denied'; const TOOL_SPAN_STATUS_PRE_HOOK_BLOCKED = 'Tool execution blocked by hook'; const TOOL_SPAN_STATUS_POST_HOOK_STOPPED = 'Tool execution stopped by hook'; +const TOOL_SPAN_STATUS_PERMISSION_DENIED = 'Permission denied for tool'; +const TOOL_SPAN_STATUS_PERMISSION_HOOK_DENIED = + 'Permission denied by permission_request hook'; +const TOOL_SPAN_STATUS_PLAN_MODE_BLOCKED = + 'Plan mode blocked a non-read-only tool call'; +const TOOL_SPAN_STATUS_NON_INTERACTIVE_DENIED = + 'Non-interactive mode declined permission'; +const TOOL_SPAN_STATUS_BACKGROUND_AGENT_DENIED = + 'Background agent cannot prompt for confirmation'; const TOOL_SPAN_STATUS_TOOL_ERROR = 'Tool execution failed'; const TOOL_SPAN_STATUS_TOOL_EXCEPTION = 'Tool execution failed with exception'; const TOOL_SPAN_STATUS_TOOL_CANCELLED = 'Tool execution cancelled by user'; @@ -134,9 +160,14 @@ function setToolSpanFailure( } catch { // OTel errors must not block the failure status update. } + // Bound the status message size at this single ingress point so every + // setToolSpanFailure caller is protected — multiple call sites pass + // raw error.message which can be unbounded (#4321 review-5 wenshao + // Suggestion). Static-constant callers see no change since their + // messages are well under 1024 chars. safeSetStatus(span, { code: SpanStatusCode.ERROR, - message, + message: truncateSpanError(message), }); } @@ -172,10 +203,11 @@ async function safelyFirePostToolUseFailureHook( permissionMode, ); } catch (error) { + const message = error instanceof Error ? error.message : String(error); debugLogger.warn( - `PostToolUseFailure hook failed for ${toolName}: ${error instanceof Error ? error.message : String(error)}`, + `PostToolUseFailure hook failed for ${toolName}: ${message}`, ); - return {}; + return { hookError: message }; } } @@ -674,6 +706,19 @@ interface ToolBatch { calls: ScheduledToolCall[]; } +/** + * State for the per-batch signal.abort listener registered in + * `_schedule`. Shared by every callId in the batch so finalize hooks + * can remove the listener once the last live entry drains, regardless + * of whether finalization happens synchronously inside `_schedule`, + * later via `handleConfirmationResponse`, or via `executeSingleToolCall`. + */ +interface BatchAbortState { + signal: AbortSignal; + onAbort: () => void; + callIds: Set; +} + /** * Returns true if a scheduled tool call can safely execute concurrently * with other safe tools (no side effects, no shared mutable state). @@ -732,6 +777,28 @@ export class CoreToolScheduler { private isFinalizingToolCalls = false; private isScheduling = false; private validationRetryCounts = new Map(); + // Tool span lifecycle now spans validating → awaiting_approval → executing + // → terminal, so we hold the span across method boundaries by callId. + // Decoupling from ToolCall identity is intentional — setStatusInternal + // rebuilds the ToolCall on every status change, so a field on the + // discriminated union would require threading on every transition. + private toolSpans = new Map(); + // blocked_on_user span — child of the corresponding tool span — covers the + // awaiting_approval phase. ModifyWithEditor stays inside one span until + // the user makes a final decision (#3731 Phase 2). + // + // Map drain on signal.abort: see drainSpansForBatch — without it, + // entries leaked across awaiting-approval-then-abort would persist for + // the scheduler's lifetime (the 30-min TTL ends the underlying spans + // but cannot reach these scheduler-local Maps; #4321 review). + private blockedSpans = new Map(); + // Per-batch abort-listener state. callIdToBatch maps each callId added + // during a `_schedule` invocation to its shared BatchAbortState; when + // `finalize{Tool,Blocked}Span` removes the last live callId of a + // batch, we strip the abort listener off the signal so long-lived + // sessions reusing the same AbortSignal don't accumulate listeners + // and trip Node's MaxListenersExceededWarning (#4321 review-3). + private callIdToBatch = new Map(); private requestQueue: Array<{ request: ToolCallRequestInfo | ToolCallRequestInfo[]; signal: AbortSignal; @@ -982,6 +1049,185 @@ export class CoreToolScheduler { ); } + /** + * End the tool span for `callId` (if any) and remove it from the map. + * Centralizes terminal-state cleanup so every cancel/error/success path + * goes through one place — easier to audit for leaks. Idempotent: + * second call for the same callId is a no-op. + * + * No `metadata` parameter: every caller pre-sets span status via + * `setToolSpan{Failure,Cancelled,Ok}` before this call (#4321 review). + */ + private finalizeToolSpan(callId: string): void { + const span = this.toolSpans.get(callId); + if (!span) return; + this.toolSpans.delete(callId); + endToolSpan(span); + this.releaseBatchListenerIfDrained(callId); + } + + /** + * End the blocked_on_user span for `callId` (if any) and remove it from + * the map. Idempotent. ModifyWithEditor must NOT call this — the same + * blocked span covers the entire awaiting period including editor side + * trips. + */ + private finalizeBlockedSpan( + callId: string, + decision: ToolBlockedDecision, + source: ToolBlockedSource, + ): void { + const span = this.blockedSpans.get(callId); + if (!span) return; + this.blockedSpans.delete(callId); + endToolBlockedOnUserSpan(span, { decision, source }); + // Don't release the batch listener here — the tool span often + // outlives the blocked span (proceed → execute), so finalizeToolSpan + // is the canonical drain point. The blocked span's release runs + // through the same path on terminal states (cancel/error finalize + // both spans together). + } + + /** + * Hook called by finalizeToolSpan when a callId drains from the + * scheduler-local maps. If this was the last live callId of its batch, + * remove the abort listener so the AbortSignal doesn't accumulate + * listeners across many `_schedule` calls in a long-lived session + * (#4321 review-3 wenshao Critical). + */ + private releaseBatchListenerIfDrained(callId: string): void { + const batch = this.callIdToBatch.get(callId); + if (!batch) return; + this.callIdToBatch.delete(callId); + batch.callIds.delete(callId); + + // Any other callId in the batch still in toolSpans/blockedSpans? + // If yes, the listener still has work to do. If no, drop it. + for (const id of batch.callIds) { + if (this.toolSpans.has(id) || this.blockedSpans.has(id)) return; + } + batch.signal.removeEventListener('abort', batch.onAbort); + } + + /** + * Best-effort attribution of the surface that resolved the blocked + * decision. When IDE mode is on, confirmations are most often resolved + * via the IDE diff flow (`openIdeDiffIfEnabled`) — but a CLI-fallback + * confirmation in IDE mode is also reported as 'ide' here. Operators + * can drill into the trace if they need finer-grained attribution. + */ + private getBlockedSource(): ToolBlockedSource { + return this.config.getIdeMode?.() ? 'ide' : 'cli'; + } + + /** + * Drain any tool/blocked spans associated with `callIds` that are still + * live in the scheduler-local maps. Called on signal.abort for spans + * that no other code path will finalize (e.g. user walks away from + * awaiting_approval and the session aborts). + * + * Deferred to a macrotask so existing finalize paths that await on the + * SAME aborted signal — explicit user Cancel via + * `handleConfirmationResponse`, mid-execution `setToolSpanCancelled` + * inside `_executeToolCallBody` — win the race and set the canonical + * decision/status before this safety-net drain runs. By the time the + * timer fires, those paths have removed the entries from the Maps and + * the drain is a no-op for the common cases. Only the genuine + * walk-away-then-abort case survives to be drained here. + * + * Idempotent for callIds whose spans were already finalized by a normal + * path — `finalizeBlockedSpan` / `finalizeToolSpan` are no-ops on + * missing entries. + */ + private drainSpansForBatch(callIds: Iterable): void { + const ids = Array.from(callIds); + setTimeout(() => { + for (const callId of ids) { + // Per-callId try/catch so one bad finalize doesn't silently skip + // remaining entries — the timer callback would otherwise surface + // an unhandled exception (#4321 review-3 wenshao Suggestion). + try { + if (this.blockedSpans.has(callId)) { + this.finalizeBlockedSpan(callId, 'aborted', 'system'); + } + const span = this.toolSpans.get(callId); + if (span) { + setToolSpanCancelled(span); + this.finalizeToolSpan(callId); + } + } catch (e) { + debugLogger.warn( + `drainSpansForBatch: failed to drain ${callId}: ${e instanceof Error ? e.message : String(e)}`, + ); + } + } + }, 0); + } + + /** + * Shared toEndMeta callback for the 4 PostToolUseFailure hook fire + * sites. Each was previously inlined as a byte-identical lambda; the + * helper avoids drift between cancel-vs-error and abort-vs-non-abort + * branches and keeps protocol changes (e.g. new metadata fields) in + * one place (#4321 review-3 wenshao Suggestion). + */ + private postToolUseFailureEndMeta = ( + r: Awaited>, + ): HookSpanMetadata => + r.hookError + ? { success: false, error: r.hookError } + : { + success: true, + hasAdditionalContext: !!r.additionalContext, + }; + + /** + * Wrap a hook fire site with span lifecycle management. Centralizes the + * try/finally pattern across the 6 hook fire sites (PreToolUse, + * PostToolUse, 4× PostToolUseFailure) so future protocol changes + * (e.g. new metadata fields) can be made in one place instead of in + * lockstep across each site (#4321 review wenshao Suggestion). + * + * On the happy path `toEndMeta(result)` builds the metadata recorded on + * the span. On a throw, the default `endMeta = { success: false }` + * survives — today's hook helpers in `toolHookTriggers.ts` swallow + * throws internally so this branch is unreachable, but the pattern + * future-proofs the lifecycle if that contract changes. + */ + private async withHookSpan( + opts: StartHookSpanOptions, + fn: () => Promise, + toEndMeta: (result: T) => HookSpanMetadata, + ): Promise { + const hookSpan = startHookSpan(opts); + // Default endMeta carries an `error` so OTel maps the span to ERROR + // status if `fn()` ever throws (today unreachable — hook helpers + // catch internally — but kept as a defensive contract). Without + // an `error` field, the span would record `success: false` as an + // attribute but `code: UNSET` as status, which trace backends + // filtering on ERROR would miss (#4321 review code-reviewer). + let endMeta: HookSpanMetadata = { success: false }; + try { + const result = await fn(); + endMeta = toEndMeta(result); + return result; + } catch (err) { + // Capture the actual thrown message instead of a hardcoded + // sentinel so the hook span surfaces the real failure for + // operators (#4321 review DeepSeek Suggestion). This branch is + // unreachable on the current hook-helper contract (each fire* + // helper catches internally) but kept defensively in case the + // contract evolves. + endMeta = { + success: false, + error: err instanceof Error ? err.message : String(err), + }; + throw err; + } finally { + endHookSpan(hookSpan, endMeta); + } + } + private buildInvocation( tool: AnyDeclarativeTool, args: object, @@ -1285,6 +1531,21 @@ export class CoreToolScheduler { this.toolCalls = this.toolCalls.concat(newToolCalls); this.notifyToolCallsUpdate(); + // Per-batch abort-listener state. Shared by every callId added in + // this `_schedule` invocation. The listener drains scheduler-local + // Maps on a real abort (walk-away-during-awaiting_approval), and is + // automatically released by `releaseBatchListenerIfDrained` from + // inside `finalizeToolSpan` when the batch's last live callId + // drains — keeping listener growth bounded across long sessions + // even when batches mix synchronous and awaiting_approval flows + // (#4321 review-3 wenshao Critical). + const batchState: BatchAbortState = { + signal, + onAbort: () => this.drainSpansForBatch(batchState.callIds), + callIds: new Set(), + }; + signal.addEventListener('abort', batchState.onAbort, { once: true }); + for (const toolCall of newToolCalls) { if (toolCall.status !== 'validating') { continue; @@ -1293,6 +1554,28 @@ export class CoreToolScheduler { const { request: reqInfo, invocation } = toolCall; const canonicalName = canonicalToolName(reqInfo.name); + // Open the tool span as soon as the call is validated. This covers + // validating → awaiting_approval → executing in one span (#3731 + // Phase 2). Every cancel/error path below — and the existing + // success path in executeSingleToolCall — must call + // finalizeToolSpan(callId, ...) to avoid leaking spans. + // `tool.name` is set automatically by startToolSpan from the first + // arg; only namespaced extras go in attrs. `call_id` (non-namespaced) + // is dual-emitted for one release as a backwards-compat shim for + // pre-Phase-2 dashboards/alerts that grep the old key — drop after + // operators migrate (#4321 review). `tool_name` is dual-emitted on + // the same migration window (review-2 DeepSeek Suggestion) so + // pre-Phase-2 dashboards filtering on it don't silently stop + // matching during the rollout. + const toolSpan = startToolSpan(canonicalName, { + 'tool.call_id': reqInfo.callId, + call_id: reqInfo.callId, + tool_name: canonicalName, + }); + this.toolSpans.set(reqInfo.callId, toolSpan); + batchState.callIds.add(reqInfo.callId); + this.callIdToBatch.set(reqInfo.callId, batchState); + try { if (signal.aborted) { this.setStatusInternal( @@ -1300,6 +1583,8 @@ export class CoreToolScheduler { 'cancelled', 'Tool call cancelled by user.', ); + setToolSpanCancelled(toolSpan); + this.finalizeToolSpan(reqInfo.callId); continue; } @@ -1344,6 +1629,12 @@ export class CoreToolScheduler { ToolErrorType.EXECUTION_DENIED, ), ); + setToolSpanFailure( + toolSpan, + TOOL_FAILURE_KIND_PERMISSION_DENIED, + TOOL_SPAN_STATUS_PERMISSION_DENIED, + ); + this.finalizeToolSpan(reqInfo.callId); continue; } @@ -1389,6 +1680,12 @@ export class CoreToolScheduler { error: undefined, errorType: undefined, }); + setToolSpanFailure( + toolSpan, + TOOL_FAILURE_KIND_PLAN_MODE_BLOCKED, + TOOL_SPAN_STATUS_PLAN_MODE_BLOCKED, + ); + this.finalizeToolSpan(reqInfo.callId); continue; } @@ -1421,6 +1718,12 @@ export class CoreToolScheduler { ToolErrorType.EXECUTION_DENIED, ), ); + setToolSpanFailure( + toolSpan, + TOOL_FAILURE_KIND_NON_INTERACTIVE_DENIED, + TOOL_SPAN_STATUS_NON_INTERACTIVE_DENIED, + ); + this.finalizeToolSpan(reqInfo.callId); continue; } @@ -1486,6 +1789,12 @@ export class CoreToolScheduler { ToolErrorType.EXECUTION_DENIED, ), ); + setToolSpanFailure( + toolSpan, + TOOL_FAILURE_KIND_PERMISSION_HOOK_DENIED, + TOOL_SPAN_STATUS_PERMISSION_HOOK_DENIED, + ); + this.finalizeToolSpan(reqInfo.callId); } continue; } @@ -1504,6 +1813,32 @@ export class CoreToolScheduler { ToolErrorType.EXECUTION_DENIED, ), ); + setToolSpanFailure( + toolSpan, + TOOL_FAILURE_KIND_BACKGROUND_AGENT_DENIED, + TOOL_SPAN_STATUS_BACKGROUND_AGENT_DENIED, + ); + this.finalizeToolSpan(reqInfo.callId); + continue; + } + + // Re-check signal.aborted between the for-loop entry guard and + // here: `evaluatePermissionFlow`, `getConfirmationDetails`, and + // `firePermissionRequestHook` are all `await` points that can + // resolve normally even after the signal aborted. Without this + // re-check we'd open `awaiting_approval` + a blocked span on + // an already-aborted signal — drainSpansForBatch (deferred via + // setTimeout(0)) may have already fired by then, so the new + // entries would never be drained (#4321 review-3 wenshao + // Critical). + if (signal.aborted) { + this.setStatusInternal( + reqInfo.callId, + 'cancelled', + 'Tool call cancelled by user.', + ); + setToolSpanCancelled(toolSpan); + this.finalizeToolSpan(reqInfo.callId); continue; } @@ -1539,6 +1874,17 @@ export class CoreToolScheduler { wrappedConfirmationDetails, ); + // Open blocked_on_user span as a child of the tool span — covers + // the entire awaiting_approval phase, including any + // ModifyWithEditor side trip (#3731 Phase 2). Finalized in + // handleConfirmationResponse / autoApproveCompatiblePendingTools + // / the global-abort catch block above. + const blockedSpan = startToolBlockedOnUserSpan(toolSpan, { + tool_name: canonicalName, + call_id: reqInfo.callId, + }); + this.blockedSpans.set(reqInfo.callId, blockedSpan); + // Fire permission_prompt notification hook if (hooksEnabled && messageBus) { fireNotificationHook( @@ -1560,6 +1906,11 @@ export class CoreToolScheduler { 'cancelled', 'Tool call cancelled by user.', ); + // If this tool was waiting on the user, end the blocked span + // as aborted before the tool span itself. + this.finalizeBlockedSpan(reqInfo.callId, 'aborted', 'system'); + setToolSpanCancelled(toolSpan); + this.finalizeToolSpan(reqInfo.callId); continue; } @@ -1581,10 +1932,37 @@ export class CoreToolScheduler { explicitErrorType ?? ToolErrorType.UNHANDLED_EXCEPTION, ), ); + // Non-aborted catch is a system error (e.g. getConfirmationDetails + // threw). 'error' decision keeps it distinct from user 'cancel' + // counts in dashboards. + this.finalizeBlockedSpan(reqInfo.callId, 'error', 'system'); + setToolSpanFailure( + toolSpan, + TOOL_FAILURE_KIND_TOOL_EXCEPTION, + error instanceof Error ? error.message : String(error), + ); + this.finalizeToolSpan(reqInfo.callId); } } await this.attemptExecutionOfScheduledCalls(signal); void this.checkAndNotifyCompletion(); + // Listener removal happens inside `finalizeToolSpan` → + // `releaseBatchListenerIfDrained` for every callId, so we don't + // need a duplicate cleanup here. That path also covers the + // exception case (this method's outer try/catch finalizes spans + // before re-throwing), satisfying the + // "stillLive cleanup not in finally" concern from review-3. + // + // Edge case: if every newToolCall was non-validating (all failed + // pre-validation — invalid params, tool not registered, etc.), + // batchState.callIds stays empty and no finalizeToolSpan call + // ever fires for this batch. Drop the listener here so the + // signal doesn't accumulate dead listeners across many such + // batches in a daemon session (#4321 review-5 wenshao + // Suggestion). + if (batchState.callIds.size === 0) { + signal.removeEventListener('abort', batchState.onAbort); + } } finally { this.isScheduling = false; } @@ -1609,6 +1987,66 @@ export class CoreToolScheduler { // processing and potential re-execution. if (!toolCall) return; + try { + await this._handleConfirmationResponseInner( + callId, + toolCall, + originalOnConfirm, + outcome, + signal, + payload, + ); + } catch (error) { + // Defensive: any throw from originalOnConfirm / modifyWithEditor / + // _applyInlineModify / attemptExecutionOfScheduledCalls would + // otherwise leave the blocked + tool spans open until the 30-min + // TTL fires. Finalize both so the trace shows a deterministic + // close. finalizeXSpan are idempotent — if the success/cancel path + // already closed them, these are no-ops. + // + // Branch on signal.aborted so a throw caused by the abort signal + // (e.g. ModifyWithEditor child interrupted by Ctrl+C) lands as + // 'aborted'/'system' + UNSET status — matching the sister catch + // in `_schedule:1797` and the dashboard intent of separating + // user/system aborts from real exceptions (#4321 review-2 wenshao). + const aborted = signal.aborted; + this.finalizeBlockedSpan(callId, aborted ? 'aborted' : 'error', 'system'); + const toolSpan = this.toolSpans.get(callId); + if (toolSpan) { + if (aborted) { + setToolSpanCancelled(toolSpan); + } else { + setToolSpanFailure( + toolSpan, + TOOL_FAILURE_KIND_TOOL_EXCEPTION, + error instanceof Error ? error.message : String(error), + ); + } + } + this.finalizeToolSpan(callId); + // Surface the failure in application logs even though we re-throw. + // The trace backend captures it via the span, but operators + // grepping logs by callId would otherwise see nothing if the + // caller doesn't log the rejection itself (#4321 review-5 + // wenshao Suggestion). + debugLogger.warn( + `handleConfirmationResponse failed for ${callId}: ${error instanceof Error ? error.message : String(error)}`, + ); + throw error; + } + } + + private async _handleConfirmationResponseInner( + callId: string, + toolCall: ToolCall, + originalOnConfirm: ( + outcome: ToolConfirmationOutcome, + payload?: ToolConfirmationPayload, + ) => Promise, + outcome: ToolConfirmationOutcome, + signal: AbortSignal, + payload?: ToolConfirmationPayload, + ): Promise { await originalOnConfirm(outcome, payload); if ( @@ -1634,12 +2072,54 @@ export class CoreToolScheduler { const cancelMessage = payload?.cancelMessage || 'User did not allow tool call'; this.setStatusInternal(callId, 'cancelled', cancelMessage); + // Tool span is cancelled too — finalize it via setToolSpanCancelled + // before pulling it out of the map so the status survives end(). + const toolSpan = this.toolSpans.get(callId); + if (toolSpan) { + setToolSpanCancelled(toolSpan); + } + // Explicit user Cancel takes precedence over a concurrent global + // abort: when both are true, treat it as an explicit cancel so + // dashboards counting `decision: 'aborted'` aren't polluted by + // benign user actions that race with shutdown. + const explicitCancel = outcome === ToolConfirmationOutcome.Cancel; + this.finalizeBlockedSpan( + callId, + explicitCancel ? 'cancel' : 'aborted', + explicitCancel ? this.getBlockedSource() : 'system', + ); + this.finalizeToolSpan(callId); } else if (outcome === ToolConfirmationOutcome.ModifyWithEditor) { const waitingToolCall = toolCall as WaitingToolCall; if (isModifiableDeclarativeTool(waitingToolCall.tool)) { const modifyContext = waitingToolCall.tool.getModifyContext(signal); const editorType = this.getPreferredEditor(); if (!editorType) { + // No editor configured: ModifyWithEditor cannot proceed. Log so + // the silent failure is at least visible in debug telemetry. + // Do NOT finalize spans here — the tool stays in awaiting_approval + // and the user can still recover with Cancel or Proceed; their + // eventual decision closes the spans correctly. Closing them + // here would make the user's eventual finalize a no-op (Map + // already cleared) and lose the actual decision/source — same + // pattern as the autoApprove catch (#4321 review codex P3). + // The 30-min TTL is the safety net if the user walks away. + debugLogger.warn( + `ModifyWithEditor requested for ${callId} but no editor available — tool stays in awaiting_approval; user can recover via Cancel/Proceed`, + ); + // Tag the tool span so operators can detect this state in + // production traces without enabling debug logging + // (#4321 review-2 DeepSeek Critical). + const toolSpan = this.toolSpans.get(callId); + if (toolSpan) { + try { + toolSpan.setAttributes({ + 'qwen-code.tool.modify_with_editor_unavailable': true, + }); + } catch { + // OTel errors must not block API behavior. + } + } return; } @@ -1687,6 +2167,15 @@ export class CoreToolScheduler { ); } this.setStatusInternal(callId, 'scheduled'); + // Proceed: end the blocked span before execution begins. ProceedOnce + // and the three ProceedAlways* variants all close the awaiting phase. + // The tool span itself stays open and is finalized in + // executeSingleToolCall. + const decision: ToolBlockedDecision = + outcome === ToolConfirmationOutcome.ProceedOnce + ? 'proceed_once' + : 'proceed_always'; + this.finalizeBlockedSpan(callId, decision, this.getBlockedSource()); } await this.attemptExecutionOfScheduledCalls(signal); } @@ -1873,16 +2362,31 @@ export class CoreToolScheduler { const scheduledCall = toolCall; const { callId, name: toolName } = scheduledCall.request; - const toolSpan = startToolSpan(toolName, { - tool_name: toolName, - call_id: callId, - }); + // The tool span is opened in `_schedule` so it covers validating → + // awaiting_approval → executing in one span. Reuse it here. If it's + // missing (defensive — shouldn't happen on the happy path), create one + // so the success path still produces telemetry. + let toolSpan = this.toolSpans.get(callId); + if (!toolSpan) { + // canonicalToolName matches the _schedule path so dashboards + // grouping by span name don't see two entries for migrated/MCP tools + // when this defensive fallback fires (#4321 review). + const canonical = canonicalToolName(toolName); + toolSpan = startToolSpan(canonical, { + 'tool.call_id': callId, + call_id: callId, // legacy alias — see _schedule for context + tool_name: canonical, // legacy alias — see _schedule for context + }); + this.toolSpans.set(callId, toolSpan); + } try { await runInToolSpanContext(toolSpan, () => this._executeToolCallBody(scheduledCall, signal, toolSpan), ); } finally { - endToolSpan(toolSpan); + // _executeToolCallBody pre-sets status (OK / FAILURE / CANCELLED) via + // setToolSpan*; finalize without metadata to preserve that. + this.finalizeToolSpan(callId); } } @@ -1927,14 +2431,37 @@ export class CoreToolScheduler { if (hooksEnabled && messageBus) { // Convert ApprovalMode to permission_mode string for hooks const permissionMode = this.config.getApprovalMode(); - const preHookResult = await firePreToolUseHook( - messageBus, - canonicalName, - toolInput, - toolUseId, - permissionMode, + const preHookResult = await this.withHookSpan( + { hookEvent: 'PreToolUse', toolName: canonicalName, toolUseId }, + () => + firePreToolUseHook( + messageBus, + canonicalName, + toolInput, + toolUseId, + permissionMode, + ), + (r) => + r.hookError + ? { + success: false, + error: r.hookError, + // Hook transport failures do NOT block tool execution + // (firePreToolUseHook returns shouldProceed:true with a + // hookError). Surface that on the span too so operators + // see the same allow-on-failure semantics the runtime + // applies (#4321 review-2 DeepSeek Suggestion). + shouldProceed: true, + } + : { + success: true, + shouldProceed: r.shouldProceed, + // Propagate the actual blockType ('denied' / 'ask' / 'stop') + // instead of collapsing every block to 'denied'. + blockType: r.shouldProceed ? undefined : r.blockType, + hasAdditionalContext: !!r.additionalContext, + }, ); - if (!preHookResult.shouldProceed) { // Hook blocked the execution const blockMessage = @@ -2053,14 +2580,24 @@ export class CoreToolScheduler { // PostToolUseFailure Hook let cancelMessage = 'User cancelled tool execution.'; if (hooksEnabled && messageBus) { - const failureHookResult = await safelyFirePostToolUseFailureHook( - messageBus, - toolUseId, - canonicalName, - toolInput, - cancelMessage, - true, - this.config.getApprovalMode(), + const failureHookResult = await this.withHookSpan( + { + hookEvent: 'PostToolUseFailure', + toolName: canonicalName, + toolUseId, + isInterrupt: true, + }, + () => + safelyFirePostToolUseFailureHook( + messageBus, + toolUseId, + canonicalName, + toolInput, + cancelMessage, + true, + this.config.getApprovalMode(), + ), + this.postToolUseFailureEndMeta, ); // Append additional context from hook if provided @@ -2091,13 +2628,35 @@ export class CoreToolScheduler { returnDisplay: toolResult.returnDisplay, }; const permissionMode = this.config.getApprovalMode(); - const postHookResult = await firePostToolUseHook( - messageBus, - canonicalName, - toolInput, - toolResponse, - toolUseId, - permissionMode, + const postHookResult = await this.withHookSpan( + { hookEvent: 'PostToolUse', toolName: canonicalName, toolUseId }, + () => + firePostToolUseHook( + messageBus, + canonicalName, + toolInput, + toolResponse, + toolUseId, + permissionMode, + ), + (r) => + r.hookError + ? { + success: false, + error: r.hookError, + // Hook transport failures do NOT halt the post-execution + // flow (firePostToolUseHook returns shouldStop:false with + // a hookError). Mirror the PreToolUse fix so the span + // matches runtime semantics (#4321 review-2 DeepSeek + // Suggestion). + shouldStop: false, + } + : { + success: true, + shouldStop: r.shouldStop, + hasAdditionalContext: !!r.additionalContext, + blockType: r.shouldStop ? 'stop' : undefined, + }, ); // Append additional context from hook if provided @@ -2257,14 +2816,24 @@ export class CoreToolScheduler { // PostToolUseFailure Hook let errorMessage = toolResult.error.message; if (hooksEnabled && messageBus) { - const failureHookResult = await safelyFirePostToolUseFailureHook( - messageBus, - toolUseId, - canonicalName, - toolInput, - toolResult.error.message, - false, - this.config.getApprovalMode(), + const failureHookResult = await this.withHookSpan( + { + hookEvent: 'PostToolUseFailure', + toolName: canonicalName, + toolUseId, + isInterrupt: false, + }, + () => + safelyFirePostToolUseFailureHook( + messageBus, + toolUseId, + canonicalName, + toolInput, + toolResult.error!.message, + false, + this.config.getApprovalMode(), + ), + this.postToolUseFailureEndMeta, ); // Append additional context from hook if provided @@ -2316,14 +2885,24 @@ export class CoreToolScheduler { // PostToolUseFailure Hook (user interrupt) let cancelMessage = 'User cancelled tool execution.'; if (hooksEnabled && messageBus) { - const failureHookResult = await safelyFirePostToolUseFailureHook( - messageBus, - toolUseId, - canonicalName, - toolInput, - cancelMessage, - true, - this.config.getApprovalMode(), + const failureHookResult = await this.withHookSpan( + { + hookEvent: 'PostToolUseFailure', + toolName: canonicalName, + toolUseId, + isInterrupt: true, + }, + () => + safelyFirePostToolUseFailureHook( + messageBus, + toolUseId, + canonicalName, + toolInput, + cancelMessage, + true, + this.config.getApprovalMode(), + ), + this.postToolUseFailureEndMeta, ); // Append additional context from hook if provided @@ -2344,14 +2923,24 @@ export class CoreToolScheduler { // PostToolUseFailure Hook let exceptionErrorMessage = errorMessage; if (hooksEnabled && messageBus) { - const failureHookResult = await safelyFirePostToolUseFailureHook( - messageBus, - toolUseId, - canonicalName, - toolInput, - errorMessage, - false, - this.config.getApprovalMode(), + const failureHookResult = await this.withHookSpan( + { + hookEvent: 'PostToolUseFailure', + toolName: canonicalName, + toolUseId, + isInterrupt: false, + }, + () => + safelyFirePostToolUseFailureHook( + messageBus, + toolUseId, + canonicalName, + toolInput, + errorMessage, + false, + this.config.getApprovalMode(), + ), + this.postToolUseFailureEndMeta, ); // Append additional context from hook if provided @@ -2494,12 +3083,28 @@ export class CoreToolScheduler { ToolConfirmationOutcome.ProceedAlways, ); this.setStatusInternal(pendingTool.request.callId, 'scheduled'); + // Sister tool was waiting on the user but a sibling's + // ProceedAlways* outcome auto-approved it. Close the blocked span + // with auto_approved so the trace explains why this branch + // skipped a manual decision (#3731 Phase 2). + this.finalizeBlockedSpan( + pendingTool.request.callId, + 'auto_approved', + 'auto', + ); } } catch (error) { debugLogger.error( `Error checking confirmation for tool ${pendingTool.request.callId}:`, error, ); + // Intentionally do NOT finalize the blocked span here: the tool + // remains in `awaiting_approval` and the user can still respond. + // Closing the span on a transient permission-flow error would + // make the user's eventual decision a no-op (Map already cleared) + // and the actual decision/source would be lost. If the user + // never responds, the 30-min TTL in session-tracing.ts cleans + // up the span (#4321 codex P3 review). } } } diff --git a/packages/core/src/core/toolHookTriggers.test.ts b/packages/core/src/core/toolHookTriggers.test.ts index 57d6539cd3..f1f15f9e5f 100644 --- a/packages/core/src/core/toolHookTriggers.test.ts +++ b/packages/core/src/core/toolHookTriggers.test.ts @@ -215,7 +215,13 @@ describe('toolHookTriggers', () => { 'auto', ); - expect(result).toEqual({ shouldProceed: true }); + // #4321 review: hookError surfaces the swallowed transport error so + // observers (telemetry spans, debug logs) can distinguish a failed + // hook from a successful "allow" decision. + expect(result).toEqual({ + shouldProceed: true, + hookError: 'Network error', + }); }); }); @@ -338,7 +344,9 @@ describe('toolHookTriggers', () => { 'auto', ); - expect(result).toEqual({ shouldStop: false }); + // #4321 review: hookError now surfaced to caller (see PreToolUse parallel test). + expect(result.shouldStop).toBe(false); + expect(result.hookError).toBeDefined(); }); }); @@ -429,7 +437,9 @@ describe('toolHookTriggers', () => { 'error message', ); - expect(result).toEqual({}); + // #4321 review: hookError now surfaced to caller. + expect(result.hookError).toBeDefined(); + expect(result.additionalContext).toBeUndefined(); }); }); diff --git a/packages/core/src/core/toolHookTriggers.ts b/packages/core/src/core/toolHookTriggers.ts index 65c9eb38ab..e305df0890 100644 --- a/packages/core/src/core/toolHookTriggers.ts +++ b/packages/core/src/core/toolHookTriggers.ts @@ -43,6 +43,14 @@ export interface PreToolUseHookResult { blockType?: 'denied' | 'ask' | 'stop'; /** Additional context to add */ additionalContext?: string; + /** + * Set when the hook helper caught and absorbed a transport / dispatch + * error. The tool execution still proceeds (existing non-blocking + * contract), but observers (telemetry spans, debug logs) can detect + * that the hook itself failed instead of treating the safe-default + * response as a successful "allow" decision (#4321 review). + */ + hookError?: string; } /** @@ -55,6 +63,8 @@ export interface PostToolUseHookResult { stopReason?: string; /** Additional context to append to tool response */ additionalContext?: string; + /** See PreToolUseHookResult.hookError. */ + hookError?: string; } /** @@ -63,6 +73,8 @@ export interface PostToolUseHookResult { export interface PostToolUseFailureHookResult { /** Additional context about the failure */ additionalContext?: string; + /** See PreToolUseHookResult.hookError. */ + hookError?: string; } /** @@ -107,7 +119,15 @@ export async function firePreToolUseHook( ); if (!response.success || !response.output) { - return { shouldProceed: true }; + // Hook runner reported failure (URL validation, fn exception, + // prompt-runner crash, ...). The `response.error` from the runner + // is the canonical cause — forward it so telemetry and operators + // see the actual failure instead of a fake "allow" success + // (#4321 review silent-failure-hunter HIGH). + const message = response.error?.message; + return message + ? { shouldProceed: true, hookError: message } + : { shouldProceed: true }; } const preToolOutput = createHookOutput( @@ -155,10 +175,9 @@ export async function firePreToolUseHook( }; } catch (error) { // Hook errors should not block tool execution - debugLogger.warn( - `PreToolUse hook error for ${toolName}: ${error instanceof Error ? error.message : String(error)}`, - ); - return { shouldProceed: true }; + const message = error instanceof Error ? error.message : String(error); + debugLogger.warn(`PreToolUse hook error for ${toolName}: ${message}`); + return { shouldProceed: true, hookError: message }; } } @@ -207,7 +226,11 @@ export async function firePostToolUseHook( ); if (!response.success || !response.output) { - return { shouldStop: false }; + // See firePreToolUseHook for the rationale. + const message = response.error?.message; + return message + ? { shouldStop: false, hookError: message } + : { shouldStop: false }; } const postToolOutput = createHookOutput( @@ -232,10 +255,9 @@ export async function firePostToolUseHook( }; } catch (error) { // Hook errors should not affect tool result - debugLogger.warn( - `PostToolUse hook error for ${toolName}: ${error instanceof Error ? error.message : String(error)}`, - ); - return { shouldStop: false }; + const message = error instanceof Error ? error.message : String(error); + debugLogger.warn(`PostToolUse hook error for ${toolName}: ${message}`); + return { shouldStop: false, hookError: message }; } } @@ -287,7 +309,9 @@ export async function firePostToolUseFailureHook( ); if (!response.success || !response.output) { - return {}; + // See firePreToolUseHook for the rationale. + const message = response.error?.message; + return message ? { hookError: message } : {}; } const failureOutput = createHookOutput( @@ -301,10 +325,11 @@ export async function firePostToolUseFailureHook( }; } catch (error) { // Hook errors should not affect error handling + const message = error instanceof Error ? error.message : String(error); debugLogger.warn( - `PostToolUseFailure hook error for ${toolName}: ${error instanceof Error ? error.message : String(error)}`, + `PostToolUseFailure hook error for ${toolName}: ${message}`, ); - return {}; + return { hookError: message }; } } diff --git a/packages/core/src/telemetry/constants.ts b/packages/core/src/telemetry/constants.ts index aac9249130..8ef41eaef8 100644 --- a/packages/core/src/telemetry/constants.ts +++ b/packages/core/src/telemetry/constants.ts @@ -64,3 +64,7 @@ export const SPAN_INTERACTION = 'qwen-code.interaction'; export const SPAN_LLM_REQUEST = 'qwen-code.llm_request'; export const SPAN_TOOL = 'qwen-code.tool'; export const SPAN_TOOL_EXECUTION = 'qwen-code.tool.execution'; +/** Brackets the time a tool spends in `awaiting_approval` waiting on the user. */ +export const SPAN_TOOL_BLOCKED_ON_USER = 'qwen-code.tool.blocked_on_user'; +/** Wraps each pre/post-tool-use hook fire site for per-hook latency / decision tracking. */ +export const SPAN_HOOK = 'qwen-code.hook'; diff --git a/packages/core/src/telemetry/index.ts b/packages/core/src/telemetry/index.ts index f81192ad0d..6ad5cb13c3 100644 --- a/packages/core/src/telemetry/index.ts +++ b/packages/core/src/telemetry/index.ts @@ -146,13 +146,23 @@ export { runInToolSpanContext, startToolExecutionSpan, endToolExecutionSpan, + startToolBlockedOnUserSpan, + endToolBlockedOnUserSpan, + startHookSpan, + endHookSpan, getActiveInteractionSpan, + truncateSpanError, } from './session-tracing.js'; export type { StartInteractionOptions, EndInteractionOptions, LLMRequestMetadata, ToolSpanMetadata, + ToolBlockedDecision, + ToolBlockedSource, + HookEvent, + StartHookSpanOptions, + HookSpanMetadata, } from './session-tracing.js'; export { addUserPromptAttributes, diff --git a/packages/core/src/telemetry/session-tracing.test.ts b/packages/core/src/telemetry/session-tracing.test.ts index 608aa85916..f36d38a0af 100644 --- a/packages/core/src/telemetry/session-tracing.test.ts +++ b/packages/core/src/telemetry/session-tracing.test.ts @@ -132,8 +132,14 @@ import { runInToolSpanContext, startToolExecutionSpan, endToolExecutionSpan, + startToolBlockedOnUserSpan, + endToolBlockedOnUserSpan, + startHookSpan, + endHookSpan, getActiveInteractionSpan, clearSessionTracingForTesting, + runTTLSweepForTesting, + truncateSpanError, } from './session-tracing.js'; function createMockConfig( @@ -537,6 +543,224 @@ describe('session-tracing', () => { }); }); + describe('blocked_on_user spans (#3731 Phase 2)', () => { + it('parents the blocked span under the explicitly-passed tool span', () => { + const toolSpan = startToolSpan('Bash', { 'tool.call_id': 'c1' }); + const blockedSpan = startToolBlockedOnUserSpan(toolSpan, { + tool_name: 'Bash', + call_id: 'c1', + }); + + const blockedRecord = mockSpans.find( + (s) => s.name === 'qwen-code.tool.blocked_on_user', + ); + expect(blockedRecord).toBeDefined(); + // Parent context carries the tool span via setSpan()'s __parentSpan tag. + expect(blockedRecord?.parentContext).toMatchObject({ + __parentSpan: toolSpan, + }); + expect(blockedRecord?.attributes['tool.name']).toBe('Bash'); + expect(blockedRecord?.attributes['tool.call_id']).toBe('c1'); + + endToolBlockedOnUserSpan(blockedSpan, { + decision: 'proceed_once', + source: 'cli', + }); + endToolSpan(toolSpan, { success: true }); + }); + + it('records decision/source attributes on end and leaves status UNSET', () => { + const toolSpan = startToolSpan('Bash'); + const blockedSpan = startToolBlockedOnUserSpan(toolSpan); + endToolBlockedOnUserSpan(blockedSpan, { + decision: 'cancel', + source: 'cli', + }); + + const blockedRecord = mockSpans.find( + (s) => s.name === 'qwen-code.tool.blocked_on_user', + ); + expect(blockedRecord?.ended).toBe(true); + expect(blockedRecord?.attributes['decision']).toBe('cancel'); + expect(blockedRecord?.attributes['source']).toBe('cli'); + // Waiting on the user is neither OK nor ERROR — status stays UNSET. + expect(blockedRecord?.statuses).toHaveLength(0); + }); + + it('is idempotent — second end is a no-op', () => { + const toolSpan = startToolSpan('Bash'); + const blockedSpan = startToolBlockedOnUserSpan(toolSpan); + endToolBlockedOnUserSpan(blockedSpan, { decision: 'proceed_once' }); + endToolBlockedOnUserSpan(blockedSpan, { decision: 'cancel' }); + + const blockedRecord = mockSpans.find( + (s) => s.name === 'qwen-code.tool.blocked_on_user', + ); + // The second end must NOT overwrite decision recorded by the first. + expect(blockedRecord?.attributes['decision']).toBe('proceed_once'); + }); + + it('returns NOOP span when SDK is not initialized', () => { + mockState.sdkInitialized = false; + const toolSpan = startToolSpan('Bash'); + const blockedSpan = startToolBlockedOnUserSpan(toolSpan); + expect(blockedSpan.spanContext().traceId).toBe('0'.repeat(32)); + + // End on NOOP span must not throw. + endToolBlockedOnUserSpan(blockedSpan, { decision: 'cancel' }); + }); + + it('handles concurrent blocked spans without findLast confusion', () => { + // Regression test for the claude-code findLast-by-type bug. + // Two concurrent tools each have their own blocked span; ending the + // second one first must NOT close the first. + const toolA = startToolSpan('Bash', { 'tool.call_id': 'a' }); + const toolB = startToolSpan('Read', { 'tool.call_id': 'b' }); + const blockedA = startToolBlockedOnUserSpan(toolA, { call_id: 'a' }); + const blockedB = startToolBlockedOnUserSpan(toolB, { call_id: 'b' }); + + endToolBlockedOnUserSpan(blockedB, { decision: 'cancel' }); + + const recordA = mockSpans.find( + (s) => + s.name === 'qwen-code.tool.blocked_on_user' && + s.attributes['tool.call_id'] === 'a', + ); + const recordB = mockSpans.find( + (s) => + s.name === 'qwen-code.tool.blocked_on_user' && + s.attributes['tool.call_id'] === 'b', + ); + // Only B is ended; A still active. + expect(recordB?.ended).toBe(true); + expect(recordA?.ended).toBeFalsy(); + + endToolBlockedOnUserSpan(blockedA, { decision: 'proceed_once' }); + expect(recordA?.attributes['decision']).toBe('proceed_once'); + expect(recordB?.attributes['decision']).toBe('cancel'); + + endToolSpan(toolA, { success: true }); + endToolSpan(toolB, { success: false, error: 'cancelled' }); + }); + + it('falls back to resolveParentContext when the tool span was already ended', () => { + const toolSpan = startToolSpan('Bash'); + // Simulate someone passing an already-ended tool span — the helper + // should still produce a span (correlated via the standard fallback + // chain) instead of crashing. + endToolSpan(toolSpan, { success: true }); + + const blockedSpan = startToolBlockedOnUserSpan(toolSpan); + expect( + mockSpans.find((s) => s.name === 'qwen-code.tool.blocked_on_user'), + ).toBeDefined(); + + endToolBlockedOnUserSpan(blockedSpan, { decision: 'proceed_once' }); + }); + }); + + describe('hook spans (#3731 Phase 2)', () => { + it('parents under the active tool span when called inside runInToolSpanContext', () => { + const toolSpan = startToolSpan('Bash'); + + let hookSpan!: ReturnType; + runInToolSpanContext(toolSpan, () => { + hookSpan = startHookSpan({ + hookEvent: 'PreToolUse', + toolName: 'Bash', + toolUseId: 'use-1', + }); + }); + + const hookRecord = mockSpans.find((s) => s.name === 'qwen-code.hook'); + expect(hookRecord).toBeDefined(); + expect(hookRecord?.parentContext).toBeDefined(); + expect(hookRecord?.attributes['hook_event']).toBe('PreToolUse'); + expect(hookRecord?.attributes['tool.name']).toBe('Bash'); + expect(hookRecord?.attributes['tool.use_id']).toBe('use-1'); + + endHookSpan(hookSpan, { success: true, shouldProceed: true }); + endToolSpan(toolSpan, { success: true }); + }); + + it('records shouldProceed/blockType when PreToolUse blocks', () => { + const toolSpan = startToolSpan('Bash'); + let hookSpan!: ReturnType; + runInToolSpanContext(toolSpan, () => { + hookSpan = startHookSpan({ + hookEvent: 'PreToolUse', + toolName: 'Bash', + }); + }); + endHookSpan(hookSpan, { + success: true, + shouldProceed: false, + blockType: 'denied', + }); + + const hookRecord = mockSpans.find((s) => s.name === 'qwen-code.hook'); + expect(hookRecord?.attributes['should_proceed']).toBe(false); + expect(hookRecord?.attributes['block_type']).toBe('denied'); + // Blocking is intentional, not an error — status must stay UNSET. + expect(hookRecord?.statuses).toHaveLength(0); + + endToolSpan(toolSpan, { success: false, error: 'denied' }); + }); + + it('records shouldStop/hasAdditionalContext on PostToolUse', () => { + const toolSpan = startToolSpan('Bash'); + let hookSpan!: ReturnType; + runInToolSpanContext(toolSpan, () => { + hookSpan = startHookSpan({ + hookEvent: 'PostToolUse', + toolName: 'Bash', + }); + }); + endHookSpan(hookSpan, { + success: true, + shouldStop: true, + hasAdditionalContext: true, + }); + + const hookRecord = mockSpans.find((s) => s.name === 'qwen-code.hook'); + expect(hookRecord?.attributes['should_stop']).toBe(true); + expect(hookRecord?.attributes['has_additional_context']).toBe(true); + expect(hookRecord?.statuses).toHaveLength(0); + + endToolSpan(toolSpan, { success: true }); + }); + + it('marks status ERROR only when the hook itself threw', () => { + const toolSpan = startToolSpan('Bash'); + let hookSpan!: ReturnType; + runInToolSpanContext(toolSpan, () => { + hookSpan = startHookSpan({ + hookEvent: 'PostToolUseFailure', + toolName: 'Bash', + isInterrupt: true, + }); + }); + endHookSpan(hookSpan, { success: false, error: 'hook crashed' }); + + const hookRecord = mockSpans.find((s) => s.name === 'qwen-code.hook'); + expect(hookRecord?.statuses[0]?.code).toBe(SpanStatusCode.ERROR); + expect(hookRecord?.statuses[0]?.message).toBe('hook crashed'); + expect(hookRecord?.attributes['is_interrupt']).toBe(true); + + endToolSpan(toolSpan, { success: false, error: 'cancelled' }); + }); + + it('returns NOOP span when SDK is not initialized', () => { + mockState.sdkInitialized = false; + const hookSpan = startHookSpan({ + hookEvent: 'PreToolUse', + toolName: 'Bash', + }); + expect(hookSpan.spanContext().traceId).toBe('0'.repeat(32)); + endHookSpan(hookSpan, { success: true }); + }); + }); + describe('toolContext ALS lifecycle', () => { it('runInToolSpanContext scopes toolContext via run(), not enterWith', () => { const toolSpan = startToolSpan('Bash'); @@ -714,4 +938,87 @@ describe('session-tracing', () => { endToolSpan(toolSpan, { success: true }); }); }); + + describe('TTL safety net (#4321 review)', () => { + it('marks stale spans with ttl_expired + duration_ms before ending them', () => { + const toolSpan = startToolSpan('staleTool'); + const record = mockSpans.find((s) => s.name === 'qwen-code.tool')!; + + // 31 minutes after the span started — past the 30-min TTL. + const staleNow = Date.now() + 31 * 60 * 1000; + runTTLSweepForTesting(staleNow); + + expect(record.ended).toBe(true); + // Without the sentinel attrs, operators couldn't tell a TTL-aborted + // span from a deliberately-ended span that lost attribution. + expect(record.attributes['qwen-code.span.ttl_expired']).toBe(true); + expect( + record.attributes['qwen-code.span.duration_ms'] as number, + ).toBeGreaterThanOrEqual(31 * 60 * 1000 - 1000); + + // Calling endToolSpan after the TTL fires must still be safe — span + // already ended, attempt is a no-op. + endToolSpan(toolSpan, { success: false }); + }); + + it('does not mark spans that were ended before TTL expiry', () => { + const toolSpan = startToolSpan('liveTool'); + const record = mockSpans.find((s) => s.name === 'qwen-code.tool')!; + + // End normally, then run a sweep. The span is already ended → the + // sweep must not retroactively stamp ttl_expired on it. + endToolSpan(toolSpan, { success: true }); + runTTLSweepForTesting(Date.now() + 31 * 60 * 1000); + + expect(record.attributes['qwen-code.span.ttl_expired']).toBeUndefined(); + }); + + it('stamps decision=aborted/source=system on TTL-expired blocked_on_user spans', () => { + // The blocked-span branch in sweepStaleSpans tags the canonical + // taxonomy so dashboards filtering by `decision: 'aborted'` count + // walk-aways alongside explicit user aborts. + const toolSpan = startToolSpan('blockedStaleParent'); + const blockedSpan = startToolBlockedOnUserSpan(toolSpan, { + tool_name: 'blockedStaleParent', + }); + const blockedRecord = mockSpans.find( + (s) => s.name === 'qwen-code.tool.blocked_on_user', + )!; + + runTTLSweepForTesting(Date.now() + 31 * 60 * 1000); + + expect(blockedRecord.ended).toBe(true); + expect(blockedRecord.attributes['qwen-code.span.ttl_expired']).toBe(true); + expect(blockedRecord.attributes['decision']).toBe('aborted'); + expect(blockedRecord.attributes['source']).toBe('system'); + + // Cleanup the still-active tool span. + endToolBlockedOnUserSpan(blockedSpan); + endToolSpan(toolSpan, { success: false }); + }); + }); + + describe('truncateSpanError (#4321 review)', () => { + it('returns short strings unchanged', () => { + expect(truncateSpanError('short message')).toBe('short message'); + expect(truncateSpanError('')).toBe(''); + }); + + it('truncates strings over 1024 chars and appends a sentinel suffix', () => { + const oversized = 'a'.repeat(2000); + const truncated = truncateSpanError(oversized); + expect(truncated.length).toBeLessThan(oversized.length); + expect(truncated.endsWith('…[truncated]')).toBe(true); + expect(truncated.startsWith('a'.repeat(1024))).toBe(true); + }); + + it('does not double-suffix already-truncated input', () => { + // Hard guarantee: the sentinel is only appended when the input + // exceeds the cap. A short string with the suffix already present + // would NOT pass back through truncate at production sites — but + // sanity-check the boundary anyway. + const exactlyAtCap = 'b'.repeat(1024); + expect(truncateSpanError(exactlyAtCap)).toBe(exactlyAtCap); + }); + }); }); diff --git a/packages/core/src/telemetry/session-tracing.ts b/packages/core/src/telemetry/session-tracing.ts index d750a8cf34..981edf1b06 100644 --- a/packages/core/src/telemetry/session-tracing.ts +++ b/packages/core/src/telemetry/session-tracing.ts @@ -17,9 +17,11 @@ import { import type { Config } from '../config/config.js'; import { SERVICE_NAME, + SPAN_HOOK, SPAN_INTERACTION, SPAN_LLM_REQUEST, SPAN_TOOL, + SPAN_TOOL_BLOCKED_ON_USER, SPAN_TOOL_EXECUTION, } from './constants.js'; import { clearDetailedSpanState } from './detailed-span-attributes.js'; @@ -121,26 +123,69 @@ let lastInteractionCtx: SpanContext | undefined; let cleanupIntervalStarted = false; const SPAN_TTL_MS = 30 * 60 * 1000; -function ensureCleanupInterval(): void { - if (cleanupIntervalStarted) return; - cleanupIntervalStarted = true; - const interval = setInterval(() => { - const cutoff = Date.now() - SPAN_TTL_MS; - for (const [spanId, weakRef] of activeSpans) { - const ctx = weakRef.deref(); - if (ctx === undefined) { - activeSpans.delete(spanId); - strongSpans.delete(spanId); - } else if (ctx.startTime < cutoff) { - if (!ctx.ended) { - ctx.ended = true; +function sweepStaleSpans(now: number): void { + const cutoff = now - SPAN_TTL_MS; + for (const [spanId, weakRef] of activeSpans) { + const ctx = weakRef.deref(); + if (ctx === undefined) { + activeSpans.delete(spanId); + strongSpans.delete(spanId); + } else if (ctx.startTime < cutoff) { + if (!ctx.ended) { + ctx.ended = true; + // Mark the span so backends can distinguish "abandoned and + // garbage-collected by the TTL safety net" from "deliberately + // ended without setting status / attrs" (#4321 review). + const ageMs = now - ctx.startTime; + const toolName = ctx.attributes['tool.name']; + const callId = ctx.attributes['tool.call_id']; + // setAttributes and span.end() are wrapped separately so a + // setAttributes throw can't prevent the span from being ended + // (#4321 review-3 wenshao Suggestion). For blocked_on_user + // spans, also stamp the canonical decision/source taxonomy so + // dashboards filtering by `decision: 'aborted'` count + // walk-aways consistently with explicit user aborts. + try { + ctx.span.setAttributes({ + 'qwen-code.span.ttl_expired': true, + 'qwen-code.span.duration_ms': ageMs, + ...(ctx.type === 'tool.blocked_on_user' + ? { + decision: 'aborted', + source: 'system', + } + : {}), + }); + } catch { + // OTel errors must not prevent span.end() from running. + } + // Include tool name + call_id so the log is actionable in + // production without a trace-backend lookup (review-3). + const ctxLabel = + toolName && callId + ? `${ctx.type} (tool.name=${toolName}, tool.call_id=${callId})` + : ctx.type; + debugLogger.warn( + `Stale ${ctxLabel} span ended by TTL safety net (age=${ageMs}ms, spanId=${spanId})`, + ); + try { ctx.span.end(); + } catch (error) { + debugLogger.warn( + `Failed to end stale span ${spanId}: ${error instanceof Error ? error.message : String(error)}`, + ); } - activeSpans.delete(spanId); - strongSpans.delete(spanId); } + activeSpans.delete(spanId); + strongSpans.delete(spanId); } - }, 60_000); + } +} + +function ensureCleanupInterval(): void { + if (cleanupIntervalStarted) return; + cleanupIntervalStarted = true; + const interval = setInterval(() => sweepStaleSpans(Date.now()), 60_000); if (typeof interval.unref === 'function') { interval.unref(); } @@ -150,6 +195,28 @@ function getSpanId(span: Span): string { return span.spanContext().spanId || ''; } +const SPAN_ERROR_MAX_CHARS = 1024; + +/** + * Bound the size of error strings written to span attributes / status + * messages. Hook server responses, raw exception stacks, or malicious + * inputs can be unbounded; some OTel backends drop the entire span when + * any field exceeds their limit (#4321 review-3 wenshao Critical). + * + * Truncates by UTF-16 code units (`String.length`/`String.slice`), not + * bytes — for ASCII-heavy text this approximates a 1KB byte limit, but + * CJK/emoji-heavy errors can land in the ~2-3KB range after UTF-8 + * encoding. That's still well under all major OTel backends' + * per-attribute limits (Jaeger ~64KB, Honeycomb ~64KB, OTLP default + * ~32KB), so we keep the simpler char-count bound rather than paying + * the encoder cost on every endXSpan (review-4 follow-up). + */ +export function truncateSpanError(s: string): string { + return s.length > SPAN_ERROR_MAX_CHARS + ? s.slice(0, SPAN_ERROR_MAX_CHARS) + '…[truncated]' + : s; +} + function getTracer() { return trace.getTracer(SERVICE_NAME, '1.0.0'); } @@ -287,7 +354,8 @@ export function endLLMRequestSpan( if (metadata.outputTokens !== undefined) endAttributes['output_tokens'] = metadata.outputTokens; endAttributes['success'] = metadata.success; - if (metadata.error !== undefined) endAttributes['error'] = metadata.error; + if (metadata.error !== undefined) + endAttributes['error'] = truncateSpanError(metadata.error); } spanCtx.span.setAttributes(endAttributes); @@ -297,7 +365,9 @@ export function endLLMRequestSpan( } else { spanCtx.span.setStatus({ code: SpanStatusCode.ERROR, - message: metadata.error ?? 'unknown error', + message: metadata.error + ? truncateSpanError(metadata.error) + : 'unknown error', }); } } catch (error) { @@ -394,7 +464,8 @@ export function endToolSpan(span: Span, metadata?: ToolSpanMetadata): void { if (metadata) { if (metadata.success !== undefined) endAttributes['success'] = metadata.success; - if (metadata.error !== undefined) endAttributes['error'] = metadata.error; + if (metadata.error !== undefined) + endAttributes['error'] = truncateSpanError(metadata.error); } spanCtx.span.setAttributes(endAttributes); @@ -405,7 +476,9 @@ export function endToolSpan(span: Span, metadata?: ToolSpanMetadata): void { } else { spanCtx.span.setStatus({ code: SpanStatusCode.ERROR, - message: metadata.error ?? 'tool error', + message: metadata.error + ? truncateSpanError(metadata.error) + : 'tool error', }); } } @@ -491,7 +564,8 @@ export function endToolExecutionSpan( if (metadata) { if (metadata.success !== undefined) endAttributes['success'] = metadata.success; - if (metadata.error !== undefined) endAttributes['error'] = metadata.error; + if (metadata.error !== undefined) + endAttributes['error'] = truncateSpanError(metadata.error); } spanCtx.span.setAttributes(endAttributes); @@ -506,7 +580,9 @@ export function endToolExecutionSpan( } else { spanCtx.span.setStatus({ code: SpanStatusCode.ERROR, - message: metadata.error ?? 'tool execution error', + message: metadata.error + ? truncateSpanError(metadata.error) + : 'tool execution error', }); } } @@ -527,6 +603,243 @@ export function endToolExecutionSpan( strongSpans.delete(spanId); } +// --- Tool Blocked-on-User Spans --- + +export type ToolBlockedDecision = + | 'proceed_once' + | 'proceed_always' + | 'cancel' + | 'aborted' + | 'auto_approved' + // System-error close — distinct from user 'cancel' so dashboards counting + // user cancels don't double-count thrown exceptions in the approval path. + | 'error'; + +export type ToolBlockedSource = 'cli' | 'ide' | 'hook' | 'auto' | 'system'; + +/** + * Brackets the time a tool spends in `awaiting_approval` waiting on the user. + * + * The parent is passed explicitly because this span starts BEFORE the tool + * body's `runInToolSpanContext` block — so `toolContext.getStore()` is empty. + * Passing the span object also avoids the `findLast`-by-type concurrency bug + * (claude-code's sessionTracing has it; we deliberately don't). + */ +export function startToolBlockedOnUserSpan( + toolSpan: Span, + attrs?: { tool_name?: string; call_id?: string }, +): Span { + if (!isTelemetrySdkInitialized()) { + return NOOP_SPAN; + } + // Idempotent — kick off the 30-min TTL cleanup in case this span is + // started in a code path where no interaction span has been created + // yet (sub-agent tool calls, side queries, future patterns). + ensureCleanupInterval(); + + const parentSpanId = getSpanId(toolSpan); + const parentSpanCtx = activeSpans.get(parentSpanId)?.deref(); + // If the tool span was already ended (defensive — shouldn't happen on the + // happy path), fall back to the standard parent-resolution chain so we + // still produce a span correlated with the session. + if (!parentSpanCtx) { + debugLogger.debug( + 'startToolBlockedOnUserSpan: tool span not in activeSpans (already ended?) — using resolveParentContext fallback', + ); + } + const ctx = parentSpanCtx + ? trace.setSpan(otelContext.active(), parentSpanCtx.span) + : resolveParentContext(undefined); + + const attributes: Attributes = {}; + if (attrs?.tool_name !== undefined) attributes['tool.name'] = attrs.tool_name; + if (attrs?.call_id !== undefined) attributes['tool.call_id'] = attrs.call_id; + + const span = getTracer().startSpan( + SPAN_TOOL_BLOCKED_ON_USER, + { kind: SpanKind.INTERNAL, attributes }, + ctx, + ); + + const spanId = getSpanId(span); + const spanContextObj: SpanContext = { + span, + startTime: Date.now(), + attributes: attributes as Record, + type: 'tool.blocked_on_user', + }; + activeSpans.set(spanId, new WeakRef(spanContextObj)); + strongSpans.set(spanId, spanContextObj); + + return span; +} + +/** + * Status stays UNSET — waiting on the user is neither OK nor ERROR. + * The decision/source attributes are the canonical signal. + */ +export function endToolBlockedOnUserSpan( + span: Span, + metadata?: { + decision?: ToolBlockedDecision; + source?: ToolBlockedSource; + }, +): void { + const spanId = getSpanId(span); + const spanCtx = activeSpans.get(spanId)?.deref(); + if (!spanCtx || spanCtx.ended) return; + + spanCtx.ended = true; + + try { + const duration = Date.now() - spanCtx.startTime; + const endAttributes: Attributes = { duration_ms: duration }; + if (metadata?.decision !== undefined) + endAttributes['decision'] = metadata.decision; + if (metadata?.source !== undefined) + endAttributes['source'] = metadata.source; + spanCtx.span.setAttributes(endAttributes); + } catch (error) { + debugLogger.warn( + `Failed to update blocked_on_user span attributes: ${error instanceof Error ? error.message : String(error)}`, + ); + } + try { + spanCtx.span.end(); + } catch (error) { + debugLogger.warn( + `Failed to end blocked_on_user span: ${error instanceof Error ? error.message : String(error)}`, + ); + } + activeSpans.delete(spanId); + strongSpans.delete(spanId); +} + +// --- Hook Spans --- + +export type HookEvent = 'PreToolUse' | 'PostToolUse' | 'PostToolUseFailure'; + +export interface StartHookSpanOptions { + hookEvent: HookEvent; + toolName: string; + toolUseId?: string; + /** PostToolUseFailure only: true when the failure is a user interrupt. */ + isInterrupt?: boolean; +} + +export interface HookSpanMetadata { + /** Whether the hook fire site completed without throwing. */ + success?: boolean; + /** PreToolUse: false means the hook blocked tool execution. */ + shouldProceed?: boolean; + /** PostToolUse: true means the hook stopped further processing. */ + shouldStop?: boolean; + /** Discriminator for blocking decision when applicable. */ + blockType?: 'denied' | 'ask' | 'stop'; + hasAdditionalContext?: boolean; + /** Hook threw — span ends as ERROR with this message. */ + error?: string; +} + +export function startHookSpan(opts: StartHookSpanOptions): Span { + if (!isTelemetrySdkInitialized()) { + return NOOP_SPAN; + } + // Same defensive cleanup-interval kick as startToolBlockedOnUserSpan — + // hook spans may run before any interaction span has been created. + ensureCleanupInterval(); + + // Hooks fire from inside `runInToolSpanContext` so toolContext is the + // natural parent. resolveParentContext also covers the rare case where a + // hook span is started outside any tool (defensive — keeps the trace tree + // correlated with the session). + const parentCtx = + toolContext.getStore() ?? interactionContext.getStore() ?? undefined; + const ctx = resolveParentContext(parentCtx); + + const attributes: Attributes = { + hook_event: opts.hookEvent, + 'tool.name': opts.toolName, + }; + if (opts.toolUseId !== undefined) attributes['tool.use_id'] = opts.toolUseId; + if (opts.isInterrupt !== undefined) + attributes['is_interrupt'] = opts.isInterrupt; + + const span = getTracer().startSpan( + SPAN_HOOK, + { kind: SpanKind.INTERNAL, attributes }, + ctx, + ); + + const spanId = getSpanId(span); + const spanContextObj: SpanContext = { + span, + startTime: Date.now(), + attributes: attributes as Record, + type: 'hook', + }; + activeSpans.set(spanId, new WeakRef(spanContextObj)); + strongSpans.set(spanId, spanContextObj); + + return span; +} + +/** + * Status: UNSET on normal flow (including blocking decisions like + * shouldProceed: false or shouldStop: true — those are intentional, not + * errors). Only an actual hook-side throw (caught by the safelyFire wrapper + * or rethrown) maps to ERROR via the `error` metadata field. + */ +export function endHookSpan(span: Span, metadata?: HookSpanMetadata): void { + const spanId = getSpanId(span); + const spanCtx = activeSpans.get(spanId)?.deref(); + if (!spanCtx || spanCtx.ended) return; + + spanCtx.ended = true; + + try { + const duration = Date.now() - spanCtx.startTime; + const endAttributes: Attributes = { duration_ms: duration }; + + if (metadata) { + if (metadata.success !== undefined) + endAttributes['success'] = metadata.success; + if (metadata.shouldProceed !== undefined) + endAttributes['should_proceed'] = metadata.shouldProceed; + if (metadata.shouldStop !== undefined) + endAttributes['should_stop'] = metadata.shouldStop; + if (metadata.blockType !== undefined) + endAttributes['block_type'] = metadata.blockType; + if (metadata.hasAdditionalContext !== undefined) + endAttributes['has_additional_context'] = metadata.hasAdditionalContext; + if (metadata.error !== undefined) + endAttributes['error'] = truncateSpanError(metadata.error); + } + + spanCtx.span.setAttributes(endAttributes); + + if (metadata?.error !== undefined) { + spanCtx.span.setStatus({ + code: SpanStatusCode.ERROR, + message: truncateSpanError(metadata.error), + }); + } + } catch (error) { + debugLogger.warn( + `Failed to update hook span attributes/status: ${error instanceof Error ? error.message : String(error)}`, + ); + } + try { + spanCtx.span.end(); + } catch (error) { + debugLogger.warn( + `Failed to end hook span: ${error instanceof Error ? error.message : String(error)}`, + ); + } + activeSpans.delete(spanId); + strongSpans.delete(spanId); +} + // --- Interaction Span Attribute Access --- export function getActiveInteractionSpan(): Span | undefined { @@ -546,3 +859,12 @@ export function clearSessionTracingForTesting(): void { lastInteractionCtx = undefined; clearDetailedSpanState(); } + +/** + * Test-only: invoke the TTL sweep with a synthetic `now`. Lets tests + * exercise the stale-span path without waiting 30 minutes or stubbing + * setInterval globally. + */ +export function runTTLSweepForTesting(now: number): void { + sweepStaleSpans(now); +}