diff --git a/docs/design/prompt-suggestion/prompt-suggestion-design.md b/docs/design/prompt-suggestion/prompt-suggestion-design.md index 1636db6cf..a8d7ef381 100644 --- a/docs/design/prompt-suggestion/prompt-suggestion-design.md +++ b/docs/design/prompt-suggestion/prompt-suggestion-design.md @@ -64,10 +64,19 @@ A **prompt suggestion** (Next-step Suggestion / NES) is a short prediction (2-12 ``` [SUGGESTION MODE: Suggest what the user might naturally type next.] +FIRST: Read the LAST FEW LINES of the assistant's most recent message — that's where +next-step hints, tips, and actionable suggestions usually appear. Then check the user's +recent messages and original request. + Your job is to predict what THEY would type - not what you think they should do. THE TEST: Would they think "I was just about to type that"? +PRIORITY: If the assistant's last message contains a tip or hint like "Tip: type X to ..." +or "type X to ...", extract X as the suggestion. These are explicit next-step hints. + EXAMPLES: +Assistant says "Tip: type post comments to publish findings" → "post comments" +Assistant says "type /review to start" → "/review" User asked "fix the bug and run tests", bug is fixed → "run the tests" After code written → "try it out" Task complete, obvious follow-up → "commit this" or "push it" @@ -197,6 +206,23 @@ The Tab handler uses `key.name === 'tab'` explicitly (not `ACCEPT_SUGGESTION` ma | `enableSpeculation` | boolean | false | Predictive execution engine | | `fastModel` (top-level) | string | "" | Model for all background tasks (empty = use main model). Set via `/model --fast` | +### Internal Prompt ID Filtering + +Background operations use dedicated prompt IDs (`INTERNAL_PROMPT_IDS` in `utils/internalPromptIds.ts`) to prevent their API traffic and tool calls from appearing in the user-visible UI: + +| Prompt ID | Used by | +| ------------------- | -------------------------- | +| `prompt_suggestion` | Suggestion generation | +| `forked_query` | Cache-aware forked queries | +| `speculation` | Speculation engine | + +**Filtering applied:** + +- `loggingContentGenerator` — skips `logApiRequest` and OpenAI interaction logging for internal IDs +- `logApiResponse` / `logApiError` — skips `chatRecordingService.recordUiTelemetryEvent` +- `logToolCall` — skips `chatRecordingService.recordUiTelemetryEvent` +- `uiTelemetryService.addEvent` — **not filtered** (ensures `/stats` token tracking works) + ### Thinking Mode Thinking/reasoning is explicitly disabled (`thinkingConfig: { includeThoughts: false }`) for all background task paths: diff --git a/docs/users/features/followup-suggestions.md b/docs/users/features/followup-suggestions.md index 3dbf11df5..6b72a1d70 100644 --- a/docs/users/features/followup-suggestions.md +++ b/docs/users/features/followup-suggestions.md @@ -12,7 +12,7 @@ After Qwen Code finishes responding, a suggestion appears as dimmed text in the > run the tests ``` -The suggestion is generated by sending the conversation history to the model, which predicts what you would naturally type next. +The suggestion is generated by sending the conversation history to the model, which predicts what you would naturally type next. If the response contains an explicit tip (e.g., `Tip: type post comments to publish findings`), the suggested action is extracted automatically. ## Accepting Suggestions diff --git a/packages/cli/src/ui/components/InputPrompt.test.tsx b/packages/cli/src/ui/components/InputPrompt.test.tsx index b4bfae4e1..641bb20cd 100644 --- a/packages/cli/src/ui/components/InputPrompt.test.tsx +++ b/packages/cli/src/ui/components/InputPrompt.test.tsx @@ -235,6 +235,10 @@ describe('InputPrompt', () => { await wait(); expect(props.onSubmit).toHaveBeenCalledWith('commit this'); + // Enter path must NOT call buffer.insert — it passes text directly to + // handleSubmitAndClear. Calling insert would re-fill the buffer after + // it was already cleared (the microtask race bug). + expect(mockBuffer.insert).not.toHaveBeenCalled(); unmount(); }); diff --git a/packages/cli/src/ui/components/InputPrompt.tsx b/packages/cli/src/ui/components/InputPrompt.tsx index aa2004c72..9b0e75e5f 100644 --- a/packages/cli/src/ui/components/InputPrompt.tsx +++ b/packages/cli/src/ui/components/InputPrompt.tsx @@ -882,7 +882,11 @@ export const InputPrompt: React.FC = ({ followup.state.suggestion ) { const text = followup.state.suggestion; - followup.accept('enter'); + // Skip onAccept (buffer.insert) — we pass the text directly to + // handleSubmitAndClear which clears the buffer synchronously. + // Without skipOnAccept the microtask in accept() would re-insert + // the suggestion into the buffer after it was already cleared. + followup.accept('enter', { skipOnAccept: true }); handleSubmitAndClear(text); return true; } diff --git a/packages/cli/src/ui/hooks/useFollowupSuggestions.tsx b/packages/cli/src/ui/hooks/useFollowupSuggestions.tsx index 81773f0fb..4ff8e4d68 100644 --- a/packages/cli/src/ui/hooks/useFollowupSuggestions.tsx +++ b/packages/cli/src/ui/hooks/useFollowupSuggestions.tsx @@ -43,7 +43,10 @@ export interface UseFollowupSuggestionsReturn { /** Set suggestion text (called by parent component) */ setSuggestion: (text: string | null) => void; /** Accept the current suggestion */ - accept: (method?: 'tab' | 'enter' | 'right') => void; + accept: ( + method?: 'tab' | 'enter' | 'right', + options?: { skipOnAccept?: boolean }, + ) => void; /** Dismiss the current suggestion */ dismiss: () => void; /** Clear all state */ diff --git a/packages/cli/src/ui/utils/MarkdownDisplay.tsx b/packages/cli/src/ui/utils/MarkdownDisplay.tsx index 642b04d6b..25e2110d9 100644 --- a/packages/cli/src/ui/utils/MarkdownDisplay.tsx +++ b/packages/cli/src/ui/utils/MarkdownDisplay.tsx @@ -111,7 +111,9 @@ const MarkdownDisplayInternal: React.FC = ({ lines[index + 1].match(tableSeparatorRegex) ) { inTable = true; - tableHeaders = tableRowMatch[1].split(/(? cell.trim().replaceAll('\\|', '|')); + tableHeaders = tableRowMatch[1] + .split(/(? cell.trim().replaceAll('\\|', '|')); tableRows = []; } else { // Not a table, treat as regular text @@ -127,7 +129,9 @@ const MarkdownDisplayInternal: React.FC = ({ // Skip separator line - already handled } else if (inTable && tableRowMatch) { // Add table row - const cells = tableRowMatch[1].split(/(? cell.trim().replaceAll('\\|', '|')); + const cells = tableRowMatch[1] + .split(/(? cell.trim().replaceAll('\\|', '|')); // Ensure row has same column count as headers while (cells.length < tableHeaders.length) { cells.push(''); diff --git a/packages/cli/src/ui/utils/TableRenderer.tsx b/packages/cli/src/ui/utils/TableRenderer.tsx index db3ff8a95..7684a2b71 100644 --- a/packages/cli/src/ui/utils/TableRenderer.tsx +++ b/packages/cli/src/ui/utils/TableRenderer.tsx @@ -36,7 +36,10 @@ export const TableRenderer: React.FC = ({ // Ensure table fits within terminal width const totalWidth = columnWidths.reduce((sum, width) => sum + width + 1, 1); const fixedWidth = columnWidths.length + 1; - const scaleFactor = totalWidth > contentWidth ? (contentWidth - fixedWidth) / (totalWidth - fixedWidth) : 1; + const scaleFactor = + totalWidth > contentWidth + ? (contentWidth - fixedWidth) / (totalWidth - fixedWidth) + : 1; const adjustedWidths = columnWidths.map((width) => Math.floor(width * scaleFactor), ); diff --git a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts index b39e65357..624f04f3b 100644 --- a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts +++ b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts @@ -23,11 +23,16 @@ import { import { OpenAILogger } from '../../utils/openaiLogger.js'; import type OpenAI from 'openai'; -vi.mock('../../telemetry/loggers.js', () => ({ - logApiRequest: vi.fn(), - logApiResponse: vi.fn(), - logApiError: vi.fn(), -})); +vi.mock('../../telemetry/loggers.js', async (importOriginal) => { + const actual = + await importOriginal(); + return { + ...actual, + logApiRequest: vi.fn(), + logApiResponse: vi.fn(), + logApiError: vi.fn(), + }; +}); vi.mock('../../utils/openaiLogger.js', () => ({ OpenAILogger: vi.fn().mockImplementation(() => ({ @@ -474,4 +479,91 @@ describe('LoggingContentGenerator', () => { }, ]); }); + + it.each(['prompt_suggestion', 'forked_query', 'speculation'])( + 'skips logApiRequest and OpenAI logging for internal promptId %s (generateContent)', + async (promptId) => { + const mockResponse = { + responseId: 'internal-resp', + modelVersion: 'test-model', + candidates: [{ content: { parts: [{ text: 'suggestion' }] } }], + usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 }, + } as unknown as GenerateContentResponse; + + const mockWrapped = { + generateContent: vi.fn().mockResolvedValue(mockResponse), + generateContentStream: vi.fn(), + } as unknown as ContentGenerator; + + const gen = new LoggingContentGenerator(mockWrapped, createConfig(), { + model: 'test-model', + enableOpenAILogging: true, + openAILoggingDir: '/tmp/test-logs', + }); + + const request = { + model: 'test-model', + contents: [{ role: 'user', parts: [{ text: 'test' }] }], + } as unknown as GenerateContentParameters; + + await gen.generateContent(request, promptId); + + // logApiRequest should NOT be called for internal prompts + expect(logApiRequest).not.toHaveBeenCalled(); + // logApiResponse SHOULD be called (for /stats token tracking) + expect(logApiResponse).toHaveBeenCalled(); + // OpenAI logger should be constructed, but no interaction should be logged + expect(OpenAILogger).toHaveBeenCalled(); + const loggerInstance = ( + OpenAILogger as unknown as ReturnType + ).mock.results[0]?.value; + expect(loggerInstance.logInteraction).not.toHaveBeenCalled(); + }, + ); + + it.each(['prompt_suggestion', 'forked_query', 'speculation'])( + 'skips logApiRequest and OpenAI logging for internal promptId %s (generateContentStream)', + async (promptId) => { + const mockChunk = { + responseId: 'stream-resp', + modelVersion: 'test-model', + candidates: [{ content: { parts: [{ text: 'suggestion' }] } }], + usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 }, + } as unknown as GenerateContentResponse; + + async function* fakeStream() { + yield mockChunk; + } + + const mockWrapped = { + generateContent: vi.fn(), + generateContentStream: vi.fn().mockResolvedValue(fakeStream()), + } as unknown as ContentGenerator; + + const gen = new LoggingContentGenerator(mockWrapped, createConfig(), { + model: 'test-model', + enableOpenAILogging: true, + openAILoggingDir: '/tmp/test-logs', + }); + + const request = { + model: 'test-model', + contents: [{ role: 'user', parts: [{ text: 'test' }] }], + } as unknown as GenerateContentParameters; + + const stream = await gen.generateContentStream(request, promptId); + // Consume the stream + for await (const _chunk of stream) { + // drain + } + + expect(logApiRequest).not.toHaveBeenCalled(); + expect(logApiResponse).toHaveBeenCalled(); + expect(OpenAILogger).toHaveBeenCalled(); + const loggerInstance = ( + OpenAILogger as unknown as ReturnType + ).mock.results[0]?.value; + expect(loggerInstance.logInteraction).not.toHaveBeenCalled(); + }, + ); }); diff --git a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts index 4f4b00138..4464f5a7e 100644 --- a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts +++ b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts @@ -31,6 +31,7 @@ import { logApiRequest, logApiResponse, } from '../../telemetry/loggers.js'; +import { isInternalPromptId } from '../../utils/internalPromptIds.js'; import type { ContentGenerator, ContentGeneratorConfig, @@ -143,8 +144,17 @@ export class LoggingContentGenerator implements ContentGenerator { userPromptId: string, ): Promise { const startTime = Date.now(); - this.logApiRequest(this.toContents(req.contents), req.model, userPromptId); - const openaiRequest = await this.buildOpenAIRequestForLogging(req); + const isInternal = isInternalPromptId(userPromptId); + if (!isInternal) { + this.logApiRequest( + this.toContents(req.contents), + req.model, + userPromptId, + ); + } + const openaiRequest = isInternal + ? undefined + : await this.buildOpenAIRequestForLogging(req); try { const response = await this.wrapped.generateContent(req, userPromptId); const durationMs = Date.now() - startTime; @@ -155,12 +165,16 @@ export class LoggingContentGenerator implements ContentGenerator { userPromptId, response.usageMetadata, ); - await this.logOpenAIInteraction(openaiRequest, response); + if (!isInternal) { + await this.logOpenAIInteraction(openaiRequest, response); + } return response; } catch (error) { const durationMs = Date.now() - startTime; this._logApiError('', durationMs, error, req.model, userPromptId); - await this.logOpenAIInteraction(openaiRequest, undefined, error); + if (!isInternal) { + await this.logOpenAIInteraction(openaiRequest, undefined, error); + } throw error; } } @@ -170,8 +184,17 @@ export class LoggingContentGenerator implements ContentGenerator { userPromptId: string, ): Promise> { const startTime = Date.now(); - this.logApiRequest(this.toContents(req.contents), req.model, userPromptId); - const openaiRequest = await this.buildOpenAIRequestForLogging(req); + const isInternal = isInternalPromptId(userPromptId); + if (!isInternal) { + this.logApiRequest( + this.toContents(req.contents), + req.model, + userPromptId, + ); + } + const openaiRequest = isInternal + ? undefined + : await this.buildOpenAIRequestForLogging(req); let stream: AsyncGenerator; try { @@ -179,7 +202,9 @@ export class LoggingContentGenerator implements ContentGenerator { } catch (error) { const durationMs = Date.now() - startTime; this._logApiError('', durationMs, error, req.model, userPromptId); - await this.logOpenAIInteraction(openaiRequest, undefined, error); + if (!isInternal) { + await this.logOpenAIInteraction(openaiRequest, undefined, error); + } throw error; } @@ -199,12 +224,27 @@ export class LoggingContentGenerator implements ContentGenerator { model: string, openaiRequest?: OpenAI.Chat.ChatCompletionCreateParams, ): AsyncGenerator { + const isInternal = isInternalPromptId(userPromptId); + // For internal prompts we only need the last usage metadata (for /stats); + // skip collecting full responses to avoid unnecessary memory overhead. const responses: GenerateContentResponse[] = []; + // Track first-seen IDs so _logApiResponse/_logApiError have accurate + // values even when we skip collecting full responses for internal prompts. + let firstResponseId = ''; + let firstModelVersion = ''; let lastUsageMetadata: GenerateContentResponseUsageMetadata | undefined; try { for await (const response of stream) { - responses.push(response); + if (!firstResponseId && response.responseId) { + firstResponseId = response.responseId; + } + if (!firstModelVersion && response.modelVersion) { + firstModelVersion = response.modelVersion; + } + if (!isInternal) { + responses.push(response); + } if (response.usageMetadata) { lastUsageMetadata = response.usageMetadata; } @@ -213,25 +253,29 @@ export class LoggingContentGenerator implements ContentGenerator { // Only log successful API response if no error occurred const durationMs = Date.now() - startTime; this._logApiResponse( - responses[0]?.responseId ?? '', + firstResponseId, durationMs, - responses[0]?.modelVersion || model, + firstModelVersion || model, userPromptId, lastUsageMetadata, ); - const consolidatedResponse = - this.consolidateGeminiResponsesForLogging(responses); - await this.logOpenAIInteraction(openaiRequest, consolidatedResponse); + if (!isInternal) { + const consolidatedResponse = + this.consolidateGeminiResponsesForLogging(responses); + await this.logOpenAIInteraction(openaiRequest, consolidatedResponse); + } } catch (error) { const durationMs = Date.now() - startTime; this._logApiError( - responses[0]?.responseId ?? '', + firstResponseId, durationMs, error, - responses[0]?.modelVersion || model, + firstModelVersion || model, userPromptId, ); - await this.logOpenAIInteraction(openaiRequest, undefined, error); + if (!isInternal) { + await this.logOpenAIInteraction(openaiRequest, undefined, error); + } throw error; } } diff --git a/packages/core/src/followup/followupState.test.ts b/packages/core/src/followup/followupState.test.ts index 325c967b0..331a28db5 100644 --- a/packages/core/src/followup/followupState.test.ts +++ b/packages/core/src/followup/followupState.test.ts @@ -294,6 +294,37 @@ describe('createFollowupController', () => { ctrl.cleanup(); }); + it('accept with skipOnAccept skips onAccept callback but still clears state and fires telemetry', async () => { + const onStateChange = vi.fn(); + const onAccept = vi.fn(); + const onOutcome = vi.fn(); + const ctrl = createFollowupController({ + onStateChange, + getOnAccept: () => onAccept, + onOutcome, + }); + + ctrl.setSuggestion('run tests'); + vi.advanceTimersByTime(300); + onStateChange.mockClear(); + + ctrl.accept('enter', { skipOnAccept: true }); + + // State should be cleared + expect(onStateChange).toHaveBeenCalledWith(INITIAL_FOLLOWUP_STATE); + + // Telemetry should still fire + expect(onOutcome).toHaveBeenCalledWith( + expect.objectContaining({ outcome: 'accepted', accept_method: 'enter' }), + ); + + // Flush microtask — onAccept should NOT be called + await Promise.resolve(); + expect(onAccept).not.toHaveBeenCalled(); + + ctrl.cleanup(); + }); + it('setSuggestion replaces a pending suggestion', () => { const onStateChange = vi.fn(); const ctrl = createFollowupController({ onStateChange }); diff --git a/packages/core/src/followup/followupState.ts b/packages/core/src/followup/followupState.ts index 8430e206f..60e8f717a 100644 --- a/packages/core/src/followup/followupState.ts +++ b/packages/core/src/followup/followupState.ts @@ -72,7 +72,10 @@ export interface FollowupControllerActions { /** Set suggestion text (with delayed show). Null clears immediately. */ setSuggestion: (text: string | null) => void; /** Accept the current suggestion and invoke onAccept callback */ - accept: (method?: 'tab' | 'enter' | 'right') => void; + accept: ( + method?: 'tab' | 'enter' | 'right', + options?: { skipOnAccept?: boolean }, + ) => void; /** Dismiss/clear suggestion */ dismiss: () => void; /** Hard-clear all state and timers */ @@ -135,7 +138,10 @@ export function createFollowupController( }, SUGGESTION_DELAY_MS); }; - const accept = (method?: 'tab' | 'enter' | 'right'): void => { + const accept = ( + method?: 'tab' | 'enter' | 'right', + options?: { skipOnAccept?: boolean }, + ): void => { if (accepting) { return; } @@ -170,7 +176,9 @@ export function createFollowupController( queueMicrotask(() => { try { - getOnAccept?.()?.(text); + if (!options?.skipOnAccept) { + getOnAccept?.()?.(text); + } } catch (error: unknown) { // eslint-disable-next-line no-console console.error('[followup] onAccept callback threw:', error); diff --git a/packages/core/src/followup/forkedQuery.test.ts b/packages/core/src/followup/forkedQuery.test.ts index 862d9b9e6..a223a308e 100644 --- a/packages/core/src/followup/forkedQuery.test.ts +++ b/packages/core/src/followup/forkedQuery.test.ts @@ -4,13 +4,24 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, expect, beforeEach } from 'vitest'; +import { describe, it, expect, beforeEach, vi } from 'vitest'; import { saveCacheSafeParams, getCacheSafeParams, clearCacheSafeParams, + runForkedQuery, } from './forkedQuery.js'; import type { GenerateContentConfig } from '@google/genai'; +import type { Config } from '../config/config.js'; +import { GeminiChat, StreamEventType } from '../core/geminiChat.js'; + +vi.mock('../core/geminiChat.js', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + GeminiChat: vi.fn(), + }; +}); describe('CacheSafeParams', () => { beforeEach(() => { @@ -113,3 +124,190 @@ describe('CacheSafeParams', () => { }); }); }); + +describe('runForkedQuery', () => { + beforeEach(() => { + clearCacheSafeParams(); + vi.mocked(GeminiChat).mockReset(); + }); + + it('passes tools: [] in per-request config so the model cannot produce function calls', async () => { + // Save cache params with real tools to simulate a normal conversation + saveCacheSafeParams( + { + systemInstruction: 'You are helpful', + tools: [ + { + functionDeclarations: [ + { name: 'edit', description: 'Edit a file' }, + { name: 'shell', description: 'Run a command' }, + ], + }, + ], + }, + [{ role: 'user', parts: [{ text: 'hello' }] }], + 'test-model', + ); + + // Track what sendMessageStream receives + let capturedParams: unknown = null; + + const mockSendMessageStream = vi.fn( + (_model: string, params: unknown, _promptId: string) => { + capturedParams = params; + async function* generate() { + yield { + type: StreamEventType.CHUNK, + value: { + candidates: [ + { + content: { + role: 'model', + parts: [{ text: 'commit this' }], + }, + }, + ], + usageMetadata: { + promptTokenCount: 10, + candidatesTokenCount: 5, + totalTokenCount: 15, + }, + }, + }; + } + return Promise.resolve(generate()); + }, + ); + + vi.mocked(GeminiChat).mockImplementation( + () => + ({ + sendMessageStream: mockSendMessageStream, + }) as unknown as GeminiChat, + ); + + const mockConfig = {} as unknown as Config; + + const result = await runForkedQuery(mockConfig, 'suggest something'); + + // Verify GeminiChat was constructed with the full generationConfig + // (including tools) — createForkedChat retains tools for speculation callers + expect(GeminiChat).toHaveBeenCalledOnce(); + const ctorArgs = vi.mocked(GeminiChat).mock.calls[0]; + const chatGenerationConfig = ctorArgs[1] as GenerateContentConfig; + expect(chatGenerationConfig.tools).toEqual([ + { + functionDeclarations: [ + { name: 'edit', description: 'Edit a file' }, + { name: 'shell', description: 'Run a command' }, + ], + }, + ]); + // chatRecordingService and telemetryService must be undefined + // to avoid polluting the main session's recordings + expect(ctorArgs[3]).toBeUndefined(); // chatRecordingService + expect(ctorArgs[4]).toBeUndefined(); // telemetryService + + // Verify sendMessageStream was called + expect(mockSendMessageStream).toHaveBeenCalledOnce(); + expect(capturedParams).not.toBeNull(); + + // KEY ASSERTION: per-request config must have tools: [] to prevent + // the model from producing function calls (Root Cause 1 fix) + const sendParams = capturedParams as { config?: { tools?: unknown } }; + expect(sendParams.config).toBeDefined(); + expect(sendParams.config!.tools).toEqual([]); + + // Verify prompt_id is 'forked_query' and message is passed correctly + expect(mockSendMessageStream).toHaveBeenCalledWith( + 'test-model', + expect.objectContaining({ + message: [{ text: 'suggest something' }], + config: expect.objectContaining({ tools: [] }), + }), + 'forked_query', + ); + + // Verify result is correct + expect(result.text).toBe('commit this'); + expect(result.usage.inputTokens).toBe(10); + expect(result.usage.outputTokens).toBe(5); + }); + + it('preserves tools: [] even when jsonSchema is provided', async () => { + saveCacheSafeParams( + { + tools: [{ functionDeclarations: [{ name: 'edit' }] }], + }, + [], + 'test-model', + ); + + let capturedParams: unknown = null; + + const mockSendMessageStream = vi.fn( + (_model: string, params: unknown, _promptId: string) => { + capturedParams = params; + async function* generate() { + yield { + type: StreamEventType.CHUNK, + value: { + candidates: [ + { + content: { + role: 'model', + parts: [{ text: '{"suggestion":"run tests"}' }], + }, + }, + ], + usageMetadata: { + promptTokenCount: 5, + candidatesTokenCount: 3, + }, + }, + }; + } + return Promise.resolve(generate()); + }, + ); + + vi.mocked(GeminiChat).mockImplementation( + () => + ({ + sendMessageStream: mockSendMessageStream, + }) as unknown as GeminiChat, + ); + + const schema = { + type: 'object', + properties: { suggestion: { type: 'string' } }, + }; + + const result = await runForkedQuery({} as Config, 'suggest', { + jsonSchema: schema, + }); + + const sendParams = capturedParams as { + config?: { + tools?: unknown; + responseMimeType?: string; + responseJsonSchema?: unknown; + }; + }; + // tools: [] must still be present alongside JSON schema options + expect(sendParams.config!.tools).toEqual([]); + expect(sendParams.config!.responseMimeType).toBe('application/json'); + expect(sendParams.config!.responseJsonSchema).toBe(schema); + + // Verify JSON was parsed correctly + expect(result.jsonResult).toEqual({ suggestion: 'run tests' }); + }); + + it('throws when CacheSafeParams are not available', async () => { + const mockConfig = {} as unknown as Config; + + await expect(runForkedQuery(mockConfig, 'test')).rejects.toThrow( + 'CacheSafeParams not available', + ); + }); +}); diff --git a/packages/core/src/followup/forkedQuery.ts b/packages/core/src/followup/forkedQuery.ts index 798374c73..0d4dc2186 100644 --- a/packages/core/src/followup/forkedQuery.ts +++ b/packages/core/src/followup/forkedQuery.ts @@ -6,11 +6,15 @@ * Forked Query Infrastructure * * Enables cache-aware secondary LLM calls that share the main conversation's - * prompt prefix (systemInstruction + tools + history) for cache hits. + * prompt prefix (systemInstruction + history) for cache hits. * * DashScope already enables cache_control via X-DashScope-CacheControl header. * By constructing the forked GeminiChat with identical generationConfig and * history prefix, the fork automatically benefits from prefix caching. + * + * Note: `runForkedQuery` overrides `tools: []` at the per-request level so the + * model cannot produce function calls. `createForkedChat` retains the full + * generationConfig (including tools) for callers like speculation that need them. */ import type { @@ -21,6 +25,12 @@ import type { import { GeminiChat, StreamEventType } from '../core/geminiChat.js'; import type { Config } from '../config/config.js'; +/** Per-request config that strips tools so the model never produces function calls. */ +const NO_TOOLS = Object.freeze({ tools: [] as const }) as Pick< + GenerateContentConfig, + 'tools' +>; + /** * Snapshot of the main conversation's cache-critical parameters. * Captured after each successful main turn so forked queries share the same prefix. @@ -111,9 +121,13 @@ export function clearCacheSafeParams(): void { // --------------------------------------------------------------------------- /** - * Create an isolated GeminiChat that shares the same cache prefix as the main - * conversation. The fork uses identical generationConfig (systemInstruction + - * tools) and history, so DashScope's cache_control mechanism produces cache hits. + * Create an isolated GeminiChat that shares the main conversation's + * generationConfig (including systemInstruction, tools, and history). + * + * The full config is retained so that callers like `runSpeculativeLoop` + * can execute tool calls during speculation. For pure-text callers like + * `runForkedQuery`, tools are stripped at the per-request level via + * `NO_TOOLS` — see {@link runForkedQuery}. * * The fork does NOT have chatRecordingService or telemetryService to avoid * polluting the main session's recordings and token counts. @@ -165,7 +179,7 @@ function extractUsage( /** * Run a forked query using a GeminiChat that shares the main conversation's - * cache prefix. This is a single-turn request (no tool execution loop). + * cache prefix. This is a single-turn, tool-free request (no function calls). * * @param config - App config * @param userMessage - The user message to send (e.g., SUGGESTION_PROMPT) @@ -191,8 +205,10 @@ export async function runForkedQuery( const model = options?.model ?? params.model; const chat = createForkedChat(config, params); - // Build per-request config overrides for JSON schema if needed - const requestConfig: GenerateContentConfig = {}; + // Build per-request config overrides. + // NO_TOOLS prevents the model from producing function calls — forked + // queries are pure text completion and must not appear in tool-call UI. + const requestConfig: GenerateContentConfig = { ...NO_TOOLS }; if (options?.abortSignal) { requestConfig.abortSignal = options.abortSignal; } @@ -205,7 +221,7 @@ export async function runForkedQuery( model, { message: [{ text: userMessage }], - config: Object.keys(requestConfig).length > 0 ? requestConfig : undefined, + config: requestConfig, }, 'forked_query', ); diff --git a/packages/core/src/followup/suggestionGenerator.ts b/packages/core/src/followup/suggestionGenerator.ts index e5b25b768..50f872be7 100644 --- a/packages/core/src/followup/suggestionGenerator.ts +++ b/packages/core/src/followup/suggestionGenerator.ts @@ -24,13 +24,20 @@ import { ApiResponseEvent } from '../telemetry/types.js'; */ export const SUGGESTION_PROMPT = `[SUGGESTION MODE: Suggest what the user might naturally type next.] -FIRST: Look at the user's recent messages and original request. +FIRST: Read the LAST FEW LINES of the assistant's most recent message -- that's where +next-step hints, tips, and actionable suggestions usually appear. Then check the user's +recent messages and original request. Your job is to predict what THEY would type - not what you think they should do. THE TEST: Would they think "I was just about to type that"? +PRIORITY: If the assistant's last message contains a tip or hint like "Tip: type X to ..." +or "type X to ...", extract X as the suggestion. These are explicit next-step hints. + EXAMPLES: +Assistant says "Tip: type post comments to publish findings" → "post comments" +Assistant says "type /review to start" → "/review" User asked "fix the bug and run tests", bug is fixed → "run the tests" After code written → "try it out" Model offers options → suggest the one the user would likely pick, based on conversation diff --git a/packages/core/src/telemetry/loggers.test.ts b/packages/core/src/telemetry/loggers.test.ts index 288e02f03..19a3997dc 100644 --- a/packages/core/src/telemetry/loggers.test.ts +++ b/packages/core/src/telemetry/loggers.test.ts @@ -55,6 +55,7 @@ import { logExtensionInstallEvent, logExtensionUninstall, logHookCall, + logApiError, } from './loggers.js'; import * as metrics from './metrics.js'; import { QwenLogger } from './qwen-logger/qwen-logger.js'; @@ -77,6 +78,7 @@ import { ExtensionInstallEvent, ExtensionUninstallEvent, HookCallEvent, + ApiErrorEvent, } from './types.js'; import { FileOperation } from './metrics.js'; import type { @@ -359,6 +361,101 @@ describe('loggers', () => { }); }); + describe('logApiResponse skips chatRecordingService for internal prompt IDs', () => { + it.each(['prompt_suggestion', 'forked_query', 'speculation'])( + 'should not record to chatRecordingService when prompt_id is %s', + (promptId) => { + const mockRecordUiTelemetryEvent = vi.fn(); + const configWithRecording = { + getSessionId: () => 'test-session-id', + getUsageStatisticsEnabled: () => false, + getChatRecordingService: () => ({ + recordUiTelemetryEvent: mockRecordUiTelemetryEvent, + }), + } as unknown as Config; + + const event = new ApiResponseEvent( + 'resp-id', + 'test-model', + 50, + promptId, + ); + logApiResponse(configWithRecording, event); + + expect(mockRecordUiTelemetryEvent).not.toHaveBeenCalled(); + expect(mockUiEvent.addEvent).toHaveBeenCalled(); + }, + ); + + it('should record to chatRecordingService for normal prompt IDs', () => { + const mockRecordUiTelemetryEvent = vi.fn(); + const configWithRecording = { + getSessionId: () => 'test-session-id', + getUsageStatisticsEnabled: () => false, + getChatRecordingService: () => ({ + recordUiTelemetryEvent: mockRecordUiTelemetryEvent, + }), + } as unknown as Config; + + const event = new ApiResponseEvent( + 'resp-id', + 'test-model', + 50, + 'user_query', + ); + logApiResponse(configWithRecording, event); + + expect(mockRecordUiTelemetryEvent).toHaveBeenCalled(); + }); + }); + + describe('logApiError skips chatRecordingService for internal prompt IDs', () => { + it.each(['prompt_suggestion', 'forked_query', 'speculation'])( + 'should not record to chatRecordingService when prompt_id is %s', + (promptId) => { + const mockRecordUiTelemetryEvent = vi.fn(); + const configWithRecording = { + getSessionId: () => 'test-session-id', + getUsageStatisticsEnabled: () => false, + getChatRecordingService: () => ({ + recordUiTelemetryEvent: mockRecordUiTelemetryEvent, + }), + } as unknown as Config; + + const event = new ApiErrorEvent({ + model: 'test-model', + durationMs: 100, + promptId, + errorMessage: 'test error', + }); + logApiError(configWithRecording, event); + + expect(mockRecordUiTelemetryEvent).not.toHaveBeenCalled(); + }, + ); + + it('should record to chatRecordingService for normal prompt IDs', () => { + const mockRecordUiTelemetryEvent = vi.fn(); + const configWithRecording = { + getSessionId: () => 'test-session-id', + getUsageStatisticsEnabled: () => false, + getChatRecordingService: () => ({ + recordUiTelemetryEvent: mockRecordUiTelemetryEvent, + }), + } as unknown as Config; + + const event = new ApiErrorEvent({ + model: 'test-model', + durationMs: 100, + promptId: 'user_query', + errorMessage: 'test error', + }); + logApiError(configWithRecording, event); + + expect(mockRecordUiTelemetryEvent).toHaveBeenCalled(); + }); + }); + describe('logApiRequest', () => { const mockConfig = { getSessionId: () => 'test-session-id', @@ -1010,6 +1107,46 @@ describe('loggers', () => { }, }); }); + + it.each(['prompt_suggestion', 'forked_query', 'speculation'])( + 'should not record to chatRecordingService when prompt_id is %s', + (promptId) => { + const mockRecordUiTelemetryEvent = vi.fn(); + const configWithRecording = { + ...mockConfig, + getChatRecordingService: () => ({ + recordUiTelemetryEvent: mockRecordUiTelemetryEvent, + }), + } as unknown as Config; + + const call: CompletedToolCall = { + status: 'success', + request: { + name: 'test-function', + args: {}, + callId: 'test-call-id', + isClientInitiated: true, + prompt_id: promptId, + }, + response: { + callId: 'test-call-id', + responseParts: [{ text: 'ok' }], + resultDisplay: undefined, + error: undefined, + errorType: undefined, + }, + tool: new EditTool(mockConfig), + invocation: {} as AnyToolInvocation, + durationMs: 50, + outcome: ToolConfirmationOutcome.ProceedOnce, + }; + const event = new ToolCallEvent(call); + logToolCall(configWithRecording, event); + + expect(mockRecordUiTelemetryEvent).not.toHaveBeenCalled(); + expect(mockUiEvent.addEvent).toHaveBeenCalled(); + }, + ); }); describe('logMalformedJsonResponse', () => { diff --git a/packages/core/src/telemetry/loggers.ts b/packages/core/src/telemetry/loggers.ts index b7c18c9d3..6cd706799 100644 --- a/packages/core/src/telemetry/loggers.ts +++ b/packages/core/src/telemetry/loggers.ts @@ -8,6 +8,7 @@ import type { LogAttributes, LogRecord } from '@opentelemetry/api-logs'; import { logs } from '@opentelemetry/api-logs'; import { SemanticAttributes } from '@opentelemetry/semantic-conventions'; import type { Config } from '../config/config.js'; +import { isInternalPromptId } from '../utils/internalPromptIds.js'; import { safeJsonStringify } from '../utils/safeJsonStringify.js'; import { EVENT_API_ERROR, @@ -214,7 +215,9 @@ export function logToolCall(config: Config, event: ToolCallEvent): void { 'event.timestamp': new Date().toISOString(), } as UiEvent; uiTelemetryService.addEvent(uiEvent); - config.getChatRecordingService()?.recordUiTelemetryEvent(uiEvent); + if (!isInternalPromptId(event.prompt_id)) { + config.getChatRecordingService()?.recordUiTelemetryEvent(uiEvent); + } QwenLogger.getInstance(config)?.logToolCallEvent(event); if (!isTelemetrySdkInitialized()) return; @@ -382,7 +385,9 @@ export function logApiError(config: Config, event: ApiErrorEvent): void { 'event.timestamp': new Date().toISOString(), } as UiEvent; uiTelemetryService.addEvent(uiEvent); - config.getChatRecordingService()?.recordUiTelemetryEvent(uiEvent); + if (!isInternalPromptId(event.prompt_id)) { + config.getChatRecordingService()?.recordUiTelemetryEvent(uiEvent); + } QwenLogger.getInstance(config)?.logApiErrorEvent(event); if (!isTelemetrySdkInitialized()) return; @@ -449,7 +454,9 @@ export function logApiResponse(config: Config, event: ApiResponseEvent): void { 'event.timestamp': new Date().toISOString(), } as UiEvent; uiTelemetryService.addEvent(uiEvent); - config.getChatRecordingService()?.recordUiTelemetryEvent(uiEvent); + if (!isInternalPromptId(event.prompt_id)) { + config.getChatRecordingService()?.recordUiTelemetryEvent(uiEvent); + } QwenLogger.getInstance(config)?.logApiResponseEvent(event); if (!isTelemetrySdkInitialized()) return; const attributes: LogAttributes = { diff --git a/packages/core/src/utils/internalPromptIds.test.ts b/packages/core/src/utils/internalPromptIds.test.ts new file mode 100644 index 000000000..fbdfcd6ae --- /dev/null +++ b/packages/core/src/utils/internalPromptIds.test.ts @@ -0,0 +1,35 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { isInternalPromptId } from './internalPromptIds.js'; + +describe('isInternalPromptId', () => { + it('returns true for prompt_suggestion', () => { + expect(isInternalPromptId('prompt_suggestion')).toBe(true); + }); + + it('returns true for forked_query', () => { + expect(isInternalPromptId('forked_query')).toBe(true); + }); + + it('returns true for speculation', () => { + expect(isInternalPromptId('speculation')).toBe(true); + }); + + it('returns false for user_query', () => { + expect(isInternalPromptId('user_query')).toBe(false); + }); + + it('returns false for empty string', () => { + expect(isInternalPromptId('')).toBe(false); + }); + + it('returns false for arbitrary prompt ids', () => { + expect(isInternalPromptId('btw-prompt-id')).toBe(false); + expect(isInternalPromptId('context-prompt-id')).toBe(false); + }); +}); diff --git a/packages/core/src/utils/internalPromptIds.ts b/packages/core/src/utils/internalPromptIds.ts new file mode 100644 index 000000000..8a178d783 --- /dev/null +++ b/packages/core/src/utils/internalPromptIds.ts @@ -0,0 +1,29 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + * + * Internal Prompt ID utilities + * + * Centralises the set of prompt IDs used by background operations + * (suggestion generation, forked queries) so that logging, recording, + * and UI layers can consistently recognise and filter them. + */ + +/** Prompt IDs that belong to internal background operations. */ +const INTERNAL_PROMPT_IDS: ReadonlySet = new Set([ + 'prompt_suggestion', + 'forked_query', + 'speculation', +]); + +/** + * Returns true if the prompt_id belongs to an internal background operation + * whose events should not be recorded to the chatRecordingService, + * OpenAI logs, or other persistent stores visible in the UI. + * + * Known internal IDs: `'prompt_suggestion'`, `'forked_query'`, `'speculation'`. + */ +export function isInternalPromptId(promptId: string): boolean { + return INTERNAL_PROMPT_IDS.has(promptId); +} diff --git a/packages/webui/src/components/layout/InputForm.tsx b/packages/webui/src/components/layout/InputForm.tsx index 86ffd4ef8..c1baccb6f 100644 --- a/packages/webui/src/components/layout/InputForm.tsx +++ b/packages/webui/src/components/layout/InputForm.tsx @@ -132,7 +132,10 @@ export interface InputFormProps { /** Prompt suggestion state */ followupState?: FollowupState; /** Callback to accept prompt suggestion */ - onAcceptFollowup?: (method?: 'tab' | 'enter' | 'right') => void; + onAcceptFollowup?: ( + method?: 'tab' | 'enter' | 'right', + options?: { skipOnAccept?: boolean }, + ) => void; /** Callback to dismiss prompt suggestion */ onDismissFollowup?: () => void; } @@ -267,9 +270,10 @@ export const InputForm: FC = ({ // Accept and submit prompt suggestion on Enter when input is empty if (hasFollowup && !inputText && followupSuggestion) { e.preventDefault(); - onAcceptFollowup?.('enter'); - // Pass suggestion text explicitly — onInputChange is async (React setState) - // so onSubmit cannot rely on reading inputText from the closure. + // Skip onAccept callback — we pass the text directly to onSubmit. + // Without skipOnAccept the microtask in accept() would re-insert + // the suggestion into the input after it was already cleared. + onAcceptFollowup?.('enter', { skipOnAccept: true }); onSubmit(e, followupSuggestion); return; } diff --git a/packages/webui/src/hooks/useFollowupSuggestions.ts b/packages/webui/src/hooks/useFollowupSuggestions.ts index 607990484..d069b45a1 100644 --- a/packages/webui/src/hooks/useFollowupSuggestions.ts +++ b/packages/webui/src/hooks/useFollowupSuggestions.ts @@ -192,7 +192,12 @@ export interface UseFollowupSuggestionsReturn { state: FollowupState; getPlaceholder: (defaultPlaceholder: string) => string; setSuggestion: (text: string | null) => void; - accept: (method?: 'tab' | 'enter' | 'right') => void; + /** Accept the current suggestion */ + accept: ( + method?: 'tab' | 'enter' | 'right', + options?: { skipOnAccept?: boolean }, + ) => void; + /** Dismiss the current suggestion */ dismiss: () => void; clear: () => void; }