Merge remote-tracking branch 'origin/main' into feat/review-skill-improvements

2026-04-28 11:41:04 +00:00 · 2026-04-09 08:52:01 +08:00 · 2026-04-09 08:52:01 +08:00 · 6de5c9e530
commit 6de5c9e530
parent 6014a61d39 f208801b0e
20 changed files with 705 additions and 48 deletions
--- a/docs/design/prompt-suggestion/prompt-suggestion-design.md
+++ b/docs/design/prompt-suggestion/prompt-suggestion-design.md
@ -64,10 +64,19 @@ A **prompt suggestion** (Next-step Suggestion / NES) is a short prediction (2-12
 ```
 [SUGGESTION MODE: Suggest what the user might naturally type next.]
 FIRST: Read the LAST FEW LINES of the assistant's most recent message — that's where
 next-step hints, tips, and actionable suggestions usually appear. Then check the user's
 recent messages and original request.
 Your job is to predict what THEY would type - not what you think they should do.
 THE TEST: Would they think "I was just about to type that"?
 PRIORITY: If the assistant's last message contains a tip or hint like "Tip: type X to ..."
 or "type X to ...", extract X as the suggestion. These are explicit next-step hints.
 EXAMPLES:
 Assistant says "Tip: type post comments to publish findings" → "post comments"
 Assistant says "type /review to start" → "/review"
 User asked "fix the bug and run tests", bug is fixed → "run the tests"
 After code written → "try it out"
 Task complete, obvious follow-up → "commit this" or "push it"
@ -197,6 +206,23 @@ The Tab handler uses `key.name === 'tab'` explicitly (not `ACCEPT_SUGGESTION` ma
 | `enableSpeculation`         | boolean | false   | Predictive execution engine                                                      |
 | `fastModel` (top-level)     | string  | ""      | Model for all background tasks (empty = use main model). Set via `/model --fast` |
 ### Internal Prompt ID Filtering
 Background operations use dedicated prompt IDs (`INTERNAL_PROMPT_IDS` in `utils/internalPromptIds.ts`) to prevent their API traffic and tool calls from appearing in the user-visible UI:
 | Prompt ID           | Used by                    |
 | ------------------- | -------------------------- |
 | `prompt_suggestion` | Suggestion generation      |
 | `forked_query`      | Cache-aware forked queries |
 | `speculation`       | Speculation engine         |
 **Filtering applied:**
 - `loggingContentGenerator` — skips `logApiRequest` and OpenAI interaction logging for internal IDs
 - `logApiResponse` / `logApiError` — skips `chatRecordingService.recordUiTelemetryEvent`
 - `logToolCall` — skips `chatRecordingService.recordUiTelemetryEvent`
 - `uiTelemetryService.addEvent` — **not filtered** (ensures `/stats` token tracking works)
 ### Thinking Mode
 Thinking/reasoning is explicitly disabled (`thinkingConfig: { includeThoughts: false }`) for all background task paths:
--- a/docs/users/features/followup-suggestions.md
+++ b/docs/users/features/followup-suggestions.md
@ -12,7 +12,7 @@ After Qwen Code finishes responding, a suggestion appears as dimmed text in the
 > run the tests
 ```
-The suggestion is generated by sending the conversation history to the model, which predicts what you would naturally type next.
+The suggestion is generated by sending the conversation history to the model, which predicts what you would naturally type next. If the response contains an explicit tip (e.g., `Tip: type post comments to publish findings`), the suggested action is extracted automatically.
 ## Accepting Suggestions
--- a/packages/cli/src/ui/components/InputPrompt.test.tsx
+++ b/packages/cli/src/ui/components/InputPrompt.test.tsx
@ -235,6 +235,10 @@ describe('InputPrompt', () => {
      await wait();
      expect(props.onSubmit).toHaveBeenCalledWith('commit this');
      // Enter path must NOT call buffer.insert — it passes text directly to
      // handleSubmitAndClear. Calling insert would re-fill the buffer after
      // it was already cleared (the microtask race bug).
      expect(mockBuffer.insert).not.toHaveBeenCalled();
      unmount();
    });
--- a/packages/cli/src/ui/components/InputPrompt.tsx
+++ b/packages/cli/src/ui/components/InputPrompt.tsx
@ -882,7 +882,11 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
          followup.state.suggestion
        ) {
          const text = followup.state.suggestion;
-          followup.accept('enter');
+          // Skip onAccept (buffer.insert) — we pass the text directly to
          // handleSubmitAndClear which clears the buffer synchronously.
          // Without skipOnAccept the microtask in accept() would re-insert
          // the suggestion into the buffer after it was already cleared.
          followup.accept('enter', { skipOnAccept: true });
          handleSubmitAndClear(text);
          return true;
        }
--- a/packages/cli/src/ui/hooks/useFollowupSuggestions.tsx
+++ b/packages/cli/src/ui/hooks/useFollowupSuggestions.tsx
@ -43,7 +43,10 @@ export interface UseFollowupSuggestionsReturn {
  /** Set suggestion text (called by parent component) */
  setSuggestion: (text: string | null) => void;
  /** Accept the current suggestion */
-  accept: (method?: 'tab' | 'enter' | 'right') => void;
+  accept: (
    method?: 'tab' | 'enter' | 'right',
    options?: { skipOnAccept?: boolean },
  ) => void;
  /** Dismiss the current suggestion */
  dismiss: () => void;
  /** Clear all state */
--- a/packages/cli/src/ui/utils/MarkdownDisplay.tsx
+++ b/packages/cli/src/ui/utils/MarkdownDisplay.tsx
@ -111,7 +111,9 @@ const MarkdownDisplayInternal: React.FC<MarkdownDisplayProps> = ({
        lines[index + 1].match(tableSeparatorRegex)
      ) {
        inTable = true;
-        tableHeaders = tableRowMatch[1].split(/(?<!\\)\|/).map((cell) => cell.trim().replaceAll('\\|', '|'));
+        tableHeaders = tableRowMatch[1]
          .split(/(?<!\\)\|/)
          .map((cell) => cell.trim().replaceAll('\\|', '|'));
        tableRows = [];
      } else {
        // Not a table, treat as regular text
@ -127,7 +129,9 @@ const MarkdownDisplayInternal: React.FC<MarkdownDisplayProps> = ({
      // Skip separator line - already handled
    } else if (inTable && tableRowMatch) {
      // Add table row
-      const cells = tableRowMatch[1].split(/(?<!\\)\|/).map((cell) => cell.trim().replaceAll('\\|', '|'));
+      const cells = tableRowMatch[1]
        .split(/(?<!\\)\|/)
        .map((cell) => cell.trim().replaceAll('\\|', '|'));
      // Ensure row has same column count as headers
      while (cells.length < tableHeaders.length) {
        cells.push('');
--- a/packages/cli/src/ui/utils/TableRenderer.tsx
+++ b/packages/cli/src/ui/utils/TableRenderer.tsx
@ -36,7 +36,10 @@ export const TableRenderer: React.FC<TableRendererProps> = ({
  // Ensure table fits within terminal width
  const totalWidth = columnWidths.reduce((sum, width) => sum + width + 1, 1);
  const fixedWidth = columnWidths.length + 1;
-  const scaleFactor = totalWidth > contentWidth ? (contentWidth - fixedWidth) / (totalWidth - fixedWidth) : 1;
+  const scaleFactor =
    totalWidth > contentWidth
      ? (contentWidth - fixedWidth) / (totalWidth - fixedWidth)
      : 1;
  const adjustedWidths = columnWidths.map((width) =>
    Math.floor(width * scaleFactor),
  );
--- a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts
+++ b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts
@ -23,11 +23,16 @@ import {
 import { OpenAILogger } from '../../utils/openaiLogger.js';
 import type OpenAI from 'openai';
-vi.mock('../../telemetry/loggers.js', () => ({
+vi.mock('../../telemetry/loggers.js', async (importOriginal) => {
-  logApiRequest: vi.fn(),
+  const actual =
-  logApiResponse: vi.fn(),
+    await importOriginal<typeof import('../../telemetry/loggers.js')>();
-  logApiError: vi.fn(),
+  return {
-}));
+    ...actual,
    logApiRequest: vi.fn(),
    logApiResponse: vi.fn(),
    logApiError: vi.fn(),
  };
 });
 vi.mock('../../utils/openaiLogger.js', () => ({
  OpenAILogger: vi.fn().mockImplementation(() => ({
@ -474,4 +479,91 @@ describe('LoggingContentGenerator', () => {
      },
    ]);
  });
  it.each(['prompt_suggestion', 'forked_query', 'speculation'])(
    'skips logApiRequest and OpenAI logging for internal promptId %s (generateContent)',
    async (promptId) => {
      const mockResponse = {
        responseId: 'internal-resp',
        modelVersion: 'test-model',
        candidates: [{ content: { parts: [{ text: 'suggestion' }] } }],
        usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 },
      } as unknown as GenerateContentResponse;
      const mockWrapped = {
        generateContent: vi.fn().mockResolvedValue(mockResponse),
        generateContentStream: vi.fn(),
      } as unknown as ContentGenerator;
      const gen = new LoggingContentGenerator(mockWrapped, createConfig(), {
        model: 'test-model',
        enableOpenAILogging: true,
        openAILoggingDir: '/tmp/test-logs',
      });
      const request = {
        model: 'test-model',
        contents: [{ role: 'user', parts: [{ text: 'test' }] }],
      } as unknown as GenerateContentParameters;
      await gen.generateContent(request, promptId);
      // logApiRequest should NOT be called for internal prompts
      expect(logApiRequest).not.toHaveBeenCalled();
      // logApiResponse SHOULD be called (for /stats token tracking)
      expect(logApiResponse).toHaveBeenCalled();
      // OpenAI logger should be constructed, but no interaction should be logged
      expect(OpenAILogger).toHaveBeenCalled();
      const loggerInstance = (
        OpenAILogger as unknown as ReturnType<typeof vi.fn>
      ).mock.results[0]?.value;
      expect(loggerInstance.logInteraction).not.toHaveBeenCalled();
    },
  );
  it.each(['prompt_suggestion', 'forked_query', 'speculation'])(
    'skips logApiRequest and OpenAI logging for internal promptId %s (generateContentStream)',
    async (promptId) => {
      const mockChunk = {
        responseId: 'stream-resp',
        modelVersion: 'test-model',
        candidates: [{ content: { parts: [{ text: 'suggestion' }] } }],
        usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 },
      } as unknown as GenerateContentResponse;
      async function* fakeStream() {
        yield mockChunk;
      }
      const mockWrapped = {
        generateContent: vi.fn(),
        generateContentStream: vi.fn().mockResolvedValue(fakeStream()),
      } as unknown as ContentGenerator;
      const gen = new LoggingContentGenerator(mockWrapped, createConfig(), {
        model: 'test-model',
        enableOpenAILogging: true,
        openAILoggingDir: '/tmp/test-logs',
      });
      const request = {
        model: 'test-model',
        contents: [{ role: 'user', parts: [{ text: 'test' }] }],
      } as unknown as GenerateContentParameters;
      const stream = await gen.generateContentStream(request, promptId);
      // Consume the stream
      for await (const _chunk of stream) {
        // drain
      }
      expect(logApiRequest).not.toHaveBeenCalled();
      expect(logApiResponse).toHaveBeenCalled();
      expect(OpenAILogger).toHaveBeenCalled();
      const loggerInstance = (
        OpenAILogger as unknown as ReturnType<typeof vi.fn>
      ).mock.results[0]?.value;
      expect(loggerInstance.logInteraction).not.toHaveBeenCalled();
    },
  );
 });
--- a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts
+++ b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts
@ -31,6 +31,7 @@ import {
  logApiRequest,
  logApiResponse,
 } from '../../telemetry/loggers.js';
 import { isInternalPromptId } from '../../utils/internalPromptIds.js';
 import type {
  ContentGenerator,
  ContentGeneratorConfig,
@ -143,8 +144,17 @@ export class LoggingContentGenerator implements ContentGenerator {
    userPromptId: string,
  ): Promise<GenerateContentResponse> {
    const startTime = Date.now();
-    this.logApiRequest(this.toContents(req.contents), req.model, userPromptId);
+    const isInternal = isInternalPromptId(userPromptId);
-    const openaiRequest = await this.buildOpenAIRequestForLogging(req);
+    if (!isInternal) {
      this.logApiRequest(
        this.toContents(req.contents),
        req.model,
        userPromptId,
      );
    }
    const openaiRequest = isInternal
      ? undefined
      : await this.buildOpenAIRequestForLogging(req);
    try {
      const response = await this.wrapped.generateContent(req, userPromptId);
      const durationMs = Date.now() - startTime;
@ -155,12 +165,16 @@ export class LoggingContentGenerator implements ContentGenerator {
        userPromptId,
        response.usageMetadata,
      );
-      await this.logOpenAIInteraction(openaiRequest, response);
+      if (!isInternal) {
        await this.logOpenAIInteraction(openaiRequest, response);
      }
      return response;
    } catch (error) {
      const durationMs = Date.now() - startTime;
      this._logApiError('', durationMs, error, req.model, userPromptId);
-      await this.logOpenAIInteraction(openaiRequest, undefined, error);
+      if (!isInternal) {
        await this.logOpenAIInteraction(openaiRequest, undefined, error);
      }
      throw error;
    }
  }
@ -170,8 +184,17 @@ export class LoggingContentGenerator implements ContentGenerator {
    userPromptId: string,
  ): Promise<AsyncGenerator<GenerateContentResponse>> {
    const startTime = Date.now();
-    this.logApiRequest(this.toContents(req.contents), req.model, userPromptId);
+    const isInternal = isInternalPromptId(userPromptId);
-    const openaiRequest = await this.buildOpenAIRequestForLogging(req);
+    if (!isInternal) {
      this.logApiRequest(
        this.toContents(req.contents),
        req.model,
        userPromptId,
      );
    }
    const openaiRequest = isInternal
      ? undefined
      : await this.buildOpenAIRequestForLogging(req);
    let stream: AsyncGenerator<GenerateContentResponse>;
    try {
@ -179,7 +202,9 @@ export class LoggingContentGenerator implements ContentGenerator {
    } catch (error) {
      const durationMs = Date.now() - startTime;
      this._logApiError('', durationMs, error, req.model, userPromptId);
-      await this.logOpenAIInteraction(openaiRequest, undefined, error);
+      if (!isInternal) {
        await this.logOpenAIInteraction(openaiRequest, undefined, error);
      }
      throw error;
    }
@ -199,12 +224,27 @@ export class LoggingContentGenerator implements ContentGenerator {
    model: string,
    openaiRequest?: OpenAI.Chat.ChatCompletionCreateParams,
  ): AsyncGenerator<GenerateContentResponse> {
    const isInternal = isInternalPromptId(userPromptId);
    // For internal prompts we only need the last usage metadata (for /stats);
    // skip collecting full responses to avoid unnecessary memory overhead.
    const responses: GenerateContentResponse[] = [];
    // Track first-seen IDs so _logApiResponse/_logApiError have accurate
    // values even when we skip collecting full responses for internal prompts.
    let firstResponseId = '';
    let firstModelVersion = '';
    let lastUsageMetadata: GenerateContentResponseUsageMetadata | undefined;
    try {
      for await (const response of stream) {
-        responses.push(response);
+        if (!firstResponseId && response.responseId) {
          firstResponseId = response.responseId;
        }
        if (!firstModelVersion && response.modelVersion) {
          firstModelVersion = response.modelVersion;
        }
        if (!isInternal) {
          responses.push(response);
        }
        if (response.usageMetadata) {
          lastUsageMetadata = response.usageMetadata;
        }
@ -213,25 +253,29 @@ export class LoggingContentGenerator implements ContentGenerator {
      // Only log successful API response if no error occurred
      const durationMs = Date.now() - startTime;
      this._logApiResponse(
-        responses[0]?.responseId ?? '',
+        firstResponseId,
        durationMs,
-        responses[0]?.modelVersion || model,
+        firstModelVersion || model,
        userPromptId,
        lastUsageMetadata,
      );
-      const consolidatedResponse =
+      if (!isInternal) {
-        this.consolidateGeminiResponsesForLogging(responses);
+        const consolidatedResponse =
-      await this.logOpenAIInteraction(openaiRequest, consolidatedResponse);
+          this.consolidateGeminiResponsesForLogging(responses);
        await this.logOpenAIInteraction(openaiRequest, consolidatedResponse);
      }
    } catch (error) {
      const durationMs = Date.now() - startTime;
      this._logApiError(
-        responses[0]?.responseId ?? '',
+        firstResponseId,
        durationMs,
        error,
-        responses[0]?.modelVersion || model,
+        firstModelVersion || model,
        userPromptId,
      );
-      await this.logOpenAIInteraction(openaiRequest, undefined, error);
+      if (!isInternal) {
        await this.logOpenAIInteraction(openaiRequest, undefined, error);
      }
      throw error;
    }
  }
--- a/packages/core/src/followup/followupState.test.ts
+++ b/packages/core/src/followup/followupState.test.ts
@ -294,6 +294,37 @@ describe('createFollowupController', () => {
    ctrl.cleanup();
  });
  it('accept with skipOnAccept skips onAccept callback but still clears state and fires telemetry', async () => {
    const onStateChange = vi.fn();
    const onAccept = vi.fn();
    const onOutcome = vi.fn();
    const ctrl = createFollowupController({
      onStateChange,
      getOnAccept: () => onAccept,
      onOutcome,
    });
    ctrl.setSuggestion('run tests');
    vi.advanceTimersByTime(300);
    onStateChange.mockClear();
    ctrl.accept('enter', { skipOnAccept: true });
    // State should be cleared
    expect(onStateChange).toHaveBeenCalledWith(INITIAL_FOLLOWUP_STATE);
    // Telemetry should still fire
    expect(onOutcome).toHaveBeenCalledWith(
      expect.objectContaining({ outcome: 'accepted', accept_method: 'enter' }),
    );
    // Flush microtask — onAccept should NOT be called
    await Promise.resolve();
    expect(onAccept).not.toHaveBeenCalled();
    ctrl.cleanup();
  });
  it('setSuggestion replaces a pending suggestion', () => {
    const onStateChange = vi.fn();
    const ctrl = createFollowupController({ onStateChange });
--- a/packages/core/src/followup/followupState.ts
+++ b/packages/core/src/followup/followupState.ts
@ -72,7 +72,10 @@ export interface FollowupControllerActions {
  /** Set suggestion text (with delayed show). Null clears immediately. */
  setSuggestion: (text: string | null) => void;
  /** Accept the current suggestion and invoke onAccept callback */
-  accept: (method?: 'tab' | 'enter' | 'right') => void;
+  accept: (
    method?: 'tab' | 'enter' | 'right',
    options?: { skipOnAccept?: boolean },
  ) => void;
  /** Dismiss/clear suggestion */
  dismiss: () => void;
  /** Hard-clear all state and timers */
@ -135,7 +138,10 @@ export function createFollowupController(
    }, SUGGESTION_DELAY_MS);
  };
-  const accept = (method?: 'tab' | 'enter' | 'right'): void => {
+  const accept = (
    method?: 'tab' | 'enter' | 'right',
    options?: { skipOnAccept?: boolean },
  ): void => {
    if (accepting) {
      return;
    }
@ -170,7 +176,9 @@ export function createFollowupController(
    queueMicrotask(() => {
      try {
-        getOnAccept?.()?.(text);
+        if (!options?.skipOnAccept) {
          getOnAccept?.()?.(text);
        }
      } catch (error: unknown) {
        // eslint-disable-next-line no-console
        console.error('[followup] onAccept callback threw:', error);
--- a/packages/core/src/followup/forkedQuery.test.ts
+++ b/packages/core/src/followup/forkedQuery.test.ts
@ -4,13 +4,24 @@
 * SPDX-License-Identifier: Apache-2.0
 */
-import { describe, it, expect, beforeEach } from 'vitest';
+import { describe, it, expect, beforeEach, vi } from 'vitest';
 import {
  saveCacheSafeParams,
  getCacheSafeParams,
  clearCacheSafeParams,
  runForkedQuery,
 } from './forkedQuery.js';
 import type { GenerateContentConfig } from '@google/genai';
 import type { Config } from '../config/config.js';
 import { GeminiChat, StreamEventType } from '../core/geminiChat.js';
 vi.mock('../core/geminiChat.js', async (importOriginal) => {
  const actual = await importOriginal<typeof import('../core/geminiChat.js')>();
  return {
    ...actual,
    GeminiChat: vi.fn(),
  };
 });
 describe('CacheSafeParams', () => {
  beforeEach(() => {
@ -113,3 +124,190 @@ describe('CacheSafeParams', () => {
    });
  });
 });
 describe('runForkedQuery', () => {
  beforeEach(() => {
    clearCacheSafeParams();
    vi.mocked(GeminiChat).mockReset();
  });
  it('passes tools: [] in per-request config so the model cannot produce function calls', async () => {
    // Save cache params with real tools to simulate a normal conversation
    saveCacheSafeParams(
      {
        systemInstruction: 'You are helpful',
        tools: [
          {
            functionDeclarations: [
              { name: 'edit', description: 'Edit a file' },
              { name: 'shell', description: 'Run a command' },
            ],
          },
        ],
      },
      [{ role: 'user', parts: [{ text: 'hello' }] }],
      'test-model',
    );
    // Track what sendMessageStream receives
    let capturedParams: unknown = null;
    const mockSendMessageStream = vi.fn(
      (_model: string, params: unknown, _promptId: string) => {
        capturedParams = params;
        async function* generate() {
          yield {
            type: StreamEventType.CHUNK,
            value: {
              candidates: [
                {
                  content: {
                    role: 'model',
                    parts: [{ text: 'commit this' }],
                  },
                },
              ],
              usageMetadata: {
                promptTokenCount: 10,
                candidatesTokenCount: 5,
                totalTokenCount: 15,
              },
            },
          };
        }
        return Promise.resolve(generate());
      },
    );
    vi.mocked(GeminiChat).mockImplementation(
      () =>
        ({
          sendMessageStream: mockSendMessageStream,
        }) as unknown as GeminiChat,
    );
    const mockConfig = {} as unknown as Config;
    const result = await runForkedQuery(mockConfig, 'suggest something');
    // Verify GeminiChat was constructed with the full generationConfig
    // (including tools) — createForkedChat retains tools for speculation callers
    expect(GeminiChat).toHaveBeenCalledOnce();
    const ctorArgs = vi.mocked(GeminiChat).mock.calls[0];
    const chatGenerationConfig = ctorArgs[1] as GenerateContentConfig;
    expect(chatGenerationConfig.tools).toEqual([
      {
        functionDeclarations: [
          { name: 'edit', description: 'Edit a file' },
          { name: 'shell', description: 'Run a command' },
        ],
      },
    ]);
    // chatRecordingService and telemetryService must be undefined
    // to avoid polluting the main session's recordings
    expect(ctorArgs[3]).toBeUndefined(); // chatRecordingService
    expect(ctorArgs[4]).toBeUndefined(); // telemetryService
    // Verify sendMessageStream was called
    expect(mockSendMessageStream).toHaveBeenCalledOnce();
    expect(capturedParams).not.toBeNull();
    // KEY ASSERTION: per-request config must have tools: [] to prevent
    // the model from producing function calls (Root Cause 1 fix)
    const sendParams = capturedParams as { config?: { tools?: unknown } };
    expect(sendParams.config).toBeDefined();
    expect(sendParams.config!.tools).toEqual([]);
    // Verify prompt_id is 'forked_query' and message is passed correctly
    expect(mockSendMessageStream).toHaveBeenCalledWith(
      'test-model',
      expect.objectContaining({
        message: [{ text: 'suggest something' }],
        config: expect.objectContaining({ tools: [] }),
      }),
      'forked_query',
    );
    // Verify result is correct
    expect(result.text).toBe('commit this');
    expect(result.usage.inputTokens).toBe(10);
    expect(result.usage.outputTokens).toBe(5);
  });
  it('preserves tools: [] even when jsonSchema is provided', async () => {
    saveCacheSafeParams(
      {
        tools: [{ functionDeclarations: [{ name: 'edit' }] }],
      },
      [],
      'test-model',
    );
    let capturedParams: unknown = null;
    const mockSendMessageStream = vi.fn(
      (_model: string, params: unknown, _promptId: string) => {
        capturedParams = params;
        async function* generate() {
          yield {
            type: StreamEventType.CHUNK,
            value: {
              candidates: [
                {
                  content: {
                    role: 'model',
                    parts: [{ text: '{"suggestion":"run tests"}' }],
                  },
                },
              ],
              usageMetadata: {
                promptTokenCount: 5,
                candidatesTokenCount: 3,
              },
            },
          };
        }
        return Promise.resolve(generate());
      },
    );
    vi.mocked(GeminiChat).mockImplementation(
      () =>
        ({
          sendMessageStream: mockSendMessageStream,
        }) as unknown as GeminiChat,
    );
    const schema = {
      type: 'object',
      properties: { suggestion: { type: 'string' } },
    };
    const result = await runForkedQuery({} as Config, 'suggest', {
      jsonSchema: schema,
    });
    const sendParams = capturedParams as {
      config?: {
        tools?: unknown;
        responseMimeType?: string;
        responseJsonSchema?: unknown;
      };
    };
    // tools: [] must still be present alongside JSON schema options
    expect(sendParams.config!.tools).toEqual([]);
    expect(sendParams.config!.responseMimeType).toBe('application/json');
    expect(sendParams.config!.responseJsonSchema).toBe(schema);
    // Verify JSON was parsed correctly
    expect(result.jsonResult).toEqual({ suggestion: 'run tests' });
  });
  it('throws when CacheSafeParams are not available', async () => {
    const mockConfig = {} as unknown as Config;
    await expect(runForkedQuery(mockConfig, 'test')).rejects.toThrow(
      'CacheSafeParams not available',
    );
  });
 });
--- a/packages/core/src/followup/forkedQuery.ts
+++ b/packages/core/src/followup/forkedQuery.ts
@ -6,11 +6,15 @@
 * Forked Query Infrastructure
 *
 * Enables cache-aware secondary LLM calls that share the main conversation's
- * prompt prefix (systemInstruction + tools + history) for cache hits.
+ * prompt prefix (systemInstruction + history) for cache hits.
 *
 * DashScope already enables cache_control via X-DashScope-CacheControl header.
 * By constructing the forked GeminiChat with identical generationConfig and
 * history prefix, the fork automatically benefits from prefix caching.
 *
 * Note: `runForkedQuery` overrides `tools: []` at the per-request level so the
 * model cannot produce function calls. `createForkedChat` retains the full
 * generationConfig (including tools) for callers like speculation that need them.
 */
 import type {
@ -21,6 +25,12 @@ import type {
 import { GeminiChat, StreamEventType } from '../core/geminiChat.js';
 import type { Config } from '../config/config.js';
 /** Per-request config that strips tools so the model never produces function calls. */
 const NO_TOOLS = Object.freeze({ tools: [] as const }) as Pick<
  GenerateContentConfig,
  'tools'
 >;
 /**
 * Snapshot of the main conversation's cache-critical parameters.
 * Captured after each successful main turn so forked queries share the same prefix.
@ -111,9 +121,13 @@ export function clearCacheSafeParams(): void {
 // ---------------------------------------------------------------------------
 /**
- * Create an isolated GeminiChat that shares the same cache prefix as the main
+ * Create an isolated GeminiChat that shares the main conversation's
- * conversation. The fork uses identical generationConfig (systemInstruction +
+ * generationConfig (including systemInstruction, tools, and history).
- * tools) and history, so DashScope's cache_control mechanism produces cache hits.
+ *
 * The full config is retained so that callers like `runSpeculativeLoop`
 * can execute tool calls during speculation. For pure-text callers like
 * `runForkedQuery`, tools are stripped at the per-request level via
 * `NO_TOOLS` — see {@link runForkedQuery}.
 *
 * The fork does NOT have chatRecordingService or telemetryService to avoid
 * polluting the main session's recordings and token counts.
@ -165,7 +179,7 @@ function extractUsage(
 /**
 * Run a forked query using a GeminiChat that shares the main conversation's
- * cache prefix. This is a single-turn request (no tool execution loop).
+ * cache prefix. This is a single-turn, tool-free request (no function calls).
 *
 * @param config - App config
 * @param userMessage - The user message to send (e.g., SUGGESTION_PROMPT)
@ -191,8 +205,10 @@ export async function runForkedQuery(
  const model = options?.model ?? params.model;
  const chat = createForkedChat(config, params);
-  // Build per-request config overrides for JSON schema if needed
+  // Build per-request config overrides.
-  const requestConfig: GenerateContentConfig = {};
+  // NO_TOOLS prevents the model from producing function calls — forked
  // queries are pure text completion and must not appear in tool-call UI.
  const requestConfig: GenerateContentConfig = { ...NO_TOOLS };
  if (options?.abortSignal) {
    requestConfig.abortSignal = options.abortSignal;
  }
@ -205,7 +221,7 @@ export async function runForkedQuery(
    model,
    {
      message: [{ text: userMessage }],
-      config: Object.keys(requestConfig).length > 0 ? requestConfig : undefined,
+      config: requestConfig,
    },
    'forked_query',
  );
--- a/packages/core/src/followup/suggestionGenerator.ts
+++ b/packages/core/src/followup/suggestionGenerator.ts
@ -24,13 +24,20 @@ import { ApiResponseEvent } from '../telemetry/types.js';
 */
 export const SUGGESTION_PROMPT = `[SUGGESTION MODE: Suggest what the user might naturally type next.]
-FIRST: Look at the user's recent messages and original request.
+FIRST: Read the LAST FEW LINES of the assistant's most recent message -- that's where
 next-step hints, tips, and actionable suggestions usually appear. Then check the user's
 recent messages and original request.
 Your job is to predict what THEY would type - not what you think they should do.
 THE TEST: Would they think "I was just about to type that"?
 PRIORITY: If the assistant's last message contains a tip or hint like "Tip: type X to ..."
 or "type X to ...", extract X as the suggestion. These are explicit next-step hints.
 EXAMPLES:
 Assistant says "Tip: type post comments to publish findings" → "post comments"
 Assistant says "type /review to start" → "/review"
 User asked "fix the bug and run tests", bug is fixed → "run the tests"
 After code written → "try it out"
 Model offers options → suggest the one the user would likely pick, based on conversation
--- a/packages/core/src/telemetry/loggers.test.ts
+++ b/packages/core/src/telemetry/loggers.test.ts
@ -55,6 +55,7 @@ import {
  logExtensionInstallEvent,
  logExtensionUninstall,
  logHookCall,
  logApiError,
 } from './loggers.js';
 import * as metrics from './metrics.js';
 import { QwenLogger } from './qwen-logger/qwen-logger.js';
@ -77,6 +78,7 @@ import {
  ExtensionInstallEvent,
  ExtensionUninstallEvent,
  HookCallEvent,
  ApiErrorEvent,
 } from './types.js';
 import { FileOperation } from './metrics.js';
 import type {
@ -359,6 +361,101 @@ describe('loggers', () => {
    });
  });
  describe('logApiResponse skips chatRecordingService for internal prompt IDs', () => {
    it.each(['prompt_suggestion', 'forked_query', 'speculation'])(
      'should not record to chatRecordingService when prompt_id is %s',
      (promptId) => {
        const mockRecordUiTelemetryEvent = vi.fn();
        const configWithRecording = {
          getSessionId: () => 'test-session-id',
          getUsageStatisticsEnabled: () => false,
          getChatRecordingService: () => ({
            recordUiTelemetryEvent: mockRecordUiTelemetryEvent,
          }),
        } as unknown as Config;
        const event = new ApiResponseEvent(
          'resp-id',
          'test-model',
          50,
          promptId,
        );
        logApiResponse(configWithRecording, event);
        expect(mockRecordUiTelemetryEvent).not.toHaveBeenCalled();
        expect(mockUiEvent.addEvent).toHaveBeenCalled();
      },
    );
    it('should record to chatRecordingService for normal prompt IDs', () => {
      const mockRecordUiTelemetryEvent = vi.fn();
      const configWithRecording = {
        getSessionId: () => 'test-session-id',
        getUsageStatisticsEnabled: () => false,
        getChatRecordingService: () => ({
          recordUiTelemetryEvent: mockRecordUiTelemetryEvent,
        }),
      } as unknown as Config;
      const event = new ApiResponseEvent(
        'resp-id',
        'test-model',
        50,
        'user_query',
      );
      logApiResponse(configWithRecording, event);
      expect(mockRecordUiTelemetryEvent).toHaveBeenCalled();
    });
  });
  describe('logApiError skips chatRecordingService for internal prompt IDs', () => {
    it.each(['prompt_suggestion', 'forked_query', 'speculation'])(
      'should not record to chatRecordingService when prompt_id is %s',
      (promptId) => {
        const mockRecordUiTelemetryEvent = vi.fn();
        const configWithRecording = {
          getSessionId: () => 'test-session-id',
          getUsageStatisticsEnabled: () => false,
          getChatRecordingService: () => ({
            recordUiTelemetryEvent: mockRecordUiTelemetryEvent,
          }),
        } as unknown as Config;
        const event = new ApiErrorEvent({
          model: 'test-model',
          durationMs: 100,
          promptId,
          errorMessage: 'test error',
        });
        logApiError(configWithRecording, event);
        expect(mockRecordUiTelemetryEvent).not.toHaveBeenCalled();
      },
    );
    it('should record to chatRecordingService for normal prompt IDs', () => {
      const mockRecordUiTelemetryEvent = vi.fn();
      const configWithRecording = {
        getSessionId: () => 'test-session-id',
        getUsageStatisticsEnabled: () => false,
        getChatRecordingService: () => ({
          recordUiTelemetryEvent: mockRecordUiTelemetryEvent,
        }),
      } as unknown as Config;
      const event = new ApiErrorEvent({
        model: 'test-model',
        durationMs: 100,
        promptId: 'user_query',
        errorMessage: 'test error',
      });
      logApiError(configWithRecording, event);
      expect(mockRecordUiTelemetryEvent).toHaveBeenCalled();
    });
  });
  describe('logApiRequest', () => {
    const mockConfig = {
      getSessionId: () => 'test-session-id',
@ -1010,6 +1107,46 @@ describe('loggers', () => {
        },
      });
    });
    it.each(['prompt_suggestion', 'forked_query', 'speculation'])(
      'should not record to chatRecordingService when prompt_id is %s',
      (promptId) => {
        const mockRecordUiTelemetryEvent = vi.fn();
        const configWithRecording = {
          ...mockConfig,
          getChatRecordingService: () => ({
            recordUiTelemetryEvent: mockRecordUiTelemetryEvent,
          }),
        } as unknown as Config;
        const call: CompletedToolCall = {
          status: 'success',
          request: {
            name: 'test-function',
            args: {},
            callId: 'test-call-id',
            isClientInitiated: true,
            prompt_id: promptId,
          },
          response: {
            callId: 'test-call-id',
            responseParts: [{ text: 'ok' }],
            resultDisplay: undefined,
            error: undefined,
            errorType: undefined,
          },
          tool: new EditTool(mockConfig),
          invocation: {} as AnyToolInvocation,
          durationMs: 50,
          outcome: ToolConfirmationOutcome.ProceedOnce,
        };
        const event = new ToolCallEvent(call);
        logToolCall(configWithRecording, event);
        expect(mockRecordUiTelemetryEvent).not.toHaveBeenCalled();
        expect(mockUiEvent.addEvent).toHaveBeenCalled();
      },
    );
  });
  describe('logMalformedJsonResponse', () => {
--- a/packages/core/src/telemetry/loggers.ts
+++ b/packages/core/src/telemetry/loggers.ts
@ -8,6 +8,7 @@ import type { LogAttributes, LogRecord } from '@opentelemetry/api-logs';
 import { logs } from '@opentelemetry/api-logs';
 import { SemanticAttributes } from '@opentelemetry/semantic-conventions';
 import type { Config } from '../config/config.js';
 import { isInternalPromptId } from '../utils/internalPromptIds.js';
 import { safeJsonStringify } from '../utils/safeJsonStringify.js';
 import {
  EVENT_API_ERROR,
@ -214,7 +215,9 @@ export function logToolCall(config: Config, event: ToolCallEvent): void {
    'event.timestamp': new Date().toISOString(),
  } as UiEvent;
  uiTelemetryService.addEvent(uiEvent);
-  config.getChatRecordingService()?.recordUiTelemetryEvent(uiEvent);
+  if (!isInternalPromptId(event.prompt_id)) {
    config.getChatRecordingService()?.recordUiTelemetryEvent(uiEvent);
  }
  QwenLogger.getInstance(config)?.logToolCallEvent(event);
  if (!isTelemetrySdkInitialized()) return;
@ -382,7 +385,9 @@ export function logApiError(config: Config, event: ApiErrorEvent): void {
    'event.timestamp': new Date().toISOString(),
  } as UiEvent;
  uiTelemetryService.addEvent(uiEvent);
-  config.getChatRecordingService()?.recordUiTelemetryEvent(uiEvent);
+  if (!isInternalPromptId(event.prompt_id)) {
    config.getChatRecordingService()?.recordUiTelemetryEvent(uiEvent);
  }
  QwenLogger.getInstance(config)?.logApiErrorEvent(event);
  if (!isTelemetrySdkInitialized()) return;
@ -449,7 +454,9 @@ export function logApiResponse(config: Config, event: ApiResponseEvent): void {
    'event.timestamp': new Date().toISOString(),
  } as UiEvent;
  uiTelemetryService.addEvent(uiEvent);
-  config.getChatRecordingService()?.recordUiTelemetryEvent(uiEvent);
+  if (!isInternalPromptId(event.prompt_id)) {
    config.getChatRecordingService()?.recordUiTelemetryEvent(uiEvent);
  }
  QwenLogger.getInstance(config)?.logApiResponseEvent(event);
  if (!isTelemetrySdkInitialized()) return;
  const attributes: LogAttributes = {
--- a/packages/core/src/utils/internalPromptIds.test.ts
+++ b/packages/core/src/utils/internalPromptIds.test.ts
@ -0,0 +1,35 @@
 /**
 * @license
 * Copyright 2025 Qwen Team
 * SPDX-License-Identifier: Apache-2.0
 */
 import { describe, it, expect } from 'vitest';
 import { isInternalPromptId } from './internalPromptIds.js';
 describe('isInternalPromptId', () => {
  it('returns true for prompt_suggestion', () => {
    expect(isInternalPromptId('prompt_suggestion')).toBe(true);
  });
  it('returns true for forked_query', () => {
    expect(isInternalPromptId('forked_query')).toBe(true);
  });
  it('returns true for speculation', () => {
    expect(isInternalPromptId('speculation')).toBe(true);
  });
  it('returns false for user_query', () => {
    expect(isInternalPromptId('user_query')).toBe(false);
  });
  it('returns false for empty string', () => {
    expect(isInternalPromptId('')).toBe(false);
  });
  it('returns false for arbitrary prompt ids', () => {
    expect(isInternalPromptId('btw-prompt-id')).toBe(false);
    expect(isInternalPromptId('context-prompt-id')).toBe(false);
  });
 });
--- a/packages/core/src/utils/internalPromptIds.ts
+++ b/packages/core/src/utils/internalPromptIds.ts
@ -0,0 +1,29 @@
 /**
 * @license
 * Copyright 2025 Qwen Team
 * SPDX-License-Identifier: Apache-2.0
 *
 * Internal Prompt ID utilities
 *
 * Centralises the set of prompt IDs used by background operations
 * (suggestion generation, forked queries) so that logging, recording,
 * and UI layers can consistently recognise and filter them.
 */
 /** Prompt IDs that belong to internal background operations. */
 const INTERNAL_PROMPT_IDS: ReadonlySet<string> = new Set([
  'prompt_suggestion',
  'forked_query',
  'speculation',
 ]);
 /**
 * Returns true if the prompt_id belongs to an internal background operation
 * whose events should not be recorded to the chatRecordingService,
 * OpenAI logs, or other persistent stores visible in the UI.
 *
 * Known internal IDs: `'prompt_suggestion'`, `'forked_query'`, `'speculation'`.
 */
 export function isInternalPromptId(promptId: string): boolean {
  return INTERNAL_PROMPT_IDS.has(promptId);
 }
--- a/packages/webui/src/components/layout/InputForm.tsx
+++ b/packages/webui/src/components/layout/InputForm.tsx
@ -132,7 +132,10 @@ export interface InputFormProps {
  /** Prompt suggestion state */
  followupState?: FollowupState;
  /** Callback to accept prompt suggestion */
-  onAcceptFollowup?: (method?: 'tab' | 'enter' | 'right') => void;
+  onAcceptFollowup?: (
    method?: 'tab' | 'enter' | 'right',
    options?: { skipOnAccept?: boolean },
  ) => void;
  /** Callback to dismiss prompt suggestion */
  onDismissFollowup?: () => void;
 }
@ -267,9 +270,10 @@ export const InputForm: FC<InputFormProps> = ({
      // Accept and submit prompt suggestion on Enter when input is empty
      if (hasFollowup && !inputText && followupSuggestion) {
        e.preventDefault();
-        onAcceptFollowup?.('enter');
+        // Skip onAccept callback — we pass the text directly to onSubmit.
-        // Pass suggestion text explicitly — onInputChange is async (React setState)
+        // Without skipOnAccept the microtask in accept() would re-insert
-        // so onSubmit cannot rely on reading inputText from the closure.
+        // the suggestion into the input after it was already cleared.
        onAcceptFollowup?.('enter', { skipOnAccept: true });
        onSubmit(e, followupSuggestion);
        return;
      }
--- a/packages/webui/src/hooks/useFollowupSuggestions.ts
+++ b/packages/webui/src/hooks/useFollowupSuggestions.ts
@ -192,7 +192,12 @@ export interface UseFollowupSuggestionsReturn {
  state: FollowupState;
  getPlaceholder: (defaultPlaceholder: string) => string;
  setSuggestion: (text: string | null) => void;
-  accept: (method?: 'tab' | 'enter' | 'right') => void;
+  /** Accept the current suggestion */
  accept: (
    method?: 'tab' | 'enter' | 'right',
    options?: { skipOnAccept?: boolean },
  ) => void;
  /** Dismiss the current suggestion */
  dismiss: () => void;
  clear: () => void;
 }