diff --git a/docs/design/prompt-suggestion/prompt-suggestion-design.md b/docs/design/prompt-suggestion/prompt-suggestion-design.md
index 1636db6cf..a8d7ef381 100644
--- a/docs/design/prompt-suggestion/prompt-suggestion-design.md
+++ b/docs/design/prompt-suggestion/prompt-suggestion-design.md
@@ -64,10 +64,19 @@ A **prompt suggestion** (Next-step Suggestion / NES) is a short prediction (2-12
 ```
 [SUGGESTION MODE: Suggest what the user might naturally type next.]
 
+FIRST: Read the LAST FEW LINES of the assistant's most recent message — that's where
+next-step hints, tips, and actionable suggestions usually appear. Then check the user's
+recent messages and original request.
+
 Your job is to predict what THEY would type - not what you think they should do.
 THE TEST: Would they think "I was just about to type that"?
 
+PRIORITY: If the assistant's last message contains a tip or hint like "Tip: type X to ..."
+or "type X to ...", extract X as the suggestion. These are explicit next-step hints.
+
 EXAMPLES:
+Assistant says "Tip: type post comments to publish findings" → "post comments"
+Assistant says "type /review to start" → "/review"
 User asked "fix the bug and run tests", bug is fixed → "run the tests"
 After code written → "try it out"
 Task complete, obvious follow-up → "commit this" or "push it"
@@ -197,6 +206,23 @@ The Tab handler uses `key.name === 'tab'` explicitly (not `ACCEPT_SUGGESTION` ma
 | `enableSpeculation`         | boolean | false   | Predictive execution engine                                                      |
 | `fastModel` (top-level)     | string  | ""      | Model for all background tasks (empty = use main model). Set via `/model --fast` |
 
+### Internal Prompt ID Filtering
+
+Background operations use dedicated prompt IDs (`INTERNAL_PROMPT_IDS` in `utils/internalPromptIds.ts`) to prevent their API traffic and tool calls from appearing in the user-visible UI:
+
+| Prompt ID           | Used by                    |
+| ------------------- | -------------------------- |
+| `prompt_suggestion` | Suggestion generation      |
+| `forked_query`      | Cache-aware forked queries |
+| `speculation`       | Speculation engine         |
+
+**Filtering applied:**
+
+- `loggingContentGenerator` — skips `logApiRequest` and OpenAI interaction logging for internal IDs
+- `logApiResponse` / `logApiError` — skips `chatRecordingService.recordUiTelemetryEvent`
+- `logToolCall` — skips `chatRecordingService.recordUiTelemetryEvent`
+- `uiTelemetryService.addEvent` — **not filtered** (ensures `/stats` token tracking works)
+
 ### Thinking Mode
 
 Thinking/reasoning is explicitly disabled (`thinkingConfig: { includeThoughts: false }`) for all background task paths:
diff --git a/docs/users/features/followup-suggestions.md b/docs/users/features/followup-suggestions.md
index 3dbf11df5..6b72a1d70 100644
--- a/docs/users/features/followup-suggestions.md
+++ b/docs/users/features/followup-suggestions.md
@@ -12,7 +12,7 @@ After Qwen Code finishes responding, a suggestion appears as dimmed text in the
 > run the tests
 ```
 
-The suggestion is generated by sending the conversation history to the model, which predicts what you would naturally type next.
+The suggestion is generated by sending the conversation history to the model, which predicts what you would naturally type next. If the response contains an explicit tip (e.g., `Tip: type post comments to publish findings`), the suggested action is extracted automatically.
 
 ## Accepting Suggestions
 
diff --git a/packages/cli/src/ui/components/InputPrompt.test.tsx b/packages/cli/src/ui/components/InputPrompt.test.tsx
index b4bfae4e1..641bb20cd 100644
--- a/packages/cli/src/ui/components/InputPrompt.test.tsx
+++ b/packages/cli/src/ui/components/InputPrompt.test.tsx
@@ -235,6 +235,10 @@ describe('InputPrompt', () => {
       await wait();
 
       expect(props.onSubmit).toHaveBeenCalledWith('commit this');
+      // Enter path must NOT call buffer.insert — it passes text directly to
+      // handleSubmitAndClear. Calling insert would re-fill the buffer after
+      // it was already cleared (the microtask race bug).
+      expect(mockBuffer.insert).not.toHaveBeenCalled();
       unmount();
     });
 
diff --git a/packages/cli/src/ui/components/InputPrompt.tsx b/packages/cli/src/ui/components/InputPrompt.tsx
index aa2004c72..9b0e75e5f 100644
--- a/packages/cli/src/ui/components/InputPrompt.tsx
+++ b/packages/cli/src/ui/components/InputPrompt.tsx
@@ -882,7 +882,11 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
           followup.state.suggestion
         ) {
           const text = followup.state.suggestion;
-          followup.accept('enter');
+          // Skip onAccept (buffer.insert) — we pass the text directly to
+          // handleSubmitAndClear which clears the buffer synchronously.
+          // Without skipOnAccept the microtask in accept() would re-insert
+          // the suggestion into the buffer after it was already cleared.
+          followup.accept('enter', { skipOnAccept: true });
           handleSubmitAndClear(text);
           return true;
         }
diff --git a/packages/cli/src/ui/hooks/useFollowupSuggestions.tsx b/packages/cli/src/ui/hooks/useFollowupSuggestions.tsx
index 81773f0fb..4ff8e4d68 100644
--- a/packages/cli/src/ui/hooks/useFollowupSuggestions.tsx
+++ b/packages/cli/src/ui/hooks/useFollowupSuggestions.tsx
@@ -43,7 +43,10 @@ export interface UseFollowupSuggestionsReturn {
   /** Set suggestion text (called by parent component) */
   setSuggestion: (text: string | null) => void;
   /** Accept the current suggestion */
-  accept: (method?: 'tab' | 'enter' | 'right') => void;
+  accept: (
+    method?: 'tab' | 'enter' | 'right',
+    options?: { skipOnAccept?: boolean },
+  ) => void;
   /** Dismiss the current suggestion */
   dismiss: () => void;
   /** Clear all state */
diff --git a/packages/cli/src/ui/utils/MarkdownDisplay.tsx b/packages/cli/src/ui/utils/MarkdownDisplay.tsx
index 642b04d6b..25e2110d9 100644
--- a/packages/cli/src/ui/utils/MarkdownDisplay.tsx
+++ b/packages/cli/src/ui/utils/MarkdownDisplay.tsx
@@ -111,7 +111,9 @@ const MarkdownDisplayInternal: React.FC<MarkdownDisplayProps> = ({
         lines[index + 1].match(tableSeparatorRegex)
       ) {
         inTable = true;
-        tableHeaders = tableRowMatch[1].split(/(?<!\\)\|/).map((cell) => cell.trim().replaceAll('\\|', '|'));
+        tableHeaders = tableRowMatch[1]
+          .split(/(?<!\\)\|/)
+          .map((cell) => cell.trim().replaceAll('\\|', '|'));
         tableRows = [];
       } else {
         // Not a table, treat as regular text
@@ -127,7 +129,9 @@ const MarkdownDisplayInternal: React.FC<MarkdownDisplayProps> = ({
       // Skip separator line - already handled
     } else if (inTable && tableRowMatch) {
       // Add table row
-      const cells = tableRowMatch[1].split(/(?<!\\)\|/).map((cell) => cell.trim().replaceAll('\\|', '|'));
+      const cells = tableRowMatch[1]
+        .split(/(?<!\\)\|/)
+        .map((cell) => cell.trim().replaceAll('\\|', '|'));
       // Ensure row has same column count as headers
       while (cells.length < tableHeaders.length) {
         cells.push('');
diff --git a/packages/cli/src/ui/utils/TableRenderer.tsx b/packages/cli/src/ui/utils/TableRenderer.tsx
index db3ff8a95..7684a2b71 100644
--- a/packages/cli/src/ui/utils/TableRenderer.tsx
+++ b/packages/cli/src/ui/utils/TableRenderer.tsx
@@ -36,7 +36,10 @@ export const TableRenderer: React.FC<TableRendererProps> = ({
   // Ensure table fits within terminal width
   const totalWidth = columnWidths.reduce((sum, width) => sum + width + 1, 1);
   const fixedWidth = columnWidths.length + 1;
-  const scaleFactor = totalWidth > contentWidth ? (contentWidth - fixedWidth) / (totalWidth - fixedWidth) : 1;
+  const scaleFactor =
+    totalWidth > contentWidth
+      ? (contentWidth - fixedWidth) / (totalWidth - fixedWidth)
+      : 1;
   const adjustedWidths = columnWidths.map((width) =>
     Math.floor(width * scaleFactor),
   );
diff --git a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts
index b39e65357..624f04f3b 100644
--- a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts
+++ b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts
@@ -23,11 +23,16 @@ import {
 import { OpenAILogger } from '../../utils/openaiLogger.js';
 import type OpenAI from 'openai';
 
-vi.mock('../../telemetry/loggers.js', () => ({
-  logApiRequest: vi.fn(),
-  logApiResponse: vi.fn(),
-  logApiError: vi.fn(),
-}));
+vi.mock('../../telemetry/loggers.js', async (importOriginal) => {
+  const actual =
+    await importOriginal<typeof import('../../telemetry/loggers.js')>();
+  return {
+    ...actual,
+    logApiRequest: vi.fn(),
+    logApiResponse: vi.fn(),
+    logApiError: vi.fn(),
+  };
+});
 
 vi.mock('../../utils/openaiLogger.js', () => ({
   OpenAILogger: vi.fn().mockImplementation(() => ({
@@ -474,4 +479,91 @@ describe('LoggingContentGenerator', () => {
       },
     ]);
   });
+
+  it.each(['prompt_suggestion', 'forked_query', 'speculation'])(
+    'skips logApiRequest and OpenAI logging for internal promptId %s (generateContent)',
+    async (promptId) => {
+      const mockResponse = {
+        responseId: 'internal-resp',
+        modelVersion: 'test-model',
+        candidates: [{ content: { parts: [{ text: 'suggestion' }] } }],
+        usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 },
+      } as unknown as GenerateContentResponse;
+
+      const mockWrapped = {
+        generateContent: vi.fn().mockResolvedValue(mockResponse),
+        generateContentStream: vi.fn(),
+      } as unknown as ContentGenerator;
+
+      const gen = new LoggingContentGenerator(mockWrapped, createConfig(), {
+        model: 'test-model',
+        enableOpenAILogging: true,
+        openAILoggingDir: '/tmp/test-logs',
+      });
+
+      const request = {
+        model: 'test-model',
+        contents: [{ role: 'user', parts: [{ text: 'test' }] }],
+      } as unknown as GenerateContentParameters;
+
+      await gen.generateContent(request, promptId);
+
+      // logApiRequest should NOT be called for internal prompts
+      expect(logApiRequest).not.toHaveBeenCalled();
+      // logApiResponse SHOULD be called (for /stats token tracking)
+      expect(logApiResponse).toHaveBeenCalled();
+      // OpenAI logger should be constructed, but no interaction should be logged
+      expect(OpenAILogger).toHaveBeenCalled();
+      const loggerInstance = (
+        OpenAILogger as unknown as ReturnType<typeof vi.fn>
+      ).mock.results[0]?.value;
+      expect(loggerInstance.logInteraction).not.toHaveBeenCalled();
+    },
+  );
+
+  it.each(['prompt_suggestion', 'forked_query', 'speculation'])(
+    'skips logApiRequest and OpenAI logging for internal promptId %s (generateContentStream)',
+    async (promptId) => {
+      const mockChunk = {
+        responseId: 'stream-resp',
+        modelVersion: 'test-model',
+        candidates: [{ content: { parts: [{ text: 'suggestion' }] } }],
+        usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 },
+      } as unknown as GenerateContentResponse;
+
+      async function* fakeStream() {
+        yield mockChunk;
+      }
+
+      const mockWrapped = {
+        generateContent: vi.fn(),
+        generateContentStream: vi.fn().mockResolvedValue(fakeStream()),
+      } as unknown as ContentGenerator;
+
+      const gen = new LoggingContentGenerator(mockWrapped, createConfig(), {
+        model: 'test-model',
+        enableOpenAILogging: true,
+        openAILoggingDir: '/tmp/test-logs',
+      });
+
+      const request = {
+        model: 'test-model',
+        contents: [{ role: 'user', parts: [{ text: 'test' }] }],
+      } as unknown as GenerateContentParameters;
+
+      const stream = await gen.generateContentStream(request, promptId);
+      // Consume the stream
+      for await (const _chunk of stream) {
+        // drain
+      }
+
+      expect(logApiRequest).not.toHaveBeenCalled();
+      expect(logApiResponse).toHaveBeenCalled();
+      expect(OpenAILogger).toHaveBeenCalled();
+      const loggerInstance = (
+        OpenAILogger as unknown as ReturnType<typeof vi.fn>
+      ).mock.results[0]?.value;
+      expect(loggerInstance.logInteraction).not.toHaveBeenCalled();
+    },
+  );
 });
diff --git a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts
index 4f4b00138..4464f5a7e 100644
--- a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts
+++ b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts
@@ -31,6 +31,7 @@ import {
   logApiRequest,
   logApiResponse,
 } from '../../telemetry/loggers.js';
+import { isInternalPromptId } from '../../utils/internalPromptIds.js';
 import type {
   ContentGenerator,
   ContentGeneratorConfig,
@@ -143,8 +144,17 @@ export class LoggingContentGenerator implements ContentGenerator {
     userPromptId: string,
   ): Promise<GenerateContentResponse> {
     const startTime = Date.now();
-    this.logApiRequest(this.toContents(req.contents), req.model, userPromptId);
-    const openaiRequest = await this.buildOpenAIRequestForLogging(req);
+    const isInternal = isInternalPromptId(userPromptId);
+    if (!isInternal) {
+      this.logApiRequest(
+        this.toContents(req.contents),
+        req.model,
+        userPromptId,
+      );
+    }
+    const openaiRequest = isInternal
+      ? undefined
+      : await this.buildOpenAIRequestForLogging(req);
     try {
       const response = await this.wrapped.generateContent(req, userPromptId);
       const durationMs = Date.now() - startTime;
@@ -155,12 +165,16 @@ export class LoggingContentGenerator implements ContentGenerator {
         userPromptId,
         response.usageMetadata,
       );
-      await this.logOpenAIInteraction(openaiRequest, response);
+      if (!isInternal) {
+        await this.logOpenAIInteraction(openaiRequest, response);
+      }
       return response;
     } catch (error) {
       const durationMs = Date.now() - startTime;
       this._logApiError('', durationMs, error, req.model, userPromptId);
-      await this.logOpenAIInteraction(openaiRequest, undefined, error);
+      if (!isInternal) {
+        await this.logOpenAIInteraction(openaiRequest, undefined, error);
+      }
       throw error;
     }
   }
@@ -170,8 +184,17 @@ export class LoggingContentGenerator implements ContentGenerator {
     userPromptId: string,
   ): Promise<AsyncGenerator<GenerateContentResponse>> {
     const startTime = Date.now();
-    this.logApiRequest(this.toContents(req.contents), req.model, userPromptId);
-    const openaiRequest = await this.buildOpenAIRequestForLogging(req);
+    const isInternal = isInternalPromptId(userPromptId);
+    if (!isInternal) {
+      this.logApiRequest(
+        this.toContents(req.contents),
+        req.model,
+        userPromptId,
+      );
+    }
+    const openaiRequest = isInternal
+      ? undefined
+      : await this.buildOpenAIRequestForLogging(req);
 
     let stream: AsyncGenerator<GenerateContentResponse>;
     try {
@@ -179,7 +202,9 @@ export class LoggingContentGenerator implements ContentGenerator {
     } catch (error) {
       const durationMs = Date.now() - startTime;
       this._logApiError('', durationMs, error, req.model, userPromptId);
-      await this.logOpenAIInteraction(openaiRequest, undefined, error);
+      if (!isInternal) {
+        await this.logOpenAIInteraction(openaiRequest, undefined, error);
+      }
       throw error;
     }
 
@@ -199,12 +224,27 @@ export class LoggingContentGenerator implements ContentGenerator {
     model: string,
     openaiRequest?: OpenAI.Chat.ChatCompletionCreateParams,
   ): AsyncGenerator<GenerateContentResponse> {
+    const isInternal = isInternalPromptId(userPromptId);
+    // For internal prompts we only need the last usage metadata (for /stats);
+    // skip collecting full responses to avoid unnecessary memory overhead.
     const responses: GenerateContentResponse[] = [];
 
+    // Track first-seen IDs so _logApiResponse/_logApiError have accurate
+    // values even when we skip collecting full responses for internal prompts.
+    let firstResponseId = '';
+    let firstModelVersion = '';
     let lastUsageMetadata: GenerateContentResponseUsageMetadata | undefined;
     try {
       for await (const response of stream) {
-        responses.push(response);
+        if (!firstResponseId && response.responseId) {
+          firstResponseId = response.responseId;
+        }
+        if (!firstModelVersion && response.modelVersion) {
+          firstModelVersion = response.modelVersion;
+        }
+        if (!isInternal) {
+          responses.push(response);
+        }
         if (response.usageMetadata) {
           lastUsageMetadata = response.usageMetadata;
         }
@@ -213,25 +253,29 @@ export class LoggingContentGenerator implements ContentGenerator {
       // Only log successful API response if no error occurred
       const durationMs = Date.now() - startTime;
       this._logApiResponse(
-        responses[0]?.responseId ?? '',
+        firstResponseId,
         durationMs,
-        responses[0]?.modelVersion || model,
+        firstModelVersion || model,
         userPromptId,
         lastUsageMetadata,
       );
-      const consolidatedResponse =
-        this.consolidateGeminiResponsesForLogging(responses);
-      await this.logOpenAIInteraction(openaiRequest, consolidatedResponse);
+      if (!isInternal) {
+        const consolidatedResponse =
+          this.consolidateGeminiResponsesForLogging(responses);
+        await this.logOpenAIInteraction(openaiRequest, consolidatedResponse);
+      }
     } catch (error) {
       const durationMs = Date.now() - startTime;
       this._logApiError(
-        responses[0]?.responseId ?? '',
+        firstResponseId,
         durationMs,
         error,
-        responses[0]?.modelVersion || model,
+        firstModelVersion || model,
         userPromptId,
       );
-      await this.logOpenAIInteraction(openaiRequest, undefined, error);
+      if (!isInternal) {
+        await this.logOpenAIInteraction(openaiRequest, undefined, error);
+      }
       throw error;
     }
   }
diff --git a/packages/core/src/followup/followupState.test.ts b/packages/core/src/followup/followupState.test.ts
index 325c967b0..331a28db5 100644
--- a/packages/core/src/followup/followupState.test.ts
+++ b/packages/core/src/followup/followupState.test.ts
@@ -294,6 +294,37 @@ describe('createFollowupController', () => {
     ctrl.cleanup();
   });
 
+  it('accept with skipOnAccept skips onAccept callback but still clears state and fires telemetry', async () => {
+    const onStateChange = vi.fn();
+    const onAccept = vi.fn();
+    const onOutcome = vi.fn();
+    const ctrl = createFollowupController({
+      onStateChange,
+      getOnAccept: () => onAccept,
+      onOutcome,
+    });
+
+    ctrl.setSuggestion('run tests');
+    vi.advanceTimersByTime(300);
+    onStateChange.mockClear();
+
+    ctrl.accept('enter', { skipOnAccept: true });
+
+    // State should be cleared
+    expect(onStateChange).toHaveBeenCalledWith(INITIAL_FOLLOWUP_STATE);
+
+    // Telemetry should still fire
+    expect(onOutcome).toHaveBeenCalledWith(
+      expect.objectContaining({ outcome: 'accepted', accept_method: 'enter' }),
+    );
+
+    // Flush microtask — onAccept should NOT be called
+    await Promise.resolve();
+    expect(onAccept).not.toHaveBeenCalled();
+
+    ctrl.cleanup();
+  });
+
   it('setSuggestion replaces a pending suggestion', () => {
     const onStateChange = vi.fn();
     const ctrl = createFollowupController({ onStateChange });
diff --git a/packages/core/src/followup/followupState.ts b/packages/core/src/followup/followupState.ts
index 8430e206f..60e8f717a 100644
--- a/packages/core/src/followup/followupState.ts
+++ b/packages/core/src/followup/followupState.ts
@@ -72,7 +72,10 @@ export interface FollowupControllerActions {
   /** Set suggestion text (with delayed show). Null clears immediately. */
   setSuggestion: (text: string | null) => void;
   /** Accept the current suggestion and invoke onAccept callback */
-  accept: (method?: 'tab' | 'enter' | 'right') => void;
+  accept: (
+    method?: 'tab' | 'enter' | 'right',
+    options?: { skipOnAccept?: boolean },
+  ) => void;
   /** Dismiss/clear suggestion */
   dismiss: () => void;
   /** Hard-clear all state and timers */
@@ -135,7 +138,10 @@ export function createFollowupController(
     }, SUGGESTION_DELAY_MS);
   };
 
-  const accept = (method?: 'tab' | 'enter' | 'right'): void => {
+  const accept = (
+    method?: 'tab' | 'enter' | 'right',
+    options?: { skipOnAccept?: boolean },
+  ): void => {
     if (accepting) {
       return;
     }
@@ -170,7 +176,9 @@ export function createFollowupController(
 
     queueMicrotask(() => {
       try {
-        getOnAccept?.()?.(text);
+        if (!options?.skipOnAccept) {
+          getOnAccept?.()?.(text);
+        }
       } catch (error: unknown) {
         // eslint-disable-next-line no-console
         console.error('[followup] onAccept callback threw:', error);
diff --git a/packages/core/src/followup/forkedQuery.test.ts b/packages/core/src/followup/forkedQuery.test.ts
index 862d9b9e6..a223a308e 100644
--- a/packages/core/src/followup/forkedQuery.test.ts
+++ b/packages/core/src/followup/forkedQuery.test.ts
@@ -4,13 +4,24 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import { describe, it, expect, beforeEach } from 'vitest';
+import { describe, it, expect, beforeEach, vi } from 'vitest';
 import {
   saveCacheSafeParams,
   getCacheSafeParams,
   clearCacheSafeParams,
+  runForkedQuery,
 } from './forkedQuery.js';
 import type { GenerateContentConfig } from '@google/genai';
+import type { Config } from '../config/config.js';
+import { GeminiChat, StreamEventType } from '../core/geminiChat.js';
+
+vi.mock('../core/geminiChat.js', async (importOriginal) => {
+  const actual = await importOriginal<typeof import('../core/geminiChat.js')>();
+  return {
+    ...actual,
+    GeminiChat: vi.fn(),
+  };
+});
 
 describe('CacheSafeParams', () => {
   beforeEach(() => {
@@ -113,3 +124,190 @@ describe('CacheSafeParams', () => {
     });
   });
 });
+
+describe('runForkedQuery', () => {
+  beforeEach(() => {
+    clearCacheSafeParams();
+    vi.mocked(GeminiChat).mockReset();
+  });
+
+  it('passes tools: [] in per-request config so the model cannot produce function calls', async () => {
+    // Save cache params with real tools to simulate a normal conversation
+    saveCacheSafeParams(
+      {
+        systemInstruction: 'You are helpful',
+        tools: [
+          {
+            functionDeclarations: [
+              { name: 'edit', description: 'Edit a file' },
+              { name: 'shell', description: 'Run a command' },
+            ],
+          },
+        ],
+      },
+      [{ role: 'user', parts: [{ text: 'hello' }] }],
+      'test-model',
+    );
+
+    // Track what sendMessageStream receives
+    let capturedParams: unknown = null;
+
+    const mockSendMessageStream = vi.fn(
+      (_model: string, params: unknown, _promptId: string) => {
+        capturedParams = params;
+        async function* generate() {
+          yield {
+            type: StreamEventType.CHUNK,
+            value: {
+              candidates: [
+                {
+                  content: {
+                    role: 'model',
+                    parts: [{ text: 'commit this' }],
+                  },
+                },
+              ],
+              usageMetadata: {
+                promptTokenCount: 10,
+                candidatesTokenCount: 5,
+                totalTokenCount: 15,
+              },
+            },
+          };
+        }
+        return Promise.resolve(generate());
+      },
+    );
+
+    vi.mocked(GeminiChat).mockImplementation(
+      () =>
+        ({
+          sendMessageStream: mockSendMessageStream,
+        }) as unknown as GeminiChat,
+    );
+
+    const mockConfig = {} as unknown as Config;
+
+    const result = await runForkedQuery(mockConfig, 'suggest something');
+
+    // Verify GeminiChat was constructed with the full generationConfig
+    // (including tools) — createForkedChat retains tools for speculation callers
+    expect(GeminiChat).toHaveBeenCalledOnce();
+    const ctorArgs = vi.mocked(GeminiChat).mock.calls[0];
+    const chatGenerationConfig = ctorArgs[1] as GenerateContentConfig;
+    expect(chatGenerationConfig.tools).toEqual([
+      {
+        functionDeclarations: [
+          { name: 'edit', description: 'Edit a file' },
+          { name: 'shell', description: 'Run a command' },
+        ],
+      },
+    ]);
+    // chatRecordingService and telemetryService must be undefined
+    // to avoid polluting the main session's recordings
+    expect(ctorArgs[3]).toBeUndefined(); // chatRecordingService
+    expect(ctorArgs[4]).toBeUndefined(); // telemetryService
+
+    // Verify sendMessageStream was called
+    expect(mockSendMessageStream).toHaveBeenCalledOnce();
+    expect(capturedParams).not.toBeNull();
+
+    // KEY ASSERTION: per-request config must have tools: [] to prevent
+    // the model from producing function calls (Root Cause 1 fix)
+    const sendParams = capturedParams as { config?: { tools?: unknown } };
+    expect(sendParams.config).toBeDefined();
+    expect(sendParams.config!.tools).toEqual([]);
+
+    // Verify prompt_id is 'forked_query' and message is passed correctly
+    expect(mockSendMessageStream).toHaveBeenCalledWith(
+      'test-model',
+      expect.objectContaining({
+        message: [{ text: 'suggest something' }],
+        config: expect.objectContaining({ tools: [] }),
+      }),
+      'forked_query',
+    );
+
+    // Verify result is correct
+    expect(result.text).toBe('commit this');
+    expect(result.usage.inputTokens).toBe(10);
+    expect(result.usage.outputTokens).toBe(5);
+  });
+
+  it('preserves tools: [] even when jsonSchema is provided', async () => {
+    saveCacheSafeParams(
+      {
+        tools: [{ functionDeclarations: [{ name: 'edit' }] }],
+      },
+      [],
+      'test-model',
+    );
+
+    let capturedParams: unknown = null;
+
+    const mockSendMessageStream = vi.fn(
+      (_model: string, params: unknown, _promptId: string) => {
+        capturedParams = params;
+        async function* generate() {
+          yield {
+            type: StreamEventType.CHUNK,
+            value: {
+              candidates: [
+                {
+                  content: {
+                    role: 'model',
+                    parts: [{ text: '{"suggestion":"run tests"}' }],
+                  },
+                },
+              ],
+              usageMetadata: {
+                promptTokenCount: 5,
+                candidatesTokenCount: 3,
+              },
+            },
+          };
+        }
+        return Promise.resolve(generate());
+      },
+    );
+
+    vi.mocked(GeminiChat).mockImplementation(
+      () =>
+        ({
+          sendMessageStream: mockSendMessageStream,
+        }) as unknown as GeminiChat,
+    );
+
+    const schema = {
+      type: 'object',
+      properties: { suggestion: { type: 'string' } },
+    };
+
+    const result = await runForkedQuery({} as Config, 'suggest', {
+      jsonSchema: schema,
+    });
+
+    const sendParams = capturedParams as {
+      config?: {
+        tools?: unknown;
+        responseMimeType?: string;
+        responseJsonSchema?: unknown;
+      };
+    };
+    // tools: [] must still be present alongside JSON schema options
+    expect(sendParams.config!.tools).toEqual([]);
+    expect(sendParams.config!.responseMimeType).toBe('application/json');
+    expect(sendParams.config!.responseJsonSchema).toBe(schema);
+
+    // Verify JSON was parsed correctly
+    expect(result.jsonResult).toEqual({ suggestion: 'run tests' });
+  });
+
+  it('throws when CacheSafeParams are not available', async () => {
+    const mockConfig = {} as unknown as Config;
+
+    await expect(runForkedQuery(mockConfig, 'test')).rejects.toThrow(
+      'CacheSafeParams not available',
+    );
+  });
+});
diff --git a/packages/core/src/followup/forkedQuery.ts b/packages/core/src/followup/forkedQuery.ts
index 798374c73..0d4dc2186 100644
--- a/packages/core/src/followup/forkedQuery.ts
+++ b/packages/core/src/followup/forkedQuery.ts
@@ -6,11 +6,15 @@
  * Forked Query Infrastructure
  *
  * Enables cache-aware secondary LLM calls that share the main conversation's
- * prompt prefix (systemInstruction + tools + history) for cache hits.
+ * prompt prefix (systemInstruction + history) for cache hits.
  *
  * DashScope already enables cache_control via X-DashScope-CacheControl header.
  * By constructing the forked GeminiChat with identical generationConfig and
  * history prefix, the fork automatically benefits from prefix caching.
+ *
+ * Note: `runForkedQuery` overrides `tools: []` at the per-request level so the
+ * model cannot produce function calls. `createForkedChat` retains the full
+ * generationConfig (including tools) for callers like speculation that need them.
  */
 
 import type {
@@ -21,6 +25,12 @@ import type {
 import { GeminiChat, StreamEventType } from '../core/geminiChat.js';
 import type { Config } from '../config/config.js';
 
+/** Per-request config that strips tools so the model never produces function calls. */
+const NO_TOOLS = Object.freeze({ tools: [] as const }) as Pick<
+  GenerateContentConfig,
+  'tools'
+>;
+
 /**
  * Snapshot of the main conversation's cache-critical parameters.
  * Captured after each successful main turn so forked queries share the same prefix.
@@ -111,9 +121,13 @@ export function clearCacheSafeParams(): void {
 // ---------------------------------------------------------------------------
 
 /**
- * Create an isolated GeminiChat that shares the same cache prefix as the main
- * conversation. The fork uses identical generationConfig (systemInstruction +
- * tools) and history, so DashScope's cache_control mechanism produces cache hits.
+ * Create an isolated GeminiChat that shares the main conversation's
+ * generationConfig (including systemInstruction, tools, and history).
+ *
+ * The full config is retained so that callers like `runSpeculativeLoop`
+ * can execute tool calls during speculation. For pure-text callers like
+ * `runForkedQuery`, tools are stripped at the per-request level via
+ * `NO_TOOLS` — see {@link runForkedQuery}.
  *
  * The fork does NOT have chatRecordingService or telemetryService to avoid
  * polluting the main session's recordings and token counts.
@@ -165,7 +179,7 @@ function extractUsage(
 
 /**
  * Run a forked query using a GeminiChat that shares the main conversation's
- * cache prefix. This is a single-turn request (no tool execution loop).
+ * cache prefix. This is a single-turn, tool-free request (no function calls).
  *
  * @param config - App config
  * @param userMessage - The user message to send (e.g., SUGGESTION_PROMPT)
@@ -191,8 +205,10 @@ export async function runForkedQuery(
   const model = options?.model ?? params.model;
   const chat = createForkedChat(config, params);
 
-  // Build per-request config overrides for JSON schema if needed
-  const requestConfig: GenerateContentConfig = {};
+  // Build per-request config overrides.
+  // NO_TOOLS prevents the model from producing function calls — forked
+  // queries are pure text completion and must not appear in tool-call UI.
+  const requestConfig: GenerateContentConfig = { ...NO_TOOLS };
   if (options?.abortSignal) {
     requestConfig.abortSignal = options.abortSignal;
   }
@@ -205,7 +221,7 @@ export async function runForkedQuery(
     model,
     {
       message: [{ text: userMessage }],
-      config: Object.keys(requestConfig).length > 0 ? requestConfig : undefined,
+      config: requestConfig,
     },
     'forked_query',
   );
diff --git a/packages/core/src/followup/suggestionGenerator.ts b/packages/core/src/followup/suggestionGenerator.ts
index e5b25b768..50f872be7 100644
--- a/packages/core/src/followup/suggestionGenerator.ts
+++ b/packages/core/src/followup/suggestionGenerator.ts
@@ -24,13 +24,20 @@ import { ApiResponseEvent } from '../telemetry/types.js';
  */
 export const SUGGESTION_PROMPT = `[SUGGESTION MODE: Suggest what the user might naturally type next.]
 
-FIRST: Look at the user's recent messages and original request.
+FIRST: Read the LAST FEW LINES of the assistant's most recent message -- that's where
+next-step hints, tips, and actionable suggestions usually appear. Then check the user's
+recent messages and original request.
 
 Your job is to predict what THEY would type - not what you think they should do.
 
 THE TEST: Would they think "I was just about to type that"?
 
+PRIORITY: If the assistant's last message contains a tip or hint like "Tip: type X to ..."
+or "type X to ...", extract X as the suggestion. These are explicit next-step hints.
+
 EXAMPLES:
+Assistant says "Tip: type post comments to publish findings" → "post comments"
+Assistant says "type /review to start" → "/review"
 User asked "fix the bug and run tests", bug is fixed → "run the tests"
 After code written → "try it out"
 Model offers options → suggest the one the user would likely pick, based on conversation
diff --git a/packages/core/src/telemetry/loggers.test.ts b/packages/core/src/telemetry/loggers.test.ts
index 288e02f03..19a3997dc 100644
--- a/packages/core/src/telemetry/loggers.test.ts
+++ b/packages/core/src/telemetry/loggers.test.ts
@@ -55,6 +55,7 @@ import {
   logExtensionInstallEvent,
   logExtensionUninstall,
   logHookCall,
+  logApiError,
 } from './loggers.js';
 import * as metrics from './metrics.js';
 import { QwenLogger } from './qwen-logger/qwen-logger.js';
@@ -77,6 +78,7 @@ import {
   ExtensionInstallEvent,
   ExtensionUninstallEvent,
   HookCallEvent,
+  ApiErrorEvent,
 } from './types.js';
 import { FileOperation } from './metrics.js';
 import type {
@@ -359,6 +361,101 @@ describe('loggers', () => {
     });
   });
 
+  describe('logApiResponse skips chatRecordingService for internal prompt IDs', () => {
+    it.each(['prompt_suggestion', 'forked_query', 'speculation'])(
+      'should not record to chatRecordingService when prompt_id is %s',
+      (promptId) => {
+        const mockRecordUiTelemetryEvent = vi.fn();
+        const configWithRecording = {
+          getSessionId: () => 'test-session-id',
+          getUsageStatisticsEnabled: () => false,
+          getChatRecordingService: () => ({
+            recordUiTelemetryEvent: mockRecordUiTelemetryEvent,
+          }),
+        } as unknown as Config;
+
+        const event = new ApiResponseEvent(
+          'resp-id',
+          'test-model',
+          50,
+          promptId,
+        );
+        logApiResponse(configWithRecording, event);
+
+        expect(mockRecordUiTelemetryEvent).not.toHaveBeenCalled();
+        expect(mockUiEvent.addEvent).toHaveBeenCalled();
+      },
+    );
+
+    it('should record to chatRecordingService for normal prompt IDs', () => {
+      const mockRecordUiTelemetryEvent = vi.fn();
+      const configWithRecording = {
+        getSessionId: () => 'test-session-id',
+        getUsageStatisticsEnabled: () => false,
+        getChatRecordingService: () => ({
+          recordUiTelemetryEvent: mockRecordUiTelemetryEvent,
+        }),
+      } as unknown as Config;
+
+      const event = new ApiResponseEvent(
+        'resp-id',
+        'test-model',
+        50,
+        'user_query',
+      );
+      logApiResponse(configWithRecording, event);
+
+      expect(mockRecordUiTelemetryEvent).toHaveBeenCalled();
+    });
+  });
+
+  describe('logApiError skips chatRecordingService for internal prompt IDs', () => {
+    it.each(['prompt_suggestion', 'forked_query', 'speculation'])(
+      'should not record to chatRecordingService when prompt_id is %s',
+      (promptId) => {
+        const mockRecordUiTelemetryEvent = vi.fn();
+        const configWithRecording = {
+          getSessionId: () => 'test-session-id',
+          getUsageStatisticsEnabled: () => false,
+          getChatRecordingService: () => ({
+            recordUiTelemetryEvent: mockRecordUiTelemetryEvent,
+          }),
+        } as unknown as Config;
+
+        const event = new ApiErrorEvent({
+          model: 'test-model',
+          durationMs: 100,
+          promptId,
+          errorMessage: 'test error',
+        });
+        logApiError(configWithRecording, event);
+
+        expect(mockRecordUiTelemetryEvent).not.toHaveBeenCalled();
+      },
+    );
+
+    it('should record to chatRecordingService for normal prompt IDs', () => {
+      const mockRecordUiTelemetryEvent = vi.fn();
+      const configWithRecording = {
+        getSessionId: () => 'test-session-id',
+        getUsageStatisticsEnabled: () => false,
+        getChatRecordingService: () => ({
+          recordUiTelemetryEvent: mockRecordUiTelemetryEvent,
+        }),
+      } as unknown as Config;
+
+      const event = new ApiErrorEvent({
+        model: 'test-model',
+        durationMs: 100,
+        promptId: 'user_query',
+        errorMessage: 'test error',
+      });
+      logApiError(configWithRecording, event);
+
+      expect(mockRecordUiTelemetryEvent).toHaveBeenCalled();
+    });
+  });
+
   describe('logApiRequest', () => {
     const mockConfig = {
       getSessionId: () => 'test-session-id',
@@ -1010,6 +1107,46 @@ describe('loggers', () => {
         },
       });
     });
+
+    it.each(['prompt_suggestion', 'forked_query', 'speculation'])(
+      'should not record to chatRecordingService when prompt_id is %s',
+      (promptId) => {
+        const mockRecordUiTelemetryEvent = vi.fn();
+        const configWithRecording = {
+          ...mockConfig,
+          getChatRecordingService: () => ({
+            recordUiTelemetryEvent: mockRecordUiTelemetryEvent,
+          }),
+        } as unknown as Config;
+
+        const call: CompletedToolCall = {
+          status: 'success',
+          request: {
+            name: 'test-function',
+            args: {},
+            callId: 'test-call-id',
+            isClientInitiated: true,
+            prompt_id: promptId,
+          },
+          response: {
+            callId: 'test-call-id',
+            responseParts: [{ text: 'ok' }],
+            resultDisplay: undefined,
+            error: undefined,
+            errorType: undefined,
+          },
+          tool: new EditTool(mockConfig),
+          invocation: {} as AnyToolInvocation,
+          durationMs: 50,
+          outcome: ToolConfirmationOutcome.ProceedOnce,
+        };
+        const event = new ToolCallEvent(call);
+        logToolCall(configWithRecording, event);
+
+        expect(mockRecordUiTelemetryEvent).not.toHaveBeenCalled();
+        expect(mockUiEvent.addEvent).toHaveBeenCalled();
+      },
+    );
   });
 
   describe('logMalformedJsonResponse', () => {
diff --git a/packages/core/src/telemetry/loggers.ts b/packages/core/src/telemetry/loggers.ts
index b7c18c9d3..6cd706799 100644
--- a/packages/core/src/telemetry/loggers.ts
+++ b/packages/core/src/telemetry/loggers.ts
@@ -8,6 +8,7 @@ import type { LogAttributes, LogRecord } from '@opentelemetry/api-logs';
 import { logs } from '@opentelemetry/api-logs';
 import { SemanticAttributes } from '@opentelemetry/semantic-conventions';
 import type { Config } from '../config/config.js';
+import { isInternalPromptId } from '../utils/internalPromptIds.js';
 import { safeJsonStringify } from '../utils/safeJsonStringify.js';
 import {
   EVENT_API_ERROR,
@@ -214,7 +215,9 @@ export function logToolCall(config: Config, event: ToolCallEvent): void {
     'event.timestamp': new Date().toISOString(),
   } as UiEvent;
   uiTelemetryService.addEvent(uiEvent);
-  config.getChatRecordingService()?.recordUiTelemetryEvent(uiEvent);
+  if (!isInternalPromptId(event.prompt_id)) {
+    config.getChatRecordingService()?.recordUiTelemetryEvent(uiEvent);
+  }
   QwenLogger.getInstance(config)?.logToolCallEvent(event);
   if (!isTelemetrySdkInitialized()) return;
 
@@ -382,7 +385,9 @@ export function logApiError(config: Config, event: ApiErrorEvent): void {
     'event.timestamp': new Date().toISOString(),
   } as UiEvent;
   uiTelemetryService.addEvent(uiEvent);
-  config.getChatRecordingService()?.recordUiTelemetryEvent(uiEvent);
+  if (!isInternalPromptId(event.prompt_id)) {
+    config.getChatRecordingService()?.recordUiTelemetryEvent(uiEvent);
+  }
   QwenLogger.getInstance(config)?.logApiErrorEvent(event);
   if (!isTelemetrySdkInitialized()) return;
 
@@ -449,7 +454,9 @@ export function logApiResponse(config: Config, event: ApiResponseEvent): void {
     'event.timestamp': new Date().toISOString(),
   } as UiEvent;
   uiTelemetryService.addEvent(uiEvent);
-  config.getChatRecordingService()?.recordUiTelemetryEvent(uiEvent);
+  if (!isInternalPromptId(event.prompt_id)) {
+    config.getChatRecordingService()?.recordUiTelemetryEvent(uiEvent);
+  }
   QwenLogger.getInstance(config)?.logApiResponseEvent(event);
   if (!isTelemetrySdkInitialized()) return;
   const attributes: LogAttributes = {
diff --git a/packages/core/src/utils/internalPromptIds.test.ts b/packages/core/src/utils/internalPromptIds.test.ts
new file mode 100644
index 000000000..fbdfcd6ae
--- /dev/null
+++ b/packages/core/src/utils/internalPromptIds.test.ts
@@ -0,0 +1,35 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { isInternalPromptId } from './internalPromptIds.js';
+
+describe('isInternalPromptId', () => {
+  it('returns true for prompt_suggestion', () => {
+    expect(isInternalPromptId('prompt_suggestion')).toBe(true);
+  });
+
+  it('returns true for forked_query', () => {
+    expect(isInternalPromptId('forked_query')).toBe(true);
+  });
+
+  it('returns true for speculation', () => {
+    expect(isInternalPromptId('speculation')).toBe(true);
+  });
+
+  it('returns false for user_query', () => {
+    expect(isInternalPromptId('user_query')).toBe(false);
+  });
+
+  it('returns false for empty string', () => {
+    expect(isInternalPromptId('')).toBe(false);
+  });
+
+  it('returns false for arbitrary prompt ids', () => {
+    expect(isInternalPromptId('btw-prompt-id')).toBe(false);
+    expect(isInternalPromptId('context-prompt-id')).toBe(false);
+  });
+});
diff --git a/packages/core/src/utils/internalPromptIds.ts b/packages/core/src/utils/internalPromptIds.ts
new file mode 100644
index 000000000..8a178d783
--- /dev/null
+++ b/packages/core/src/utils/internalPromptIds.ts
@@ -0,0 +1,29 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Internal Prompt ID utilities
+ *
+ * Centralises the set of prompt IDs used by background operations
+ * (suggestion generation, forked queries) so that logging, recording,
+ * and UI layers can consistently recognise and filter them.
+ */
+
+/** Prompt IDs that belong to internal background operations. */
+const INTERNAL_PROMPT_IDS: ReadonlySet<string> = new Set([
+  'prompt_suggestion',
+  'forked_query',
+  'speculation',
+]);
+
+/**
+ * Returns true if the prompt_id belongs to an internal background operation
+ * whose events should not be recorded to the chatRecordingService,
+ * OpenAI logs, or other persistent stores visible in the UI.
+ *
+ * Known internal IDs: `'prompt_suggestion'`, `'forked_query'`, `'speculation'`.
+ */
+export function isInternalPromptId(promptId: string): boolean {
+  return INTERNAL_PROMPT_IDS.has(promptId);
+}
diff --git a/packages/webui/src/components/layout/InputForm.tsx b/packages/webui/src/components/layout/InputForm.tsx
index 86ffd4ef8..c1baccb6f 100644
--- a/packages/webui/src/components/layout/InputForm.tsx
+++ b/packages/webui/src/components/layout/InputForm.tsx
@@ -132,7 +132,10 @@ export interface InputFormProps {
   /** Prompt suggestion state */
   followupState?: FollowupState;
   /** Callback to accept prompt suggestion */
-  onAcceptFollowup?: (method?: 'tab' | 'enter' | 'right') => void;
+  onAcceptFollowup?: (
+    method?: 'tab' | 'enter' | 'right',
+    options?: { skipOnAccept?: boolean },
+  ) => void;
   /** Callback to dismiss prompt suggestion */
   onDismissFollowup?: () => void;
 }
@@ -267,9 +270,10 @@ export const InputForm: FC<InputFormProps> = ({
       // Accept and submit prompt suggestion on Enter when input is empty
       if (hasFollowup && !inputText && followupSuggestion) {
         e.preventDefault();
-        onAcceptFollowup?.('enter');
-        // Pass suggestion text explicitly — onInputChange is async (React setState)
-        // so onSubmit cannot rely on reading inputText from the closure.
+        // Skip onAccept callback — we pass the text directly to onSubmit.
+        // Without skipOnAccept the microtask in accept() would re-insert
+        // the suggestion into the input after it was already cleared.
+        onAcceptFollowup?.('enter', { skipOnAccept: true });
         onSubmit(e, followupSuggestion);
         return;
       }
diff --git a/packages/webui/src/hooks/useFollowupSuggestions.ts b/packages/webui/src/hooks/useFollowupSuggestions.ts
index 607990484..d069b45a1 100644
--- a/packages/webui/src/hooks/useFollowupSuggestions.ts
+++ b/packages/webui/src/hooks/useFollowupSuggestions.ts
@@ -192,7 +192,12 @@ export interface UseFollowupSuggestionsReturn {
   state: FollowupState;
   getPlaceholder: (defaultPlaceholder: string) => string;
   setSuggestion: (text: string | null) => void;
-  accept: (method?: 'tab' | 'enter' | 'right') => void;
+  /** Accept the current suggestion */
+  accept: (
+    method?: 'tab' | 'enter' | 'right',
+    options?: { skipOnAccept?: boolean },
+  ) => void;
+  /** Dismiss the current suggestion */
   dismiss: () => void;
   clear: () => void;
 }