From 3bce84d5da97f5ae7ffd1e6be80211b7d2ac579b Mon Sep 17 00:00:00 2001
From: Shaojin Wen <shaojin.wensj@alibaba-inc.com>
Date: Fri, 3 Apr 2026 20:07:23 +0800
Subject: [PATCH] feat(cli, webui): add follow-up suggestions feature (#2525)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(cli, webui): add follow-up suggestions feature

Implement context-aware follow-up suggestions that appear after task
completion, suggesting relevant next actions like "commit this", "run
tests", etc.

- Add `followup/` module with types, generator, and rule-based provider
- Export follow-up types and functions from core index
- 8 default suggestion rules covering common workflows

- Add `useFollowupSuggestionsCLI` hook for Ink/React
- Integrate suggestion generation in AppContainer when streaming completes
- Add Tab key to accept, arrow keys to cycle through suggestions
- Display suggestions as ghost text in input prompt

- Add `useFollowupSuggestions` hook for React
- Update InputForm to display suggestions as placeholder
- Add CSS styling for suggestion appearance with counter
- Add keyboard handlers (Tab, arrow keys)

- After streaming completes with tool calls, suggestions appear
- Tab accepts the current suggestion
- Left/Right arrows cycle through multiple suggestions
- Typing or pasting dismisses the suggestion

- Shell command rules (tests, git, npm install) don't work yet due to
  history not storing tool arguments
- VSCode extension integration pending
- Web UI needs parent app integration for suggestion generation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: resolve merge conflicts and build errors

- Rebased on upstream main (5d02260c8)
- Fixed JSX structure in InputPrompt.tsx
- Changed `return;` to `return true;` in follow-up handlers
- Added @agentclientprotocol/sdk to core package dependencies
- Restored correct BaseTextInput usage (self-closing, no children)
- Follow-up suggestions now shown via placeholder prop only

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: remove @agentclientprotocol/sdk from core package.json

The types are imported in fileSystemService.ts but the package
should not be a runtime dependency of core. It's provided by
the CLI package which depends on core. This was causing
package-lock.json sync issues on Node.js 24.x CI.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: restore alphabetical order of dependencies in core/package.json

* fix: restore package-lock.json from upstream to fix Node 24.x CI

* fix: resolve acpConnection test failure and ESLint warning

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>

* style: apply prettier formatting after merge

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>

* fix(followup): address review issues in follow-up suggestions

- Export followupState.ts from core index (was dead code)
- Refactor CLI and WebUI hooks to use shared followupReducers (eliminate duplication)
- Move side effects out of setState updaters via queueMicrotask
- Fix AppContainer useEffect dependency on unstable historyManager.history reference
- Reorder matchesRule to check pattern before condition (cheaper first)
- Make RuleBasedProvider collect from all matching rules with dedup and limit
- Add missing resetGenerator export for testing
- Add explicit implements SuggestionProvider to RuleBasedProvider
- Fix unstable followup object in useEffect dependency arrays
- Merge duplicate imports to fix eslint import/no-duplicates warnings
- Standardize copyright year to 2025
- Add test files for followupState, ruleBasedProvider, suggestionGenerator

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): address review feedback from PR #2525

- Fix acceptingRef race: set lock synchronously before queueMicrotask
- Derive hasError/wasCancelled from actual tool call statuses
- Incorporate rule priority into suggestion priority calculation
- Clear suggestions immediately when setSuggestions([]) is called
- Add !completion.showSuggestions guard to Tab handler
- Fix onAcceptFollowup type from (string) => void to () => void
- Fix ToolCallInfo.name doc examples to match display names
- Scope CSS counter ::after to data-has-suggestion + empty conditions
- Reset regex lastIndex before test() for g/y flag safety
- Stabilize hook return with useMemo + onAcceptRef pattern
- Add @qwen-code/qwen-code-core as webui external + peerDependency

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): address second round of review feedback

- Scope CSS max-width to match counter condition (not count=1)
- Only dismiss followup on printable character input, not navigation keys
- Restrict tool_group scan to most recent contiguous block (current turn)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): clear suggestions on new turn, add search guards

- Clear followupSuggestions when streaming starts (Idle → Responding)
  to prevent stale suggestions from previous turns
- Add !reverseSearchActive && !commandSearchActive guards to Tab handler
  to avoid keybinding conflicts with search modes

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): address third round of review feedback

- Fix string pattern asymmetry: only match tool names when matchMessage=false
- Collect tool_groups from last user message boundary, not contiguous tail
- Flatten to individual tool calls before slicing to cap at 10 actual calls

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): fix arrow cycling guard and align rule conditions with patterns

- Remove unreliable textContent check for arrow cycling in WebUI InputForm;
  rely on inputText state which already accounts for zero-width spaces
- Add 'error' to fix/bug rule condition to match its regex pattern
- Add 'clean up' to refactor rule condition to match its regex pattern

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): reset acceptingRef in clear() to prevent deadlock

If clear() is called during accept debounce window, acceptingRef
could remain stuck true permanently. Now reset in clear().

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): cancel pending timeout in dismiss() and accept()

Prevents stale suggestion timeout from re-showing suggestions
after user dismisses or accepts during the 300ms delay window.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): reset lastIndex in removeRules() for g/y flag safety

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(vscode-ide-companion): mark @qwen-code/qwen-code-core as external in webview esbuild

The webui package now declares @qwen-code/qwen-code-core as external in its
vite build config. Without this change, the vscode-ide-companion webview
esbuild (platform: 'browser') would try to bundle core's Node.js-only
dependencies (undici, @grpc/grpc-js, fs, stream, etc.), causing 562 build
errors during `npm ci`.

* fix: restore node_modules/@google/gemini-cli-test-utils workspace link in lockfile

The top-level workspace symlink entry was accidentally removed by a local
npm install in commit 004baaeb, which replaced it with a nested
packages/cli/node_modules/ entry. npm ci requires the top-level link entry
to be present in the lockfile, otherwise it fails with:
  "Missing: @google/gemini-cli-test-utils@0.13.0 from lock file"

Also syncs @qwen-code/qwen-code-core peerDependency into the lockfile
to match the updated packages/webui/package.json.

* refactor(followup): extract controller and improve rule matching

- Extract createFollowupController for unified state management across CLI and WebUI
- Refactor rule-based provider to match via assistant message keywords instead of tool arguments
- Add enableFollowupSuggestions user setting in UI category
- Decouple WebUI from @qwen-code/qwen-code-core by copying browser-safe state logic
- Add followupHistory.ts for extracting suggestion context from CLI history
- Add comprehensive tests for controller and rule matching scenarios
- Use --app-primary CSS variable for consistency

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>

* refactor(webui): import followup state from core package

- Remove followupState.ts from webui (moved to core)
- Import FollowupSuggestion, FollowupState types from core
- Add @qwen-code/qwen-code-core as peerDependency
- Add core to vite external list
- Update test to include id field in HistoryItem

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>

* refactor(followup): simplify generator, revert unrelated changes

- Collapse FollowupSuggestionsGenerator class into a single
  generateFollowupSuggestions() function (152 → 26 lines)
- Inline extractSuggestionContext into followupHistory.ts
- Remove unused RuleBasedProvider.addRule/removeRules methods
- Revert unrelated acpConnection.test.ts refactor
- Fix followupHistory.test.ts HistoryItem missing id field
- Reduce test verbosity (162 → 36 lines for generator tests)

* fix(followup): fix accept() deadlock and restore UMD globals mapping

- Wrap queueMicrotask callback in try/catch/finally to prevent accepting
  lock from being permanently held when onAccept throws
- Restore '@qwen-code/qwen-code-core': 'QwenCodeCore' in webui
  vite.config.ts globals (regression from d0f38a5f)
- Add test case verifying accept() recovers after callback exception

* fix(followup): log accept callback errors instead of swallowing them

Replace empty catch {} with console.error to ensure onAccept errors
remain visible for debugging while still preventing deadlock via finally.
Update test to verify error is logged.

* refactor(webui): move followup hook to separate subpath entry

Move useFollowupSuggestions from the root entry to a dedicated
'@qwen-code/webui/followup' subpath so that consumers who only need
UI components are not forced to install @qwen-code/qwen-code-core.

- Add src/followup.ts as separate Vite lib entry
- Remove followup exports from src/index.ts
- Add ./followup exports map in package.json
- Mark @qwen-code/qwen-code-core as optional peerDependency
- Switch build from single-entry UMD to multi-entry ESM/CJS

* fix(webui): restore UMD build and isolate core from root type boundary

- Restore UMD output for root entry (used by CDN demos, export-html, etc.)
- Build followup subpath via separate vite.config.followup.ts to avoid
  Vite's multi-entry + UMD limitation
- Replace FollowupState import in InputForm.tsx with a local structural
  type (InputFormFollowupState) so root .d.ts no longer references
  @qwen-code/qwen-code-core
- Root entry (JS + UMD + .d.ts) is now fully free of core dependency;
  core is only required by '@qwen-code/webui/followup' subpath

* refactor(followup): replace rule-based suggestions with LLM-based prompt suggestion

Replace the hardcoded rule-based follow-up suggestion engine with an LLM-based
prompt suggestion system, aligned with Claude Code's NES (Next-step Suggestion)
architecture.

Core changes:
- Replace ruleBasedProvider with generatePromptSuggestion using BaseLlmClient.generateJson()
- Port Claude Code's SUGGESTION_PROMPT and 14 filter rules (shouldFilterSuggestion)
- Simplify state from multi-suggestion array to single string (FollowupState)
- Add framework-agnostic controller with Object.freeze'd initial state

Guard conditions (9 checks):
- Settings toggle, non-interactive/SDK mode, plan mode
- Permission/confirmation/loop-detection dialogs, elicitation requests
- API error response detection, conversation history limit (slice -40)

UI interaction (CLI + WebUI):
- Tab: fill suggestion into input
- Enter: accept and submit
- Right Arrow: fill without submitting
- Typing/paste: dismiss suggestion
- Autocomplete conflict prevention

Telemetry (PromptSuggestionEvent):
- outcome (accepted/ignored/suppressed), accept_method (tab/enter/right)
- time_to_accept_ms, time_to_ignore_ms, time_to_first_keystroke_ms
- suggestion_length, similarity, was_focused_when_shown, prompt_id
- Per-rule suppression logging with reason strings

Deleted files:
- ruleBasedProvider.ts/test, followupHistory.ts/test, types.ts (dead FollowupSuggestion type)

13 rounds of adversarial audit, 17 issues found and fixed.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): address qwen3.6-plus-preview review findings

P0: Fix API error detection — check pendingGeminiHistoryItems for error
items (API errors go to pending items, not historyManager.history).

P1: Don't log abort as 'error' in telemetry — aborts are normal user
behavior (user started typing), not errors.

P3: Early return in dismiss() when state already cleared, avoiding
redundant applyState call after accept().

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(settings): update suggestion feature description to match current behavior

Remove outdated "arrow keys to cycle" text — the feature now uses
Tab/Right Arrow to accept and Enter to accept+submit (no cycling).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): fix WebUI Enter submitting empty text + defend onOutcome

P0/P1: WebUI Enter handler now passes suggestion text explicitly via
onSubmit(e, followupSuggestion) instead of relying on React setState
(which is async and would leave inputText as "" in the closure).

P3: Wrap onOutcome callbacks in try/catch in both accept() and dismiss()
so telemetry errors cannot block state transitions.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): allow setSuggestion(null) when disabled + fix dts clobber

- setSuggestion(null) now always clears state/timers even when disabled,
  preventing stale suggestions from lingering after feature toggle.
- Set insertTypesEntry: false in followup vite config to prevent
  overwriting the main build's index.d.ts.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(webui): thread explicitText through submit chain for Enter accept

handleSubmit and handleSubmitWithScroll now accept an optional
explicitText parameter. When provided (e.g., from prompt suggestion
Enter accept), it is used instead of the closure-captured inputText,
fixing the React setState race where onSubmit reads stale empty text.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): address Copilot review — 4 fixes

- Enter accept: use buffer.text.length === 0 instead of !trim() to
  prevent whitespace-only input from triggering suggestion accept
- Move ref tracking from render body to useEffect to avoid
  render-time side effects in StrictMode/concurrent rendering
- Align PromptSuggestionEvent event.name to 'qwen-code.prompt_suggestion'
  matching the EVENT_PROMPT_SUGGESTION constant used by the logger
- Fix onOutcome JSDoc: remove mention of 'suppressed' (handled separately)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): address Copilot review — curated history, type compat, peer version

- Use curated history (getChat().getHistory(true)) to avoid invalid
  entries causing API 400 errors in suggestion generation
- Use method signature for onSubmit in InputFormProps to maintain
  bivariant compatibility with existing consumers under strictFunctionTypes
- Tighten @qwen-code/qwen-code-core peer dependency to >=0.13.1

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat(followup): add prompt cache sharing + speculation engine

Phase 1 — Forked Query (cache sharing):
- CacheSafeParams: snapshot of generationConfig (systemInstruction + tools)
  + curated history + model + version, saved after each successful main turn
- createForkedChat: isolated GeminiChat sharing the same cache prefix for
  DashScope cache_control hit
- runForkedQuery: single-turn request via forked chat with JSON schema support
- suggestionGenerator: uses forked query when CacheSafeParams available,
  falls back to BaseLlmClient.generateJson otherwise
- GeminiChat.getGenerationConfig(): new getter for cache param snapshots
- Feature flag: enableCacheSharing (default: false)

Phase 2 — Speculation (predictive execution):
- OverlayFs: copy-on-write filesystem for speculation file isolation
  (/tmp/qwen-speculation/{pid}/{id}/), handles new files + existing files
- speculationToolGate: tool boundary enforcement using AST-based shell
  checker (not deprecated regex), write tools gated by ApprovalMode
  (only auto-edit/yolo allow overlay writes)
- speculation.ts: startSpeculation (on suggestion display), acceptSpeculation
  (on Tab/Enter — copies overlay to real FS, injects history via addHistory),
  abortSpeculation (on user input/new turn — cleanup overlay)
- Custom execution loop: toolRegistry.getTool → tool.build → invocation.execute
  (bypasses CoreToolScheduler — permission handled by toolGate)
- ensureToolResultPairing: strips unpaired functionCalls at boundary
- Boundary-aware tool result preservation: keeps executed tool results
  even when boundary truncates remaining calls
- Feature flag: enableSpeculation (default: false)

Telemetry:
- SpeculationEvent: outcome, turns_used, files_written, tool_use_count,
  duration_ms, boundary_type, had_pipelined_suggestion
- logSpeculation logger function

Security:
- Write tools only allowed in auto-edit/yolo mode during speculation
- Shell commands gated by isShellCommandReadOnlyAST (AST parser)
- Unknown/MCP tools always hit boundary (safe default)
- All structuredClone for cache param isolation

4 rounds of adversarial audit, 20+ issues found and fixed.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): address Copilot review — curated history, type compat, peer version

- Move web_fetch/web_search from SAFE_READ_ONLY to BOUNDARY tools
  (they require user confirmation for network requests)
- Add overlay read path resolution for read tools (resolveReadPaths)
  so speculative reads see overlay-written files
- Wire enableCacheSharing setting into generatePromptSuggestion
- Fix esbuild comment to not hardcode webui version

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(speculation): use index-based tracking for boundary tool pairing

Track executed function calls by order (first N matching
functionResponses.length) instead of by name. Fixes incorrect
pairing when model emits multiple calls with the same tool name.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(speculation): handle undefined functionCall.name + wrap rewritePathArgs

- Skip functionCall parts with missing name instead of non-null assertion
- Wrap rewritePathArgs in try/catch — treat path rewrite failure as boundary

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat(followup): pipelined suggestion, UI rendering, dismiss abort

- Pipelined suggestion: after speculation completes, generate next
  suggestion using augmented context. Promoted on accept.
- UI rendering: completed speculation results rendered via historyManager.
- Dismiss abort: typing/pasting calls dismissPromptSuggestion → clears
  promptSuggestion → useEffect aborts running speculation immediately.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): clear cache on reset, truncate history, fix test + comment

- Clear CacheSafeParams on startChat/resetChat to prevent cross-session leakage
- Truncate history to 40 entries before deep clone in saveCacheSafeParams
  to reduce CPU/memory overhead on long sessions
- Update stale comment about speculation dismiss lifecycle
- Add onAccept assertion to accept test with proper microtask flush

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* docs(design): add prompt suggestion design documentation

- prompt-suggestion-design.md: architecture, generation, filtering, state
  management, keyboard interaction, telemetry, feature flags
- speculation-design.md: copy-on-write overlay, tool gate security, boundary
  handling, pipelined suggestion, forked query cache sharing
- prompt-suggestion-implementation.md: implementation status, test coverage,
  audit history, Claude Code alignment tracking

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(overlay): align catch comment with silent behavior

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): wire augmented context into pipelined suggestion + guard Tab/Right

- Pipelined suggestion now includes the accepted suggestion text and
  speculated model response as context for the next prediction
- Tab/ArrowRight handlers only preventDefault when onAcceptFollowup
  is provided, preventing key interception without a wired callback

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(speculation): filter thought parts + add filePath to path keys

- Skip thought/reasoning parts from model responses to prevent leaking
  internal reasoning into speculated history
- Add 'filePath' to path rewrite key list for LSP and other tools that
  use camelCase argument names

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(overlay): resolve relative paths against realCwd not process.cwd

Relative tool paths are now resolved against the overlay's realCwd
before computing the relative path, preventing incorrect outside-cwd
detection when process.cwd() differs from config.getCwd().

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* docs(design): fix 4 doc-code inconsistencies

- Guard conditions: clarify 13 code checks vs 11 table categories,
  separate feature flags from guard block, add streaming transition
- Filter rules: 14 → 12 (actual count in code and table)
- BOUNDARY_TOOLS: add todo_write + exit_plan_mode to doc table
- SpeculationEvent: 8 → 7 fields (matching code)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): turns_used metric + reuse SUGGESTION_PROMPT + reduce clones

- turns_used: count only model messages (not all Content entries)
  to accurately reflect LLM round-trips instead of inflated 3x count
- Pipelined suggestion: reuse exported SUGGESTION_PROMPT from
  suggestionGenerator instead of a degraded local copy, ensuring
  consistent quality (EXAMPLES, NEVER SUGGEST rules included)
- createForkedChat: replace redundant structuredClone with shallow
  copies since params are already deep-cloned snapshots

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat(followup): speculation UI tool rendering + speculationModel setting

- Speculation UI: render tool calls as tool_group HistoryItems with
  structured name/description/result instead of plain text only
- speculationModel setting: allows using a cheaper/faster model for
  speculation and pipelined suggestion. Leave empty to use main model.
  Passed through startSpeculation → runSpeculativeLoop → pipelined.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* docs(design): sync docs with latest code changes

- Add speculationModel setting to feature flags table
- Document tool_group UI rendering in speculation accept flow
- Fix createForkedChat: deep clone → shallow copy (already cloned snapshots)
- Document pipelined suggestion SUGGESTION_PROMPT reuse
- Add Model Override and UI Rendering sections to speculation-design
- Update line counts to match actual file sizes

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* test(followup): add unit tests for overlayFs, toolGate, forkedQuery

overlayFs (15 tests): COW write, read resolution, apply, cleanup, path traversal
speculationToolGate (24 tests): tool categories, approval mode gating, shell AST, path rewrite
forkedQuery (6 tests): cache params save/get/clear, deep clone, version detection

Total: 27 → 173 tests

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* test(followup): P0-P2 test coverage for speculation + controller + toolGate

speculation.test.ts (7 tests):
- ensureToolResultPairing: empty, no calls, paired, unpaired text+call,
  unpaired call-only, user-ending, empty parts

followupState.test.ts (+8 tests = 15 total):
- onOutcome: accepted/tab, ignored/dismiss, error caught, no-op when cleared
- clear(): resets accepting lock allowing re-accept
- double accept blocked by debounce
- setSuggestion replaces pending timer

speculationToolGate.test.ts (+3 tests = 27 total):
- resolveReadPaths: overlay path after write, unchanged when not written
- rewritePathArgs: path key coverage

Total: 173 → 190 tests

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* test(followup): smoke tests + P0-P2 coverage gaps

smoke.test.ts (21 tests): E2E verification across modules
- Filter against realistic LLM outputs (9 good + 7 bad + reason check)
- OverlayFs full round-trip (write → read → apply → verify)
- ToolGate → OverlayFs integration (write redirect → read resolve)
- CacheSafeParams lifecycle (save → mutate → isolation → clear)
- ensureToolResultPairing orphaned functionCalls

followupState.test.ts (+8 tests):
- onOutcome: accepted/tab, ignored/dismiss, error caught, no-op cleared
- clear(): resets accepting lock
- double accept debounce
- setSuggestion replaces pending timer

speculationToolGate.test.ts (+3 tests):
- resolveReadPaths through overlay after write
- path key coverage for rewritePathArgs

Export ensureToolResultPairing for testing.

Total: 190 → 211 tests

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): dismiss aborts suggestion, boundary skip inject, parentSignal check

- dismissPromptSuggestion now also aborts suggestionAbortRef to prevent
  race between dismiss and in-flight startSpeculation
- Boundary speculation: skip acceptSpeculation (which injects history),
  fall through to normal addMessage to avoid duplicate user turns
- startSpeculation: check parentSignal.aborted upfront before starting
- Speculation rendering: use index-based loop instead of indexOf O(n²)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* docs(design): fix speculation accept diagram — boundary skips inject

The architecture diagram now shows the branching logic: completed
speculations go through acceptSpeculation (inject + render), while
boundary speculations are discarded and the query is submitted fresh
via addMessage.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat(followup): enable cache sharing by default

enableCacheSharing now defaults to true. This is a pure cost
optimization with no behavioral change — suggestion generation
uses the forked query path (sharing the main conversation's
prompt cache prefix) when CacheSafeParams are available.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): aborted parent skips loop, acceptSpeculation try/finally, doc sync

- startSpeculation: return aborted state immediately when parentSignal
  is already aborted, without creating overlay or starting loop
- acceptSpeculation: wrap in try/finally to guarantee overlay cleanup
  even if applyToReal or addHistory throws
- Doc: enableCacheSharing default false → true (matches code)
- Doc: update test count table (7 → 15 followupState, add 6 new files)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): remove debug logs, add function calling fallback for non-FC models

- Remove all followup-debug process.stderr.write logs
- Add direct text fallback in generateViaBaseLlm when generateJson
  returns {} (model doesn't support function calling, e.g., glm-5.1)
- Add CJK text support in filter: skip whitespace-based word count
  for Chinese/Japanese/Korean text, use character count instead

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat(followup): add suggestionModel setting for faster suggestion generation

New setting `suggestionModel` allows using a smaller/faster model
(e.g., qwen-turbo) for prompt suggestion generation instead of the
main conversation model. Reduces suggestion latency significantly.

Passed through: settings → AppContainer → generatePromptSuggestion
→ generateViaForkedQuery / generateViaBaseLlm (both paths).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat(followup): suggestionModel setting, /stats tracking, /about display

- suggestionModel: new setting to use a faster model for suggestion
  generation (e.g., qwen3.5-flash instead of main model glm-5.1)
- /stats: suggestion API calls now report usage to UiTelemetryService
  so token consumption appears in /stats model breakdown
- /about: shows Suggestion Model field (configured or main model)

Also:
- Function calling fallback for non-FC models (direct text generation)
- CJK text support in word count filter (character-based for Chinese)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* i18n: add Suggestion Model translations for /about display

en: Suggestion Model | zh: 建议模型 | ja: 提案モデル
de: Vorschlagsmodell | pt: Modelo de Sugestão | ru: Модель предложений

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): always use generateContent for suggestion (not generateJson)

generateJson doesn't expose usageMetadata, so /stats can't track
suggestion model tokens. Switch to direct generateContent which
always returns usage data. Also simplifies the code by removing
the function-calling + fallback dual path.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): fix /stats tracking — use ApiResponseEvent constructor

Use ApiResponseEvent class constructor with proper response_id and
override event.name to match UiEvent type for UiTelemetryService
switch statement. This ensures suggestion model token usage appears
in /stats model output.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* i18n: fix Chinese translation for Suggestion Model

"建议模型" → "提示建议模型" to avoid ambiguity.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* refactor(followup): merge suggestionModel + speculationModel into fastModel

Single unified setting for all background tasks: suggestion generation,
speculation, pipelined suggestions, and future background tasks.

Users only need to understand one concept: main model for conversation,
fast model for background tasks.

- Remove: suggestionModel, speculationModel
- Add: fastModel (ui.fastModel in settings.json)
- Update /about display: "Fast Model" with i18n translations
- Update all 6 locale files (en/zh/ja/de/pt/ru)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* refactor(settings): move fastModel to top-level (parallel to model)

fastModel is an independent model concept, not a property of the
main model. Move from model.fastModel to top-level settings.fastModel.

Config: { "fastModel": "qwen3.5-flash", "model": { "name": "glm-5.1" } }

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): report usage in both forkedQuery and baseLlm paths

The forkedQuery path (used when enableCacheSharing=true) was not
reporting token usage to UiTelemetryService, so /stats model didn't
show the fast model. Now both paths report usage.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat(cli): add /model --fast command to set fast model

Usage:
  /model --fast qwen3.5-flash  — set fast model
  /model --fast                — show current fast model
  /model                      — open model selection dialog (unchanged)

Saves to user settings (SettingScope.User).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* docs(design): update to fastModel (replace suggestionModel/speculationModel)

- prompt-suggestion-design.md: speculationModel → fastModel (top-level)
- speculation-design.md: Model Override → Fast Model, update description
- prompt-suggestion-implementation.md: update settings description

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat(cli): /model --fast opens model selection dialog for fast model

When called without a model name, /model --fast now opens the same
model selection dialog used by /model, but selecting a model saves
it as fastModel instead of switching the main model.

- useModelCommand: add isFastModelMode state
- ModelDialog: intercept selection in fast model mode, save to fastModel
- DialogManager: pass isFastModelMode prop to ModelDialog
- types.ts: add 'fast-model' dialog type

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): pass resolved model (not undefined) to runForkedQuery

model: modelOverride → model: model (which has the fallback applied)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(cli): /model --fast defaults to current fast model in dialog

When opening the model selection dialog via /model --fast, the
currently configured fastModel is pre-selected instead of the
main model.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat(cli): add --fast tab completion for /model command

/model <Tab> now shows --fast as a completion option with description.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(schema): regenerate settings.schema.json with new followup settings

Adds enableCacheSharing, enableSpeculation, and fastModel to the
generated JSON schema so CI validation passes.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(test): update tests for new Fast Model field in system info

Add "Fast Model" to expected labels in systemInfoFields and bugCommand
tests to match the new field added to /about and bug report output.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* ci: trigger PR synchronize event

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix: address Copilot review comments (batch 4)

- modelCommand: use getPersistScopeForModelSelection for fastModel,
  return meaningful info message instead of empty content
- ModelDialog: handle $runtime|authType|modelId format in fast-model mode
- forkedQuery: return structuredClone from getCacheSafeParams
- client: fix stale comment about history truncation order
- speculation: detect abort in .then() handler, set 'aborted' status
  and cleanup overlay to prevent leaks
- docs: update test count table

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* docs(users): add followup suggestions user manual

- New feature page: followup-suggestions.md covering usage, keybindings,
  fast model configuration, settings, and quality filters
- commands.md: add /model --fast command reference
- settings.md: add enableFollowupSuggestions, enableCacheSharing,
  enableSpeculation, and fastModel settings documentation
- _meta.ts: register new page in navigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* docs(users): audit fixes for followup suggestions documentation

- followup-suggestions.md: add 300ms delay, WebUI support, plan mode
  guard, non-interactive guard, slash commands as single-word, meta/error
  filters, character limit
- settings.md: move fastModel next to model section, add /model --fast
  cross-reference and link to feature page
- overview.md: add followup suggestions to feature list
- i18n: add missing translations for 'Set fast model for background
  tasks' and 'Fast model updated.' in all 6 locales

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix: address Copilot review comments (batch 5)

- modelCommand: remove duplicate info message (keep addItem only)
- followup-suggestions.md: clarify WebUI requires host app wiring
- speculation-design.md: fix abort telemetry description
- i18n: add missing translations for fast model strings

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(cli): remove duplicate message in /model --fast command

Use return message instead of addItem + empty return to avoid
blank INFO line in history. Also handle missing settings service.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(i18n): remove unused 'Fast model updated.' translations

The /model --fast command now returns the model name directly
instead of using this string. Remove dead translations.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(followup): disable thinking mode for suggestion and speculation

Forked queries inherit the main conversation's generationConfig which
may have thinkingConfig enabled. This wastes tokens and adds latency
for background tasks that don't need reasoning. Explicitly set
thinkingConfig.includeThoughts=false in both paths:
- createForkedChat (covers forked query + speculation)
- generateViaBaseLlm (non-cache-sharing fallback)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* docs: document thinking mode auto-disable for background tasks

- User docs: note that thinking is auto-disabled for suggestions/speculation
- Design docs: detail thinkingConfig override in both forked query and
  BaseLlm paths, explain why cache hits are unaffected

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
Co-authored-by: jinjing.zzj <jinjing.zzj@alibaba-inc.com>
Co-authored-by: yiliang114 <1204183885@qq.com>
---
 .../prompt-suggestion-design.md               | 211 +++++++
 .../prompt-suggestion-implementation.md       |  85 +++
 .../prompt-suggestion/speculation-design.md   | 215 +++++++
 docs/users/configuration/settings.md          |   9 +
 docs/users/features/_meta.ts                  |   1 +
 docs/users/features/commands.md               |  27 +-
 docs/users/features/followup-suggestions.md   | 109 ++++
 docs/users/overview.md                        |   1 +
 package-lock.json                             |   1 +
 packages/cli/src/config/settingsSchema.ts     |  41 ++
 packages/cli/src/i18n/locales/de.js           |   3 +
 packages/cli/src/i18n/locales/en.js           |   2 +
 packages/cli/src/i18n/locales/ja.js           |   3 +
 packages/cli/src/i18n/locales/pt.js           |   3 +
 packages/cli/src/i18n/locales/ru.js           |   3 +
 packages/cli/src/i18n/locales/zh.js           |   2 +
 packages/cli/src/ui/AppContainer.tsx          | 298 ++++++++-
 .../cli/src/ui/commands/bugCommand.test.ts    |   3 +
 packages/cli/src/ui/commands/modelCommand.ts  |  45 +-
 packages/cli/src/ui/commands/types.ts         |   1 +
 packages/cli/src/ui/components/Composer.tsx   |   2 +
 .../cli/src/ui/components/DialogManager.tsx   |   7 +-
 .../src/ui/components/InputPrompt.test.tsx    |  65 ++
 .../cli/src/ui/components/InputPrompt.tsx     |  95 ++-
 .../cli/src/ui/components/ModelDialog.tsx     |  52 +-
 .../cli/src/ui/contexts/UIStateContext.tsx    |   5 +
 .../cli/src/ui/hooks/slashCommandProcessor.ts |   5 +-
 .../src/ui/hooks/useFollowupSuggestions.tsx   | 162 +++++
 packages/cli/src/ui/hooks/useModelCommand.ts  |  16 +-
 packages/cli/src/utils/systemInfo.ts          |   5 +
 .../cli/src/utils/systemInfoFields.test.ts    |   1 +
 packages/cli/src/utils/systemInfoFields.ts    |   1 +
 packages/core/src/core/client.ts              |  29 +
 packages/core/src/core/geminiChat.ts          |   5 +
 .../core/src/followup/followupState.test.ts   | 312 ++++++++++
 packages/core/src/followup/followupState.ts   | 229 +++++++
 .../core/src/followup/forkedQuery.test.ts     | 115 ++++
 packages/core/src/followup/forkedQuery.ts     | 249 ++++++++
 packages/core/src/followup/index.ts           |  16 +
 packages/core/src/followup/overlayFs.test.ts  | 193 ++++++
 packages/core/src/followup/overlayFs.ts       | 140 +++++
 packages/core/src/followup/smoke.test.ts      | 181 ++++++
 .../core/src/followup/speculation.test.ts     | 113 ++++
 packages/core/src/followup/speculation.ts     | 563 ++++++++++++++++++
 .../src/followup/speculationToolGate.test.ts  | 240 ++++++++
 .../core/src/followup/speculationToolGate.ts  | 146 +++++
 .../src/followup/suggestionGenerator.test.ts  |  98 +++
 .../core/src/followup/suggestionGenerator.ts  | 367 ++++++++++++
 packages/core/src/index.ts                    |  10 +
 packages/core/src/telemetry/constants.ts      |   4 +
 packages/core/src/telemetry/loggers.ts        |  80 +++
 packages/core/src/telemetry/types.ts          |  73 +++
 packages/vscode-ide-companion/esbuild.js      |   5 +
 .../schemas/settings.schema.json              |  20 +
 .../vscode-ide-companion/src/webview/App.tsx  |   4 +-
 .../src/webview/hooks/useMessageSubmit.ts     |  13 +-
 packages/webui/package.json                   |  13 +-
 .../webui/src/components/layout/InputForm.tsx |  81 ++-
 packages/webui/src/followup.ts                |  19 +
 .../webui/src/hooks/useFollowupSuggestions.ts | 121 ++++
 packages/webui/src/index.ts                   |   2 +
 packages/webui/src/styles/components.css      |  16 +
 packages/webui/vite.config.followup.ts        |  52 ++
 packages/webui/vite.config.ts                 |   4 +
 64 files changed, 4951 insertions(+), 41 deletions(-)
 create mode 100644 docs/design/prompt-suggestion/prompt-suggestion-design.md
 create mode 100644 docs/design/prompt-suggestion/prompt-suggestion-implementation.md
 create mode 100644 docs/design/prompt-suggestion/speculation-design.md
 create mode 100644 docs/users/features/followup-suggestions.md
 create mode 100644 packages/cli/src/ui/hooks/useFollowupSuggestions.tsx
 create mode 100644 packages/core/src/followup/followupState.test.ts
 create mode 100644 packages/core/src/followup/followupState.ts
 create mode 100644 packages/core/src/followup/forkedQuery.test.ts
 create mode 100644 packages/core/src/followup/forkedQuery.ts
 create mode 100644 packages/core/src/followup/index.ts
 create mode 100644 packages/core/src/followup/overlayFs.test.ts
 create mode 100644 packages/core/src/followup/overlayFs.ts
 create mode 100644 packages/core/src/followup/smoke.test.ts
 create mode 100644 packages/core/src/followup/speculation.test.ts
 create mode 100644 packages/core/src/followup/speculation.ts
 create mode 100644 packages/core/src/followup/speculationToolGate.test.ts
 create mode 100644 packages/core/src/followup/speculationToolGate.ts
 create mode 100644 packages/core/src/followup/suggestionGenerator.test.ts
 create mode 100644 packages/core/src/followup/suggestionGenerator.ts
 create mode 100644 packages/webui/src/followup.ts
 create mode 100644 packages/webui/src/hooks/useFollowupSuggestions.ts
 create mode 100644 packages/webui/vite.config.followup.ts

diff --git a/docs/design/prompt-suggestion/prompt-suggestion-design.md b/docs/design/prompt-suggestion/prompt-suggestion-design.md
new file mode 100644
index 000000000..1636db6cf
--- /dev/null
+++ b/docs/design/prompt-suggestion/prompt-suggestion-design.md
@@ -0,0 +1,211 @@
+# Prompt Suggestion (NES) Design
+
+> Predicts what the user would naturally type next after the AI completes a response, showing it as ghost text in the input prompt.
+>
+> Implementation status: `prompt-suggestion-implementation.md`. Speculation engine: `speculation-design.md`.
+
+## Overview
+
+A **prompt suggestion** (Next-step Suggestion / NES) is a short prediction (2-12 words) of the user's next input, generated by an LLM call after each AI response. It appears as ghost text in the input prompt. The user can accept it with Tab/Enter/Right Arrow or dismiss it by typing.
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│  AppContainer (CLI)                                         │
+│                                                             │
+│  Responding → Idle transition                               │
+│       │                                                     │
+│       ▼                                                     │
+│  ┌─────────────────────────────────────────────────────┐    │
+│  │  Guard Conditions (11 categories)                    │    │
+│  │  settings, interactive, sdk, plan mode, dialogs,    │    │
+│  │  elicitation, API error                             │    │
+│  └────────────────────┬────────────────────────────────┘    │
+│                       │                                     │
+│                       ▼                                     │
+│  ┌─────────────────────────────────────────────────────┐    │
+│  │  generatePromptSuggestion()                         │    │
+│  │                                                     │    │
+│  │  ┌─── CacheSafeParams available? ───┐               │    │
+│  │  │                                  │               │    │
+│  │  ▼ YES                         NO ▼                 │    │
+│  │  runForkedQuery()      BaseLlmClient.generateJson() │    │
+│  │  (cache-aware)         (standalone fallback)        │    │
+│  │                                                     │    │
+│  │  ──── SUGGESTION_PROMPT ────                        │    │
+│  │  ──── 12 filter rules ──────                        │    │
+│  │  ──── getFilterReason() ────                        │    │
+│  └────────────────────┬────────────────────────────────┘    │
+│                       │                                     │
+│                       ▼                                     │
+│  ┌─────────────────────────────────────────────────────┐    │
+│  │  FollowupController (framework-agnostic)            │    │
+│  │  300ms delay → show as ghost text                   │    │
+│  │                                                     │    │
+│  │  Tab    → accept (fill input)                       │    │
+│  │  Enter  → accept + submit                           │    │
+│  │  Right  → accept (fill input)                       │    │
+│  │  Type   → dismiss + abort speculation               │    │
+│  └─────────────────────────────────────────────────────┘    │
+│                                                             │
+│  ┌─────────────────────────────────────────────────────┐    │
+│  │  Telemetry (PromptSuggestionEvent)                  │    │
+│  │  outcome, accept_method, timing, similarity,        │    │
+│  │  keystroke, focus, suppression reason, prompt_id     │    │
+│  └─────────────────────────────────────────────────────┘    │
+└─────────────────────────────────────────────────────────────┘
+```
+
+## Suggestion Generation
+
+### LLM Prompt
+
+```
+[SUGGESTION MODE: Suggest what the user might naturally type next.]
+
+Your job is to predict what THEY would type - not what you think they should do.
+THE TEST: Would they think "I was just about to type that"?
+
+EXAMPLES:
+User asked "fix the bug and run tests", bug is fixed → "run the tests"
+After code written → "try it out"
+Task complete, obvious follow-up → "commit this" or "push it"
+
+Format: 2-12 words, match the user's style. Or nothing.
+Reply with ONLY the suggestion, no quotes or explanation.
+```
+
+### Filter Rules (12)
+
+| Rule               | Example blocked                                  |
+| ------------------ | ------------------------------------------------ |
+| done               | "done"                                           |
+| meta_text          | "nothing found", "no suggestion", "silence"      |
+| meta_wrapped       | "(silence)", "[no suggestion]"                   |
+| error_message      | "api error: 500"                                 |
+| prefixed_label     | "Suggestion: commit"                             |
+| too_few_words      | "hmm" (but allows "yes", "commit", "push" etc.)  |
+| too_many_words     | > 12 words                                       |
+| too_long           | >= 100 chars                                     |
+| multiple_sentences | "Run tests. Then commit."                        |
+| has_formatting     | newlines, markdown bold                          |
+| evaluative         | "looks good", "thanks" (with \b word boundaries) |
+| ai_voice           | "Let me...", "I'll...", "Here's..."              |
+
+### Guard Conditions
+
+**AppContainer useEffect (13 checks in code):**
+
+| Guard                | Check                                               |
+| -------------------- | --------------------------------------------------- |
+| Settings toggle      | `enableFollowupSuggestions`                         |
+| Non-interactive      | `config.isInteractive()`                            |
+| SDK mode             | `!config.getSdkMode()`                              |
+| Streaming transition | `Responding → Idle` (2 checks)                      |
+| API error (history)  | `historyManager.history[last]?.type !== 'error'`    |
+| API error (pending)  | `!pendingGeminiHistoryItems.some(type === 'error')` |
+| Confirmation dialogs | shell + general + loop detection (3 checks)         |
+| Permission dialog    | `isPermissionsDialogOpen`                           |
+| Elicitation          | `settingInputRequests.length === 0`                 |
+| Plan mode            | `ApprovalMode.PLAN`                                 |
+
+**Inside generatePromptSuggestion():**
+
+| Guard              | Check            |
+| ------------------ | ---------------- |
+| Early conversation | `modelTurns < 2` |
+
+**Separate feature flags (not in guard block):**
+
+| Flag                 | Controls                                                |
+| -------------------- | ------------------------------------------------------- |
+| `enableCacheSharing` | Whether to use forked query or fallback to generateJson |
+| `enableSpeculation`  | Whether to start speculation on suggestion display      |
+
+## State Management
+
+### FollowupState
+
+```typescript
+interface FollowupState {
+  suggestion: string | null;
+  isVisible: boolean;
+  shownAt: number; // timestamp for telemetry
+}
+```
+
+### FollowupController
+
+Framework-agnostic controller shared by CLI (Ink) and WebUI (React):
+
+- `setSuggestion(text)` — 300ms delayed show, null clears immediately
+- `accept(method)` — clears state, fires `onAccept` via microtask, 100ms debounce lock
+- `dismiss()` — clears state, logs `ignored` telemetry
+- `clear()` — hard reset all state + timers
+- `Object.freeze(INITIAL_FOLLOWUP_STATE)` prevents accidental mutation
+
+## Keyboard Interaction
+
+| Key         | CLI                         | WebUI                                |
+| ----------- | --------------------------- | ------------------------------------ |
+| Tab         | Fill input (no submit)      | Fill input (no submit)               |
+| Enter       | Fill + submit               | Fill + submit (`explicitText` param) |
+| Right Arrow | Fill input (no submit)      | Fill input (no submit)               |
+| Typing      | Dismiss + abort speculation | Dismiss                              |
+| Paste       | Dismiss + abort speculation | Dismiss                              |
+
+### Key Binding Note
+
+The Tab handler uses `key.name === 'tab'` explicitly (not `ACCEPT_SUGGESTION` matcher) because `ACCEPT_SUGGESTION` also matches Enter, which must fall through to the SUBMIT handler.
+
+## Telemetry
+
+### PromptSuggestionEvent
+
+| Field                      | Type                        | Description                         |
+| -------------------------- | --------------------------- | ----------------------------------- |
+| outcome                    | accepted/ignored/suppressed | Final outcome                       |
+| prompt_id                  | string                      | Default: 'user_intent'              |
+| accept_method              | tab/enter/right             | How user accepted                   |
+| time_to_accept_ms          | number                      | Time from shown to accept           |
+| time_to_ignore_ms          | number                      | Time from shown to dismiss          |
+| time_to_first_keystroke_ms | number                      | Time to first keystroke while shown |
+| suggestion_length          | number                      | Character count                     |
+| similarity                 | number                      | 1.0 for accept, 0.0 for ignore      |
+| was_focused_when_shown     | boolean                     | Terminal had focus                  |
+| reason                     | string                      | For suppressed: filter rule name    |
+
+### SpeculationEvent
+
+| Field                    | Type                    | Description               |
+| ------------------------ | ----------------------- | ------------------------- |
+| outcome                  | accepted/aborted/failed | Speculation result        |
+| turns_used               | number                  | API round-trips           |
+| files_written            | number                  | Files in overlay          |
+| tool_use_count           | number                  | Tools executed            |
+| duration_ms              | number                  | Wall-clock time           |
+| boundary_type            | string                  | What stopped speculation  |
+| had_pipelined_suggestion | boolean                 | Next suggestion generated |
+
+## Feature Flags and Settings
+
+| Setting                     | Type    | Default | Description                                                                      |
+| --------------------------- | ------- | ------- | -------------------------------------------------------------------------------- |
+| `enableFollowupSuggestions` | boolean | true    | Master toggle for prompt suggestions                                             |
+| `enableCacheSharing`        | boolean | true    | Use cache-aware forked queries                                                   |
+| `enableSpeculation`         | boolean | false   | Predictive execution engine                                                      |
+| `fastModel` (top-level)     | string  | ""      | Model for all background tasks (empty = use main model). Set via `/model --fast` |
+
+### Thinking Mode
+
+Thinking/reasoning is explicitly disabled (`thinkingConfig: { includeThoughts: false }`) for all background task paths:
+
+- **Forked query path** (`createForkedChat`) — overrides `thinkingConfig` in the cloned `generationConfig`, covering both suggestion generation and speculation
+- **BaseLlm fallback path** (`generateViaBaseLlm`) — per-request config overrides base content generator's thinking settings
+
+This is safe because:
+
+- Cache prefix is determined by systemInstruction + tools + history, not `thinkingConfig` — cache hits are unaffected
+- All backends (Gemini, OpenAI-compatible, Anthropic) handle `includeThoughts: false` by omitting the thinking field — no API errors on models without thinking support
+- Suggestion generation and speculation don't benefit from reasoning tokens
diff --git a/docs/design/prompt-suggestion/prompt-suggestion-implementation.md b/docs/design/prompt-suggestion/prompt-suggestion-implementation.md
new file mode 100644
index 000000000..72fa56773
--- /dev/null
+++ b/docs/design/prompt-suggestion/prompt-suggestion-implementation.md
@@ -0,0 +1,85 @@
+# Prompt Suggestion Implementation Status
+
+> Tracks the implementation status of the prompt suggestion (NES) feature across all packages.
+
+## Core Module (`packages/core/src/followup/`)
+
+| Component                | Status  | Lines | Description                                                   |
+| ------------------------ | ------- | ----- | ------------------------------------------------------------- |
+| `followupState.ts`       | ✅ Done | ~230  | Framework-agnostic controller with timer/debounce             |
+| `suggestionGenerator.ts` | ✅ Done | ~260  | LLM generation + 12 filter rules + forked query support       |
+| `forkedQuery.ts`         | ✅ Done | ~240  | CacheSafeParams + createForkedChat + runForkedQuery           |
+| `overlayFs.ts`           | ✅ Done | ~140  | Copy-on-write overlay filesystem                              |
+| `speculationToolGate.ts` | ✅ Done | ~150  | Tool boundary enforcement with AST shell parser               |
+| `speculation.ts`         | ✅ Done | ~540  | Speculation engine with pipelined suggestion + model override |
+
+## CLI Integration (`packages/cli/`)
+
+| Component                    | Status  | Description                                                |
+| ---------------------------- | ------- | ---------------------------------------------------------- |
+| `AppContainer.tsx`           | ✅ Done | Suggestion generation, speculation lifecycle, UI rendering |
+| `InputPrompt.tsx`            | ✅ Done | Tab/Enter/Right Arrow acceptance, dismiss + abort          |
+| `Composer.tsx`               | ✅ Done | Props threading                                            |
+| `UIStateContext.tsx`         | ✅ Done | promptSuggestion + dismissPromptSuggestion                 |
+| `useFollowupSuggestions.tsx` | ✅ Done | React hook with telemetry + keystroke tracking             |
+| `settingsSchema.ts`          | ✅ Done | 3 feature flags + fastModel setting                        |
+| `settings.schema.json`       | ✅ Done | VSCode settings schema                                     |
+
+## WebUI Integration (`packages/webui/`)
+
+| Component                   | Status  | Description                                 |
+| --------------------------- | ------- | ------------------------------------------- |
+| `InputForm.tsx`             | ✅ Done | Tab/Enter/Right Arrow + explicitText submit |
+| `useFollowupSuggestions.ts` | ✅ Done | React hook with onOutcome support           |
+| `followup.ts`               | ✅ Done | Subpath entry                               |
+| `components.css`            | ✅ Done | Ghost text styling                          |
+| `vite.config.followup.ts`   | ✅ Done | Separate build config                       |
+
+## Telemetry (`packages/core/src/telemetry/`)
+
+| Component               | Status  | Description          |
+| ----------------------- | ------- | -------------------- |
+| `PromptSuggestionEvent` | ✅ Done | 10 fields            |
+| `SpeculationEvent`      | ✅ Done | 7 fields             |
+| `logPromptSuggestion()` | ✅ Done | OpenTelemetry logger |
+| `logSpeculation()`      | ✅ Done | OpenTelemetry logger |
+
+## Test Coverage
+
+| Test File                     | Tests | Description                                                     |
+| ----------------------------- | ----- | --------------------------------------------------------------- |
+| `followupState.test.ts`       | 14    | Controller timer, debounce, accept callback, onOutcome, clear   |
+| `suggestionGenerator.test.ts` | 16    | All 12 filter rules + edge cases + false positives              |
+| `overlayFs.test.ts`           | 15    | COW write, read resolution, apply, cleanup, path traversal      |
+| `speculationToolGate.test.ts` | 27    | Tool categories, approval mode, shell AST, path rewrite         |
+| `forkedQuery.test.ts`         | 6     | Cache params save/get/clear, deep clone, version detection      |
+| `speculation.test.ts`         | 7     | ensureToolResultPairing edge cases                              |
+| `smoke.test.ts`               | 21    | Cross-module E2E: filter + overlay + toolGate + cache + pairing |
+| `InputPrompt.test.tsx`        | 4     | Tab, Enter+submit, Right Arrow, completion guard                |
+
+## Audit History
+
+| Round           | Issues Found | Issues Fixed                                             |
+| --------------- | ------------ | -------------------------------------------------------- |
+| R1-R4           | 10           | 10 (rule engine → LLM, state simplification)             |
+| R5-R6           | 2            | 2 (Enter keybinding conflict, Right Arrow telemetry)     |
+| R7-R8           | 3            | 3 (WebUI telemetry, dead type, test coverage)            |
+| R9              | 0            | — (convergence)                                          |
+| R10-R11         | 1            | 1 (historyManager dep)                                   |
+| R12-R13         | 1            | 1 (evaluative regex word boundaries)                     |
+| Phase 1+2 R1-R4 | 20+          | 20+ (permission bypass, overlay safety, race conditions) |
+| **Total**       | **37+**      | **37+**                                                  |
+
+## Claude Code Alignment
+
+| Feature                          | Alignment | Notes                                 |
+| -------------------------------- | --------- | ------------------------------------- |
+| Prompt text                      | 100%      | Identical (brand name only)           |
+| 12 filter rules                  | 100%+     | \b word boundaries improvement        |
+| UI interaction (Tab/Enter/Right) | 100%      |                                       |
+| Guard conditions                 | 100%      | 13 checks                             |
+| Telemetry                        | 100%      | 10+7 fields                           |
+| Cache sharing                    | ✅        | DashScope cache_control               |
+| Speculation                      | ✅        | COW overlay + tool gating             |
+| Pipelined suggestion             | ✅        | Generated after speculation completes |
+| State management                 | 100%+     | Controller pattern, Object.freeze     |
diff --git a/docs/design/prompt-suggestion/speculation-design.md b/docs/design/prompt-suggestion/speculation-design.md
new file mode 100644
index 000000000..5a4ee2c56
--- /dev/null
+++ b/docs/design/prompt-suggestion/speculation-design.md
@@ -0,0 +1,215 @@
+# Speculation Engine Design
+
+> Speculatively executes the accepted suggestion before the user confirms, using copy-on-write file isolation. Results appear instantly when the user presses Tab.
+
+## Overview
+
+When a prompt suggestion is shown, the **speculation engine** immediately starts executing it in the background using a forked GeminiChat. File writes go to a temporary overlay directory. If the user accepts the suggestion, overlay files are copied to the real filesystem and the speculated conversation is injected into the main chat history. If the user types something else, the speculation is aborted and the overlay is cleaned up.
+
+## Architecture
+
+```
+User sees suggestion "commit this"
+           │
+           ▼
+┌──────────────────────────────────────────────────────────────┐
+│  startSpeculation()                                          │
+│                                                              │
+│  ┌─────────────────┐    ┌────────────────────┐               │
+│  │ Forked GeminiChat│    │  OverlayFs          │              │
+│  │ (cache-shared)   │    │  /tmp/qwen-         │              │
+│  │                  │    │   speculation/       │              │
+│  │  systemInstruction│   │   {pid}/{id}/        │              │
+│  │  + tools          │   │                      │              │
+│  │  + history prefix │   │  COW: first write    │              │
+│  │                  │    │  copies original     │              │
+│  └────────┬─────────┘    └──────────┬───────────┘             │
+│           │                         │                         │
+│           ▼                         │                         │
+│  ┌──────────────────────────────────┴──────────────────────┐  │
+│  │  Speculative Loop (max 20 turns, 100 messages)          │  │
+│  │                                                         │  │
+│  │  Model response                                         │  │
+│  │       │                                                 │  │
+│  │       ▼                                                 │  │
+│  │  ┌──────────────────────────────────────────────────┐   │  │
+│  │  │  speculationToolGate                             │   │  │
+│  │  │                                                  │   │  │
+│  │  │  Read/Grep/Glob/LS/LSP → allow (+ overlay read) │   │  │
+│  │  │  Edit/WriteFile → redirect to overlay            │   │  │
+│  │  │    (only in auto-edit/yolo mode)                 │   │  │
+│  │  │  Shell → AST check read-only? allow : boundary   │   │  │
+│  │  │  WebFetch/WebSearch → boundary                   │   │  │
+│  │  │  Agent/Skill/Memory/Ask → boundary               │   │  │
+│  │  │  Unknown/MCP → boundary                          │   │  │
+│  │  └──────────────────────────────────────────────────┘   │  │
+│  │       │                                                 │  │
+│  │       ▼                                                 │  │
+│  │  Tool execution: toolRegistry.getTool → build → execute │  │
+│  │  (bypasses CoreToolScheduler — gated by toolGate)       │  │
+│  │                                                         │  │
+│  └─────────────────────────────────────────────────────────┘  │
+│                                                              │
+│  On completion → generatePipelinedSuggestion()               │
+└──────────────────────────────────────────────────────────────┘
+           │
+           │  User presses Tab / Enter
+           ▼
+     ┌─── status === 'completed'? ───┐
+     │ YES                      NO (boundary) │
+     ▼                                ▼
+┌─────────────────────────┐  ┌────────────────────────┐
+│  acceptSpeculation()    │  │  Discard speculation    │
+│                         │  │  abort + cleanup        │
+│  1. applyToReal()       │  │  Submit query normally  │
+│  2. ensureToolPairing() │  │  (addMessage)           │
+│  3. addHistory()        │  └────────────────────────┘
+│  4. render tool_group   │
+│  5. cleanup overlay     │
+│  6. pipelined suggest   │
+└─────────────────────────┘
+           │
+           │  User types instead
+           ▼
+┌──────────────────────────────────────────────────────────────┐
+│  abortSpeculation()                                          │
+│                                                              │
+│  1. abortController.abort() — cancel LLM call               │
+│  2. overlayFs.cleanup() — delete temp directory              │
+│  3. Update speculation state (no telemetry on abort)         │
+└──────────────────────────────────────────────────────────────┘
+```
+
+## Copy-on-Write Overlay
+
+```
+Real CWD: /home/user/project/
+Overlay:  /tmp/qwen-speculation/12345/a1b2c3d4/
+
+Write to src/app.ts:
+  1. Copy /home/user/project/src/app.ts → overlay/src/app.ts (first time only)
+  2. Tool writes to overlay/src/app.ts
+
+Read from src/app.ts:
+  - If in writtenFiles → read from overlay/src/app.ts
+  - Otherwise → read from /home/user/project/src/app.ts
+
+New file (src/new.ts):
+  - Create overlay/src/new.ts directly (no original to copy)
+
+Accept:
+  - copyFile(overlay/src/app.ts → /home/user/project/src/app.ts)
+  - copyFile(overlay/src/new.ts → /home/user/project/src/new.ts)
+  - rm -rf overlay/
+
+Abort:
+  - rm -rf overlay/
+```
+
+## Tool Gate Security
+
+| Tool                                                       | Action   | Condition                                    |
+| ---------------------------------------------------------- | -------- | -------------------------------------------- |
+| read_file, grep, glob, ls, lsp                             | allow    | Read paths resolved through overlay          |
+| edit, write_file                                           | redirect | Only in auto-edit / yolo approval mode       |
+| edit, write_file                                           | boundary | In default / plan approval mode              |
+| shell                                                      | allow    | `isShellCommandReadOnlyAST()` returns true   |
+| shell                                                      | boundary | Non-read-only commands                       |
+| web_fetch, web_search                                      | boundary | Network requests require user consent        |
+| agent, skill, memory, ask_user, todo_write, exit_plan_mode | boundary | Cannot interact with user during speculation |
+| Unknown / MCP tools                                        | boundary | Safe default                                 |
+
+### Path Rewrite
+
+- **Write tools**: `rewritePathArgs()` redirects `file_path` to overlay via `overlayFs.redirectWrite()`
+- **Read tools**: `resolveReadPaths()` redirects `file_path` to overlay via `overlayFs.resolveReadPath()` if previously written
+- **Rewrite failure**: Treated as boundary (e.g., absolute path outside cwd throws in `redirectWrite`)
+
+## Boundary Handling
+
+When a boundary is hit mid-turn:
+
+1. Already-executed tool calls are preserved (index-based tracking, not name-based)
+2. Unexecuted function calls are stripped from the model message
+3. Partial tool responses are added to history
+4. `ensureToolResultPairing()` validates completeness before injection
+
+## Pipelined Suggestion
+
+After speculation completes (no boundary), a second LLM call generates the **next** suggestion:
+
+```
+Context: original conversation + "commit this" + speculated messages
+→ LLM predicts: "push it"
+→ Stored in state.pipelinedSuggestion
+→ On accept: setPromptSuggestion("push it") — appears instantly
+```
+
+This enables Tab-Tab-Tab workflows where each acceptance immediately shows the next step.
+
+The pipelined suggestion reuses the exported `SUGGESTION_PROMPT` constant from `suggestionGenerator.ts` (not a local copy) to ensure consistent quality with initial suggestions.
+
+## Fast Model
+
+`startSpeculation` accepts an optional `options.model` parameter, threaded through `runSpeculativeLoop` and `generatePipelinedSuggestion` to `runForkedQuery`. Configured via the top-level `fastModel` setting (empty = use main model). The same `fastModel` is used for all background tasks: suggestion generation, speculation, and pipelined suggestions. Set via `/model --fast <name>` or `settings.json`.
+
+## UI Rendering
+
+When speculation completes, `acceptSpeculation` renders results via `historyManager.addItem()`:
+
+- **User messages**: rendered as `type: 'user'` items
+- **Model text**: rendered as `type: 'gemini'` items
+- **Tool calls**: rendered as `type: 'tool_group'` items with structured `IndividualToolCallDisplay` entries (tool name, argument description, result text, status)
+
+This shows the user the full speculation output including tool call details, not just plain text.
+
+## Forked Query (Cache Sharing)
+
+### CacheSafeParams
+
+```typescript
+interface CacheSafeParams {
+  generationConfig: GenerateContentConfig; // systemInstruction + tools
+  history: Content[]; // curated, max 40 entries
+  model: string;
+  version: number; // increments on config changes
+}
+```
+
+- Saved after each successful main turn in `GeminiClient.sendMessageStream()`
+- Cleared on `startChat()` / `resetChat()` to prevent cross-session leakage
+- History truncated to 40 entries; `createForkedChat` uses shallow copies (params are already deep-cloned snapshots)
+- Thinking mode explicitly disabled (`thinkingConfig: { includeThoughts: false }`) — reasoning tokens are not needed for speculation and would waste cost/latency. This does not affect cache prefix matching (determined by systemInstruction + tools + history only)
+- Version detection via `JSON.stringify` comparison of systemInstruction + tools
+
+### Cache Mechanism
+
+DashScope already enables prefix caching via:
+
+- `X-DashScope-CacheControl: enable` header
+- `cache_control: { type: 'ephemeral' }` annotations on messages and tools
+
+The forked `GeminiChat` uses identical `generationConfig` (including tools) and history prefix, so DashScope's existing cache mechanism produces cache hits automatically.
+
+## Constants
+
+| Constant                 | Value | Description                              |
+| ------------------------ | ----- | ---------------------------------------- |
+| MAX_SPECULATION_TURNS    | 20    | Maximum API round-trips                  |
+| MAX_SPECULATION_MESSAGES | 100   | Maximum messages in speculated history   |
+| SUGGESTION_DELAY_MS      | 300   | Delay before showing suggestion          |
+| ACCEPT_DEBOUNCE_MS       | 100   | Debounce lock for rapid accepts          |
+| MAX_HISTORY_FOR_CACHE    | 40    | History entries saved in CacheSafeParams |
+
+## File Structure
+
+```
+packages/core/src/followup/
+├── followupState.ts          # Framework-agnostic state controller
+├── suggestionGenerator.ts    # LLM-based suggestion generation + 12 filter rules
+├── forkedQuery.ts            # Cache-aware forked query infrastructure
+├── overlayFs.ts              # Copy-on-write overlay filesystem
+├── speculationToolGate.ts    # Tool boundary enforcement
+├── speculation.ts            # Speculation engine (start/accept/abort)
+└── index.ts                  # Module exports
+```
diff --git a/docs/users/configuration/settings.md b/docs/users/configuration/settings.md
index 1c7c20404..9389ba8f5 100644
--- a/docs/users/configuration/settings.md
+++ b/docs/users/configuration/settings.md
@@ -109,6 +109,9 @@ Settings are organized into categories. All settings should be placed within the
 | `ui.accessibility.enableLoadingPhrases` | boolean          | Enable loading phrases (disable for accessibility).                                                                                                                                                                                                                                                                                                                                                                 | `true`      |
 | `ui.accessibility.screenReader`         | boolean          | Enables screen reader mode, which adjusts the TUI for better compatibility with screen readers.                                                                                                                                                                                                                                                                                                                     | `false`     |
 | `ui.customWittyPhrases`                 | array of strings | A list of custom phrases to display during loading states. When provided, the CLI will cycle through these phrases instead of the default ones.                                                                                                                                                                                                                                                                     | `[]`        |
+| `ui.enableFollowupSuggestions`          | boolean          | Enable [followup suggestions](../features/followup-suggestions) that predict what you want to type next after the model responds. Suggestions appear as ghost text and can be accepted with Tab, Enter, or Right Arrow.                                                                                                                                                                                             | `true`      |
+| `ui.enableCacheSharing`                 | boolean          | Use cache-aware forked queries for suggestion generation. Reduces cost on providers that support prefix caching (experimental).                                                                                                                                                                                                                                                                                     | `true`      |
+| `ui.enableSpeculation`                  | boolean          | Speculatively execute accepted suggestions before submission. Results appear instantly when you accept (experimental).                                                                                                                                                                                                                                                                                              | `false`     |
 
 #### ide
 
@@ -185,6 +188,12 @@ The `extra_body` field allows you to add custom parameters to the request body s
 - `"./custom-logs"` - Logs to `./custom-logs` relative to current directory
 - `"/tmp/openai-logs"` - Logs to absolute path `/tmp/openai-logs`
 
+#### fastModel
+
+| Setting     | Type   | Description                                                                                                                                                                                                                                           | Default |
+| ----------- | ------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
+| `fastModel` | string | Model for background tasks ([suggestion generation](../features/followup-suggestions), speculation). Leave empty to use the main model. A smaller/faster model (e.g., `qwen3.5-flash`) reduces latency and cost. Can also be set via `/model --fast`. | `""`    |
+
 #### context
 
 | Setting                                           | Type                       | Description                                                                                                                                                                                                                                                                                                                                                           | Default     |
diff --git a/docs/users/features/_meta.ts b/docs/users/features/_meta.ts
index b4f3acbfc..4c793f589 100644
--- a/docs/users/features/_meta.ts
+++ b/docs/users/features/_meta.ts
@@ -1,5 +1,6 @@
 export default {
   commands: 'Commands',
+  'followup-suggestions': 'Followup Suggestions',
   'sub-agents': 'SubAgents',
   arena: 'Agent Arena',
   skills: 'Skills',
diff --git a/docs/users/features/commands.md b/docs/users/features/commands.md
index 05311e957..d99e3a647 100644
--- a/docs/users/features/commands.md
+++ b/docs/users/features/commands.md
@@ -56,19 +56,20 @@ Commands specifically for controlling interface and output language.
 
 Commands for managing AI tools and models.
 
-| Command          | Description                                   | Usage Examples                                |
-| ---------------- | --------------------------------------------- | --------------------------------------------- |
-| `/mcp`           | List configured MCP servers and tools         | `/mcp`, `/mcp desc`                           |
-| `/tools`         | Display currently available tool list         | `/tools`, `/tools desc`                       |
-| `/skills`        | List and run available skills                 | `/skills`, `/skills <name>`                   |
-| `/approval-mode` | Change approval mode for tool usage           | `/approval-mode <mode (auto-edit)> --project` |
-| →`plan`          | Analysis only, no execution                   | Secure review                                 |
-| →`default`       | Require approval for edits                    | Daily use                                     |
-| →`auto-edit`     | Automatically approve edits                   | Trusted environment                           |
-| →`yolo`          | Automatically approve all                     | Quick prototyping                             |
-| `/model`         | Switch model used in current session          | `/model`                                      |
-| `/extensions`    | List all active extensions in current session | `/extensions`                                 |
-| `/memory`        | Manage AI's instruction context               | `/memory add Important Info`                  |
+| Command          | Description                                       | Usage Examples                                |
+| ---------------- | ------------------------------------------------- | --------------------------------------------- |
+| `/mcp`           | List configured MCP servers and tools             | `/mcp`, `/mcp desc`                           |
+| `/tools`         | Display currently available tool list             | `/tools`, `/tools desc`                       |
+| `/skills`        | List and run available skills                     | `/skills`, `/skills <name>`                   |
+| `/approval-mode` | Change approval mode for tool usage               | `/approval-mode <mode (auto-edit)> --project` |
+| →`plan`          | Analysis only, no execution                       | Secure review                                 |
+| →`default`       | Require approval for edits                        | Daily use                                     |
+| →`auto-edit`     | Automatically approve edits                       | Trusted environment                           |
+| →`yolo`          | Automatically approve all                         | Quick prototyping                             |
+| `/model`         | Switch model used in current session              | `/model`                                      |
+| `/model --fast`  | Set or select the fast model for background tasks | `/model --fast qwen3.5-flash`                 |
+| `/extensions`    | List all active extensions in current session     | `/extensions`                                 |
+| `/memory`        | Manage AI's instruction context                   | `/memory add Important Info`                  |
 
 ### 1.5 Side Question (`/btw`)
 
diff --git a/docs/users/features/followup-suggestions.md b/docs/users/features/followup-suggestions.md
new file mode 100644
index 000000000..3dbf11df5
--- /dev/null
+++ b/docs/users/features/followup-suggestions.md
@@ -0,0 +1,109 @@
+# Followup Suggestions
+
+Qwen Code can predict what you want to type next and show it as ghost text in the input area. This feature uses an LLM call to analyze the conversation context and generate a natural next step suggestion.
+
+This feature works end-to-end in the CLI. In the WebUI, the hook and UI plumbing are available, but host applications must trigger suggestion generation and wire the followup state for suggestions to appear.
+
+## How It Works
+
+After Qwen Code finishes responding, a suggestion appears as dimmed text in the input area after a short delay (~300ms). For example, after fixing a bug, you might see:
+
+```
+> run the tests
+```
+
+The suggestion is generated by sending the conversation history to the model, which predicts what you would naturally type next.
+
+## Accepting Suggestions
+
+| Key           | Action                                           |
+| ------------- | ------------------------------------------------ |
+| `Tab`         | Accept the suggestion and fill it into the input |
+| `Enter`       | Accept the suggestion and submit it immediately  |
+| `Right Arrow` | Accept the suggestion and fill it into the input |
+| Any typing    | Dismiss the suggestion and type normally         |
+
+## When Suggestions Appear
+
+Suggestions are generated when all of the following conditions are met:
+
+- The model has completed its response (not during streaming)
+- At least 2 model turns have occurred in the conversation
+- There are no errors in the most recent response
+- No confirmation dialogs are pending (e.g., shell confirmation, permissions)
+- The approval mode is not set to `plan`
+- The feature is enabled in settings (enabled by default)
+
+Suggestions will not appear in non-interactive mode (e.g., headless/SDK mode).
+
+Suggestions are automatically dismissed when:
+
+- You start typing
+- A new model turn begins
+- The suggestion is accepted
+
+## Fast Model
+
+By default, suggestions use the same model as your main conversation. For faster and cheaper suggestions, configure a dedicated fast model:
+
+### Via command
+
+```
+/model --fast qwen3.5-flash
+```
+
+Or use `/model --fast` (without a model name) to open a selection dialog.
+
+### Via settings.json
+
+```json
+{
+  "fastModel": "qwen3.5-flash"
+}
+```
+
+The fast model is used for background tasks like suggestion generation. When not configured, the main conversation model is used as fallback.
+
+Thinking/reasoning mode is automatically disabled for all background tasks (suggestion generation and speculation), regardless of your main model's thinking configuration. This avoids wasting tokens on internal reasoning that isn't needed for these tasks.
+
+## Configuration
+
+These settings can be configured in `settings.json`:
+
+| Setting                        | Type    | Default | Description                                                        |
+| ------------------------------ | ------- | ------- | ------------------------------------------------------------------ |
+| `ui.enableFollowupSuggestions` | boolean | `true`  | Enable or disable followup suggestions                             |
+| `ui.enableCacheSharing`        | boolean | `true`  | Use cache-aware forked queries to reduce cost (experimental)       |
+| `ui.enableSpeculation`         | boolean | `false` | Speculatively execute suggestions before submission (experimental) |
+| `fastModel`                    | string  | `""`    | Model for background tasks (suggestion generation, speculation)    |
+
+### Example
+
+```json
+{
+  "fastModel": "qwen3.5-flash",
+  "ui": {
+    "enableFollowupSuggestions": true,
+    "enableCacheSharing": true
+  }
+}
+```
+
+## Monitoring
+
+Suggestion model usage appears in `/stats` output, showing tokens consumed by the fast model for suggestion generation.
+
+The fast model is also shown in `/about` output under "Fast Model".
+
+## Suggestion Quality
+
+Suggestions go through quality filters to ensure they are useful:
+
+- Must be 2-12 words (CJK: 2-30 characters), under 100 characters total
+- Cannot be evaluative ("looks good", "thanks")
+- Cannot use AI voice ("Let me...", "I'll...")
+- Cannot be multiple sentences or contain formatting (markdown, newlines)
+- Cannot be meta-commentary ("nothing to suggest", "silence")
+- Cannot be error messages or prefixed labels ("Suggestion: ...")
+- Single-word suggestions are only allowed for common commands (yes, commit, push, etc.)
+- Slash commands (e.g., `/commit`) are always allowed as single-word suggestions
diff --git a/docs/users/overview.md b/docs/users/overview.md
index f3c52be91..b61e8aa80 100644
--- a/docs/users/overview.md
+++ b/docs/users/overview.md
@@ -56,6 +56,7 @@ You'll be prompted to log in on first use. That's it! [Continue with Quickstart
 - **Debug and fix issues**: Describe a bug or paste an error message. Qwen Code will analyze your codebase, identify the problem, and implement a fix.
 - **Navigate any codebase**: Ask anything about your team's codebase, and get a thoughtful answer back. Qwen Code maintains awareness of your entire project structure, can find up-to-date information from the web, and with [MCP](./features/mcp) can pull from external datasources like Google Drive, Figma, and Slack.
 - **Automate tedious tasks**: Fix fiddly lint issues, resolve merge conflicts, and write release notes. Do all this in a single command from your developer machines, or automatically in CI.
+- **[Followup suggestions](./features/followup-suggestions)**: Qwen Code predicts what you want to type next and shows it as ghost text. Press Tab to accept, or just keep typing to dismiss.
 
 ## Why developers love Qwen Code
 
diff --git a/package-lock.json b/package-lock.json
index a5c91d66b..42e4a9297 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -23875,6 +23875,7 @@
         "vite-plugin-dts": "^4.5.4"
       },
       "peerDependencies": {
+        "@qwen-code/qwen-code-core": ">=0.13.0",
         "react": "^18.0.0 || ^19.0.0",
         "react-dom": "^18.0.0 || ^19.0.0"
       }
diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts
index c445590a8..e765dd801 100644
--- a/packages/cli/src/config/settingsSchema.ts
+++ b/packages/cli/src/config/settingsSchema.ts
@@ -513,6 +513,36 @@ const SETTINGS_SCHEMA = {
           'Show optional feedback dialog after conversations to help improve Qwen performance.',
         showInDialog: true,
       },
+      enableFollowupSuggestions: {
+        type: 'boolean',
+        label: 'Enable Follow-up Suggestions',
+        category: 'UI',
+        requiresRestart: false,
+        default: true,
+        description:
+          'Show context-aware follow-up suggestions after task completion. Press Tab or Right Arrow to accept, Enter to accept and submit.',
+        showInDialog: true,
+      },
+      enableCacheSharing: {
+        type: 'boolean',
+        label: 'Enable Cache Sharing for Suggestions',
+        category: 'UI',
+        requiresRestart: false,
+        default: true,
+        description:
+          'Use cache-aware forked queries for suggestion generation. Reduces cost on providers that support prefix caching (experimental).',
+        showInDialog: false,
+      },
+      enableSpeculation: {
+        type: 'boolean',
+        label: 'Enable Speculative Execution',
+        category: 'UI',
+        requiresRestart: false,
+        default: false,
+        description:
+          'Speculatively execute accepted suggestions before submission. Results appear instantly when you accept (experimental).',
+        showInDialog: false,
+      },
       accessibility: {
         type: 'object',
         label: 'Accessibility',
@@ -616,6 +646,17 @@ const SETTINGS_SCHEMA = {
     showInDialog: false,
   },
 
+  fastModel: {
+    type: 'string',
+    label: 'Fast Model',
+    category: 'Model',
+    requiresRestart: false,
+    default: '',
+    description:
+      'Model for background tasks (suggestion generation, speculation). Leave empty to use the main model. A smaller/faster model (e.g., qwen3.5-flash) reduces latency and cost.',
+    showInDialog: true,
+  },
+
   model: {
     type: 'object',
     label: 'Model',
diff --git a/packages/cli/src/i18n/locales/de.js b/packages/cli/src/i18n/locales/de.js
index dd743e365..92ee2a01e 100644
--- a/packages/cli/src/i18n/locales/de.js
+++ b/packages/cli/src/i18n/locales/de.js
@@ -79,6 +79,7 @@ export default {
   'CLI Version': 'CLI-Version',
   'Git Commit': 'Git-Commit',
   Model: 'Modell',
+  'Fast Model': 'Schnelles Modell',
   Sandbox: 'Sandbox',
   'OS Platform': 'Betriebssystem',
   'OS Arch': 'OS-Architektur',
@@ -989,6 +990,8 @@ export default {
   // Commands - Model
   // ============================================================================
   'Switch the model for this session': 'Modell für diese Sitzung wechseln',
+  'Set fast model for background tasks':
+    'Schnelles Modell für Hintergrundaufgaben festlegen',
   'Content generator configuration not available.':
     'Inhaltsgenerator-Konfiguration nicht verfügbar.',
   'Authentication type not available.':
diff --git a/packages/cli/src/i18n/locales/en.js b/packages/cli/src/i18n/locales/en.js
index 1a2a53e9d..fdc572fcb 100644
--- a/packages/cli/src/i18n/locales/en.js
+++ b/packages/cli/src/i18n/locales/en.js
@@ -98,6 +98,7 @@ export default {
   'CLI Version': 'CLI Version',
   'Git Commit': 'Git Commit',
   Model: 'Model',
+  'Fast Model': 'Fast Model',
   Sandbox: 'Sandbox',
   'OS Platform': 'OS Platform',
   'OS Arch': 'OS Arch',
@@ -1151,6 +1152,7 @@ export default {
   // Commands - Model
   // ============================================================================
   'Switch the model for this session': 'Switch the model for this session',
+  'Set fast model for background tasks': 'Set fast model for background tasks',
   'Content generator configuration not available.':
     'Content generator configuration not available.',
   'Authentication type not available.': 'Authentication type not available.',
diff --git a/packages/cli/src/i18n/locales/ja.js b/packages/cli/src/i18n/locales/ja.js
index 6bfd14168..838214f6e 100644
--- a/packages/cli/src/i18n/locales/ja.js
+++ b/packages/cli/src/i18n/locales/ja.js
@@ -66,6 +66,7 @@ export default {
   'CLI Version': 'CLIバージョン',
   'Git Commit': 'Gitコミット',
   Model: 'モデル',
+  'Fast Model': '高速モデル',
   Sandbox: 'サンドボックス',
   'OS Platform': 'OSプラットフォーム',
   'OS Arch': 'OSアーキテクチャ',
@@ -743,6 +744,8 @@ export default {
     'サマリーの生成に失敗 - LLMレスポンスからテキストコンテンツを受信できませんでした',
   // Model
   'Switch the model for this session': 'このセッションのモデルを切り替え',
+  'Set fast model for background tasks':
+    'バックグラウンドタスク用の高速モデルを設定',
   'Content generator configuration not available.':
     'コンテンツジェネレーター設定が利用できません',
   'Authentication type not available.': '認証タイプが利用できません',
diff --git a/packages/cli/src/i18n/locales/pt.js b/packages/cli/src/i18n/locales/pt.js
index b8f40740f..8fd6a79d1 100644
--- a/packages/cli/src/i18n/locales/pt.js
+++ b/packages/cli/src/i18n/locales/pt.js
@@ -91,6 +91,7 @@ export default {
   'CLI Version': 'Versão da CLI',
   'Git Commit': 'Commit do Git',
   Model: 'Modelo',
+  'Fast Model': 'Modelo Rápido',
   Sandbox: 'Sandbox',
   'OS Platform': 'Plataforma do SO',
   'OS Arch': 'Arquitetura do SO',
@@ -996,6 +997,8 @@ export default {
   // Commands - Model
   // ============================================================================
   'Switch the model for this session': 'Trocar o modelo para esta sessão',
+  'Set fast model for background tasks':
+    'Definir modelo rápido para tarefas em segundo plano',
   'Content generator configuration not available.':
     'Configuração do gerador de conteúdo não disponível.',
   'Authentication type not available.': 'Tipo de autenticação não disponível.',
diff --git a/packages/cli/src/i18n/locales/ru.js b/packages/cli/src/i18n/locales/ru.js
index 77f748189..6b910ff48 100644
--- a/packages/cli/src/i18n/locales/ru.js
+++ b/packages/cli/src/i18n/locales/ru.js
@@ -99,6 +99,7 @@ export default {
   'CLI Version': 'Версия CLI',
   'Git Commit': 'Git-коммит',
   Model: 'Модель',
+  'Fast Model': 'Быстрая модель',
   Sandbox: 'Песочница',
   'OS Platform': 'Платформа ОС',
   'OS Arch': 'Архитектура ОС',
@@ -997,6 +998,8 @@ export default {
   // Команды - Модель
   // ============================================================================
   'Switch the model for this session': 'Переключение модели для этой сессии',
+  'Set fast model for background tasks':
+    'Установить быструю модель для фоновых задач',
   'Content generator configuration not available.':
     'Конфигурация генератора содержимого недоступна.',
   'Authentication type not available.': 'Тип авторизации недоступен.',
diff --git a/packages/cli/src/i18n/locales/zh.js b/packages/cli/src/i18n/locales/zh.js
index f0440108a..357a4ccd3 100644
--- a/packages/cli/src/i18n/locales/zh.js
+++ b/packages/cli/src/i18n/locales/zh.js
@@ -96,6 +96,7 @@ export default {
   'CLI Version': 'CLI 版本',
   'Git Commit': 'Git 提交',
   Model: '模型',
+  'Fast Model': '快速模型',
   Sandbox: '沙箱',
   'OS Platform': '操作系统平台',
   'OS Arch': '操作系统架构',
@@ -1092,6 +1093,7 @@ export default {
   // Commands - Model
   // ============================================================================
   'Switch the model for this session': '切换此会话的模型',
+  'Set fast model for background tasks': '设置后台任务的快速模型',
   'Content generator configuration not available.': '内容生成器配置不可用',
   'Authentication type not available.': '认证类型不可用',
   'No models available for the current authentication type ({{authType}}).':
diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx
index 728484945..5f5e29e9c 100644
--- a/packages/cli/src/ui/AppContainer.tsx
+++ b/packages/cli/src/ui/AppContainer.tsx
@@ -41,6 +41,17 @@ import {
   Storage,
   SessionEndReason,
   SessionStartSource,
+  generatePromptSuggestion,
+  logPromptSuggestion,
+  PromptSuggestionEvent,
+  logSpeculation,
+  SpeculationEvent,
+  startSpeculation,
+  acceptSpeculation,
+  abortSpeculation,
+  type SpeculationState,
+  IDLE_SPECULATION,
+  ApprovalMode,
   type PermissionMode,
 } from '@qwen-code/qwen-code-core';
 import { buildResumedHistoryItems } from './utils/resumeHistoryUtils.js';
@@ -519,8 +530,12 @@ export const AppContainer = (props: AppContainerProps) => {
   const { isSettingsDialogOpen, openSettingsDialog, closeSettingsDialog } =
     useSettingsCommand();
 
-  const { isModelDialogOpen, openModelDialog, closeModelDialog } =
-    useModelCommand();
+  const {
+    isModelDialogOpen,
+    isFastModelMode,
+    openModelDialog,
+    closeModelDialog,
+  } = useModelCommand();
   const { activeArenaDialog, openArenaDialog, closeArenaDialog } =
     useArenaCommand();
 
@@ -735,7 +750,22 @@ export const AppContainer = (props: AppContainerProps) => {
 
   const agentViewState = useAgentViewState();
 
+  // Prompt suggestion state
+  const [promptSuggestion, setPromptSuggestion] = useState<string | null>(null);
+  const prevStreamingStateRef = useRef<StreamingState>(StreamingState.Idle);
+  const speculationRef = useRef<SpeculationState>(IDLE_SPECULATION);
+  const suggestionAbortRef = useRef<AbortController | null>(null);
+
+  // Dismiss callback — clears suggestion + aborts in-flight generation/speculation
+  const dismissPromptSuggestion = useCallback(() => {
+    setPromptSuggestion(null);
+    suggestionAbortRef.current?.abort();
+    suggestionAbortRef.current = null;
+  }, []);
+
   // Auto-accept indicator — disabled on agent tabs (agents handle their own)
+  const geminiClient = config.getGeminiClient();
+
   const showAutoAcceptIndicator = useAutoAcceptIndicator({
     config,
     addItem: historyManager.addItem,
@@ -769,9 +799,136 @@ export const AppContainer = (props: AppContainerProps) => {
         void submitQuery(submittedValue);
         return;
       }
+
+      // Check if speculation has results for this submission
+      const spec = speculationRef.current;
+      if (
+        spec.status !== 'idle' &&
+        spec.suggestion === submittedValue &&
+        spec.status === 'completed'
+      ) {
+        // Accept completed speculation: inject messages and apply files
+        acceptSpeculation(spec, geminiClient)
+          .then((result) => {
+            logSpeculation(
+              config,
+              new SpeculationEvent({
+                outcome: 'accepted',
+                turns_used: spec.messages.filter((m) => m.role === 'model')
+                  .length,
+                files_written: result.filesApplied.length,
+                tool_use_count: spec.toolUseCount,
+                duration_ms: Date.now() - spec.startTime,
+                boundary_type: spec.boundary?.type,
+                had_pipelined_suggestion: !!result.nextSuggestion,
+              }),
+            );
+            // Speculation completed fully (no boundary) — render results in UI
+            {
+              const now = Date.now();
+
+              // Render each speculated message as the appropriate HistoryItem
+              for (let mi = 0; mi < result.messages.length; mi++) {
+                const msg = result.messages[mi];
+                if (msg.role === 'user' && msg.parts) {
+                  // Check if this is a tool result (functionResponse) or user text
+                  const hasText = msg.parts.some(
+                    (p) => p.text && !p.functionResponse,
+                  );
+                  if (hasText) {
+                    const text = msg.parts
+                      .map((p) => p.text ?? '')
+                      .filter(Boolean)
+                      .join('');
+                    if (text) {
+                      historyManager.addItem(
+                        { type: 'user' as const, text },
+                        now,
+                      );
+                    }
+                  }
+                  // functionResponse parts are rendered as part of the tool_group below
+                } else if (msg.role === 'model' && msg.parts) {
+                  // Extract text and tool calls separately
+                  const textParts = msg.parts
+                    .filter((p) => p.text && !p.functionCall)
+                    .map((p) => p.text!)
+                    .join('');
+                  const toolCalls = msg.parts.filter((p) => p.functionCall);
+
+                  if (textParts) {
+                    historyManager.addItem(
+                      { type: 'gemini' as const, text: textParts },
+                      now,
+                    );
+                  }
+
+                  if (toolCalls.length > 0) {
+                    // Find matching tool results from the next message
+                    const nextMsg = result.messages[mi + 1];
+                    const toolResults =
+                      nextMsg?.parts?.filter((p) => p.functionResponse) ?? [];
+
+                    const tools = toolCalls.map((tc, i) => {
+                      const name = tc.functionCall?.name ?? 'unknown';
+                      const args = tc.functionCall?.args ?? {};
+                      const resp = toolResults[i]?.functionResponse?.response;
+                      const resultText =
+                        typeof resp === 'object' && resp
+                          ? ((resp as Record<string, unknown>)['output'] ??
+                            JSON.stringify(resp))
+                          : String(resp ?? '');
+                      return {
+                        callId: `spec-${name}-${i}`,
+                        name,
+                        description:
+                          Object.entries(args)
+                            .map(([k, v]) => `${k}: ${String(v).slice(0, 80)}`)
+                            .join(', ') || name,
+                        resultDisplay: String(resultText).slice(0, 500),
+                        status: ToolCallStatus.Success,
+                        confirmationDetails: undefined,
+                      };
+                    });
+
+                    const toolGroupItem: HistoryItemWithoutId = {
+                      type: 'tool_group' as const,
+                      tools,
+                    };
+                    historyManager.addItem(toolGroupItem, now);
+                  }
+                }
+              }
+            }
+            if (result.nextSuggestion) {
+              setPromptSuggestion(result.nextSuggestion);
+            }
+          })
+          .catch(() => {
+            // Fallback: submit normally
+            addMessage(submittedValue);
+          });
+        speculationRef.current = IDLE_SPECULATION;
+        return;
+      }
+
+      // Abort any running speculation since we're submitting something different
+      if (spec.status === 'running') {
+        abortSpeculation(spec).catch(() => {});
+        speculationRef.current = IDLE_SPECULATION;
+      }
+
       addMessage(submittedValue);
     },
-    [addMessage, agentViewState, streamingState, submitQuery],
+    [
+      addMessage,
+      agentViewState,
+      streamingState,
+      submitQuery,
+      config,
+      geminiClient,
+      historyManager,
+    ],
   );
 
   const handleArenaModelsSelected = useCallback(
@@ -894,7 +1051,6 @@ export const AppContainer = (props: AppContainerProps) => {
   // Initial prompt handling
   const initialPrompt = useMemo(() => config.getQuestion(), [config]);
   const initialPromptSubmitted = useRef(false);
-  const geminiClient = config.getGeminiClient();
 
   useEffect(() => {
     if (activePtyId) {
@@ -935,6 +1091,132 @@ export const AppContainer = (props: AppContainerProps) => {
     geminiClient,
   ]);
 
+  // Generate prompt suggestions when streaming completes
+  const followupSuggestionsEnabled =
+    settings.merged.ui?.enableFollowupSuggestions !== false;
+
+  useEffect(() => {
+    // Clear suggestion when feature is disabled at runtime
+    if (!followupSuggestionsEnabled) {
+      suggestionAbortRef.current?.abort();
+      setPromptSuggestion(null);
+      if (speculationRef.current.status === 'running') {
+        abortSpeculation(speculationRef.current).catch(() => {});
+        speculationRef.current = IDLE_SPECULATION;
+      }
+    }
+
+    // Clear suggestion and abort pending generation/speculation when a new turn starts
+    if (
+      prevStreamingStateRef.current === StreamingState.Idle &&
+      streamingState === StreamingState.Responding
+    ) {
+      suggestionAbortRef.current?.abort();
+      setPromptSuggestion(null);
+      if (speculationRef.current.status !== 'idle') {
+        abortSpeculation(speculationRef.current).catch(() => {});
+        speculationRef.current = IDLE_SPECULATION;
+      }
+    }
+
+    // Only trigger when transitioning from Responding to Idle (and enabled)
+    // Skip when dialogs are active, in plan mode, elicitation pending, or last response was error
+    if (
+      followupSuggestionsEnabled &&
+      config.isInteractive() &&
+      !config.getSdkMode() &&
+      prevStreamingStateRef.current === StreamingState.Responding &&
+      streamingState === StreamingState.Idle &&
+      // Check both committed history and pending items for errors
+      // (API errors go to pendingGeminiHistoryItems, not historyManager.history)
+      historyManager.history[historyManager.history.length - 1]?.type !==
+        'error' &&
+      !pendingGeminiHistoryItems.some((item) => item.type === 'error') &&
+      !shellConfirmationRequest &&
+      !confirmationRequest &&
+      !loopDetectionConfirmationRequest &&
+      !isPermissionsDialogOpen &&
+      settingInputRequests.length === 0 &&
+      config.getApprovalMode() !== ApprovalMode.PLAN
+    ) {
+      const ac = new AbortController();
+      suggestionAbortRef.current = ac;
+
+      // Use curated history to avoid invalid/empty entries causing API errors
+      const fullHistory = geminiClient.getChat().getHistory(true);
+      const conversationHistory =
+        fullHistory.length > 40 ? fullHistory.slice(-40) : fullHistory;
+      generatePromptSuggestion(config, conversationHistory, ac.signal, {
+        enableCacheSharing: settings.merged.ui?.enableCacheSharing === true,
+        model: settings.merged.fastModel || undefined,
+      })
+        .then((result) => {
+          if (ac.signal.aborted) return;
+          if (result.suggestion) {
+            setPromptSuggestion(result.suggestion);
+            // Start speculation if enabled (runs in background)
+            if (settings.merged.ui?.enableSpeculation) {
+              startSpeculation(config, result.suggestion, ac.signal, {
+                model: settings.merged.fastModel || undefined,
+              })
+                .then((state) => {
+                  speculationRef.current = state;
+                })
+                .catch(() => {
+                  // Speculation failure is non-blocking
+                });
+            }
+          } else if (result.filterReason) {
+            // Log suppressed suggestion for analytics
+            logPromptSuggestion(
+              config,
+              new PromptSuggestionEvent({
+                outcome: 'suppressed',
+                reason: result.filterReason,
+              }),
+            );
+          }
+        })
+        .catch(() => {
+          // Silently degrade — don't disrupt the user experience
+        });
+    }
+
+    // Only update prev ref when streamingState actually changes, so that
+    // dialog-dependency re-runs don't cause us to miss a Responding→Idle transition.
+    if (prevStreamingStateRef.current !== streamingState) {
+      prevStreamingStateRef.current = streamingState;
+    }
+
+    return () => {
+      suggestionAbortRef.current?.abort();
+      // Cleanup speculation on unmount (#21)
+      if (speculationRef.current.status !== 'idle') {
+        abortSpeculation(speculationRef.current).catch(() => {});
+        speculationRef.current = IDLE_SPECULATION;
+      }
+    };
+    // eslint-disable-next-line react-hooks/exhaustive-deps -- guards may change independently
+  }, [
+    streamingState,
+    followupSuggestionsEnabled,
+    shellConfirmationRequest,
+    confirmationRequest,
+    loopDetectionConfirmationRequest,
+    isPermissionsDialogOpen,
+    settingInputRequests,
+  ]);
+
+  // Abort speculation when promptSuggestion is cleared (new turn, feature toggle, or
+  // user-initiated dismiss via typing/paste). InputPrompt calls onPromptSuggestionDismiss
+  // on user input, which clears promptSuggestion, triggering this effect to abort speculation.
+  useEffect(() => {
+    if (!promptSuggestion && speculationRef.current.status !== 'idle') {
+      abortSpeculation(speculationRef.current).catch(() => {});
+      speculationRef.current = IDLE_SPECULATION;
+    }
+  }, [promptSuggestion]);
+
   const [idePromptAnswered, setIdePromptAnswered] = useState(false);
   const [currentIDE, setCurrentIDE] = useState<IdeInfo | null>(null);
 
@@ -1505,6 +1787,7 @@ export const AppContainer = (props: AppContainerProps) => {
       quittingMessages,
       isSettingsDialogOpen,
       isModelDialogOpen,
+      isFastModelMode,
       isTrustDialogOpen,
       activeArenaDialog,
       isPermissionsDialogOpen,
@@ -1590,6 +1873,9 @@ export const AppContainer = (props: AppContainerProps) => {
       isFeedbackDialogOpen,
       // Per-task token tracking
       taskStartTokens,
+      // Prompt suggestion
+      promptSuggestion,
+      dismissPromptSuggestion,
     }),
     [
       isThemeDialogOpen,
@@ -1607,6 +1893,7 @@ export const AppContainer = (props: AppContainerProps) => {
       quittingMessages,
       isSettingsDialogOpen,
       isModelDialogOpen,
+      isFastModelMode,
       isTrustDialogOpen,
       activeArenaDialog,
       isPermissionsDialogOpen,
@@ -1693,6 +1980,9 @@ export const AppContainer = (props: AppContainerProps) => {
       isFeedbackDialogOpen,
       // Per-task token tracking
       taskStartTokens,
+      // Prompt suggestion
+      promptSuggestion,
+      dismissPromptSuggestion,
     ],
   );
 
diff --git a/packages/cli/src/ui/commands/bugCommand.test.ts b/packages/cli/src/ui/commands/bugCommand.test.ts
index d8d8e83a0..f9a766045 100644
--- a/packages/cli/src/ui/commands/bugCommand.test.ts
+++ b/packages/cli/src/ui/commands/bugCommand.test.ts
@@ -65,6 +65,7 @@ Runtime: Node.js v20.0.0 / npm 10.0.0
 IDE Client: VSCode
 OS: test-platform x64 (22.0.0)
 Model: qwen3-coder-plus
+Fast Model: qwen3-coder-plus
 Session ID: test-session-id
 Sandbox: test
 Proxy: no proxy
@@ -99,6 +100,7 @@ Runtime: Node.js v20.0.0 / npm 10.0.0
 IDE Client: VSCode
 OS: test-platform x64 (22.0.0)
 Model: qwen3-coder-plus
+Fast Model: qwen3-coder-plus
 Session ID: test-session-id
 Sandbox: test
 Proxy: no proxy
@@ -153,6 +155,7 @@ OS: test-platform x64 (22.0.0)
 Auth: API Key - ${AuthType.USE_OPENAI}
 Base URL: https://api.openai.com/v1
 Model: qwen3-coder-plus
+Fast Model: qwen3-coder-plus
 Session ID: test-session-id
 Sandbox: test
 Proxy: no proxy
diff --git a/packages/cli/src/ui/commands/modelCommand.ts b/packages/cli/src/ui/commands/modelCommand.ts
index 4dcc9a518..2ccbda323 100644
--- a/packages/cli/src/ui/commands/modelCommand.ts
+++ b/packages/cli/src/ui/commands/modelCommand.ts
@@ -12,6 +12,7 @@ import type {
 } from './types.js';
 import { CommandKind } from './types.js';
 import { t } from '../../i18n/index.js';
+import { getPersistScopeForModelSelection } from '../../config/modelProvidersScope.js';
 
 export const modelCommand: SlashCommand = {
   name: 'model',
@@ -19,11 +20,22 @@ export const modelCommand: SlashCommand = {
     return t('Switch the model for this session');
   },
   kind: CommandKind.BUILT_IN,
+  completion: async (_context, partialArg) => {
+    if ('--fast'.startsWith(partialArg)) {
+      return [
+        {
+          value: '--fast',
+          description: t('Set fast model for background tasks'),
+        },
+      ];
+    }
+    return null;
+  },
   action: async (
     context: CommandContext,
   ): Promise<OpenDialogActionReturn | MessageActionReturn> => {
     const { services } = context;
-    const { config } = services;
+    const { config, settings } = services;
 
     if (!config) {
       return {
@@ -33,6 +45,37 @@ export const modelCommand: SlashCommand = {
       };
     }
 
+    // Handle --fast flag: /model --fast <modelName>
+    const args = context.invocation?.args?.trim() ?? '';
+    if (args.startsWith('--fast')) {
+      const modelName = args.replace('--fast', '').trim();
+      if (!modelName) {
+        // Open model dialog in fast-model mode
+        return {
+          type: 'dialog',
+          dialog: 'fast-model',
+        };
+      }
+      // Set fast model
+      if (!settings) {
+        return {
+          type: 'message',
+          messageType: 'error',
+          content: t('Settings service not available.'),
+        };
+      }
+      settings.setValue(
+        getPersistScopeForModelSelection(settings),
+        'fastModel',
+        modelName,
+      );
+      return {
+        type: 'message',
+        messageType: 'info',
+        content: t('Fast Model') + ': ' + modelName,
+      };
+    }
+
     const contentGeneratorConfig = config.getContentGeneratorConfig();
     if (!contentGeneratorConfig) {
       return {
diff --git a/packages/cli/src/ui/commands/types.ts b/packages/cli/src/ui/commands/types.ts
index 2bd798054..9c66fec89 100644
--- a/packages/cli/src/ui/commands/types.ts
+++ b/packages/cli/src/ui/commands/types.ts
@@ -157,6 +157,7 @@ export interface OpenDialogActionReturn {
     | 'editor'
     | 'settings'
     | 'model'
+    | 'fast-model'
     | 'subagent_create'
     | 'subagent_list'
     | 'trust'
diff --git a/packages/cli/src/ui/components/Composer.tsx b/packages/cli/src/ui/components/Composer.tsx
index 530b57046..4dca07f0b 100644
--- a/packages/cli/src/ui/components/Composer.tsx
+++ b/packages/cli/src/ui/components/Composer.tsx
@@ -110,6 +110,8 @@ export const Composer = () => {
               ? '  ' + t("Press 'i' for INSERT mode and 'Esc' for NORMAL mode.")
               : '  ' + t('Type your message or @path/to/file')
           }
+          promptSuggestion={uiState.promptSuggestion}
+          onPromptSuggestionDismiss={uiState.dismissPromptSuggestion}
         />
       )}
 
diff --git a/packages/cli/src/ui/components/DialogManager.tsx b/packages/cli/src/ui/components/DialogManager.tsx
index e2f1256ff..bd6e30dae 100644
--- a/packages/cli/src/ui/components/DialogManager.tsx
+++ b/packages/cli/src/ui/components/DialogManager.tsx
@@ -241,7 +241,12 @@ export const DialogManager = ({
     );
   }
   if (uiState.isModelDialogOpen) {
-    return <ModelDialog onClose={uiActions.closeModelDialog} />;
+    return (
+      <ModelDialog
+        onClose={uiActions.closeModelDialog}
+        isFastModelMode={uiState.isFastModelMode}
+      />
+    );
   }
   if (uiState.activeArenaDialog === 'start') {
     return (
diff --git a/packages/cli/src/ui/components/InputPrompt.test.tsx b/packages/cli/src/ui/components/InputPrompt.test.tsx
index 347a1e918..cd33395f8 100644
--- a/packages/cli/src/ui/components/InputPrompt.test.tsx
+++ b/packages/cli/src/ui/components/InputPrompt.test.tsx
@@ -211,6 +211,71 @@ describe('InputPrompt', () => {
 
   const wait = (ms = 50) => new Promise((resolve) => setTimeout(resolve, ms));
 
+  describe('prompt suggestions', () => {
+    it('accepts the visible prompt suggestion on tab when the buffer is empty', async () => {
+      const { stdin, unmount } = renderWithProviders(
+        <InputPrompt {...props} promptSuggestion="commit this" />,
+      );
+      await wait(350);
+
+      stdin.write('\t');
+      await wait();
+
+      expect(mockBuffer.insert).toHaveBeenCalledWith('commit this');
+      unmount();
+    });
+
+    it('accepts and submits the prompt suggestion on Enter when the buffer is empty', async () => {
+      const { stdin, unmount } = renderWithProviders(
+        <InputPrompt {...props} promptSuggestion="commit this" />,
+      );
+      await wait(350);
+
+      stdin.write('\r');
+      await wait();
+
+      expect(props.onSubmit).toHaveBeenCalledWith('commit this');
+      unmount();
+    });
+
+    it('fills the prompt suggestion on right arrow without submitting', async () => {
+      const { stdin, unmount } = renderWithProviders(
+        <InputPrompt {...props} promptSuggestion="commit this" />,
+      );
+      await wait(350);
+
+      stdin.write('\u001B[C'); // right arrow
+      await wait();
+
+      expect(mockBuffer.insert).toHaveBeenCalledWith('commit this');
+      expect(props.onSubmit).not.toHaveBeenCalled();
+      unmount();
+    });
+
+    it('does not accept a prompt suggestion while command completion is active', async () => {
+      mockCommandCompletion.showSuggestions = true;
+      mockCommandCompletion.suggestions = [
+        {
+          value: '/clear',
+          label: '/clear',
+          description: 'Clear screen',
+        },
+      ] as UseCommandCompletionReturn['suggestions'];
+
+      const { stdin, unmount } = renderWithProviders(
+        <InputPrompt {...props} promptSuggestion="commit this" />,
+      );
+      await wait(350);
+
+      stdin.write('\t');
+      await wait();
+
+      expect(mockBuffer.insert).not.toHaveBeenCalledWith('commit this');
+      expect(mockCommandCompletion.handleAutocomplete).toHaveBeenCalled();
+      unmount();
+    });
+  });
+
   it('should call shellHistory.getPreviousCommand on up arrow in shell mode', async () => {
     props.shellModeActive = true;
     const { stdin, unmount } = renderWithProviders(<InputPrompt {...props} />);
diff --git a/packages/cli/src/ui/components/InputPrompt.tsx b/packages/cli/src/ui/components/InputPrompt.tsx
index 52add983b..56448f85b 100644
--- a/packages/cli/src/ui/components/InputPrompt.tsx
+++ b/packages/cli/src/ui/components/InputPrompt.tsx
@@ -17,10 +17,11 @@ import chalk from 'chalk';
 import { useShellHistory } from '../hooks/useShellHistory.js';
 import { useReverseSearchCompletion } from '../hooks/useReverseSearchCompletion.js';
 import { useCommandCompletion } from '../hooks/useCommandCompletion.js';
+import { useFollowupSuggestionsCLI } from '../hooks/useFollowupSuggestions.js';
+import type { Config } from '@qwen-code/qwen-code-core';
 import type { Key } from '../hooks/useKeypress.js';
 import { keyMatchers, Command } from '../keyMatchers.js';
 import type { CommandContext, SlashCommand } from '../commands/types.js';
-import type { Config } from '@qwen-code/qwen-code-core';
 import {
   ApprovalMode,
   Storage,
@@ -81,6 +82,10 @@ export interface InputPromptProps {
   onSuggestionsVisibilityChange?: (visible: boolean) => void;
   vimHandleInput?: (key: Key) => boolean;
   isEmbeddedShellFocused?: boolean;
+  /** Prompt suggestion text to display after response completes */
+  promptSuggestion?: string | null;
+  /** Called when prompt suggestion is dismissed (user typed) */
+  onPromptSuggestionDismiss?: () => void;
 }
 
 // Re-export from shared utils for backwards compatibility
@@ -110,6 +115,8 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
   onSuggestionsVisibilityChange,
   vimHandleInput,
   isEmbeddedShellFocused,
+  promptSuggestion,
+  onPromptSuggestionDismiss,
 }) => {
   const isShellFocused = useShellFocusState();
   const uiState = useUIState();
@@ -210,6 +217,15 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
     commandSearchActive,
   );
 
+  // Prompt suggestion hook
+  const followup = useFollowupSuggestionsCLI({
+    onAccept: (suggestion) => {
+      buffer.insert(suggestion);
+    },
+    config,
+    isFocused: isShellFocused,
+  });
+
   const resetCompletionState = completion.resetCompletionState;
   const resetReverseSearchCompletionState =
     reverseSearchCompletion.resetCompletionState;
@@ -304,6 +320,9 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
       buffer.setText('');
       onSubmit(finalValue);
 
+      // Dismiss follow-up suggestion after submit
+      followup.dismiss();
+
       // Clear attachments after submit
       setAttachments([]);
       setIsAttachmentMode(false);
@@ -322,6 +341,7 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
       attachments,
       config,
       pendingPastes,
+      followup,
     ],
   );
 
@@ -441,6 +461,12 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
       }
 
       if (key.paste) {
+        // Dismiss follow-up suggestion when user starts typing/pasting
+        if (buffer.text.length === 0 && followup.state.isVisible) {
+          followup.dismiss();
+          onPromptSuggestionDismiss?.();
+        }
+
         // Record paste time to prevent accidental auto-submission
         setRecentPasteTime(Date.now());
 
@@ -698,6 +724,35 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
         return true;
       }
 
+      // Handle Tab for prompt suggestions (when buffer is empty and no completion/search active)
+      // Use explicit key.name === 'tab' instead of ACCEPT_SUGGESTION matcher,
+      // because ACCEPT_SUGGESTION also matches Enter which must fall through to SUBMIT.
+      if (
+        key.name === 'tab' &&
+        buffer.text.length === 0 &&
+        !completion.showSuggestions &&
+        !reverseSearchActive &&
+        !commandSearchActive &&
+        followup.state.isVisible &&
+        followup.state.suggestion
+      ) {
+        followup.accept('tab');
+        return true;
+      }
+
+      // Right arrow fills suggestion into input without submitting
+      if (
+        key.name === 'right' &&
+        !key.ctrl &&
+        !key.meta &&
+        buffer.text.length === 0 &&
+        followup.state.isVisible &&
+        followup.state.suggestion
+      ) {
+        followup.accept('right');
+        return true;
+      }
+
       if (completion.showSuggestions) {
         if (completion.suggestions.length > 1) {
           if (keyMatchers[Command.COMPLETION_UP](key)) {
@@ -830,6 +885,17 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
       }
 
       if (keyMatchers[Command.SUBMIT](key)) {
+        // Accept and submit prompt suggestion on Enter when input is truly empty
+        if (
+          buffer.text.length === 0 &&
+          followup.state.isVisible &&
+          followup.state.suggestion
+        ) {
+          const text = followup.state.suggestion;
+          followup.accept('enter');
+          handleSubmitAndClear(text);
+          return true;
+        }
         if (buffer.text.trim()) {
           // Check if a paste operation occurred recently to prevent accidental auto-submission.
           // Only applies when pasteWorkaround is enabled (Windows or Node < 20), where bracketed
@@ -909,6 +975,19 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
       }
 
       // All remaining keys (readline shortcuts, text input) handled by BaseTextInput
+      // Dismiss follow-up suggestion only on printable character input
+      if (
+        buffer.text.length === 0 &&
+        followup.state.isVisible &&
+        key.sequence &&
+        key.sequence.length === 1 &&
+        !key.ctrl &&
+        !key.meta
+      ) {
+        followup.recordKeystroke();
+        followup.dismiss();
+        onPromptSuggestionDismiss?.();
+      }
       return false;
     },
     [
@@ -950,6 +1029,8 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
       agentTabBarFocused,
       hasAgents,
       setAgentTabBarFocused,
+      followup,
+      onPromptSuggestionDismiss,
     ],
   );
 
@@ -1047,6 +1128,12 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
     }
   }, [shouldShowSuggestions, onSuggestionsVisibilityChange]);
 
+  // Trigger prompt suggestion when prop changes
+  useEffect(() => {
+    followup.setSuggestion(promptSuggestion ?? null);
+    // eslint-disable-next-line react-hooks/exhaustive-deps -- only trigger on prop change
+  }, [promptSuggestion]);
+
   const showAutoAcceptStyling =
     !shellModeActive && approvalMode === ApprovalMode.AUTO_EDIT;
   const showYoloStyling =
@@ -1117,7 +1204,11 @@ export const InputPrompt: React.FC<InputPromptProps> = ({
         onSubmit={handleSubmitAndClear}
         onKeypress={handleInput}
         showCursor={showCursor}
-        placeholder={placeholder}
+        placeholder={
+          followup.state.isVisible && followup.state.suggestion
+            ? followup.state.suggestion
+            : placeholder
+        }
         prefix={prefixNode}
         borderColor={borderColor}
         isActive={!isEmbeddedShellFocused}
diff --git a/packages/cli/src/ui/components/ModelDialog.tsx b/packages/cli/src/ui/components/ModelDialog.tsx
index 09723dcdd..e01172f99 100644
--- a/packages/cli/src/ui/components/ModelDialog.tsx
+++ b/packages/cli/src/ui/components/ModelDialog.tsx
@@ -38,6 +38,7 @@ function formatModalities(modalities?: InputModalities): string {
 
 interface ModelDialogProps {
   onClose: () => void;
+  isFastModelMode?: boolean;
 }
 
 function maskApiKey(apiKey: string | undefined): string {
@@ -130,7 +131,10 @@ function DetailRow({
   );
 }
 
-export function ModelDialog({ onClose }: ModelDialogProps): React.JSX.Element {
+export function ModelDialog({
+  onClose,
+  isFastModelMode,
+}: ModelDialogProps): React.JSX.Element {
   const config = useContext(ConfigContext);
   const uiState = useContext(UIStateContext);
   const settings = useSettings();
@@ -243,10 +247,17 @@ export function ModelDialog({ onClose }: ModelDialogProps): React.JSX.Element {
     [availableModelEntries],
   );
 
-  const preferredModelId = config?.getModel() || MAINLINE_CODER_MODEL;
+  // In fast model mode, default to the currently configured fast model
+  const fastModelSetting = settings?.merged?.fastModel as string | undefined;
+  const preferredModelId =
+    isFastModelMode && fastModelSetting
+      ? fastModelSetting
+      : config?.getModel() || MAINLINE_CODER_MODEL;
   // Check if current model is a runtime model
   // Runtime snapshot ID is already in $runtime|${authType}|${modelId} format
-  const activeRuntimeSnapshot = config?.getActiveRuntimeModelSnapshot?.();
+  const activeRuntimeSnapshot = isFastModelMode
+    ? undefined // fast model is never a runtime model
+    : config?.getActiveRuntimeModelSnapshot?.();
   const preferredKey = activeRuntimeSnapshot
     ? activeRuntimeSnapshot.id
     : authType
@@ -287,6 +298,31 @@ export function ModelDialog({ onClose }: ModelDialogProps): React.JSX.Element {
     async (selected: string) => {
       setErrorMessage(null);
 
+      // Fast model mode: just save the model ID and close
+      if (isFastModelMode) {
+        // Extract model ID from selection key (format: "authType::modelId" or "$runtime|authType|modelId")
+        let modelId: string;
+        if (selected.includes('::')) {
+          modelId = selected.split('::').slice(1).join('::');
+        } else if (selected.startsWith('$runtime|')) {
+          const parts = selected.split('|');
+          modelId = parts[2] ?? selected;
+        } else {
+          modelId = selected;
+        }
+        const scope = getPersistScopeForModelSelection(settings);
+        settings.setValue(scope, 'fastModel', modelId);
+        uiState?.historyManager.addItem(
+          {
+            type: 'success',
+            text: `${t('Fast Model')}: ${modelId}`,
+          },
+          Date.now(),
+        );
+        onClose();
+        return;
+      }
+
       let after: ContentGeneratorConfig | undefined;
       let effectiveAuthType: AuthType | undefined;
       let effectiveModelId = selected;
@@ -362,7 +398,15 @@ export function ModelDialog({ onClose }: ModelDialogProps): React.JSX.Element {
       });
       onClose();
     },
-    [authType, config, onClose, settings, uiState, setErrorMessage],
+    [
+      authType,
+      config,
+      onClose,
+      settings,
+      uiState,
+      setErrorMessage,
+      isFastModelMode,
+    ],
   );
 
   const hasModels = MODEL_OPTIONS.length > 0;
diff --git a/packages/cli/src/ui/contexts/UIStateContext.tsx b/packages/cli/src/ui/contexts/UIStateContext.tsx
index 9c8446368..13146ac0b 100644
--- a/packages/cli/src/ui/contexts/UIStateContext.tsx
+++ b/packages/cli/src/ui/contexts/UIStateContext.tsx
@@ -54,6 +54,7 @@ export interface UIState {
   quittingMessages: HistoryItem[] | null;
   isSettingsDialogOpen: boolean;
   isModelDialogOpen: boolean;
+  isFastModelMode: boolean;
   isTrustDialogOpen: boolean;
   activeArenaDialog: ArenaDialogType;
   isPermissionsDialogOpen: boolean;
@@ -142,6 +143,10 @@ export interface UIState {
   isFeedbackDialogOpen: boolean;
   // Per-task token tracking
   taskStartTokens: number;
+  // Prompt suggestion
+  promptSuggestion: string | null;
+  /** Dismiss prompt suggestion (clears state, aborts speculation) */
+  dismissPromptSuggestion: () => void;
 }
 
 export const UIStateContext = createContext<UIState | null>(null);
diff --git a/packages/cli/src/ui/hooks/slashCommandProcessor.ts b/packages/cli/src/ui/hooks/slashCommandProcessor.ts
index c0c3fac07..b0d7806e7 100644
--- a/packages/cli/src/ui/hooks/slashCommandProcessor.ts
+++ b/packages/cli/src/ui/hooks/slashCommandProcessor.ts
@@ -74,7 +74,7 @@ interface SlashCommandProcessorActions {
   openThemeDialog: () => void;
   openEditorDialog: () => void;
   openSettingsDialog: () => void;
-  openModelDialog: () => void;
+  openModelDialog: (options?: { fastModelMode?: boolean }) => void;
   openTrustDialog: () => void;
   openPermissionsDialog: () => void;
   openApprovalModeDialog: () => void;
@@ -509,6 +509,9 @@ export const useSlashCommandProcessor = (
                     case 'model':
                       actions.openModelDialog();
                       return { type: 'handled' };
+                    case 'fast-model':
+                      actions.openModelDialog({ fastModelMode: true });
+                      return { type: 'handled' };
                     case 'trust':
                       actions.openTrustDialog();
                       return { type: 'handled' };
diff --git a/packages/cli/src/ui/hooks/useFollowupSuggestions.tsx b/packages/cli/src/ui/hooks/useFollowupSuggestions.tsx
new file mode 100644
index 000000000..81773f0fb
--- /dev/null
+++ b/packages/cli/src/ui/hooks/useFollowupSuggestions.tsx
@@ -0,0 +1,162 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Prompt Suggestion Hook for CLI
+ *
+ * Thin React wrapper around the framework-agnostic controller from core.
+ */
+
+import { useState, useMemo, useRef, useEffect, useCallback } from 'react';
+import {
+  INITIAL_FOLLOWUP_STATE,
+  createFollowupController,
+  logPromptSuggestion,
+  PromptSuggestionEvent,
+} from '@qwen-code/qwen-code-core';
+import type { FollowupState, Config } from '@qwen-code/qwen-code-core';
+
+// Re-export for consumers that import from here
+export type { FollowupState } from '@qwen-code/qwen-code-core';
+
+/**
+ * Options for the hook
+ */
+export interface UseFollowupSuggestionsOptions {
+  /** Whether the feature is enabled */
+  enabled?: boolean;
+  /** Callback when suggestion is accepted */
+  onAccept?: (suggestion: string) => void;
+  /** Config for telemetry logging */
+  config?: Config;
+  /** Whether the terminal is focused (for telemetry) */
+  isFocused?: boolean;
+}
+
+/**
+ * Result returned by the hook
+ */
+export interface UseFollowupSuggestionsReturn {
+  /** Current state */
+  state: FollowupState;
+  /** Set suggestion text (called by parent component) */
+  setSuggestion: (text: string | null) => void;
+  /** Accept the current suggestion */
+  accept: (method?: 'tab' | 'enter' | 'right') => void;
+  /** Dismiss the current suggestion */
+  dismiss: () => void;
+  /** Clear all state */
+  clear: () => void;
+  /**
+   * Notify that the user typed while suggestion was visible.
+   * Call from the input handler on first keystroke.
+   */
+  recordKeystroke: () => void;
+}
+
+/**
+ * Hook for managing prompt suggestions in CLI.
+ *
+ * Delegates all timer/debounce/state logic to the shared
+ * `createFollowupController` from core.
+ */
+export function useFollowupSuggestionsCLI(
+  options: UseFollowupSuggestionsOptions = {},
+): UseFollowupSuggestionsReturn {
+  const { enabled = true, onAccept, config, isFocused = true } = options;
+
+  const [state, setState] = useState<FollowupState>(INITIAL_FOLLOWUP_STATE);
+
+  // Keep mutable refs so the controller always sees the latest callbacks
+  const onAcceptRef = useRef(onAccept);
+  onAcceptRef.current = onAccept;
+  const configRef = useRef(config);
+  configRef.current = config;
+
+  // Engagement tracking refs
+  const firstKeystrokeAtRef = useRef(0);
+  const prevShownAtRef = useRef(0);
+  const wasFocusedWhenShownRef = useRef(true);
+
+  // Track when a new suggestion appears (in useEffect to avoid render-time side effects)
+  useEffect(() => {
+    if (state.shownAt > 0 && state.shownAt !== prevShownAtRef.current) {
+      prevShownAtRef.current = state.shownAt;
+      wasFocusedWhenShownRef.current = isFocused;
+      firstKeystrokeAtRef.current = 0;
+    } else if (state.shownAt === 0) {
+      prevShownAtRef.current = 0;
+    }
+  }, [state.shownAt, isFocused]);
+
+  const recordKeystroke = useCallback(() => {
+    if (firstKeystrokeAtRef.current === 0 && state.isVisible) {
+      firstKeystrokeAtRef.current = Date.now();
+    }
+  }, [state.isVisible]);
+
+  // Telemetry callback from controller (accept/dismiss)
+  const onOutcome = useCallback(
+    (params: {
+      outcome: 'accepted' | 'ignored';
+      accept_method?: 'tab' | 'enter' | 'right';
+      time_ms: number;
+      suggestion_length: number;
+    }) => {
+      const cfg = configRef.current;
+      if (!cfg) return;
+      logPromptSuggestion(
+        cfg,
+        new PromptSuggestionEvent({
+          outcome: params.outcome,
+          accept_method: params.accept_method,
+          ...(params.outcome === 'accepted'
+            ? { time_to_accept_ms: params.time_ms }
+            : { time_to_ignore_ms: params.time_ms }),
+          ...(firstKeystrokeAtRef.current > 0 &&
+            prevShownAtRef.current > 0 && {
+              time_to_first_keystroke_ms:
+                firstKeystrokeAtRef.current - prevShownAtRef.current,
+            }),
+          suggestion_length: params.suggestion_length,
+          similarity: params.outcome === 'accepted' ? 1.0 : 0.0,
+          was_focused_when_shown: wasFocusedWhenShownRef.current,
+        }),
+      );
+    },
+    [],
+  );
+
+  // Create the controller once — it is stable across renders
+  const controller = useMemo(
+    () =>
+      createFollowupController({
+        enabled,
+        onStateChange: setState,
+        getOnAccept: () => onAcceptRef.current,
+        onOutcome,
+      }),
+    [enabled, onOutcome],
+  );
+
+  // Clear state when disabled; clean up timers on unmount
+  useEffect(() => {
+    if (!enabled) {
+      controller.clear();
+    }
+    return () => controller.cleanup();
+  }, [controller, enabled]);
+
+  return useMemo(
+    () => ({
+      state,
+      setSuggestion: controller.setSuggestion,
+      accept: controller.accept,
+      dismiss: controller.dismiss,
+      clear: controller.clear,
+      recordKeystroke,
+    }),
+    [state, controller, recordKeystroke],
+  );
+}
diff --git a/packages/cli/src/ui/hooks/useModelCommand.ts b/packages/cli/src/ui/hooks/useModelCommand.ts
index c26dcf95a..af182a2f3 100644
--- a/packages/cli/src/ui/hooks/useModelCommand.ts
+++ b/packages/cli/src/ui/hooks/useModelCommand.ts
@@ -8,23 +8,31 @@ import { useState, useCallback } from 'react';
 
 interface UseModelCommandReturn {
   isModelDialogOpen: boolean;
-  openModelDialog: () => void;
+  isFastModelMode: boolean;
+  openModelDialog: (options?: { fastModelMode?: boolean }) => void;
   closeModelDialog: () => void;
 }
 
 export const useModelCommand = (): UseModelCommandReturn => {
   const [isModelDialogOpen, setIsModelDialogOpen] = useState(false);
+  const [isFastModelMode, setIsFastModelMode] = useState(false);
 
-  const openModelDialog = useCallback(() => {
-    setIsModelDialogOpen(true);
-  }, []);
+  const openModelDialog = useCallback(
+    (options?: { fastModelMode?: boolean }) => {
+      setIsFastModelMode(options?.fastModelMode ?? false);
+      setIsModelDialogOpen(true);
+    },
+    [],
+  );
 
   const closeModelDialog = useCallback(() => {
     setIsModelDialogOpen(false);
+    setIsFastModelMode(false);
   }, []);
 
   return {
     isModelDialogOpen,
+    isFastModelMode,
     openModelDialog,
     closeModelDialog,
   };
diff --git a/packages/cli/src/utils/systemInfo.ts b/packages/cli/src/utils/systemInfo.ts
index 4ea281210..856da53d7 100644
--- a/packages/cli/src/utils/systemInfo.ts
+++ b/packages/cli/src/utils/systemInfo.ts
@@ -41,6 +41,7 @@ export interface ExtendedSystemInfo extends SystemInfo {
   apiKeyEnvKey?: string;
   gitCommit?: string;
   proxy?: string;
+  fastModel?: string;
 }
 
 /**
@@ -170,6 +171,9 @@ export async function getExtendedSystemInfo(
       ? GIT_COMMIT_INFO
       : undefined;
 
+  // Get fast model from settings
+  const fastModel = context.services.settings?.merged?.fastModel || undefined;
+
   return {
     ...baseInfo,
     sandboxEnv,
@@ -177,5 +181,6 @@ export async function getExtendedSystemInfo(
     baseUrl,
     apiKeyEnvKey,
     gitCommit,
+    fastModel,
   };
 }
diff --git a/packages/cli/src/utils/systemInfoFields.test.ts b/packages/cli/src/utils/systemInfoFields.test.ts
index fb8624781..0225fb587 100644
--- a/packages/cli/src/utils/systemInfoFields.test.ts
+++ b/packages/cli/src/utils/systemInfoFields.test.ts
@@ -38,6 +38,7 @@ describe('getAboutSystemInfoFields', () => {
       'OS',
       'Auth',
       'Model',
+      'Fast Model',
       'Session ID',
       'Sandbox',
       'Proxy',
diff --git a/packages/cli/src/utils/systemInfoFields.ts b/packages/cli/src/utils/systemInfoFields.ts
index 17062b66a..ec40afada 100644
--- a/packages/cli/src/utils/systemInfoFields.ts
+++ b/packages/cli/src/utils/systemInfoFields.ts
@@ -33,6 +33,7 @@ export function getSystemInfoFields(
   addField(fields, t('Auth'), formatAuth(info));
   addField(fields, t('Base URL'), formatBaseUrl(info));
   addField(fields, t('Model'), info.modelVersion);
+  addField(fields, t('Fast Model'), info.fastModel || info.modelVersion);
   addField(fields, t('Session ID'), info.sessionId);
   addField(fields, t('Sandbox'), info.sandboxEnv);
   addField(fields, t('Proxy'), formatProxy(info.proxy));
diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts
index 7d0cd34b8..0f985364b 100644
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -55,6 +55,12 @@ import {
 } from '../telemetry/index.js';
 import { uiTelemetryService } from '../telemetry/uiTelemetry.js';
 
+// Forked query cache
+import {
+  saveCacheSafeParams,
+  clearCacheSafeParams,
+} from '../followup/forkedQuery.js';
+
 // Utilities
 import {
   getDirectoryContextString,
@@ -234,6 +240,8 @@ export class GeminiClient {
   async startChat(extraHistory?: Content[]): Promise<GeminiChat> {
     this.forceFullIdeContext = true;
     this.hasFailedCompressionAttempt = false;
+    // Clear stale cache params on session reset to prevent cross-session leakage
+    clearCacheSafeParams();
 
     const history = await getInitialChatHistory(this.config, extraHistory);
 
@@ -846,6 +854,27 @@ export class GeminiClient {
       await arenaAgentClient.reportCancelled();
     }
 
+    // Save cache-safe params on successful completion (non-abort) for forked queries
+    if (!signal?.aborted && this.isInitialized()) {
+      try {
+        const chat = this.getChat();
+        // Clone history then truncate to last 40 entries to avoid full-session deep copy overhead
+        const fullHistory = chat.getHistory(true);
+        const maxHistoryForCache = 40;
+        const cachedHistory =
+          fullHistory.length > maxHistoryForCache
+            ? fullHistory.slice(-maxHistoryForCache)
+            : fullHistory;
+        saveCacheSafeParams(
+          chat.getGenerationConfig(),
+          cachedHistory,
+          this.config.getModel(),
+        );
+      } catch {
+        // Best-effort — don't block the main flow
+      }
+    }
+
     return turn;
   }
 
diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts
index 522deb039..12dfcb080 100644
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -594,6 +594,11 @@ export class GeminiChat {
     this.generationConfig.tools = tools;
   }
 
+  /** Returns a shallow copy of the current generation config (for cache param snapshots). */
+  getGenerationConfig(): GenerateContentConfig {
+    return { ...this.generationConfig };
+  }
+
   async maybeIncludeSchemaDepthContext(error: StructuredError): Promise<void> {
     // Check for potentially problematic cyclic tools with cyclic schemas
     // and include a recommendation to remove potentially problematic tools.
diff --git a/packages/core/src/followup/followupState.test.ts b/packages/core/src/followup/followupState.test.ts
new file mode 100644
index 000000000..325c967b0
--- /dev/null
+++ b/packages/core/src/followup/followupState.test.ts
@@ -0,0 +1,312 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import {
+  INITIAL_FOLLOWUP_STATE,
+  createFollowupController,
+} from './followupState.js';
+import type { FollowupState } from './followupState.js';
+
+describe('createFollowupController', () => {
+  beforeEach(() => {
+    vi.useFakeTimers();
+  });
+
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+
+  it('sets suggestion after delay', () => {
+    const onStateChange = vi.fn();
+    const ctrl = createFollowupController({ onStateChange });
+
+    ctrl.setSuggestion('commit this');
+
+    // Not yet — delay hasn't elapsed
+    expect(onStateChange).not.toHaveBeenCalled();
+
+    vi.advanceTimersByTime(300);
+
+    expect(onStateChange).toHaveBeenCalledTimes(1);
+    const state = onStateChange.mock.calls[0][0] as FollowupState;
+    expect(state.isVisible).toBe(true);
+    expect(state.suggestion).toBe('commit this');
+
+    ctrl.cleanup();
+  });
+
+  it('clears immediately when given null', () => {
+    const onStateChange = vi.fn();
+    const ctrl = createFollowupController({ onStateChange });
+
+    ctrl.setSuggestion(null);
+
+    expect(onStateChange).toHaveBeenCalledTimes(1);
+    expect(onStateChange.mock.calls[0][0]).toEqual(INITIAL_FOLLOWUP_STATE);
+
+    ctrl.cleanup();
+  });
+
+  it('does not set suggestion when disabled', () => {
+    const onStateChange = vi.fn();
+    const ctrl = createFollowupController({
+      enabled: false,
+      onStateChange,
+    });
+
+    ctrl.setSuggestion('commit this');
+    vi.advanceTimersByTime(300);
+
+    expect(onStateChange).not.toHaveBeenCalled();
+
+    ctrl.cleanup();
+  });
+
+  it('accept invokes onAccept callback and clears state', async () => {
+    const onStateChange = vi.fn();
+    const onAccept = vi.fn();
+    const ctrl = createFollowupController({
+      onStateChange,
+      getOnAccept: () => onAccept,
+    });
+
+    ctrl.setSuggestion('commit this');
+    vi.advanceTimersByTime(300);
+    onStateChange.mockClear();
+
+    ctrl.accept();
+
+    // State should be cleared
+    expect(onStateChange).toHaveBeenCalledWith(INITIAL_FOLLOWUP_STATE);
+
+    // Callback fires via microtask — flush it
+    await Promise.resolve();
+
+    expect(onAccept).toHaveBeenCalledTimes(1);
+    expect(onAccept).toHaveBeenCalledWith('commit this');
+
+    ctrl.cleanup();
+  });
+
+  it('dismiss clears state', () => {
+    const onStateChange = vi.fn();
+    const ctrl = createFollowupController({ onStateChange });
+
+    ctrl.setSuggestion('commit this');
+    vi.advanceTimersByTime(300);
+    onStateChange.mockClear();
+
+    ctrl.dismiss();
+
+    expect(onStateChange).toHaveBeenCalledWith(INITIAL_FOLLOWUP_STATE);
+
+    ctrl.cleanup();
+  });
+
+  it('accept recovers when onAccept callback throws', async () => {
+    const onStateChange = vi.fn();
+    const consoleErrorSpy = vi
+      .spyOn(console, 'error')
+      .mockImplementation(() => {});
+
+    let callCount = 0;
+    const onAccept = vi.fn().mockImplementation(() => {
+      callCount++;
+      if (callCount === 1) {
+        throw new Error('callback error');
+      }
+    });
+    const ctrl = createFollowupController({
+      onStateChange,
+      getOnAccept: () => onAccept,
+    });
+
+    ctrl.setSuggestion('commit this');
+    vi.advanceTimersByTime(300);
+
+    // First accept — callback throws, but lock should still be released
+    ctrl.accept();
+    await Promise.resolve();
+
+    expect(consoleErrorSpy).toHaveBeenCalledWith(
+      '[followup] onAccept callback threw:',
+      expect.any(Error),
+    );
+
+    // Advance past debounce timer to release the accepting lock
+    vi.advanceTimersByTime(100);
+
+    // Set suggestion again for second accept
+    ctrl.setSuggestion('run tests');
+    vi.advanceTimersByTime(300);
+
+    // Second accept — should NOT be blocked
+    ctrl.accept();
+    await Promise.resolve();
+
+    expect(onAccept).toHaveBeenCalledTimes(2);
+    expect(onAccept).toHaveBeenNthCalledWith(1, 'commit this');
+    expect(onAccept).toHaveBeenNthCalledWith(2, 'run tests');
+
+    ctrl.cleanup();
+    consoleErrorSpy.mockRestore();
+  });
+
+  it('cleanup prevents pending timers from firing', () => {
+    const onStateChange = vi.fn();
+    const ctrl = createFollowupController({ onStateChange });
+
+    ctrl.setSuggestion('commit this');
+    ctrl.cleanup();
+
+    vi.advanceTimersByTime(300);
+
+    expect(onStateChange).not.toHaveBeenCalled();
+  });
+
+  it('onOutcome fires with accepted on accept', async () => {
+    const onStateChange = vi.fn();
+    const onOutcome = vi.fn();
+    const ctrl = createFollowupController({ onStateChange, onOutcome });
+
+    ctrl.setSuggestion('commit this');
+    vi.advanceTimersByTime(300);
+
+    ctrl.accept('tab');
+
+    expect(onOutcome).toHaveBeenCalledTimes(1);
+    expect(onOutcome).toHaveBeenCalledWith(
+      expect.objectContaining({
+        outcome: 'accepted',
+        accept_method: 'tab',
+        suggestion_length: 11,
+      }),
+    );
+
+    ctrl.cleanup();
+  });
+
+  it('onOutcome fires with ignored on dismiss', () => {
+    const onStateChange = vi.fn();
+    const onOutcome = vi.fn();
+    const ctrl = createFollowupController({ onStateChange, onOutcome });
+
+    ctrl.setSuggestion('commit this');
+    vi.advanceTimersByTime(300);
+
+    ctrl.dismiss();
+
+    expect(onOutcome).toHaveBeenCalledTimes(1);
+    expect(onOutcome).toHaveBeenCalledWith(
+      expect.objectContaining({
+        outcome: 'ignored',
+        suggestion_length: 11,
+      }),
+    );
+
+    ctrl.cleanup();
+  });
+
+  it('onOutcome error does not block state clear', () => {
+    const onStateChange = vi.fn();
+    const consoleErrorSpy = vi
+      .spyOn(console, 'error')
+      .mockImplementation(() => {});
+    const onOutcome = vi.fn().mockImplementation(() => {
+      throw new Error('telemetry crash');
+    });
+    const ctrl = createFollowupController({ onStateChange, onOutcome });
+
+    ctrl.setSuggestion('test');
+    vi.advanceTimersByTime(300);
+    onStateChange.mockClear();
+
+    ctrl.accept('enter');
+
+    // State should still be cleared despite onOutcome throwing
+    expect(onStateChange).toHaveBeenCalledWith(INITIAL_FOLLOWUP_STATE);
+    expect(consoleErrorSpy).toHaveBeenCalled();
+
+    ctrl.cleanup();
+    consoleErrorSpy.mockRestore();
+  });
+
+  it('dismiss does not fire onOutcome when already cleared', () => {
+    const onStateChange = vi.fn();
+    const onOutcome = vi.fn();
+    const ctrl = createFollowupController({ onStateChange, onOutcome });
+
+    // No suggestion set — dismiss should be a no-op
+    ctrl.dismiss();
+
+    expect(onOutcome).not.toHaveBeenCalled();
+
+    ctrl.cleanup();
+  });
+
+  it('clear resets the accepting lock', async () => {
+    const onStateChange = vi.fn();
+    const onAccept = vi.fn();
+    const ctrl = createFollowupController({
+      onStateChange,
+      getOnAccept: () => onAccept,
+    });
+
+    ctrl.setSuggestion('first');
+    vi.advanceTimersByTime(300);
+
+    ctrl.accept();
+    // clear before debounce timeout releases lock
+    ctrl.clear();
+
+    // Set new suggestion and accept again — should work
+    ctrl.setSuggestion('second');
+    vi.advanceTimersByTime(300);
+    ctrl.accept();
+    await Promise.resolve();
+
+    expect(onAccept).toHaveBeenCalledTimes(2);
+
+    ctrl.cleanup();
+  });
+
+  it('double accept is blocked by debounce lock', async () => {
+    const onStateChange = vi.fn();
+    const onAccept = vi.fn();
+    const ctrl = createFollowupController({
+      onStateChange,
+      getOnAccept: () => onAccept,
+    });
+
+    ctrl.setSuggestion('text');
+    vi.advanceTimersByTime(300);
+
+    ctrl.accept();
+    ctrl.accept(); // second call should be blocked
+    await Promise.resolve();
+
+    expect(onAccept).toHaveBeenCalledTimes(1);
+
+    ctrl.cleanup();
+  });
+
+  it('setSuggestion replaces a pending suggestion', () => {
+    const onStateChange = vi.fn();
+    const ctrl = createFollowupController({ onStateChange });
+
+    ctrl.setSuggestion('first');
+    vi.advanceTimersByTime(150); // halfway through delay
+    ctrl.setSuggestion('second'); // replace
+    vi.advanceTimersByTime(300);
+
+    // Only 'second' should have fired
+    expect(onStateChange).toHaveBeenCalledTimes(1);
+    expect(onStateChange.mock.calls[0][0].suggestion).toBe('second');
+
+    ctrl.cleanup();
+  });
+});
diff --git a/packages/core/src/followup/followupState.ts b/packages/core/src/followup/followupState.ts
new file mode 100644
index 000000000..8430e206f
--- /dev/null
+++ b/packages/core/src/followup/followupState.ts
@@ -0,0 +1,229 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Shared Follow-up Suggestions State Logic
+ *
+ * Framework-agnostic state management for prompt suggestions,
+ * shared between CLI (Ink) and WebUI (React) hooks.
+ */
+
+/**
+ * State for prompt suggestion display.
+ */
+export interface FollowupState {
+  /** Current suggestion text */
+  suggestion: string | null;
+  /** Whether to show suggestion */
+  isVisible: boolean;
+  /** Timestamp when suggestion was shown (for telemetry) */
+  shownAt: number;
+}
+
+/** Initial empty state */
+export const INITIAL_FOLLOWUP_STATE: Readonly<FollowupState> = Object.freeze({
+  suggestion: null,
+  isVisible: false,
+  shownAt: 0,
+});
+
+// ---------------------------------------------------------------------------
+// Framework-agnostic controller
+// ---------------------------------------------------------------------------
+
+/** Delay before showing suggestion after response completes */
+const SUGGESTION_DELAY_MS = 300;
+/** Debounce lock duration to prevent rapid-fire accepts */
+const ACCEPT_DEBOUNCE_MS = 100;
+
+/**
+ * Options for creating a followup controller
+ */
+export interface FollowupControllerOptions {
+  /** Whether the feature is enabled (checked when setting suggestion) */
+  enabled?: boolean;
+  /** Called whenever the internal state changes */
+  onStateChange: (state: FollowupState) => void;
+  /**
+   * Returns the current onAccept callback.
+   * A getter is used so the controller always invokes the latest callback
+   * without requiring re-creation when the callback reference changes.
+   */
+  getOnAccept?: () => ((text: string) => void) | undefined;
+  /**
+   * Called when a suggestion outcome is determined (accepted or ignored).
+   * Used for telemetry. Note: 'suppressed' outcomes are logged separately
+   * at the generation site, not through this callback.
+   */
+  onOutcome?: (params: {
+    outcome: 'accepted' | 'ignored';
+    accept_method?: 'tab' | 'enter' | 'right';
+    time_ms: number;
+    suggestion_length: number;
+  }) => void;
+}
+
+/**
+ * Actions returned by createFollowupController.
+ * These are stable (never change identity) and safe to call from any context.
+ */
+export interface FollowupControllerActions {
+  /** Set suggestion text (with delayed show). Null clears immediately. */
+  setSuggestion: (text: string | null) => void;
+  /** Accept the current suggestion and invoke onAccept callback */
+  accept: (method?: 'tab' | 'enter' | 'right') => void;
+  /** Dismiss/clear suggestion */
+  dismiss: () => void;
+  /** Hard-clear all state and timers */
+  clear: () => void;
+  /** Clean up timers — call on unmount */
+  cleanup: () => void;
+}
+
+/**
+ * Creates a framework-agnostic followup suggestion controller.
+ *
+ * Encapsulates timer management, accept debounce, and state transitions so
+ * that React hooks (CLI and WebUI) only need thin wrappers around
+ * `useState` + this controller.
+ */
+export function createFollowupController(
+  options: FollowupControllerOptions,
+): FollowupControllerActions {
+  const { enabled = true, onStateChange, getOnAccept, onOutcome } = options;
+
+  let currentState: FollowupState = INITIAL_FOLLOWUP_STATE;
+  let timeoutId: ReturnType<typeof setTimeout> | null = null;
+  let accepting = false;
+  let acceptTimeoutId: ReturnType<typeof setTimeout> | null = null;
+
+  function applyState(next: FollowupState): void {
+    currentState = next;
+    onStateChange(next);
+  }
+
+  function clearTimers(): void {
+    if (timeoutId) {
+      clearTimeout(timeoutId);
+      timeoutId = null;
+    }
+    if (acceptTimeoutId) {
+      clearTimeout(acceptTimeoutId);
+      acceptTimeoutId = null;
+    }
+  }
+
+  const setSuggestion = (text: string | null): void => {
+    if (timeoutId) {
+      clearTimeout(timeoutId);
+      timeoutId = null;
+    }
+
+    if (!text) {
+      applyState(INITIAL_FOLLOWUP_STATE);
+      return;
+    }
+
+    // Only schedule new suggestions when enabled
+    if (!enabled) {
+      return;
+    }
+
+    timeoutId = setTimeout(() => {
+      applyState({ suggestion: text, isVisible: true, shownAt: Date.now() });
+    }, SUGGESTION_DELAY_MS);
+  };
+
+  const accept = (method?: 'tab' | 'enter' | 'right'): void => {
+    if (accepting) {
+      return;
+    }
+
+    if (timeoutId) {
+      clearTimeout(timeoutId);
+      timeoutId = null;
+    }
+
+    accepting = true;
+
+    const text = currentState.suggestion;
+    const { shownAt } = currentState;
+    if (!text) {
+      accepting = false;
+      return;
+    }
+
+    try {
+      onOutcome?.({
+        outcome: 'accepted',
+        accept_method: method,
+        time_ms: shownAt > 0 ? Date.now() - shownAt : 0,
+        suggestion_length: text.length,
+      });
+    } catch (e: unknown) {
+      // eslint-disable-next-line no-console
+      console.error('[followup] onOutcome callback threw:', e);
+    }
+
+    applyState(INITIAL_FOLLOWUP_STATE);
+
+    queueMicrotask(() => {
+      try {
+        getOnAccept?.()?.(text);
+      } catch (error: unknown) {
+        // eslint-disable-next-line no-console
+        console.error('[followup] onAccept callback threw:', error);
+      } finally {
+        if (acceptTimeoutId) {
+          clearTimeout(acceptTimeoutId);
+        }
+        acceptTimeoutId = setTimeout(() => {
+          accepting = false;
+        }, ACCEPT_DEBOUNCE_MS);
+      }
+    });
+  };
+
+  const dismiss = (): void => {
+    if (timeoutId) {
+      clearTimeout(timeoutId);
+      timeoutId = null;
+    }
+
+    // Skip if already cleared (e.g., accept already ran)
+    if (!currentState.isVisible && !currentState.suggestion) {
+      return;
+    }
+
+    // Log ignored outcome if a suggestion was visible
+    if (currentState.isVisible && currentState.suggestion) {
+      try {
+        onOutcome?.({
+          outcome: 'ignored',
+          time_ms:
+            currentState.shownAt > 0 ? Date.now() - currentState.shownAt : 0,
+          suggestion_length: currentState.suggestion.length,
+        });
+      } catch (e: unknown) {
+        // eslint-disable-next-line no-console
+        console.error('[followup] onOutcome callback threw:', e);
+      }
+    }
+
+    applyState(INITIAL_FOLLOWUP_STATE);
+  };
+
+  const clear = (): void => {
+    clearTimers();
+    accepting = false;
+    applyState(INITIAL_FOLLOWUP_STATE);
+  };
+
+  const cleanup = (): void => {
+    clearTimers();
+    accepting = false;
+  };
+
+  return { setSuggestion, accept, dismiss, clear, cleanup };
+}
diff --git a/packages/core/src/followup/forkedQuery.test.ts b/packages/core/src/followup/forkedQuery.test.ts
new file mode 100644
index 000000000..862d9b9e6
--- /dev/null
+++ b/packages/core/src/followup/forkedQuery.test.ts
@@ -0,0 +1,115 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, beforeEach } from 'vitest';
+import {
+  saveCacheSafeParams,
+  getCacheSafeParams,
+  clearCacheSafeParams,
+} from './forkedQuery.js';
+import type { GenerateContentConfig } from '@google/genai';
+
+describe('CacheSafeParams', () => {
+  beforeEach(() => {
+    clearCacheSafeParams();
+  });
+
+  describe('saveCacheSafeParams / getCacheSafeParams', () => {
+    it('saves and retrieves params', () => {
+      const config: GenerateContentConfig = {
+        systemInstruction: 'You are helpful',
+        tools: [{ functionDeclarations: [] }],
+      };
+
+      saveCacheSafeParams(config, [], 'qwen-max');
+
+      const params = getCacheSafeParams();
+      expect(params).not.toBeNull();
+      expect(params!.model).toBe('qwen-max');
+      expect(params!.history).toEqual([]);
+      expect(params!.version).toBeGreaterThan(0);
+    });
+
+    it('deep clones generationConfig', () => {
+      const config: GenerateContentConfig = {
+        systemInstruction: 'test',
+        tools: [{ functionDeclarations: [{ name: 'tool1' }] }],
+      };
+
+      saveCacheSafeParams(config, [], 'model');
+
+      // Mutate original — should not affect saved params
+      (
+        config.tools![0] as { functionDeclarations: unknown[] }
+      ).functionDeclarations.push({ name: 'tool2' });
+
+      const params = getCacheSafeParams();
+      const savedTools = params!.generationConfig.tools as Array<{
+        functionDeclarations: unknown[];
+      }>;
+      expect(savedTools[0].functionDeclarations).toHaveLength(1);
+    });
+  });
+
+  describe('clearCacheSafeParams', () => {
+    it('clears saved params', () => {
+      saveCacheSafeParams({}, [], 'model');
+      expect(getCacheSafeParams()).not.toBeNull();
+
+      clearCacheSafeParams();
+      expect(getCacheSafeParams()).toBeNull();
+    });
+  });
+
+  describe('version detection', () => {
+    it('increments version when systemInstruction changes', () => {
+      saveCacheSafeParams({ systemInstruction: 'version1' }, [], 'model');
+      const v1 = getCacheSafeParams()!.version;
+
+      saveCacheSafeParams({ systemInstruction: 'version2' }, [], 'model');
+      const v2 = getCacheSafeParams()!.version;
+
+      expect(v2).toBeGreaterThan(v1);
+    });
+
+    it('increments version when tools change', () => {
+      saveCacheSafeParams(
+        { tools: [{ functionDeclarations: [{ name: 'a' }] }] },
+        [],
+        'model',
+      );
+      const v1 = getCacheSafeParams()!.version;
+
+      saveCacheSafeParams(
+        { tools: [{ functionDeclarations: [{ name: 'a' }, { name: 'b' }] }] },
+        [],
+        'model',
+      );
+      const v2 = getCacheSafeParams()!.version;
+
+      expect(v2).toBeGreaterThan(v1);
+    });
+
+    it('does not increment version when only history changes', () => {
+      const config: GenerateContentConfig = {
+        systemInstruction: 'stable',
+        tools: [],
+      };
+
+      saveCacheSafeParams(config, [], 'model');
+      const v1 = getCacheSafeParams()!.version;
+
+      saveCacheSafeParams(
+        config,
+        [{ role: 'user', parts: [{ text: 'hi' }] }],
+        'model',
+      );
+      const v2 = getCacheSafeParams()!.version;
+
+      expect(v2).toBe(v1);
+    });
+  });
+});
diff --git a/packages/core/src/followup/forkedQuery.ts b/packages/core/src/followup/forkedQuery.ts
new file mode 100644
index 000000000..798374c73
--- /dev/null
+++ b/packages/core/src/followup/forkedQuery.ts
@@ -0,0 +1,249 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Forked Query Infrastructure
+ *
+ * Enables cache-aware secondary LLM calls that share the main conversation's
+ * prompt prefix (systemInstruction + tools + history) for cache hits.
+ *
+ * DashScope already enables cache_control via X-DashScope-CacheControl header.
+ * By constructing the forked GeminiChat with identical generationConfig and
+ * history prefix, the fork automatically benefits from prefix caching.
+ */
+
+import type {
+  Content,
+  GenerateContentConfig,
+  GenerateContentResponseUsageMetadata,
+} from '@google/genai';
+import { GeminiChat, StreamEventType } from '../core/geminiChat.js';
+import type { Config } from '../config/config.js';
+
+/**
+ * Snapshot of the main conversation's cache-critical parameters.
+ * Captured after each successful main turn so forked queries share the same prefix.
+ */
+export interface CacheSafeParams {
+  /** Full generation config including systemInstruction and tools */
+  generationConfig: GenerateContentConfig;
+  /** Curated conversation history (deep clone) */
+  history: Content[];
+  /** Model identifier */
+  model: string;
+  /** Version number — increments when systemInstruction or tools change */
+  version: number;
+}
+
+/**
+ * Result from a forked query.
+ */
+export interface ForkedQueryResult {
+  /** Extracted text response, or null if no text */
+  text: string | null;
+  /** Parsed JSON result if schema was provided */
+  jsonResult?: Record<string, unknown>;
+  /** Token usage metrics */
+  usage: {
+    inputTokens: number;
+    outputTokens: number;
+    cacheHitTokens: number;
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Global cache params slot
+// ---------------------------------------------------------------------------
+
+let currentCacheSafeParams: CacheSafeParams | null = null;
+let currentVersion = 0;
+
+/**
+ * Save cache-safe params after a successful main conversation turn.
+ * Called from GeminiClient.sendMessageStream() on successful completion.
+ */
+export function saveCacheSafeParams(
+  generationConfig: GenerateContentConfig,
+  history: Content[],
+  model: string,
+): void {
+  // Detect if systemInstruction or tools changed
+  const prevConfig = currentCacheSafeParams?.generationConfig;
+  const sysChanged =
+    !prevConfig ||
+    JSON.stringify(prevConfig.systemInstruction) !==
+      JSON.stringify(generationConfig.systemInstruction);
+  const toolsChanged =
+    !prevConfig ||
+    JSON.stringify(prevConfig.tools) !== JSON.stringify(generationConfig.tools);
+
+  if (sysChanged || toolsChanged) {
+    currentVersion++;
+  }
+
+  currentCacheSafeParams = {
+    generationConfig: structuredClone(generationConfig),
+    history, // caller passes structuredClone'd curated history (from getHistory(true))
+    model,
+    version: currentVersion,
+  };
+}
+
+/**
+ * Get the current cache-safe params, or null if not yet captured.
+ */
+export function getCacheSafeParams(): CacheSafeParams | null {
+  return currentCacheSafeParams
+    ? structuredClone(currentCacheSafeParams)
+    : null;
+}
+
+/**
+ * Clear cache-safe params (e.g., on session reset).
+ */
+export function clearCacheSafeParams(): void {
+  currentCacheSafeParams = null;
+}
+
+// ---------------------------------------------------------------------------
+// Forked chat creation
+// ---------------------------------------------------------------------------
+
+/**
+ * Create an isolated GeminiChat that shares the same cache prefix as the main
+ * conversation. The fork uses identical generationConfig (systemInstruction +
+ * tools) and history, so DashScope's cache_control mechanism produces cache hits.
+ *
+ * The fork does NOT have chatRecordingService or telemetryService to avoid
+ * polluting the main session's recordings and token counts.
+ */
+export function createForkedChat(
+  config: Config,
+  params: CacheSafeParams,
+): GeminiChat {
+  // Limit history to avoid excessive cost
+  const maxHistoryEntries = 40;
+  const history =
+    params.history.length > maxHistoryEntries
+      ? params.history.slice(-maxHistoryEntries)
+      : params.history;
+
+  // params.generationConfig and params.history are already deep-cloned snapshots
+  // from saveCacheSafeParams (which clones generationConfig) and getHistory(true)
+  // (which structuredClones the history). Slice creates a new array but shares
+  // Content references — GeminiChat only reads history, never mutates entries,
+  // so sharing is safe and avoids a redundant deep clone.
+  return new GeminiChat(
+    config,
+    {
+      ...params.generationConfig,
+      // Disable thinking for forked queries — suggestions/speculation don't need
+      // reasoning tokens and it wastes cost + latency on the fast model path.
+      // This doesn't affect cache prefix (system + tools + history).
+      thinkingConfig: { includeThoughts: false },
+    },
+    [...history], // shallow copy — entries are read-only
+    undefined, // no chatRecordingService
+    undefined, // no telemetryService
+  );
+}
+
+// ---------------------------------------------------------------------------
+// Forked query execution
+// ---------------------------------------------------------------------------
+
+function extractUsage(
+  metadata?: GenerateContentResponseUsageMetadata,
+): ForkedQueryResult['usage'] {
+  return {
+    inputTokens: metadata?.promptTokenCount ?? 0,
+    outputTokens: metadata?.candidatesTokenCount ?? 0,
+    cacheHitTokens: metadata?.cachedContentTokenCount ?? 0,
+  };
+}
+
+/**
+ * Run a forked query using a GeminiChat that shares the main conversation's
+ * cache prefix. This is a single-turn request (no tool execution loop).
+ *
+ * @param config - App config
+ * @param userMessage - The user message to send (e.g., SUGGESTION_PROMPT)
+ * @param options - Optional configuration
+ * @returns Query result with text, optional JSON, and usage metrics
+ */
+export async function runForkedQuery(
+  config: Config,
+  userMessage: string,
+  options?: {
+    abortSignal?: AbortSignal;
+    /** JSON schema for structured output */
+    jsonSchema?: Record<string, unknown>;
+    /** Override model (e.g., for speculation with a cheaper model) */
+    model?: string;
+  },
+): Promise<ForkedQueryResult> {
+  const params = getCacheSafeParams();
+  if (!params) {
+    throw new Error('CacheSafeParams not available');
+  }
+
+  const model = options?.model ?? params.model;
+  const chat = createForkedChat(config, params);
+
+  // Build per-request config overrides for JSON schema if needed
+  const requestConfig: GenerateContentConfig = {};
+  if (options?.abortSignal) {
+    requestConfig.abortSignal = options.abortSignal;
+  }
+  if (options?.jsonSchema) {
+    requestConfig.responseMimeType = 'application/json';
+    requestConfig.responseJsonSchema = options.jsonSchema;
+  }
+
+  const stream = await chat.sendMessageStream(
+    model,
+    {
+      message: [{ text: userMessage }],
+      config: Object.keys(requestConfig).length > 0 ? requestConfig : undefined,
+    },
+    'forked_query',
+  );
+
+  // Collect the full response
+  let fullText = '';
+  let usage: ForkedQueryResult['usage'] = {
+    inputTokens: 0,
+    outputTokens: 0,
+    cacheHitTokens: 0,
+  };
+
+  for await (const event of stream) {
+    if (event.type !== StreamEventType.CHUNK) continue;
+    const response = event.value;
+    // Extract text from candidates
+    const text = response.candidates?.[0]?.content?.parts
+      ?.map((p) => p.text ?? '')
+      .join('');
+    if (text) {
+      fullText += text;
+    }
+    if (response.usageMetadata) {
+      usage = extractUsage(response.usageMetadata);
+    }
+  }
+
+  const trimmed = fullText.trim() || null;
+
+  // Parse JSON if schema was provided
+  let jsonResult: Record<string, unknown> | undefined;
+  if (options?.jsonSchema && trimmed) {
+    try {
+      jsonResult = JSON.parse(trimmed) as Record<string, unknown>;
+    } catch {
+      // Model returned non-JSON despite schema constraint — treat as text
+    }
+  }
+
+  return { text: trimmed, jsonResult, usage };
+}
diff --git a/packages/core/src/followup/index.ts b/packages/core/src/followup/index.ts
new file mode 100644
index 000000000..d05fa52fd
--- /dev/null
+++ b/packages/core/src/followup/index.ts
@@ -0,0 +1,16 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Prompt Suggestion Module
+ *
+ * Exports for the prompt suggestion feature.
+ */
+
+export * from './followupState.js';
+export * from './suggestionGenerator.js';
+export * from './forkedQuery.js';
+export * from './overlayFs.js';
+export * from './speculationToolGate.js';
+export * from './speculation.js';
diff --git a/packages/core/src/followup/overlayFs.test.ts b/packages/core/src/followup/overlayFs.test.ts
new file mode 100644
index 000000000..b31c4f872
--- /dev/null
+++ b/packages/core/src/followup/overlayFs.test.ts
@@ -0,0 +1,193 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { OverlayFs } from './overlayFs.js';
+import { writeFile, readFile, mkdir, rm } from 'node:fs/promises';
+import { existsSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import { randomUUID } from 'node:crypto';
+
+describe('OverlayFs', () => {
+  let testDir: string;
+  let overlay: OverlayFs;
+
+  beforeEach(async () => {
+    testDir = join(tmpdir(), `overlay-test-${randomUUID().slice(0, 8)}`);
+    await mkdir(testDir, { recursive: true });
+    overlay = new OverlayFs(testDir);
+  });
+
+  afterEach(async () => {
+    await overlay.cleanup();
+    await rm(testDir, { recursive: true, force: true });
+  });
+
+  describe('redirectWrite', () => {
+    it('copies existing file to overlay on first write', async () => {
+      // Create a real file
+      const realFile = join(testDir, 'src', 'app.ts');
+      await mkdir(join(testDir, 'src'), { recursive: true });
+      await writeFile(realFile, 'original content');
+
+      const overlayPath = await overlay.redirectWrite(realFile);
+
+      // Overlay file should exist with original content
+      expect(existsSync(overlayPath)).toBe(true);
+      const content = await readFile(overlayPath, 'utf-8');
+      expect(content).toBe('original content');
+    });
+
+    it('returns same overlay path on subsequent writes', async () => {
+      const realFile = join(testDir, 'file.ts');
+      await writeFile(realFile, 'content');
+
+      const path1 = await overlay.redirectWrite(realFile);
+      const path2 = await overlay.redirectWrite(realFile);
+
+      expect(path1).toBe(path2);
+    });
+
+    it('creates overlay path for new files without copying', async () => {
+      const newFile = join(testDir, 'new-file.ts');
+
+      const overlayPath = await overlay.redirectWrite(newFile);
+
+      // Overlay directory should be created but file may not exist yet
+      // (the tool will write to it)
+      expect(overlayPath).toContain('new-file.ts');
+      expect(overlay.getWrittenFiles().has('new-file.ts')).toBe(true);
+    });
+
+    it('throws for paths outside cwd', async () => {
+      await expect(overlay.redirectWrite('/etc/passwd')).rejects.toThrow(
+        'Cannot redirect write outside cwd',
+      );
+    });
+
+    it('throws for path traversal attempts', async () => {
+      await expect(
+        overlay.redirectWrite(join(testDir, '..', '..', 'etc', 'passwd')),
+      ).rejects.toThrow('Cannot redirect write outside cwd');
+    });
+  });
+
+  describe('resolveReadPath', () => {
+    it('returns overlay path for previously written files', async () => {
+      const realFile = join(testDir, 'file.ts');
+      await writeFile(realFile, 'original');
+
+      const overlayPath = await overlay.redirectWrite(realFile);
+      const resolved = overlay.resolveReadPath(realFile);
+
+      expect(resolved).toBe(overlayPath);
+    });
+
+    it('returns real path for files not in overlay', () => {
+      const realFile = join(testDir, 'untouched.ts');
+
+      const resolved = overlay.resolveReadPath(realFile);
+
+      expect(resolved).toBe(realFile);
+    });
+
+    it('returns real path for files outside cwd', () => {
+      const outsidePath = '/etc/hosts';
+
+      const resolved = overlay.resolveReadPath(outsidePath);
+
+      expect(resolved).toBe(outsidePath);
+    });
+  });
+
+  describe('resolveReadPath with relative paths', () => {
+    it('resolves relative paths against realCwd', async () => {
+      const realFile = join(testDir, 'src', 'app.ts');
+      await mkdir(join(testDir, 'src'), { recursive: true });
+      await writeFile(realFile, 'content');
+
+      await overlay.redirectWrite(realFile);
+      // Resolve using relative path
+      const resolved = overlay.resolveReadPath(join(testDir, 'src', 'app.ts'));
+
+      expect(resolved).not.toBe(realFile);
+      expect(resolved).toContain('app.ts');
+    });
+  });
+
+  describe('applyToReal', () => {
+    it('copies overlay files back to real filesystem', async () => {
+      const realFile = join(testDir, 'file.ts');
+      await writeFile(realFile, 'original');
+
+      const overlayPath = await overlay.redirectWrite(realFile);
+      await writeFile(overlayPath, 'modified in overlay');
+
+      const applied = await overlay.applyToReal();
+
+      expect(applied).toContain(realFile);
+      const content = await readFile(realFile, 'utf-8');
+      expect(content).toBe('modified in overlay');
+    });
+
+    it('creates directories for new files during apply', async () => {
+      const newFile = join(testDir, 'new', 'deep', 'file.ts');
+      const overlayPath = await overlay.redirectWrite(newFile);
+      await writeFile(overlayPath, 'new file content');
+
+      const applied = await overlay.applyToReal();
+
+      expect(applied).toContain(newFile);
+      const content = await readFile(newFile, 'utf-8');
+      expect(content).toBe('new file content');
+    });
+
+    it('returns empty array when no files written', async () => {
+      const applied = await overlay.applyToReal();
+
+      expect(applied).toEqual([]);
+    });
+  });
+
+  describe('cleanup', () => {
+    it('removes the overlay directory', async () => {
+      const realFile = join(testDir, 'file.ts');
+      await writeFile(realFile, 'content');
+      await overlay.redirectWrite(realFile);
+
+      const overlayDir = overlay.getOverlayDir();
+      expect(existsSync(overlayDir)).toBe(true);
+
+      await overlay.cleanup();
+
+      expect(existsSync(overlayDir)).toBe(false);
+    });
+
+    it('does not throw if overlay directory does not exist', async () => {
+      await overlay.cleanup();
+      // Should not throw on double cleanup
+      await expect(overlay.cleanup()).resolves.not.toThrow();
+    });
+  });
+
+  describe('getWrittenFiles', () => {
+    it('returns a copy of written files map', async () => {
+      const realFile = join(testDir, 'file.ts');
+      await writeFile(realFile, 'content');
+      await overlay.redirectWrite(realFile);
+
+      const files = overlay.getWrittenFiles();
+
+      expect(files.size).toBe(1);
+      expect(files.has('file.ts')).toBe(true);
+
+      // Modifying returned map should not affect internal state
+      files.clear();
+      expect(overlay.getWrittenFiles().size).toBe(1);
+    });
+  });
+});
diff --git a/packages/core/src/followup/overlayFs.ts b/packages/core/src/followup/overlayFs.ts
new file mode 100644
index 000000000..6e115cc7e
--- /dev/null
+++ b/packages/core/src/followup/overlayFs.ts
@@ -0,0 +1,140 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Copy-on-Write Overlay Filesystem
+ *
+ * Provides file isolation for speculative execution. Writes go to a temporary
+ * overlay directory while reads resolve to overlay (if previously written)
+ * or the real filesystem.
+ */
+
+import { mkdir, copyFile, rm } from 'node:fs/promises';
+import { existsSync } from 'node:fs';
+import { join, dirname, relative, isAbsolute } from 'node:path';
+import { tmpdir } from 'node:os';
+import { randomUUID } from 'node:crypto';
+
+/**
+ * Copy-on-write overlay filesystem for speculation safety.
+ */
+export class OverlayFs {
+  private readonly overlayDir: string;
+  private readonly writtenFiles = new Map<string, string>(); // relPath -> overlayPath
+
+  constructor(private readonly realCwd: string) {
+    const id = randomUUID().slice(0, 8);
+    this.overlayDir = join(
+      tmpdir(),
+      'qwen-speculation',
+      String(process.pid),
+      id,
+    );
+  }
+
+  /** Get the overlay directory path */
+  getOverlayDir(): string {
+    return this.overlayDir;
+  }
+
+  /**
+   * Resolve a read path: return overlay path if the file was previously written,
+   * otherwise return the real path.
+   */
+  resolveReadPath(realPath: string): string {
+    const rel = this.toRelative(realPath);
+    if (rel && this.writtenFiles.has(rel)) {
+      return this.writtenFiles.get(rel)!;
+    }
+    return realPath;
+  }
+
+  /**
+   * Redirect a write to the overlay. On first write to a file, copies the
+   * original to the overlay (if it exists). Returns the overlay path to write to.
+   */
+  async redirectWrite(realPath: string): Promise<string> {
+    const rel = this.toRelative(realPath);
+    if (!rel) {
+      throw new Error(`Cannot redirect write outside cwd: ${realPath}`);
+    }
+
+    // Already in overlay
+    if (this.writtenFiles.has(rel)) {
+      return this.writtenFiles.get(rel)!;
+    }
+
+    const overlayPath = join(this.overlayDir, rel);
+    await mkdir(dirname(overlayPath), { recursive: true });
+
+    // Copy-on-write: copy original to overlay if it exists
+    const originalPath = join(this.realCwd, rel);
+    if (existsSync(originalPath)) {
+      try {
+        await copyFile(originalPath, overlayPath);
+      } catch {
+        // Original may be a directory or unreadable — proceed without copy
+      }
+    }
+    // For new files: the overlay path is created but empty — the tool will write to it
+
+    this.writtenFiles.set(rel, overlayPath);
+    return overlayPath;
+  }
+
+  /**
+   * Get all files that were written to the overlay.
+   */
+  getWrittenFiles(): Map<string, string> {
+    return new Map(this.writtenFiles);
+  }
+
+  /**
+   * Copy all overlay files back to the real filesystem.
+   * Returns the list of real paths that were updated.
+   */
+  async applyToReal(): Promise<string[]> {
+    const applied: string[] = [];
+
+    for (const [rel, overlayPath] of this.writtenFiles) {
+      const realPath = join(this.realCwd, rel);
+      try {
+        await mkdir(dirname(realPath), { recursive: true });
+        await copyFile(overlayPath, realPath);
+        applied.push(realPath);
+      } catch {
+        // Best-effort — ignore errors and continue
+      }
+    }
+
+    return applied;
+  }
+
+  /**
+   * Clean up the overlay directory.
+   */
+  async cleanup(): Promise<void> {
+    try {
+      await rm(this.overlayDir, { recursive: true, force: true });
+    } catch {
+      // Best-effort cleanup
+    }
+  }
+
+  /**
+   * Convert an absolute path to a relative path within cwd.
+   * Returns null if the path is outside cwd.
+   */
+  private toRelative(inputPath: string): string | null {
+    // Resolve relative paths against realCwd (not process.cwd())
+    const abs = isAbsolute(inputPath)
+      ? inputPath
+      : join(this.realCwd, inputPath);
+    const rel = relative(this.realCwd, abs);
+    if (isAbsolute(rel) || rel.startsWith('..')) {
+      return null;
+    }
+    return rel;
+  }
+}
diff --git a/packages/core/src/followup/smoke.test.ts b/packages/core/src/followup/smoke.test.ts
new file mode 100644
index 000000000..f14295f1d
--- /dev/null
+++ b/packages/core/src/followup/smoke.test.ts
@@ -0,0 +1,181 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Smoke Tests — E2E verification of core followup modules working together.
+ */
+
+import { describe, it, expect } from 'vitest';
+import {
+  shouldFilterSuggestion,
+  getFilterReason,
+} from './suggestionGenerator.js';
+import { OverlayFs } from './overlayFs.js';
+import { evaluateToolCall, rewritePathArgs } from './speculationToolGate.js';
+import {
+  saveCacheSafeParams,
+  getCacheSafeParams,
+  clearCacheSafeParams,
+} from './forkedQuery.js';
+import { ensureToolResultPairing } from './speculation.js';
+import { ToolNames } from '../tools/tool-names.js';
+import { ApprovalMode } from '../config/config.js';
+import { writeFile, mkdir, readFile, rm } from 'node:fs/promises';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import { randomUUID } from 'node:crypto';
+
+describe('SMOKE TESTS — E2E Verification', () => {
+  describe('Smoke 1: Filter against realistic LLM outputs', () => {
+    const good = [
+      'commit this',
+      'run the tests',
+      'try it out',
+      'push it',
+      'yes',
+      '/commit',
+      'create a PR',
+      'run nicely formatted tests',
+      'fix the greatest issue',
+    ];
+    const bad = [
+      'done',
+      'looks good',
+      'Let me check that',
+      'nothing found',
+      '(silence)',
+      'thanks for the help',
+      "I'll run the tests",
+    ];
+
+    it.each(good)('allows: "%s"', (s) => {
+      expect(shouldFilterSuggestion(s)).toBe(false);
+    });
+
+    it.each(bad)('filters: "%s"', (s) => {
+      expect(shouldFilterSuggestion(s)).toBe(true);
+    });
+
+    it('getFilterReason returns named reasons', () => {
+      expect(getFilterReason('done')).toBe('done');
+      expect(getFilterReason('nothing found')).toBe('meta_text');
+      expect(getFilterReason('(no suggestion needed)')).toBe('meta_wrapped');
+      expect(getFilterReason('commit this')).toBeNull();
+    });
+  });
+
+  describe('Smoke 2: OverlayFs full round-trip', () => {
+    it('write → read overlay → apply → verify real file', async () => {
+      const dir = join(tmpdir(), `smoke-${randomUUID().slice(0, 8)}`);
+      await mkdir(dir, { recursive: true });
+      const realFile = join(dir, 'app.ts');
+      await writeFile(realFile, 'original content');
+
+      const overlay = new OverlayFs(dir);
+
+      const overlayPath = await overlay.redirectWrite(realFile);
+      await writeFile(overlayPath, 'modified in speculation');
+
+      expect(overlay.resolveReadPath(realFile)).toBe(overlayPath);
+      expect(await readFile(realFile, 'utf-8')).toBe('original content');
+
+      const applied = await overlay.applyToReal();
+      expect(applied).toContain(realFile);
+      expect(await readFile(realFile, 'utf-8')).toBe('modified in speculation');
+
+      await overlay.cleanup();
+      await rm(dir, { recursive: true, force: true });
+    });
+  });
+
+  describe('Smoke 3: ToolGate → OverlayFs integration', () => {
+    it('write redirects to overlay, read resolves from overlay', async () => {
+      const dir = join(tmpdir(), `smoke-gate-${randomUUID().slice(0, 8)}`);
+      await mkdir(dir, { recursive: true });
+      const overlay = new OverlayFs(dir);
+      const filePath = join(dir, 'file.ts');
+      await writeFile(filePath, 'real content');
+
+      const wr = await evaluateToolCall(
+        ToolNames.EDIT,
+        { file_path: filePath },
+        overlay,
+        ApprovalMode.AUTO_EDIT,
+      );
+      expect(wr.action).toBe('redirect');
+
+      const writeArgs: Record<string, unknown> = { file_path: filePath };
+      await rewritePathArgs(writeArgs, overlay);
+      const op = writeArgs['file_path'] as string;
+      expect(op).toContain('qwen-speculation');
+      await writeFile(op, 'speculated content');
+
+      const readArgs: Record<string, unknown> = { file_path: filePath };
+      await evaluateToolCall(
+        ToolNames.READ_FILE,
+        readArgs,
+        overlay,
+        ApprovalMode.AUTO_EDIT,
+      );
+      expect(readArgs['file_path']).toBe(op);
+      expect(await readFile(filePath, 'utf-8')).toBe('real content');
+
+      await overlay.cleanup();
+      await rm(dir, { recursive: true, force: true });
+    });
+  });
+
+  describe('Smoke 4: CacheSafeParams lifecycle', () => {
+    it('save → get → mutate → verify isolation → clear', () => {
+      clearCacheSafeParams();
+
+      const config = {
+        systemInstruction: 'You are helpful',
+        tools: [{ functionDeclarations: [{ name: 'edit' }] }],
+      };
+
+      saveCacheSafeParams(
+        config,
+        [{ role: 'user' as const, parts: [{ text: 'hi' }] }],
+        'qwen-max',
+      );
+
+      const p = getCacheSafeParams();
+      expect(p).not.toBeNull();
+      expect(p!.model).toBe('qwen-max');
+
+      (
+        config.tools[0] as { functionDeclarations: unknown[] }
+      ).functionDeclarations.push({ name: 'shell' });
+      const saved = getCacheSafeParams();
+      const tools = saved!.generationConfig.tools as Array<{
+        functionDeclarations: unknown[];
+      }>;
+      expect(tools[0].functionDeclarations).toHaveLength(1);
+
+      clearCacheSafeParams();
+      expect(getCacheSafeParams()).toBeNull();
+    });
+  });
+
+  describe('Smoke 5: ensureToolResultPairing', () => {
+    it('strips orphaned functionCalls, keeps text', () => {
+      const messages = [
+        { role: 'user' as const, parts: [{ text: 'edit file' }] },
+        {
+          role: 'model' as const,
+          parts: [
+            { text: 'editing...' },
+            { functionCall: { name: 'edit', args: {} } },
+            { functionCall: { name: 'shell', args: {} } },
+          ],
+        },
+      ];
+
+      const result = ensureToolResultPairing(messages);
+      expect(result).toHaveLength(2);
+      expect(result[1].parts).toEqual([{ text: 'editing...' }]);
+    });
+  });
+});
diff --git a/packages/core/src/followup/speculation.test.ts b/packages/core/src/followup/speculation.test.ts
new file mode 100644
index 000000000..e1361bcea
--- /dev/null
+++ b/packages/core/src/followup/speculation.test.ts
@@ -0,0 +1,113 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { ensureToolResultPairing } from './speculation.js';
+import type { Content } from '@google/genai';
+
+describe('ensureToolResultPairing', () => {
+  it('returns empty array unchanged', () => {
+    expect(ensureToolResultPairing([])).toEqual([]);
+  });
+
+  it('preserves complete messages (no function calls)', () => {
+    const messages: Content[] = [
+      { role: 'user', parts: [{ text: 'hello' }] },
+      { role: 'model', parts: [{ text: 'hi there' }] },
+    ];
+    const result = ensureToolResultPairing(messages);
+    expect(result).toEqual(messages);
+  });
+
+  it('preserves paired functionCall + functionResponse', () => {
+    const messages: Content[] = [
+      { role: 'user', parts: [{ text: 'edit file' }] },
+      {
+        role: 'model',
+        parts: [
+          { text: 'editing...' },
+          { functionCall: { name: 'edit', args: { file: 'a.ts' } } },
+        ],
+      },
+      {
+        role: 'user',
+        parts: [
+          {
+            functionResponse: {
+              name: 'edit',
+              response: { output: 'done' },
+            },
+          },
+        ],
+      },
+      { role: 'model', parts: [{ text: 'file edited' }] },
+    ];
+    const result = ensureToolResultPairing(messages);
+    expect(result).toEqual(messages);
+  });
+
+  it('strips unpaired functionCalls from last model message (keeps text)', () => {
+    const messages: Content[] = [
+      { role: 'user', parts: [{ text: 'do something' }] },
+      {
+        role: 'model',
+        parts: [
+          { text: 'I will edit the file' },
+          { functionCall: { name: 'edit', args: {} } },
+        ],
+      },
+      // No functionResponse follows — boundary truncation
+    ];
+    const result = ensureToolResultPairing(messages);
+    expect(result).toHaveLength(2);
+    expect(result[1].parts).toEqual([{ text: 'I will edit the file' }]);
+  });
+
+  it('removes last model message entirely if only functionCalls', () => {
+    const messages: Content[] = [
+      { role: 'user', parts: [{ text: 'do something' }] },
+      {
+        role: 'model',
+        parts: [
+          { functionCall: { name: 'edit', args: {} } },
+          { functionCall: { name: 'shell', args: {} } },
+        ],
+      },
+    ];
+    const result = ensureToolResultPairing(messages);
+    expect(result).toHaveLength(1);
+    expect(result[0].role).toBe('user');
+  });
+
+  it('does not modify messages when last message is user role', () => {
+    const messages: Content[] = [
+      { role: 'user', parts: [{ text: 'hello' }] },
+      { role: 'model', parts: [{ text: 'response' }] },
+      {
+        role: 'user',
+        parts: [
+          {
+            functionResponse: {
+              name: 'tool',
+              response: { output: 'result' },
+            },
+          },
+        ],
+      },
+    ];
+    const result = ensureToolResultPairing(messages);
+    expect(result).toEqual(messages);
+  });
+
+  it('handles model message with no parts', () => {
+    const messages: Content[] = [
+      { role: 'user', parts: [{ text: 'hello' }] },
+      { role: 'model', parts: [] },
+    ];
+    const result = ensureToolResultPairing(messages);
+    expect(result).toEqual(messages);
+  });
+});
diff --git a/packages/core/src/followup/speculation.ts b/packages/core/src/followup/speculation.ts
new file mode 100644
index 000000000..c11b472c6
--- /dev/null
+++ b/packages/core/src/followup/speculation.ts
@@ -0,0 +1,563 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Speculation Engine
+ *
+ * Speculatively executes the accepted suggestion before the user confirms,
+ * using a forked GeminiChat with copy-on-write file isolation.
+ *
+ * Flow:
+ * 1. Suggestion shown → startSpeculation() fires
+ * 2. Speculative loop runs in background (read-only tools + overlay writes)
+ * 3. User presses Tab/Enter → acceptSpeculation() copies overlay to real FS
+ * 4. User types → abortSpeculation() cleans up
+ */
+
+import type { Content, Part } from '@google/genai';
+import type { Config } from '../config/config.js';
+import type { GeminiClient } from '../core/client.js';
+import { StreamEventType } from '../core/geminiChat.js';
+import { OverlayFs } from './overlayFs.js';
+import { evaluateToolCall, rewritePathArgs } from './speculationToolGate.js';
+import {
+  getCacheSafeParams,
+  createForkedChat,
+  runForkedQuery,
+} from './forkedQuery.js';
+import { getFilterReason, SUGGESTION_PROMPT } from './suggestionGenerator.js';
+
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+
+const MAX_SPECULATION_TURNS = 20;
+const MAX_SPECULATION_MESSAGES = 100;
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+export interface BoundaryInfo {
+  type: string;
+  detail: string;
+  completedAt: number;
+}
+
+export interface SpeculationState {
+  id: string;
+  status: 'idle' | 'running' | 'completed' | 'boundary' | 'aborted';
+  suggestion: string;
+  overlayFs: OverlayFs | null;
+  abortController: AbortController | null;
+  messages: Content[];
+  boundary?: BoundaryInfo;
+  startTime: number;
+  toolUseCount: number;
+  pipelinedSuggestion?: string;
+}
+
+export interface SpeculationResult {
+  filesApplied: string[];
+  messages: Content[];
+  boundary?: BoundaryInfo;
+  timeSavedMs: number;
+  nextSuggestion?: string;
+}
+
+export const IDLE_SPECULATION: Readonly<SpeculationState> = Object.freeze({
+  id: '',
+  status: 'idle' as const,
+  suggestion: '',
+  overlayFs: null,
+  abortController: null,
+  messages: [],
+  startTime: 0,
+  toolUseCount: 0,
+});
+
+// ---------------------------------------------------------------------------
+// Start speculation
+// ---------------------------------------------------------------------------
+
+/**
+ * Start speculative execution of a suggestion.
+ * Called when the suggestion is first shown to the user (before acceptance).
+ */
+export async function startSpeculation(
+  config: Config,
+  suggestion: string,
+  parentSignal?: AbortSignal,
+  options?: { model?: string },
+): Promise<SpeculationState> {
+  const cacheSafe = getCacheSafeParams();
+  if (!cacheSafe) {
+    throw new Error('CacheSafeParams not available for speculation');
+  }
+
+  const abortController = new AbortController();
+
+  // If parent was already aborted, return aborted state without starting loop
+  if (parentSignal?.aborted) {
+    return {
+      id: Math.random().toString(36).slice(2, 10),
+      status: 'aborted' as const,
+      suggestion,
+      overlayFs: null,
+      abortController,
+      messages: [],
+      startTime: Date.now(),
+      toolUseCount: 0,
+    };
+  }
+
+  // Link to parent signal with cleanup to prevent memory leak (#20)
+  let parentAbortHandler: (() => void) | undefined;
+  if (parentSignal) {
+    parentAbortHandler = () => abortController.abort();
+    parentSignal.addEventListener('abort', parentAbortHandler, { once: true });
+  }
+
+  const overlayFs = new OverlayFs(config.getCwd());
+  const startTime = Date.now();
+
+  const state: SpeculationState = {
+    id: Math.random().toString(36).slice(2, 10),
+    status: 'running',
+    suggestion,
+    overlayFs,
+    abortController,
+    messages: [],
+    startTime,
+    toolUseCount: 0,
+  };
+
+  // Run the speculative loop in the background
+  runSpeculativeLoop(config, state, cacheSafe, options?.model)
+    .then(async (result) => {
+      if (abortController.signal.aborted) {
+        state.status = 'aborted';
+        await overlayFs.cleanup();
+        return;
+      }
+      if (state.status === 'running') {
+        state.messages = result.messages;
+        if (result.boundary) {
+          state.boundary = result.boundary;
+          state.status = 'boundary';
+        } else {
+          state.status = 'completed';
+          // Generate pipelined suggestion for the next step
+          if (!abortController.signal.aborted) {
+            try {
+              const next = await generatePipelinedSuggestion(
+                config,
+                suggestion,
+                result.messages,
+                abortController.signal,
+                options?.model,
+              );
+              if (next && state.status === 'completed') {
+                state.pipelinedSuggestion = next;
+              }
+            } catch {
+              // Non-blocking — pipelined suggestion is optional
+            }
+          }
+        }
+      }
+    })
+    .catch(async () => {
+      // Cleanup overlay on error (#16)
+      if (state.status === 'running') {
+        state.status = 'aborted';
+      }
+      await overlayFs.cleanup();
+    })
+    .finally(() => {
+      // Clean up parent signal listener (#20)
+      if (parentSignal && parentAbortHandler) {
+        parentSignal.removeEventListener('abort', parentAbortHandler);
+      }
+    });
+
+  return state;
+}
+
+// ---------------------------------------------------------------------------
+// Speculative execution loop
+// ---------------------------------------------------------------------------
+
+interface LoopResult {
+  messages: Content[];
+  boundary?: BoundaryInfo;
+}
+
+async function runSpeculativeLoop(
+  config: Config,
+  state: SpeculationState,
+  cacheSafe: import('./forkedQuery.js').CacheSafeParams,
+  modelOverride?: string,
+): Promise<LoopResult> {
+  const chat = createForkedChat(config, cacheSafe);
+  const model = modelOverride || cacheSafe.model;
+  const approvalMode = config.getApprovalMode();
+  const messages: Content[] = [];
+
+  // Add the suggestion as the initial user message
+  const userMsg: Content = {
+    role: 'user',
+    parts: [{ text: state.suggestion }],
+  };
+  messages.push(userMsg);
+
+  for (let turn = 0; turn < MAX_SPECULATION_TURNS; turn++) {
+    if (state.abortController?.signal.aborted) break;
+    if (messages.length >= MAX_SPECULATION_MESSAGES) break;
+
+    // Send user message for this turn
+    const lastUserMsg = messages[messages.length - 1];
+    const stream = await chat.sendMessageStream(
+      model,
+      { message: lastUserMsg.parts ?? [] },
+      'speculation',
+    );
+
+    const modelParts: Part[] = [];
+    for await (const event of stream) {
+      if (state.abortController?.signal.aborted) break;
+      if (event.type !== StreamEventType.CHUNK) continue;
+      const response = event.value;
+      const parts = response.candidates?.[0]?.content?.parts ?? [];
+      for (const part of parts) {
+        // Skip thought/reasoning parts — only capture visible text + function calls
+        if (part.text && !(part as Record<string, unknown>)['thought']) {
+          modelParts.push({ text: part.text });
+        }
+        if (part.functionCall && part.functionCall.name) {
+          modelParts.push({
+            functionCall: {
+              name: part.functionCall.name,
+              args: part.functionCall.args,
+            },
+          });
+        }
+      }
+    }
+
+    if (state.abortController?.signal.aborted) break;
+    if (modelParts.length === 0) break;
+
+    const modelMsg: Content = { role: 'model', parts: modelParts };
+    messages.push(modelMsg);
+
+    // Extract function calls from model response
+    const functionCalls = modelParts.filter(
+      (p): p is Part & { functionCall: NonNullable<Part['functionCall']> } =>
+        p.functionCall !== undefined,
+    );
+
+    if (functionCalls.length === 0) {
+      // No tool calls — speculation complete (text-only response)
+      break;
+    }
+
+    // Process each function call through the tool gate
+    const functionResponses: Part[] = [];
+    let hitBoundary = false;
+
+    for (const part of functionCalls) {
+      const fc = part.functionCall;
+      const name = fc.name ?? '';
+      const args = (fc.args ?? {}) as Record<string, unknown>;
+      const gate = await evaluateToolCall(
+        name,
+        args,
+        state.overlayFs!,
+        approvalMode,
+      );
+
+      if (gate.action === 'boundary') {
+        hitBoundary = true;
+        break;
+      }
+
+      if (gate.action === 'redirect') {
+        try {
+          await rewritePathArgs(args, state.overlayFs!);
+        } catch {
+          // Path rewrite failed (e.g., absolute path outside cwd) — treat as boundary
+          hitBoundary = true;
+          break;
+        }
+      }
+
+      // Execute the tool directly (bypassing CoreToolScheduler)
+      // SECURITY: Only reaches here for read-only tools or writes gated by approvalMode
+      try {
+        const toolRegistry = config.getToolRegistry();
+        const tool = toolRegistry.getTool(name);
+        if (!tool) {
+          functionResponses.push({
+            functionResponse: {
+              name,
+              response: { error: `Tool '${name}' not found` },
+            },
+          });
+          continue;
+        }
+
+        const invocation = tool.build(args);
+        const result = await invocation.execute(state.abortController!.signal);
+        state.toolUseCount++;
+
+        const responseContent =
+          typeof result.llmContent === 'string'
+            ? { output: result.llmContent }
+            : { output: JSON.stringify(result.llmContent) };
+        functionResponses.push({
+          functionResponse: { name, response: responseContent },
+        });
+      } catch (error: unknown) {
+        functionResponses.push({
+          functionResponse: {
+            name,
+            response: {
+              error:
+                error instanceof Error
+                  ? error.message
+                  : 'Tool execution failed',
+            },
+          },
+        });
+      }
+    }
+
+    if (hitBoundary) {
+      // Keep already-executed tool responses, strip unexecuted function calls
+      // from model message, and add the partial responses we do have (#18)
+      if (functionResponses.length > 0) {
+        // Some tools were executed before boundary — keep only the first N
+        // functionCall parts (matching functionResponses.length) by order,
+        // not by name, to handle duplicate tool names correctly.
+        let keptFunctionCalls = 0;
+        const keptModelParts = modelParts.filter((p) => {
+          if (!p.functionCall) return true;
+          if (keptFunctionCalls < functionResponses.length) {
+            keptFunctionCalls++;
+            return true;
+          }
+          return false;
+        });
+        if (keptModelParts.length > 0) {
+          messages[messages.length - 1] = {
+            role: 'model',
+            parts: keptModelParts,
+          };
+          // Add the tool results we have
+          messages.push({ role: 'user', parts: functionResponses });
+        } else {
+          messages.pop();
+        }
+      } else {
+        // No tools were executed — remove the model message entirely
+        const textOnlyParts = modelParts.filter(
+          (p) => p.functionCall === undefined,
+        );
+        if (textOnlyParts.length > 0) {
+          messages[messages.length - 1] = {
+            role: 'model',
+            parts: textOnlyParts,
+          };
+        } else {
+          messages.pop();
+        }
+      }
+
+      return {
+        messages,
+        boundary: {
+          type: 'boundary',
+          detail: 'speculation_boundary',
+          completedAt: Date.now(),
+        },
+      };
+    }
+
+    // Add tool results to history for next turn
+    if (functionResponses.length > 0) {
+      const resultMsg: Content = { role: 'user', parts: functionResponses };
+      messages.push(resultMsg);
+    }
+  }
+
+  return { messages };
+}
+
+// ---------------------------------------------------------------------------
+// Accept speculation
+// ---------------------------------------------------------------------------
+
+/**
+ * Accept speculation results: copy overlay files to real filesystem and
+ * return messages to inject into the main conversation.
+ */
+export async function acceptSpeculation(
+  state: SpeculationState,
+  geminiClient: GeminiClient,
+): Promise<SpeculationResult> {
+  const timeSavedMs = state.boundary
+    ? Math.max(0, state.boundary.completedAt - state.startTime)
+    : Math.max(0, Date.now() - state.startTime);
+
+  try {
+    // Copy overlay files to real filesystem
+    const filesApplied = state.overlayFs
+      ? await state.overlayFs.applyToReal()
+      : [];
+
+    // Ensure tool result pairing is complete before injection
+    const cleanMessages = ensureToolResultPairing(state.messages);
+
+    // Inject into main conversation
+    for (const msg of cleanMessages) {
+      await geminiClient.addHistory(msg);
+    }
+
+    state.status = 'completed';
+
+    return {
+      filesApplied,
+      messages: cleanMessages,
+      boundary: state.boundary,
+      timeSavedMs,
+      nextSuggestion: state.pipelinedSuggestion,
+    };
+  } finally {
+    // Always cleanup overlay, even if applyToReal or addHistory throws
+    if (state.overlayFs) {
+      await state.overlayFs.cleanup();
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Abort speculation
+// ---------------------------------------------------------------------------
+
+/**
+ * Abort a running or completed speculation and clean up resources.
+ */
+export async function abortSpeculation(state: SpeculationState): Promise<void> {
+  state.abortController?.abort();
+  state.status = 'aborted';
+  if (state.overlayFs) {
+    await state.overlayFs.cleanup();
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Utility: ensure tool result pairing
+// ---------------------------------------------------------------------------
+
+/**
+ * Ensure all functionCall parts have matching functionResponse parts.
+ * If the last model message has unpaired function calls (boundary truncation),
+ * remove those function call parts to keep the history API-legal.
+ */
+export function ensureToolResultPairing(messages: Content[]): Content[] {
+  if (messages.length === 0) return messages;
+
+  const result = [...messages];
+  const lastMsg = result[result.length - 1];
+
+  // If last message is model with function calls but no following user response
+  if (lastMsg.role === 'model' && lastMsg.parts) {
+    const hasFunctionCalls = lastMsg.parts.some(
+      (p) => p.functionCall !== undefined,
+    );
+    if (hasFunctionCalls) {
+      const textParts = lastMsg.parts.filter(
+        (p) => p.functionCall === undefined,
+      );
+      if (textParts.length > 0) {
+        result[result.length - 1] = { role: 'model', parts: textParts };
+      } else {
+        result.pop();
+      }
+    }
+  }
+
+  return result;
+}
+
+// ---------------------------------------------------------------------------
+// Pipelined suggestion generation
+// ---------------------------------------------------------------------------
+
+// Reuses SUGGESTION_PROMPT from suggestionGenerator.ts (imported above)
+// to ensure pipelined suggestions have the same quality as initial suggestions.
+
+const PIPELINED_SCHEMA: Record<string, unknown> = {
+  type: 'object',
+  properties: {
+    suggestion: {
+      type: 'string',
+      description:
+        'The predicted next user input (2-12 words), or empty string.',
+    },
+  },
+  required: ['suggestion'],
+};
+
+/**
+ * Generate the next suggestion using the augmented context
+ * (original conversation + user's suggestion + speculated messages).
+ */
+async function generatePipelinedSuggestion(
+  config: Config,
+  suggestionText: string,
+  speculatedMessages: Content[],
+  abortSignal: AbortSignal,
+  modelOverride?: string,
+): Promise<string | null> {
+  try {
+    // Build augmented prompt that includes the speculated context inline
+    const speculatedSummary = speculatedMessages
+      .filter((m) => m.role === 'model')
+      .flatMap((m) => m.parts ?? [])
+      .map((p) => p.text ?? '')
+      .filter(Boolean)
+      .join('\n')
+      .slice(0, 500);
+
+    const augmentedPrompt = `The user just said: "${suggestionText}"
+The assistant responded: ${speculatedSummary || '(tool calls executed)'}
+
+${SUGGESTION_PROMPT}`;
+
+    const result = await runForkedQuery(config, augmentedPrompt, {
+      abortSignal,
+      jsonSchema: PIPELINED_SCHEMA,
+      model: modelOverride,
+    });
+
+    if (abortSignal.aborted) return null;
+
+    let raw: string | null = null;
+    if (result.jsonResult) {
+      const val = result.jsonResult['suggestion'];
+      raw = typeof val === 'string' ? val.trim() : null;
+    } else if (result.text) {
+      raw = result.text;
+    }
+
+    if (!raw) return null;
+    if (getFilterReason(raw)) return null;
+
+    return raw;
+  } catch {
+    return null;
+  }
+}
diff --git a/packages/core/src/followup/speculationToolGate.test.ts b/packages/core/src/followup/speculationToolGate.test.ts
new file mode 100644
index 000000000..b72874e62
--- /dev/null
+++ b/packages/core/src/followup/speculationToolGate.test.ts
@@ -0,0 +1,240 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { evaluateToolCall, rewritePathArgs } from './speculationToolGate.js';
+import { OverlayFs } from './overlayFs.js';
+import { ToolNames } from '../tools/tool-names.js';
+import { ApprovalMode } from '../config/config.js';
+import { mkdir, writeFile, rm } from 'node:fs/promises';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import { randomUUID } from 'node:crypto';
+
+describe('speculationToolGate', () => {
+  let testDir: string;
+  let overlayFs: OverlayFs;
+
+  beforeEach(async () => {
+    testDir = join(tmpdir(), `gate-test-${randomUUID().slice(0, 8)}`);
+    await mkdir(testDir, { recursive: true });
+    overlayFs = new OverlayFs(testDir);
+  });
+
+  afterEach(async () => {
+    await overlayFs.cleanup();
+    await rm(testDir, { recursive: true, force: true });
+  });
+
+  describe('SAFE_READ_ONLY_TOOLS', () => {
+    it.each([
+      ToolNames.READ_FILE,
+      ToolNames.GREP,
+      ToolNames.GLOB,
+      ToolNames.LS,
+      ToolNames.LSP,
+    ])('allows %s', async (toolName) => {
+      const result = await evaluateToolCall(
+        toolName,
+        {},
+        overlayFs,
+        ApprovalMode.DEFAULT,
+      );
+      expect(result.action).toBe('allow');
+    });
+  });
+
+  describe('WRITE_TOOLS', () => {
+    it('redirects edit in auto-edit mode', async () => {
+      const result = await evaluateToolCall(
+        ToolNames.EDIT,
+        {},
+        overlayFs,
+        ApprovalMode.AUTO_EDIT,
+      );
+      expect(result.action).toBe('redirect');
+    });
+
+    it('redirects write_file in yolo mode', async () => {
+      const result = await evaluateToolCall(
+        ToolNames.WRITE_FILE,
+        {},
+        overlayFs,
+        ApprovalMode.YOLO,
+      );
+      expect(result.action).toBe('redirect');
+    });
+
+    it('hits boundary for edit in default mode', async () => {
+      const result = await evaluateToolCall(
+        ToolNames.EDIT,
+        {},
+        overlayFs,
+        ApprovalMode.DEFAULT,
+      );
+      expect(result.action).toBe('boundary');
+    });
+
+    it('hits boundary for write_file in plan mode', async () => {
+      const result = await evaluateToolCall(
+        ToolNames.WRITE_FILE,
+        {},
+        overlayFs,
+        ApprovalMode.PLAN,
+      );
+      expect(result.action).toBe('boundary');
+    });
+  });
+
+  describe('SHELL', () => {
+    it('allows read-only shell commands', async () => {
+      const result = await evaluateToolCall(
+        ToolNames.SHELL,
+        { command: 'ls -la' },
+        overlayFs,
+        ApprovalMode.DEFAULT,
+      );
+      expect(result.action).toBe('allow');
+    });
+
+    it('hits boundary for non-read-only shell commands', async () => {
+      const result = await evaluateToolCall(
+        ToolNames.SHELL,
+        { command: 'rm -rf /' },
+        overlayFs,
+        ApprovalMode.DEFAULT,
+      );
+      expect(result.action).toBe('boundary');
+    });
+
+    it('hits boundary for empty command', async () => {
+      const result = await evaluateToolCall(
+        ToolNames.SHELL,
+        { command: '' },
+        overlayFs,
+        ApprovalMode.DEFAULT,
+      );
+      expect(result.action).toBe('boundary');
+    });
+  });
+
+  describe('BOUNDARY_TOOLS', () => {
+    it.each([
+      ToolNames.AGENT,
+      ToolNames.SKILL,
+      ToolNames.TODO_WRITE,
+      ToolNames.MEMORY,
+      ToolNames.ASK_USER_QUESTION,
+      ToolNames.EXIT_PLAN_MODE,
+      ToolNames.WEB_FETCH,
+      ToolNames.WEB_SEARCH,
+    ])('hits boundary for %s', async (toolName) => {
+      const result = await evaluateToolCall(
+        toolName,
+        {},
+        overlayFs,
+        ApprovalMode.DEFAULT,
+      );
+      expect(result.action).toBe('boundary');
+    });
+  });
+
+  describe('unknown tools', () => {
+    it('hits boundary for unknown tool names', async () => {
+      const result = await evaluateToolCall(
+        'mcp_custom_tool',
+        {},
+        overlayFs,
+        ApprovalMode.DEFAULT,
+      );
+      expect(result.action).toBe('boundary');
+      expect(result.reason).toContain('unknown_tool');
+    });
+  });
+
+  describe('rewritePathArgs', () => {
+    it('rewrites file_path argument', async () => {
+      const filePath = join(testDir, 'src', 'app.ts');
+      await mkdir(join(testDir, 'src'), { recursive: true });
+      await writeFile(filePath, 'content');
+
+      const args: Record<string, unknown> = { file_path: filePath };
+      await rewritePathArgs(args, overlayFs);
+
+      expect(args['file_path']).not.toBe(filePath);
+      expect(String(args['file_path'])).toContain('qwen-speculation');
+    });
+
+    it('rewrites filePath argument (camelCase)', async () => {
+      const filePath = join(testDir, 'file.ts');
+      await writeFile(filePath, 'content');
+
+      const args: Record<string, unknown> = { filePath };
+      await rewritePathArgs(args, overlayFs);
+
+      expect(args['filePath']).not.toBe(filePath);
+    });
+
+    it('does nothing when no path arguments present', async () => {
+      const args: Record<string, unknown> = { command: 'ls' };
+      await rewritePathArgs(args, overlayFs);
+
+      expect(args['command']).toBe('ls');
+    });
+
+    it('rewrites path argument', async () => {
+      const filePath = join(testDir, 'dir', 'file.ts');
+      await mkdir(join(testDir, 'dir'), { recursive: true });
+      await writeFile(filePath, 'content');
+
+      const args: Record<string, unknown> = { path: filePath };
+      await rewritePathArgs(args, overlayFs);
+
+      expect(String(args['path'])).toContain('qwen-speculation');
+    });
+  });
+
+  describe('read path resolution through overlay', () => {
+    it('resolves read tool path to overlay after a write', async () => {
+      const filePath = join(testDir, 'src', 'app.ts');
+      await mkdir(join(testDir, 'src'), { recursive: true });
+      await writeFile(filePath, 'original');
+
+      // First: redirect a write (puts file in overlay)
+      await overlayFs.redirectWrite(filePath);
+
+      // Then: evaluate a read tool — path should be resolved to overlay
+      const args: Record<string, unknown> = { file_path: filePath };
+      const result = await evaluateToolCall(
+        ToolNames.READ_FILE,
+        args,
+        overlayFs,
+        ApprovalMode.DEFAULT,
+      );
+
+      expect(result.action).toBe('allow');
+      // The file_path arg should now point to the overlay
+      expect(String(args['file_path'])).toContain('qwen-speculation');
+      expect(String(args['file_path'])).not.toBe(filePath);
+    });
+
+    it('does not resolve read path when file was not written to overlay', async () => {
+      const filePath = join(testDir, 'untouched.ts');
+      await writeFile(filePath, 'content');
+
+      const args: Record<string, unknown> = { file_path: filePath };
+      await evaluateToolCall(
+        ToolNames.READ_FILE,
+        args,
+        overlayFs,
+        ApprovalMode.DEFAULT,
+      );
+
+      // Path should remain unchanged
+      expect(args['file_path']).toBe(filePath);
+    });
+  });
+});
diff --git a/packages/core/src/followup/speculationToolGate.ts b/packages/core/src/followup/speculationToolGate.ts
new file mode 100644
index 000000000..98a4452bb
--- /dev/null
+++ b/packages/core/src/followup/speculationToolGate.ts
@@ -0,0 +1,146 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Speculation Tool Gate
+ *
+ * Determines which tool calls are allowed during speculative execution.
+ * Returns 'allow' for safe read-only tools, 'redirect' for write tools
+ * (only when approval mode permits), or 'boundary' to stop speculation.
+ *
+ * SECURITY: Speculation bypasses the normal permission/approval flow.
+ * Write tools are ONLY redirected to overlay when the user's approval mode
+ * already permits automatic edits (auto-edit or yolo). In default/plan mode,
+ * write tools hit boundary — no silent writes without user consent.
+ */
+
+import { ToolNames } from '../tools/tool-names.js';
+import { isShellCommandReadOnlyAST } from '../utils/shellAstParser.js';
+import { ApprovalMode } from '../config/config.js';
+import type { OverlayFs } from './overlayFs.js';
+
+export interface ToolGateResult {
+  action: 'allow' | 'redirect' | 'boundary';
+  reason?: string;
+}
+
+/** Tools that are safe to execute without any restriction during speculation */
+const SAFE_READ_ONLY_TOOLS = new Set<string>([
+  ToolNames.READ_FILE,
+  ToolNames.GREP,
+  ToolNames.GLOB,
+  ToolNames.LS,
+  ToolNames.LSP,
+  // web_fetch and web_search excluded — they require user confirmation
+  // for external network requests, which speculation bypasses
+]);
+
+/** Tools that produce file writes — must be redirected to overlay */
+const WRITE_TOOLS = new Set<string>([ToolNames.EDIT, ToolNames.WRITE_FILE]);
+
+/** Tools that should always stop speculation */
+const BOUNDARY_TOOLS = new Set<string>([
+  ToolNames.AGENT,
+  ToolNames.SKILL,
+  ToolNames.TODO_WRITE,
+  ToolNames.MEMORY,
+  ToolNames.ASK_USER_QUESTION,
+  ToolNames.EXIT_PLAN_MODE,
+  ToolNames.WEB_FETCH,
+  ToolNames.WEB_SEARCH,
+]);
+
+/**
+ * Evaluate whether a tool call is allowed during speculative execution.
+ *
+ * @param toolName - The tool's internal name (from ToolNames)
+ * @param args - The tool call arguments
+ * @param overlayFs - The overlay filesystem for path rewriting
+ * @param approvalMode - The user's current approval mode
+ * @returns Gate result: allow, redirect, or boundary
+ */
+export async function evaluateToolCall(
+  toolName: string,
+  args: Record<string, unknown>,
+  overlayFs: OverlayFs,
+  approvalMode: ApprovalMode,
+): Promise<ToolGateResult> {
+  // Safe read-only tools — allow, but resolve paths through overlay
+  if (SAFE_READ_ONLY_TOOLS.has(toolName)) {
+    // Rewrite read paths to overlay if file was previously written there
+    await resolveReadPaths(args, overlayFs);
+    return { action: 'allow' };
+  }
+
+  // Write tools — only redirect to overlay if approval mode permits auto-edits
+  if (WRITE_TOOLS.has(toolName)) {
+    if (
+      approvalMode === ApprovalMode.AUTO_EDIT ||
+      approvalMode === ApprovalMode.YOLO
+    ) {
+      return { action: 'redirect', reason: `write_tool:${toolName}` };
+    }
+    // In default/plan mode, writes are a boundary — don't silently edit
+    return {
+      action: 'boundary',
+      reason: `write_tool_no_auto:${toolName}`,
+    };
+  }
+
+  // Shell — use AST parser for accurate read-only detection
+  if (toolName === ToolNames.SHELL) {
+    const command = typeof args['command'] === 'string' ? args['command'] : '';
+    if (command && (await isShellCommandReadOnlyAST(command))) {
+      return { action: 'allow' };
+    }
+    return {
+      action: 'boundary',
+      reason: `shell:${command.slice(0, 50) || 'empty'}`,
+    };
+  }
+
+  // Known boundary tools
+  if (BOUNDARY_TOOLS.has(toolName)) {
+    return { action: 'boundary', reason: `denied_tool:${toolName}` };
+  }
+
+  // Unknown tools (including MCP/discovered) — boundary for safety
+  return { action: 'boundary', reason: `unknown_tool:${toolName}` };
+}
+
+/**
+ * Resolve read path arguments through the overlay filesystem.
+ * If a file was previously written to the overlay, redirect reads there.
+ * Mutates the args object in place.
+ */
+async function resolveReadPaths(
+  args: Record<string, unknown>,
+  overlayFs: OverlayFs,
+): Promise<void> {
+  const pathKeys = ['file_path', 'filePath', 'path', 'notebook_path'];
+  for (const key of pathKeys) {
+    if (typeof args[key] === 'string') {
+      args[key] = overlayFs.resolveReadPath(args[key] as string);
+      return;
+    }
+  }
+}
+
+/**
+ * Rewrite file path arguments to point to the overlay filesystem.
+ * Mutates the args object in place.
+ */
+export async function rewritePathArgs(
+  args: Record<string, unknown>,
+  overlayFs: OverlayFs,
+): Promise<void> {
+  // Common path argument names used by Edit and WriteFile tools
+  const pathKeys = ['file_path', 'filePath', 'path', 'notebook_path'];
+  for (const key of pathKeys) {
+    if (typeof args[key] === 'string') {
+      args[key] = await overlayFs.redirectWrite(args[key] as string);
+      return;
+    }
+  }
+}
diff --git a/packages/core/src/followup/suggestionGenerator.test.ts b/packages/core/src/followup/suggestionGenerator.test.ts
new file mode 100644
index 000000000..380053182
--- /dev/null
+++ b/packages/core/src/followup/suggestionGenerator.test.ts
@@ -0,0 +1,98 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { shouldFilterSuggestion } from './suggestionGenerator.js';
+
+describe('shouldFilterSuggestion', () => {
+  it('filters "done"', () => {
+    expect(shouldFilterSuggestion('done')).toBe(true);
+  });
+
+  it('filters meta-text', () => {
+    expect(shouldFilterSuggestion('nothing found')).toBe(true);
+    expect(shouldFilterSuggestion('no suggestion needed')).toBe(true);
+    expect(shouldFilterSuggestion('silence')).toBe(true);
+    expect(shouldFilterSuggestion('staying silent here')).toBe(true);
+  });
+
+  it('filters meta-wrapped text', () => {
+    expect(shouldFilterSuggestion('(silence)')).toBe(true);
+    expect(shouldFilterSuggestion('[no suggestion]')).toBe(true);
+  });
+
+  it('filters error messages', () => {
+    expect(shouldFilterSuggestion('api error: 500')).toBe(true);
+    expect(shouldFilterSuggestion('prompt is too long')).toBe(true);
+  });
+
+  it('filters prefixed labels', () => {
+    expect(shouldFilterSuggestion('Suggestion: commit this')).toBe(true);
+  });
+
+  it('filters single words not in whitelist', () => {
+    expect(shouldFilterSuggestion('hmm')).toBe(true);
+    expect(shouldFilterSuggestion('maybe')).toBe(true);
+  });
+
+  it('allows whitelisted single words', () => {
+    expect(shouldFilterSuggestion('yes')).toBe(false);
+    expect(shouldFilterSuggestion('commit')).toBe(false);
+    expect(shouldFilterSuggestion('push')).toBe(false);
+    expect(shouldFilterSuggestion('no')).toBe(false);
+  });
+
+  it('allows slash commands as single word', () => {
+    expect(shouldFilterSuggestion('/commit')).toBe(false);
+  });
+
+  it('filters too many words', () => {
+    expect(
+      shouldFilterSuggestion(
+        'this is a very long suggestion with way too many words in it to show',
+      ),
+    ).toBe(true);
+  });
+
+  it('filters suggestions >= 100 chars', () => {
+    expect(shouldFilterSuggestion('a'.repeat(100))).toBe(true);
+  });
+
+  it('filters multiple sentences', () => {
+    expect(shouldFilterSuggestion('Run the tests. Then commit.')).toBe(true);
+  });
+
+  it('filters formatting', () => {
+    expect(shouldFilterSuggestion('run the **tests**')).toBe(true);
+    expect(shouldFilterSuggestion('line1\nline2')).toBe(true);
+  });
+
+  it('filters evaluative language', () => {
+    expect(shouldFilterSuggestion('looks good to me')).toBe(true);
+    expect(shouldFilterSuggestion('thanks for the help')).toBe(true);
+    expect(shouldFilterSuggestion('that works perfectly')).toBe(true);
+  });
+
+  it('filters AI-voice patterns', () => {
+    expect(shouldFilterSuggestion('Let me check that')).toBe(true);
+    expect(shouldFilterSuggestion("I'll run the tests")).toBe(true);
+    expect(shouldFilterSuggestion("Here's what I found")).toBe(true);
+  });
+
+  it('does not false-positive on evaluative substrings', () => {
+    expect(shouldFilterSuggestion('run nicely formatted tests')).toBe(false);
+    expect(shouldFilterSuggestion('fix the greatest issue')).toBe(false);
+    expect(shouldFilterSuggestion('create thanksgiving banner')).toBe(false);
+  });
+
+  it('allows good suggestions', () => {
+    expect(shouldFilterSuggestion('run the tests')).toBe(false);
+    expect(shouldFilterSuggestion('commit this')).toBe(false);
+    expect(shouldFilterSuggestion('try it out')).toBe(false);
+    expect(shouldFilterSuggestion('push it')).toBe(false);
+    expect(shouldFilterSuggestion('create a PR')).toBe(false);
+  });
+});
diff --git a/packages/core/src/followup/suggestionGenerator.ts b/packages/core/src/followup/suggestionGenerator.ts
new file mode 100644
index 000000000..e5b25b768
--- /dev/null
+++ b/packages/core/src/followup/suggestionGenerator.ts
@@ -0,0 +1,367 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Prompt Suggestion Generator
+ *
+ * Uses a lightweight LLM call to predict what the user would naturally
+ * type next (Next-step Suggestion / NES).
+ */
+
+import type { Content } from '@google/genai';
+import type { Config } from '../config/config.js';
+import { getCacheSafeParams, runForkedQuery } from './forkedQuery.js';
+import {
+  uiTelemetryService,
+  EVENT_API_RESPONSE,
+} from '../telemetry/uiTelemetry.js';
+import { ApiResponseEvent } from '../telemetry/types.js';
+
+/**
+ * Prompt for suggestion generation.
+ * Instructs the model to predict the user's next input.
+ */
+export const SUGGESTION_PROMPT = `[SUGGESTION MODE: Suggest what the user might naturally type next.]
+
+FIRST: Look at the user's recent messages and original request.
+
+Your job is to predict what THEY would type - not what you think they should do.
+
+THE TEST: Would they think "I was just about to type that"?
+
+EXAMPLES:
+User asked "fix the bug and run tests", bug is fixed → "run the tests"
+After code written → "try it out"
+Model offers options → suggest the one the user would likely pick, based on conversation
+Model asks to continue → "yes" or "go ahead"
+Task complete, obvious follow-up → "commit this" or "push it"
+After error or misunderstanding → silence (let them assess/correct)
+
+Be specific: "run the tests" beats "continue".
+
+NEVER SUGGEST:
+- Evaluative ("looks good", "thanks")
+- Questions ("what about...?")
+- AI-voice ("Let me...", "I'll...", "Here's...")
+- New ideas they didn't ask about
+- Multiple sentences
+
+Stay silent if the next step isn't obvious from what the user said.
+
+Format: 2-12 words, match the user's style. Or nothing.
+
+Reply with ONLY the suggestion, no quotes or explanation.`;
+
+/**
+ * JSON schema for the suggestion response.
+ */
+const SUGGESTION_SCHEMA: Record<string, unknown> = {
+  type: 'object',
+  properties: {
+    suggestion: {
+      type: 'string',
+      description:
+        'The predicted next user input (2-12 words), or empty string if nothing obvious.',
+    },
+  },
+  required: ['suggestion'],
+};
+
+/** Minimum assistant turns before generating suggestions */
+const MIN_ASSISTANT_TURNS = 2;
+
+/**
+ * Generate a prompt suggestion using an LLM call.
+ *
+ * @param config - App config (provides BaseLlmClient and model)
+ * @param conversationHistory - Full conversation history as Content[]
+ * @param abortSignal - Signal to cancel the LLM call (e.g., when user types)
+ * @returns Object with suggestion text and optional filter reason, or null on error/early skip
+ */
+export async function generatePromptSuggestion(
+  config: Config,
+  conversationHistory: Content[],
+  abortSignal: AbortSignal,
+  options?: { enableCacheSharing?: boolean; model?: string },
+): Promise<{ suggestion: string | null; filterReason?: string }> {
+  // Don't suggest in very early conversations
+  const modelTurns = conversationHistory.filter(
+    (c) => c.role === 'model',
+  ).length;
+  if (modelTurns < MIN_ASSISTANT_TURNS) {
+    return { suggestion: null, filterReason: 'early_conversation' };
+  }
+
+  try {
+    // Try cache-aware forked query if enabled and params available
+    const cacheSafe = options?.enableCacheSharing ? getCacheSafeParams() : null;
+    const modelOverride = options?.model;
+    const raw = cacheSafe
+      ? await generateViaForkedQuery(config, abortSignal, modelOverride)
+      : await generateViaBaseLlm(
+          config,
+          conversationHistory,
+          abortSignal,
+          modelOverride,
+        );
+
+    const suggestion = typeof raw === 'string' ? raw.trim() : null;
+
+    if (!suggestion) {
+      return { suggestion: null, filterReason: 'empty' };
+    }
+
+    const filterReason = getFilterReason(suggestion);
+    if (filterReason) {
+      return { suggestion: null, filterReason };
+    }
+
+    return { suggestion };
+  } catch {
+    if (abortSignal.aborted) {
+      return { suggestion: null };
+    }
+    return { suggestion: null, filterReason: 'error' };
+  }
+}
+
+/** Generate suggestion via cache-aware forked query */
+async function generateViaForkedQuery(
+  config: Config,
+  abortSignal: AbortSignal,
+  modelOverride?: string,
+): Promise<string | null> {
+  const model = modelOverride || config.getModel();
+  const startTime = Date.now();
+  const result = await runForkedQuery(config, SUGGESTION_PROMPT, {
+    abortSignal,
+    jsonSchema: SUGGESTION_SCHEMA,
+    model,
+  });
+  const durationMs = Date.now() - startTime;
+
+  // Report usage to session stats
+  if (result.usage) {
+    reportSuggestionUsage(
+      model,
+      {
+        promptTokenCount: result.usage.inputTokens,
+        candidatesTokenCount: result.usage.outputTokens,
+        totalTokenCount: result.usage.inputTokens + result.usage.outputTokens,
+        cachedContentTokenCount: result.usage.cacheHitTokens,
+      },
+      durationMs,
+    );
+  }
+
+  if (result.jsonResult) {
+    const raw = result.jsonResult['suggestion'];
+    return typeof raw === 'string' ? raw : null;
+  }
+
+  // Fallback: try parsing text as JSON
+  if (result.text) {
+    try {
+      const parsed = JSON.parse(result.text) as Record<string, unknown>;
+      const raw = parsed['suggestion'];
+      return typeof raw === 'string' ? raw : null;
+    } catch {
+      // Model returned plain text — use it directly
+      return result.text;
+    }
+  }
+
+  return null;
+}
+
+/** Generate via direct ContentGenerator.generateContent (always reports usage) */
+async function generateViaBaseLlm(
+  config: Config,
+  conversationHistory: Content[],
+  abortSignal: AbortSignal,
+  modelOverride?: string,
+): Promise<string | null> {
+  const model = modelOverride || config.getModel();
+  const contents: Content[] = [
+    ...conversationHistory,
+    { role: 'user', parts: [{ text: SUGGESTION_PROMPT }] },
+  ];
+
+  const generator = config.getContentGenerator();
+  const startTime = Date.now();
+  const response = await generator.generateContent(
+    {
+      model,
+      contents,
+      config: {
+        abortSignal,
+        // Disable thinking for suggestion generation — not needed and wastes tokens
+        thinkingConfig: { includeThoughts: false },
+      },
+    },
+    'prompt_suggestion',
+  );
+  const durationMs = Date.now() - startTime;
+
+  // Report usage to session stats so /stats tracks suggestion model tokens
+  const usage = response.usageMetadata;
+  if (usage) {
+    reportSuggestionUsage(model, usage, durationMs);
+  }
+
+  const text = response.candidates?.[0]?.content?.parts
+    ?.map((p) => p.text ?? '')
+    .join('')
+    .trim();
+  if (text) {
+    // Try to parse as JSON first (model might return {"suggestion": "..."})
+    try {
+      const parsed = JSON.parse(text) as Record<string, unknown>;
+      const s = parsed['suggestion'];
+      if (typeof s === 'string') return s;
+    } catch {
+      // Not JSON — use raw text as the suggestion
+    }
+    return text;
+  }
+
+  return null;
+}
+
+/** Single-word suggestions allowed through the too_few_words filter */
+const ALLOWED_SINGLE_WORDS = new Set([
+  'yes',
+  'yeah',
+  'yep',
+  'yea',
+  'yup',
+  'sure',
+  'ok',
+  'okay',
+  'push',
+  'commit',
+  'deploy',
+  'stop',
+  'continue',
+  'check',
+  'exit',
+  'quit',
+  'no',
+]);
+
+/**
+ * Returns the filter reason if the suggestion should be suppressed, or null if it passes.
+ */
+export function getFilterReason(suggestion: string): string | null {
+  const lower = suggestion.toLowerCase();
+  const wordCount = suggestion.trim().split(/\s+/).length;
+
+  if (lower === 'done') return 'done';
+
+  if (
+    lower === 'nothing found' ||
+    lower === 'nothing found.' ||
+    lower.startsWith('nothing to suggest') ||
+    lower.startsWith('no suggestion') ||
+    /\bsilence is\b|\bstay(s|ing)? silent\b/.test(lower) ||
+    /^\W*silence\W*$/.test(lower)
+  ) {
+    return 'meta_text';
+  }
+
+  if (/^\(.*\)$|^\[.*\]$/.test(suggestion)) return 'meta_wrapped';
+
+  if (
+    lower.startsWith('api error:') ||
+    lower.startsWith('prompt is too long') ||
+    lower.startsWith('request timed out') ||
+    lower.startsWith('invalid api key') ||
+    lower.startsWith('image was too large')
+  ) {
+    return 'error_message';
+  }
+
+  if (/^\w+:\s/.test(suggestion)) return 'prefixed_label';
+
+  // CJK text has no spaces — skip whitespace-based word count checks
+  // and use character count instead
+  const hasCJK = /[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]/.test(
+    suggestion,
+  );
+  if (!hasCJK) {
+    if (wordCount < 2) {
+      if (suggestion.startsWith('/')) return null; // slash commands ok
+      if (!ALLOWED_SINGLE_WORDS.has(lower)) return 'too_few_words';
+    }
+    if (wordCount > 12) return 'too_many_words';
+  } else {
+    // For CJK: filter if too short (< 2 chars) or too long (> 30 chars)
+    if (suggestion.length < 2) return 'too_few_words';
+    if (suggestion.length > 30) return 'too_many_words';
+  }
+  if (suggestion.length >= 100) return 'too_long';
+  if (/[.!?]\s+[A-Z]/.test(suggestion)) return 'multiple_sentences';
+  if (/[\n*]|\*\*/.test(suggestion)) return 'has_formatting';
+
+  if (
+    /\bthanks\b|\bthank you\b|\blooks good\b|\bsounds good\b|\bthat works\b|\bthat worked\b|\bthat's all\b|\bnice\b|\bgreat\b|\bperfect\b|\bmakes sense\b|\bawesome\b|\bexcellent\b/.test(
+      lower,
+    )
+  ) {
+    return 'evaluative';
+  }
+
+  if (
+    /^(let me|i'll|i've|i'm|i can|i would|i think|i notice|here's|here is|here are|that's|this is|this will|you can|you should|you could|sure,|of course|certainly)/i.test(
+      suggestion,
+    )
+  ) {
+    return 'ai_voice';
+  }
+
+  return null;
+}
+
+/**
+ * Returns true if the suggestion should be filtered out.
+ * Convenience wrapper around getFilterReason for tests and simple checks.
+ */
+export function shouldFilterSuggestion(suggestion: string): boolean {
+  return getFilterReason(suggestion) !== null;
+}
+
+/**
+ * Report suggestion API usage to the UI telemetry service so it appears in /stats.
+ */
+function reportSuggestionUsage(
+  model: string,
+  usage: {
+    promptTokenCount?: number;
+    candidatesTokenCount?: number;
+    totalTokenCount?: number;
+    cachedContentTokenCount?: number;
+    thoughtsTokenCount?: number;
+  },
+  durationMs: number,
+): void {
+  const event = new ApiResponseEvent(
+    'suggestion-' + Date.now(),
+    model,
+    durationMs,
+    'prompt_suggestion',
+    undefined,
+    {
+      promptTokenCount: usage.promptTokenCount ?? 0,
+      candidatesTokenCount: usage.candidatesTokenCount ?? 0,
+      totalTokenCount: usage.totalTokenCount ?? 0,
+      cachedContentTokenCount: usage.cachedContentTokenCount ?? 0,
+      thoughtsTokenCount: usage.thoughtsTokenCount ?? 0,
+    },
+  );
+  // Override event.name to match UiEvent type (UiTelemetryService switch)
+  const uiEvent = Object.assign(event, {
+    'event.name': EVENT_API_RESPONSE as typeof EVENT_API_RESPONSE,
+  });
+  uiTelemetryService.addEvent(uiEvent);
+}
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 9fbf78002..4ae368af8 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -172,6 +172,8 @@ export {
   logExtensionEnable,
   logIdeConnection,
   logModelSlashCommand,
+  logPromptSuggestion,
+  logSpeculation,
 } from './telemetry/loggers.js';
 export {
   AuthEvent,
@@ -182,6 +184,8 @@ export {
   IdeConnectionEvent,
   IdeConnectionType,
   ModelSlashCommandEvent,
+  PromptSuggestionEvent,
+  SpeculationEvent,
 } from './telemetry/types.js';
 
 // ============================================================================
@@ -194,6 +198,12 @@ export * from './skills/index.js';
 export * from './subagents/index.js';
 export * from './agents/index.js';
 
+// ============================================================================
+// Follow-up Suggestions
+// ============================================================================
+
+export * from './followup/index.js';
+
 // ============================================================================
 // Utilities
 // ============================================================================
diff --git a/packages/core/src/telemetry/constants.ts b/packages/core/src/telemetry/constants.ts
index 6de60015b..1bd3db3b4 100644
--- a/packages/core/src/telemetry/constants.ts
+++ b/packages/core/src/telemetry/constants.ts
@@ -39,6 +39,10 @@ export const EVENT_SKILL_LAUNCH = 'qwen-code.skill_launch';
 export const EVENT_AUTH = 'qwen-code.auth';
 export const EVENT_USER_FEEDBACK = 'qwen-code.user_feedback';
 
+// Prompt Suggestion Events
+export const EVENT_PROMPT_SUGGESTION = 'qwen-code.prompt_suggestion';
+export const EVENT_SPECULATION = 'qwen-code.speculation';
+
 // Arena Events
 export const EVENT_ARENA_SESSION_STARTED = 'qwen-code.arena_session_started';
 export const EVENT_ARENA_AGENT_COMPLETED = 'qwen-code.arena_agent_completed';
diff --git a/packages/core/src/telemetry/loggers.ts b/packages/core/src/telemetry/loggers.ts
index da2499008..b7c18c9d3 100644
--- a/packages/core/src/telemetry/loggers.ts
+++ b/packages/core/src/telemetry/loggers.ts
@@ -44,6 +44,8 @@ import {
   EVENT_ARENA_SESSION_STARTED,
   EVENT_ARENA_AGENT_COMPLETED,
   EVENT_ARENA_SESSION_ENDED,
+  EVENT_PROMPT_SUGGESTION,
+  EVENT_SPECULATION,
 } from './constants.js';
 import {
   recordApiErrorMetrics,
@@ -101,6 +103,8 @@ import type {
   ArenaSessionStartedEvent,
   ArenaAgentCompletedEvent,
   ArenaSessionEndedEvent,
+  PromptSuggestionEvent,
+  SpeculationEvent,
 } from './types.js';
 import type { HookCallEvent } from './types.js';
 import type { UiEvent } from './uiTelemetry.js';
@@ -1068,3 +1072,79 @@ export function logArenaSessionEnded(
     event.winner_model_id,
   );
 }
+
+export function logPromptSuggestion(
+  config: Config,
+  event: PromptSuggestionEvent,
+): void {
+  if (!isTelemetrySdkInitialized()) return;
+
+  const attributes: LogAttributes = {
+    ...getCommonAttributes(config),
+    'event.name': EVENT_PROMPT_SUGGESTION,
+    'event.timestamp': event['event.timestamp'],
+    outcome: event.outcome,
+  };
+
+  if (event.prompt_id) {
+    attributes['prompt_id'] = event.prompt_id;
+  }
+  if (event.accept_method) {
+    attributes['accept_method'] = event.accept_method;
+  }
+  if (event.time_to_accept_ms !== undefined) {
+    attributes['time_to_accept_ms'] = event.time_to_accept_ms;
+  }
+  if (event.time_to_ignore_ms !== undefined) {
+    attributes['time_to_ignore_ms'] = event.time_to_ignore_ms;
+  }
+  if (event.time_to_first_keystroke_ms !== undefined) {
+    attributes['time_to_first_keystroke_ms'] = event.time_to_first_keystroke_ms;
+  }
+  if (event.suggestion_length !== undefined) {
+    attributes['suggestion_length'] = event.suggestion_length;
+  }
+  if (event.similarity !== undefined) {
+    attributes['similarity'] = event.similarity;
+  }
+  if (event.was_focused_when_shown !== undefined) {
+    attributes['was_focused_when_shown'] = event.was_focused_when_shown;
+  }
+  if (event.reason) {
+    attributes['reason'] = event.reason;
+  }
+
+  const logger = logs.getLogger(SERVICE_NAME);
+  const logRecord: LogRecord = {
+    body: `Prompt suggestion: ${event.outcome}.`,
+    attributes,
+  };
+  logger.emit(logRecord);
+}
+
+export function logSpeculation(config: Config, event: SpeculationEvent): void {
+  if (!isTelemetrySdkInitialized()) return;
+
+  const attributes: LogAttributes = {
+    ...getCommonAttributes(config),
+    'event.name': EVENT_SPECULATION,
+    'event.timestamp': event['event.timestamp'],
+    outcome: event.outcome,
+    turns_used: event.turns_used,
+    files_written: event.files_written,
+    tool_use_count: event.tool_use_count,
+    duration_ms: event.duration_ms,
+    had_pipelined_suggestion: event.had_pipelined_suggestion,
+  };
+
+  if (event.boundary_type) {
+    attributes['boundary_type'] = event.boundary_type;
+  }
+
+  const logger = logs.getLogger(SERVICE_NAME);
+  const logRecord: LogRecord = {
+    body: `Speculation: ${event.outcome}.`,
+    attributes,
+  };
+  logger.emit(logRecord);
+}
diff --git a/packages/core/src/telemetry/types.ts b/packages/core/src/telemetry/types.ts
index a44f20ef9..575e4c1b1 100644
--- a/packages/core/src/telemetry/types.ts
+++ b/packages/core/src/telemetry/types.ts
@@ -1062,3 +1062,76 @@ export class ExtensionDisableEvent implements BaseTelemetryEvent {
     this.setting_scope = settingScope;
   }
 }
+
+export class PromptSuggestionEvent implements BaseTelemetryEvent {
+  'event.name': 'qwen-code.prompt_suggestion';
+  'event.timestamp': string;
+  outcome: 'accepted' | 'ignored' | 'suppressed';
+  prompt_id?: string;
+  accept_method?: 'tab' | 'enter' | 'right';
+  time_to_accept_ms?: number;
+  time_to_ignore_ms?: number;
+  time_to_first_keystroke_ms?: number;
+  suggestion_length?: number;
+  similarity?: number;
+  was_focused_when_shown?: boolean;
+  reason?: string;
+
+  constructor(params: {
+    outcome: 'accepted' | 'ignored' | 'suppressed';
+    prompt_id?: string;
+    accept_method?: 'tab' | 'enter' | 'right';
+    time_to_accept_ms?: number;
+    time_to_ignore_ms?: number;
+    time_to_first_keystroke_ms?: number;
+    suggestion_length?: number;
+    similarity?: number;
+    was_focused_when_shown?: boolean;
+    reason?: string;
+  }) {
+    this['event.name'] = 'qwen-code.prompt_suggestion';
+    this['event.timestamp'] = new Date().toISOString();
+    this.outcome = params.outcome;
+    this.prompt_id = params.prompt_id ?? 'user_intent';
+    this.accept_method = params.accept_method;
+    this.time_to_accept_ms = params.time_to_accept_ms;
+    this.time_to_ignore_ms = params.time_to_ignore_ms;
+    this.time_to_first_keystroke_ms = params.time_to_first_keystroke_ms;
+    this.suggestion_length = params.suggestion_length;
+    this.similarity = params.similarity;
+    this.was_focused_when_shown = params.was_focused_when_shown;
+    this.reason = params.reason;
+  }
+}
+
+export class SpeculationEvent implements BaseTelemetryEvent {
+  'event.name': 'qwen-code.speculation';
+  'event.timestamp': string;
+  outcome: 'accepted' | 'aborted' | 'failed';
+  turns_used: number;
+  files_written: number;
+  tool_use_count: number;
+  duration_ms: number;
+  boundary_type?: string;
+  had_pipelined_suggestion: boolean;
+
+  constructor(params: {
+    outcome: 'accepted' | 'aborted' | 'failed';
+    turns_used: number;
+    files_written: number;
+    tool_use_count: number;
+    duration_ms: number;
+    boundary_type?: string;
+    had_pipelined_suggestion: boolean;
+  }) {
+    this['event.name'] = 'qwen-code.speculation';
+    this['event.timestamp'] = new Date().toISOString();
+    this.outcome = params.outcome;
+    this.turns_used = params.turns_used;
+    this.files_written = params.files_written;
+    this.tool_use_count = params.tool_use_count;
+    this.duration_ms = params.duration_ms;
+    this.boundary_type = params.boundary_type;
+    this.had_pipelined_suggestion = params.had_pipelined_suggestion;
+  }
+}
diff --git a/packages/vscode-ide-companion/esbuild.js b/packages/vscode-ide-companion/esbuild.js
index 69381bafc..fe3001722 100644
--- a/packages/vscode-ide-companion/esbuild.js
+++ b/packages/vscode-ide-companion/esbuild.js
@@ -175,6 +175,11 @@ async function main() {
     sourcesContent: false,
     platform: 'browser',
     outfile: 'dist/webview.js',
+    // @qwen-code/qwen-code-core is a peer dependency of @qwen-code/webui.
+    // Since @qwen-code/webui marks it as external in its own Vite build, the
+    // browser bundle must also mark it external to avoid bundling Node.js-only
+    // modules (undici, @grpc/grpc-js, fs, stream, etc.) into the webview.
+    external: ['@qwen-code/qwen-code-core'],
     logLevel: 'silent',
     plugins: [reactDedupPlugin, cssInjectPlugin, esbuildProblemMatcherPlugin],
     jsx: 'automatic', // Use new JSX transform (React 17+)
diff --git a/packages/vscode-ide-companion/schemas/settings.schema.json b/packages/vscode-ide-companion/schemas/settings.schema.json
index 61e3f7ecd..4f92b74d7 100644
--- a/packages/vscode-ide-companion/schemas/settings.schema.json
+++ b/packages/vscode-ide-companion/schemas/settings.schema.json
@@ -180,6 +180,21 @@
           "type": "boolean",
           "default": true
         },
+        "enableFollowupSuggestions": {
+          "description": "Show context-aware follow-up suggestions after task completion. Press Tab or Right Arrow to accept, Enter to accept and submit.",
+          "type": "boolean",
+          "default": true
+        },
+        "enableCacheSharing": {
+          "description": "Use cache-aware forked queries for suggestion generation. Reduces cost on providers that support prefix caching (experimental).",
+          "type": "boolean",
+          "default": true
+        },
+        "enableSpeculation": {
+          "description": "Speculatively execute accepted suggestions before submission. Results appear instantly when you accept (experimental).",
+          "type": "boolean",
+          "default": false
+        },
         "accessibility": {
           "description": "Accessibility settings.",
           "type": "object",
@@ -234,6 +249,11 @@
       "type": "object",
       "additionalProperties": true
     },
+    "fastModel": {
+      "description": "Model for background tasks (suggestion generation, speculation). Leave empty to use the main model. A smaller/faster model (e.g., qwen3.5-flash) reduces latency and cost.",
+      "type": "string",
+      "default": ""
+    },
     "model": {
       "description": "Settings related to the generative model.",
       "type": "object",
diff --git a/packages/vscode-ide-companion/src/webview/App.tsx b/packages/vscode-ide-companion/src/webview/App.tsx
index ebdc61350..3f01f30e5 100644
--- a/packages/vscode-ide-companion/src/webview/App.tsx
+++ b/packages/vscode-ide-companion/src/webview/App.tsx
@@ -782,7 +782,7 @@ export const App: React.FC = () => {
 
   // When user sends a message after scrolling up, re-pin and jump to the bottom
   const handleSubmitWithScroll = useCallback(
-    (e: React.FormEvent) => {
+    (e: React.FormEvent | React.KeyboardEvent, explicitText?: string) => {
       setPinnedToBottom(true);
 
       const container = messagesContainerRef.current;
@@ -791,7 +791,7 @@ export const App: React.FC = () => {
         container.scrollTo({ top });
       }
 
-      submitMessage(e);
+      submitMessage(e, explicitText);
     },
     [submitMessage],
   );
diff --git a/packages/vscode-ide-companion/src/webview/hooks/useMessageSubmit.ts b/packages/vscode-ide-companion/src/webview/hooks/useMessageSubmit.ts
index 3145e9d15..dbcd04be7 100644
--- a/packages/vscode-ide-companion/src/webview/hooks/useMessageSubmit.ts
+++ b/packages/vscode-ide-companion/src/webview/hooks/useMessageSubmit.ts
@@ -72,12 +72,15 @@ export const useMessageSubmit = ({
   messageHandling,
 }: UseMessageSubmitProps) => {
   const handleSubmit = useCallback(
-    (e: React.FormEvent) => {
+    (e: React.FormEvent | React.KeyboardEvent, explicitText?: string) => {
       e.preventDefault();
 
+      // Use explicit text if provided (e.g., from prompt suggestion Enter accept)
+      const textToSend = explicitText ?? inputText;
+
       if (
         !shouldSendMessage({
-          inputText,
+          inputText: textToSend,
           attachedImages,
           isStreaming,
           isWaitingForResponse,
@@ -87,7 +90,7 @@ export const useMessageSubmit = ({
       }
 
       // Handle /login command - show inline loading while extension authenticates
-      if (inputText.trim() === '/login') {
+      if (textToSend.trim() === '/login') {
         setInputText('');
         if (inputFieldRef.current) {
           // Use a zero-width space to maintain the height of the contentEditable element
@@ -121,7 +124,7 @@ export const useMessageSubmit = ({
       const fileRefPattern = /@([^\s]+)/g;
       let match;
 
-      while ((match = fileRefPattern.exec(inputText)) !== null) {
+      while ((match = fileRefPattern.exec(textToSend)) !== null) {
         const fileName = match[1];
         const filePath = fileContext.getFileReference(fileName);
 
@@ -171,7 +174,7 @@ export const useMessageSubmit = ({
       vscode.postMessage({
         type: 'sendMessage',
         data: {
-          text: inputText,
+          text: textToSend,
           context: context.length > 0 ? context : undefined,
           fileContext: fileContextForMessage,
           attachments: attachedImages.length > 0 ? attachedImages : undefined,
diff --git a/packages/webui/package.json b/packages/webui/package.json
index 6d1f8e513..de531b80d 100644
--- a/packages/webui/package.json
+++ b/packages/webui/package.json
@@ -12,6 +12,11 @@
       "import": "./dist/index.js",
       "require": "./dist/index.cjs"
     },
+    "./followup": {
+      "types": "./dist/followup.d.ts",
+      "import": "./dist/followup.js",
+      "require": "./dist/followup.cjs"
+    },
     "./icons": {
       "types": "./dist/components/icons/index.d.ts",
       "import": "./dist/components/icons/index.js",
@@ -32,7 +37,7 @@
   },
   "scripts": {
     "dev": "vite build --watch",
-    "build": "vite build",
+    "build": "vite build && vite build --config vite.config.followup.ts",
     "typecheck": "tsc --noEmit",
     "lint": "eslint src --ext .ts,.tsx",
     "lint:fix": "eslint src --ext .ts,.tsx --fix",
@@ -40,9 +45,15 @@
     "build-storybook": "storybook build"
   },
   "peerDependencies": {
+    "@qwen-code/qwen-code-core": ">=0.13.1",
     "react": "^18.0.0 || ^19.0.0",
     "react-dom": "^18.0.0 || ^19.0.0"
   },
+  "peerDependenciesMeta": {
+    "@qwen-code/qwen-code-core": {
+      "optional": true
+    }
+  },
   "dependencies": {
     "markdown-it": "^14.1.0"
   },
diff --git a/packages/webui/src/components/layout/InputForm.tsx b/packages/webui/src/components/layout/InputForm.tsx
index e73700e12..f34436843 100644
--- a/packages/webui/src/components/layout/InputForm.tsx
+++ b/packages/webui/src/components/layout/InputForm.tsx
@@ -22,6 +22,18 @@ import { CompletionMenu } from './CompletionMenu.js';
 import { ContextIndicator } from './ContextIndicator.js';
 import type { CompletionItem } from '../../types/completion.js';
 import type { ContextUsage } from './ContextIndicator.js';
+/**
+ * Minimal follow-up state shape used by InputForm.
+ * Defined locally to avoid pulling @qwen-code/qwen-code-core into the
+ * root entry's type declarations. The full FollowupState lives in
+ * '@qwen-code/webui/followup'.
+ */
+interface InputFormFollowupState {
+  /** Current suggestion text */
+  suggestion: string | null;
+  /** Whether to show suggestion */
+  isVisible: boolean;
+}
 
 /**
  * Edit mode display information
@@ -91,8 +103,11 @@ export interface InputFormProps {
   onCompositionEnd: () => void;
   /** Key down callback */
   onKeyDown: (e: React.KeyboardEvent) => void;
-  /** Submit callback */
-  onSubmit: (e: React.FormEvent) => void;
+  /** Submit callback. When explicitText is provided, submit that value instead of reading from input state. */
+  onSubmit(
+    e: React.FormEvent | React.KeyboardEvent,
+    explicitText?: string,
+  ): void;
   /** Cancel callback */
   onCancel: () => void;
   /** Toggle edit mode callback */
@@ -125,6 +140,12 @@ export interface InputFormProps {
   placeholder?: string;
   /** Whether the current draft is eligible to submit */
   canSubmit?: boolean;
+  /** Prompt suggestion state */
+  followupState?: InputFormFollowupState;
+  /** Callback to accept prompt suggestion */
+  onAcceptFollowup?: (method?: 'tab' | 'enter' | 'right') => void;
+  /** Callback to dismiss prompt suggestion */
+  onDismissFollowup?: () => void;
 }
 
 /**
@@ -184,6 +205,9 @@ export const InputForm: FC<InputFormProps> = ({
   extraContent,
   placeholder = 'Ask Qwen Code …',
   canSubmit,
+  followupState,
+  onAcceptFollowup,
+  onDismissFollowup,
 }) => {
   const composerDisabled = isStreaming || isWaitingForResponse;
   const hasDraftContent =
@@ -195,6 +219,17 @@ export const InputForm: FC<InputFormProps> = ({
     !!onCompletionSelect &&
     !!onCompletionClose;
 
+  // Prompt suggestion handling
+  const followupSuggestion =
+    followupState?.isVisible && followupState.suggestion
+      ? followupState.suggestion
+      : null;
+  const hasFollowup = !!followupSuggestion;
+
+  // Compute actual placeholder
+  const actualPlaceholder =
+    hasFollowup && !inputText ? followupSuggestion! : placeholder;
+
   const handleKeyDown = (e: React.KeyboardEvent) => {
     // Let the completion menu handle Escape when it's active.
     if (completionActive && e.key === 'Escape') {
@@ -209,12 +244,46 @@ export const InputForm: FC<InputFormProps> = ({
       onCancel();
       return;
     }
+    // Tab to accept prompt suggestion (only when callback is wired)
+    if (
+      e.key === 'Tab' &&
+      hasFollowup &&
+      onAcceptFollowup &&
+      !inputText &&
+      !completionActive
+    ) {
+      e.preventDefault();
+      e.stopPropagation();
+      onAcceptFollowup('tab');
+      return;
+    }
+    // Right arrow to accept prompt suggestion (only when callback is wired)
+    if (
+      e.key === 'ArrowRight' &&
+      hasFollowup &&
+      onAcceptFollowup &&
+      !inputText &&
+      !completionActive
+    ) {
+      e.preventDefault();
+      onAcceptFollowup?.('right');
+      return;
+    }
     // If composing (Chinese IME input), don't process Enter key
     if (e.key === 'Enter' && !e.shiftKey && !isComposing) {
       // If CompletionMenu is open, let it handle Enter key
       if (completionActive) {
         return;
       }
+      // Accept and submit prompt suggestion on Enter when input is empty
+      if (hasFollowup && !inputText && followupSuggestion) {
+        e.preventDefault();
+        onAcceptFollowup?.('enter');
+        // Pass suggestion text explicitly — onInputChange is async (React setState)
+        // so onSubmit cannot rely on reading inputText from the closure.
+        onSubmit(e, followupSuggestion);
+        return;
+      }
       e.preventDefault();
       onSubmit(e);
     }
@@ -269,7 +338,9 @@ export const InputForm: FC<InputFormProps> = ({
               role="textbox"
               aria-label="Message input"
               aria-multiline="true"
-              data-placeholder={placeholder}
+              data-placeholder={actualPlaceholder}
+              // Indicate when a prompt suggestion is active
+              data-has-suggestion={hasFollowup ? 'true' : 'false'}
               // Use a data flag so CSS can show placeholder even if the browser
               // inserts an invisible <br> into contentEditable (so :empty no longer matches)
               data-empty={
@@ -282,6 +353,10 @@ export const InputForm: FC<InputFormProps> = ({
                 // Filter out zero-width space that we use to maintain height
                 const text = target.textContent?.replace(/\u200B/g, '') || '';
                 onInputChange(text);
+                // Dismiss follow-up suggestion when user starts typing
+                if (hasFollowup && !inputText && text) {
+                  onDismissFollowup?.();
+                }
               }}
               onCompositionStart={onCompositionStart}
               onCompositionEnd={onCompositionEnd}
diff --git a/packages/webui/src/followup.ts b/packages/webui/src/followup.ts
new file mode 100644
index 000000000..028159051
--- /dev/null
+++ b/packages/webui/src/followup.ts
@@ -0,0 +1,19 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Prompt Suggestion Subpath Entry
+ *
+ * Separated from the root entry to avoid forcing all @qwen-code/webui
+ * consumers to install @qwen-code/qwen-code-core as a dependency.
+ *
+ * Usage: import { useFollowupSuggestions } from '@qwen-code/webui/followup';
+ */
+
+export { useFollowupSuggestions } from './hooks/useFollowupSuggestions';
+export type {
+  FollowupState,
+  UseFollowupSuggestionsOptions,
+  UseFollowupSuggestionsReturn,
+} from './hooks/useFollowupSuggestions';
diff --git a/packages/webui/src/hooks/useFollowupSuggestions.ts b/packages/webui/src/hooks/useFollowupSuggestions.ts
new file mode 100644
index 000000000..60fa84ce3
--- /dev/null
+++ b/packages/webui/src/hooks/useFollowupSuggestions.ts
@@ -0,0 +1,121 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Prompt Suggestion Hook
+ *
+ * Thin React wrapper around the framework-agnostic controller from core.
+ *
+ * Note: For browser environments, the parent component should handle
+ * suggestion generation and pass the results to this hook.
+ */
+
+import { useState, useCallback, useMemo, useRef, useEffect } from 'react';
+import {
+  INITIAL_FOLLOWUP_STATE,
+  createFollowupController,
+} from '@qwen-code/qwen-code-core';
+import type { FollowupState } from '@qwen-code/qwen-code-core';
+
+// Re-export types from core for convenience
+export type { FollowupState } from '@qwen-code/qwen-code-core';
+
+/**
+ * Options for the hook
+ */
+export interface UseFollowupSuggestionsOptions {
+  /** Whether the feature is enabled */
+  enabled?: boolean;
+  /** Callback when suggestion is accepted */
+  onAccept?: (suggestion: string) => void;
+  /** Callback when a suggestion outcome is determined */
+  onOutcome?: (params: {
+    outcome: 'accepted' | 'ignored';
+    accept_method?: 'tab' | 'enter' | 'right';
+    time_ms: number;
+    suggestion_length: number;
+  }) => void;
+}
+
+/**
+ * Result returned by the hook
+ */
+export interface UseFollowupSuggestionsReturn {
+  /** Current state */
+  state: FollowupState;
+  /** Get current placeholder text */
+  getPlaceholder: (defaultPlaceholder: string) => string;
+  /** Set suggestion text (called by parent component) */
+  setSuggestion: (text: string | null) => void;
+  /** Accept the current suggestion */
+  accept: (method?: 'tab' | 'enter' | 'right') => void;
+  /** Dismiss the current suggestion */
+  dismiss: () => void;
+  /** Clear all state */
+  clear: () => void;
+}
+
+/**
+ * Hook for managing prompt suggestions in the Web UI.
+ *
+ * Delegates all timer/debounce/state logic to the shared
+ * `createFollowupController` from core. Adds a `getPlaceholder`
+ * helper specific to the WebUI input form.
+ */
+export function useFollowupSuggestions(
+  options: UseFollowupSuggestionsOptions = {},
+): UseFollowupSuggestionsReturn {
+  const { enabled = true, onAccept, onOutcome } = options;
+
+  const [state, setState] = useState<FollowupState>(INITIAL_FOLLOWUP_STATE);
+
+  // Keep mutable refs so the controller always sees the latest callbacks
+  const onAcceptRef = useRef(onAccept);
+  onAcceptRef.current = onAccept;
+  const onOutcomeRef = useRef(onOutcome);
+  onOutcomeRef.current = onOutcome;
+
+  // Create the controller once — it is stable across renders
+  const controller = useMemo(
+    () =>
+      createFollowupController({
+        enabled,
+        onStateChange: setState,
+        getOnAccept: () => onAcceptRef.current,
+        onOutcome: (params) => onOutcomeRef.current?.(params),
+      }),
+    [enabled],
+  );
+
+  // Clear state when disabled; clean up timers on unmount
+  useEffect(() => {
+    if (!enabled) {
+      controller.clear();
+    }
+    return () => controller.cleanup();
+  }, [controller, enabled]);
+
+  // WebUI-specific helper: resolves placeholder text
+  const getPlaceholder = useCallback(
+    (defaultPlaceholder: string) => {
+      if (state.isVisible && state.suggestion) {
+        return state.suggestion;
+      }
+      return defaultPlaceholder;
+    },
+    [state.isVisible, state.suggestion],
+  );
+
+  return useMemo(
+    () => ({
+      state,
+      getPlaceholder,
+      setSuggestion: controller.setSuggestion,
+      accept: controller.accept,
+      dismiss: controller.dismiss,
+      clear: controller.clear,
+    }),
+    [state, getPlaceholder, controller],
+  );
+}
diff --git a/packages/webui/src/index.ts b/packages/webui/src/index.ts
index 777d2cced..f0b6807ef 100644
--- a/packages/webui/src/index.ts
+++ b/packages/webui/src/index.ts
@@ -231,6 +231,8 @@ export { StopIcon } from './components/icons/StopIcon';
 // Hooks
 export { useTheme } from './hooks/useTheme';
 export { useLocalStorage } from './hooks/useLocalStorage';
+// NOTE: useFollowupSuggestions is exported from '@qwen-code/webui/followup'
+// subpath to avoid forcing all consumers to install @qwen-code/qwen-code-core.
 
 // Types
 export type { Theme } from './types/theme';
diff --git a/packages/webui/src/styles/components.css b/packages/webui/src/styles/components.css
index 7ef3cd237..2065af1a6 100644
--- a/packages/webui/src/styles/components.css
+++ b/packages/webui/src/styles/components.css
@@ -441,6 +441,22 @@
   max-width: calc(100% - 28px);
 }
 
+/* Prompt suggestion styling - different from normal placeholder */
+.composer-input[data-has-suggestion='true']:empty::before,
+.composer-input[data-has-suggestion='true'][data-empty='true']::before {
+  color: var(--app-primary, #3b82f6);
+  opacity: 0.7;
+  font-style: italic;
+}
+
+.composer-input[data-has-suggestion='true']:hover:empty::before,
+.composer-input[data-has-suggestion='true']:hover[data-empty='true']::before {
+  opacity: 0.9;
+  text-decoration: underline;
+  text-decoration-style: dotted;
+  text-underline-offset: 2px;
+}
+
 .composer-input:focus {
   outline: none;
 }
diff --git a/packages/webui/vite.config.followup.ts b/packages/webui/vite.config.followup.ts
new file mode 100644
index 000000000..8040c545d
--- /dev/null
+++ b/packages/webui/vite.config.followup.ts
@@ -0,0 +1,52 @@
+/**
+ * @license
+ * Copyright 2025 Qwen Team
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Separate Vite config for the @qwen-code/webui/followup subpath entry.
+ *
+ * Built independently so that the root entry (vite.config.ts) stays free
+ * of @qwen-code/qwen-code-core and can retain UMD output.
+ */
+
+import { defineConfig } from 'vite';
+import react from '@vitejs/plugin-react';
+import dts from 'vite-plugin-dts';
+import { resolve } from 'path';
+
+export default defineConfig({
+  plugins: [
+    react(),
+    dts({
+      include: ['src/followup.ts', 'src/hooks/useFollowupSuggestions.ts'],
+      outDir: 'dist',
+      rollupTypes: false,
+      // Do not insert types entry — avoid clobbering the main build's index.d.ts
+      insertTypesEntry: false,
+    }),
+  ],
+  build: {
+    lib: {
+      entry: resolve(__dirname, 'src/followup.ts'),
+      formats: ['es', 'cjs'],
+      fileName: (format) => {
+        if (format === 'es') return 'followup.js';
+        if (format === 'cjs') return 'followup.cjs';
+        return 'followup.js';
+      },
+    },
+    outDir: 'dist',
+    emptyOutDir: false,
+    rollupOptions: {
+      external: [
+        'react',
+        'react-dom',
+        'react/jsx-runtime',
+        '@qwen-code/qwen-code-core',
+      ],
+    },
+    sourcemap: true,
+    minify: false,
+    cssCodeSplit: false,
+  },
+});
diff --git a/packages/webui/vite.config.ts b/packages/webui/vite.config.ts
index 9a571eab3..b85d1ad2c 100644
--- a/packages/webui/vite.config.ts
+++ b/packages/webui/vite.config.ts
@@ -18,6 +18,10 @@ import { resolve } from 'path';
  * - UMD: dist/index.umd.js (for CDN usage)
  * - TypeScript declarations: dist/index.d.ts
  * - CSS: dist/styles.css (optional styles)
+ *
+ * The followup subpath (@qwen-code/webui/followup) is built separately
+ * via vite.config.followup.ts so that the root entry stays free of
+ * @qwen-code/qwen-code-core dependencies.
  */
 export default defineConfig({
   plugins: [