diff --git a/docs/users/configuration/settings.md b/docs/users/configuration/settings.md index dfe293133..1f71493c9 100644 --- a/docs/users/configuration/settings.md +++ b/docs/users/configuration/settings.md @@ -589,7 +589,10 @@ For authentication-related variables (like `OPENAI_*`) and the recommended `.qwe | `CODE_ASSIST_ENDPOINT` | Specifies the endpoint for the code assist server. | This is useful for development and testing. | | `QWEN_CODE_MAX_OUTPUT_TOKENS` | Overrides the default maximum output tokens per response. When not set, Qwen Code uses an adaptive strategy: starts with 8K tokens and automatically retries with 64K if the response is truncated. Set this to a specific value (e.g., `16000`) to use a fixed limit instead. | Takes precedence over the capped default (8K) but is overridden by `samplingParams.max_tokens` in settings. Disables automatic escalation when set. Example: `export QWEN_CODE_MAX_OUTPUT_TOKENS=16000` | | `QWEN_CODE_UNATTENDED_RETRY` | Set to `true` or `1` to enable persistent retry mode. When enabled, transient API capacity errors (HTTP 429 Rate Limit and 529 Overloaded) are retried indefinitely with exponential backoff (capped at 5 minutes per retry) and heartbeat keepalives every 30 seconds on stderr. | Designed for CI/CD pipelines and background automation where long-running tasks should survive temporary API outages. Must be set explicitly — `CI=true` alone does **not** activate this mode. See [Headless Mode](../features/headless#persistent-retry-mode) for details. Example: `export QWEN_CODE_UNATTENDED_RETRY=1` | -| `QWEN_CODE_PROFILE_STARTUP` | Set to `1` to enable startup performance profiling. Writes a JSON timing report to `~/.qwen/startup-perf/` with per-phase durations. | Only active inside the sandbox child process. Zero overhead when not set. Example: `export QWEN_CODE_PROFILE_STARTUP=1` | +| `QWEN_CODE_PROFILE_STARTUP` | Set to `1` to enable startup performance profiling. Writes a JSON timing report to `~/.qwen/startup-perf/` with per-phase durations. | Only active inside the sandbox child process (or with `QWEN_CODE_PROFILE_STARTUP_OUTER=1`). Zero overhead when not set. Example: `export QWEN_CODE_PROFILE_STARTUP=1` | +| `QWEN_CODE_PROFILE_STARTUP_OUTER` | Set to `1` together with `QWEN_CODE_PROFILE_STARTUP=1` to also collect a startup profile in the outer (pre-sandbox) process. Outer-process reports get an `outer-` filename prefix to keep them distinct from the sandbox child's report. | Off by default — only the sandbox child collects, to avoid duplicate reports. Useful for local development where the cli isn't relaunched into a sandbox. | +| `QWEN_CODE_PROFILE_STARTUP_NO_HEAP` | Set to `1` together with `QWEN_CODE_PROFILE_STARTUP=1` to skip the per-checkpoint `process.memoryUsage()` snapshots. Useful when measuring the profiler's own Heisenberg overhead. | Off by default. Heap snapshots cost ~50 µs each (well below 1% of total startup) so most users should leave this alone. | +| `QWEN_CODE_LEGACY_MCP_BLOCKING` | Set to `1` to restore the pre-progressive-MCP behavior where `Config.initialize()` waits synchronously for every configured MCP server's discover handshake before returning. | Off by default. Modern qwen-code lets MCP servers come online in the background while the UI is already interactive; the model sees each batch of new tools within ~16 ms of the server settling. This flag is kept as a rollback escape hatch for ≥ 1 release. Example: `export QWEN_CODE_LEGACY_MCP_BLOCKING=1` | When both user-level `.env` files define the same variable, the Qwen-specific file wins: `/.env` (or `~/.qwen/.env` when `QWEN_HOME` is unset) is diff --git a/docs/users/features/mcp.md b/docs/users/features/mcp.md index 4e0b8b14e..0fda97a76 100644 --- a/docs/users/features/mcp.md +++ b/docs/users/features/mcp.md @@ -147,6 +147,62 @@ CLI: qwen mcp add --transport sse sseServer http://localhost:8080/sse --timeout 30000 ``` +## Progressive availability and discovery timeouts + +Qwen Code discovers MCP servers in the background after the UI is already +interactive. You see the cli's first prompt within a few hundred +milliseconds even when one of your MCP servers takes several seconds +(or never responds), and the model's tool list updates within roughly +one frame (~16 ms) of each server completing its discover handshake. + +- **Interactive mode**: the UI appears immediately; an MCP status pill in + the bottom-right shows `N/M MCP servers ready` while discovery is in + flight. Sending a prompt before MCP finishes simply means the model + sees the tools that are ready _at that moment_; subsequent prompts see + more tools as servers come online. +- **Non-interactive mode** (`--prompt`, stream-json, ACP): the cli still + waits for MCP discovery to settle before sending the first prompt, so + scripted / piped invocations see the same complete tool set the + legacy synchronous behavior produced. + +### Per-server `discoveryTimeoutMs` + +Each MCP server gets a discovery-only timeout that caps how long the +initial handshake (`connect` + `tools/list` + `prompts/list` + +`resources/list`) is allowed to take. Defaults: + +- **stdio servers**: 30 s +- **remote HTTP / SSE servers**: 5 s (network risk is higher) + +Override per server when needed: + +```jsonc +{ + "mcpServers": { + "slow-stdio": { + "command": "node", + "args": ["./slow-server.js"], + "discoveryTimeoutMs": 60000, + }, + "flaky-remote": { + "httpUrl": "https://example.com/mcp", + "discoveryTimeoutMs": 10000, + }, + }, +} +``` + +The existing `timeout` field is **tool-call** timeout (used for each +`tools/call` request, default 10 minutes) and is unaffected by +`discoveryTimeoutMs` — a long-running tool invocation is not a startup +pathology. + +### Rolling back progressive MCP + +If you need the old synchronous behavior (cli waits for every MCP server +before showing any UI), set `QWEN_CODE_LEGACY_MCP_BLOCKING=1` in your +environment. This is kept as an escape hatch for at least one release. + ## Safety and control ### Trust (skip confirmations) diff --git a/packages/cli/src/acp-integration/acpAgent.test.ts b/packages/cli/src/acp-integration/acpAgent.test.ts index 59a6382e9..ae24a17f3 100644 --- a/packages/cli/src/acp-integration/acpAgent.test.ts +++ b/packages/cli/src/acp-integration/acpAgent.test.ts @@ -166,6 +166,7 @@ describe('runAcpAgent shutdown cleanup', () => { // Reset mockConfig after clearAllMocks mockConfig = { initialize: vi.fn().mockResolvedValue(undefined), + waitForMcpReady: vi.fn().mockResolvedValue(undefined), getHookSystem: vi.fn().mockReturnValue(undefined), getDisableAllHooks: vi.fn().mockReturnValue(false), hasHooksForEvent: vi.fn().mockReturnValue(false), @@ -343,6 +344,7 @@ describe('runAcpAgent SessionEnd hooks', () => { }; mockConfig = { initialize: vi.fn().mockResolvedValue(undefined), + waitForMcpReady: vi.fn().mockResolvedValue(undefined), getHookSystem: vi.fn().mockReturnValue(mockHookSystem), getDisableAllHooks: vi.fn().mockReturnValue(false), hasHooksForEvent: vi.fn().mockReturnValue(true), @@ -684,6 +686,7 @@ describe('QwenAgent MCP SSE/HTTP support', () => { mockConfig = { initialize: vi.fn().mockResolvedValue(undefined), + waitForMcpReady: vi.fn().mockResolvedValue(undefined), getHookSystem: vi.fn().mockReturnValue(undefined), getDisableAllHooks: vi.fn().mockReturnValue(false), hasHooksForEvent: vi.fn().mockReturnValue(false), @@ -744,6 +747,7 @@ describe('QwenAgent MCP SSE/HTTP support', () => { function makeInnerConfig() { return { initialize: vi.fn().mockResolvedValue(undefined), + waitForMcpReady: vi.fn().mockResolvedValue(undefined), getModelsConfig: vi.fn().mockReturnValue({ getCurrentAuthType: vi.fn().mockReturnValue('api-key'), }), @@ -759,6 +763,7 @@ describe('QwenAgent MCP SSE/HTTP support', () => { getGeminiClient: vi.fn().mockReturnValue({ isInitialized: vi.fn().mockReturnValue(true), initialize: vi.fn().mockResolvedValue(undefined), + waitForMcpReady: vi.fn().mockResolvedValue(undefined), }), getFileSystemService: vi.fn().mockReturnValue(undefined), setFileSystemService: vi.fn(), @@ -1122,6 +1127,92 @@ describe('QwenAgent MCP SSE/HTTP support', () => { await agentPromise; }); + it('per-session newSession surfaces MCP failures to stderr (round-7 fix: was silent before)', async () => { + // Round-7 regression: `QwenAgent.initializeConfig()` (per-session ACP + // path) calls `waitForMcpReady()` but the round-4 fix only added the + // failure warning to the top-level `runAcpAgent` path. Per-session + // configs with failed MCP servers silently fell back to built-in + // tools with zero user-visible indication, despite the inline comment + // claiming "Same reasoning as the top-level runAcpAgent path." + const innerConfig = await setupSessionMocks('session-failed-mcp'); + ( + innerConfig as unknown as { getFailedMcpServerNames: () => string[] } + ).getFailedMcpServerNames = vi + .fn() + .mockReturnValue(['broken-server-a', 'broken-server-b']); + const stderrWrite = vi + .spyOn(process.stderr, 'write') + .mockImplementation(() => true); + + const agentPromise = runAcpAgent( + mockConfig, + makeSessionSettings(), + mockArgv, + ); + await vi.waitFor(() => expect(capturedAgentFactory).toBeDefined()); + + const agent = capturedAgentFactory!({ + get closed() { + return mockConnectionState.promise; + }, + }) as AgentLike; + + await agent.newSession({ cwd: '/tmp', mcpServers: [] }); + + // The warning must list both failed servers and mention "Warning:" + // exactly like the top-level path and the other non-interactive + // entry points (`gemini.tsx`, `session.ts`). + const matchingWrite = stderrWrite.mock.calls.find( + ([msg]) => + typeof msg === 'string' && + msg.includes('Warning: MCP server(s) failed to start') && + msg.includes('broken-server-a') && + msg.includes('broken-server-b'), + ); + expect(matchingWrite).toBeDefined(); + + stderrWrite.mockRestore(); + mockConnectionState.resolve(); + await agentPromise; + }); + + it('per-session newSession is safe when Config lacks getFailedMcpServerNames (defensive typeof check)', async () => { + // Tests pass stubbed Configs without `getFailedMcpServerNames` — the + // round-7 fix uses `typeof config.getFailedMcpServerNames === + // 'function'` so it must not throw, and must not write to stderr. + await setupSessionMocks('session-stubbed-config'); + const stderrWrite = vi + .spyOn(process.stderr, 'write') + .mockImplementation(() => true); + + const agentPromise = runAcpAgent( + mockConfig, + makeSessionSettings(), + mockArgv, + ); + await vi.waitFor(() => expect(capturedAgentFactory).toBeDefined()); + + const agent = capturedAgentFactory!({ + get closed() { + return mockConnectionState.promise; + }, + }) as AgentLike; + + await expect( + agent.newSession({ cwd: '/tmp', mcpServers: [] }), + ).resolves.not.toThrow(); + const surfacedWarning = stderrWrite.mock.calls.find( + ([msg]) => + typeof msg === 'string' && + msg.includes('Warning: MCP server(s) failed to start'), + ); + expect(surfacedWarning).toBeUndefined(); + + stderrWrite.mockRestore(); + mockConnectionState.resolve(); + await agentPromise; + }); + it('newSession with SSE MCP server and empty headers passes undefined for headers', async () => { await setupSessionMocks('session-sse-noheaders'); @@ -1206,6 +1297,7 @@ describe('QwenAgent extMethod renameSession routing', () => { mockConfig = { initialize: vi.fn().mockResolvedValue(undefined), + waitForMcpReady: vi.fn().mockResolvedValue(undefined), getHookSystem: vi.fn().mockReturnValue(undefined), getDisableAllHooks: vi.fn().mockReturnValue(false), hasHooksForEvent: vi.fn().mockReturnValue(false), @@ -1229,6 +1321,7 @@ describe('QwenAgent extMethod renameSession routing', () => { ) { return { initialize: vi.fn().mockResolvedValue(undefined), + waitForMcpReady: vi.fn().mockResolvedValue(undefined), getModelsConfig: vi.fn().mockReturnValue({ getCurrentAuthType: vi.fn().mockReturnValue('api-key'), }), @@ -1244,6 +1337,7 @@ describe('QwenAgent extMethod renameSession routing', () => { getGeminiClient: vi.fn().mockReturnValue({ isInitialized: vi.fn().mockReturnValue(true), initialize: vi.fn().mockResolvedValue(undefined), + waitForMcpReady: vi.fn().mockResolvedValue(undefined), }), getFileSystemService: vi.fn().mockReturnValue(undefined), setFileSystemService: vi.fn(), diff --git a/packages/cli/src/acp-integration/acpAgent.ts b/packages/cli/src/acp-integration/acpAgent.ts index 999f695b3..386d6ee11 100644 --- a/packages/cli/src/acp-integration/acpAgent.ts +++ b/packages/cli/src/acp-integration/acpAgent.ts @@ -84,6 +84,27 @@ export async function runAcpAgent( // Initialize config to set up hookSystem (required for SessionStart/SessionEnd hooks) // This is needed because gemini.tsx calls runAcpAgent without calling config.initialize() await config.initialize(); + // ACP forwards session messages straight to the model; under progressive + // MCP availability `initialize()` returns before MCP servers settle, so + // we wait here to keep the first session's tool surface consistent with + // the legacy synchronous behavior. + await config.waitForMcpReady(); + // Surface MCP failures to stderr. ACP's stdout is the protocol channel + // so info/log writes are already redirected to stderr below, but we + // emit this BEFORE that redirection takes effect to keep the message + // visible regardless of how the host process is wired. + // Defensive against tests that pass a stubbed Config without + // `getFailedMcpServerNames`. + const failedMcpServers = + typeof config.getFailedMcpServerNames === 'function' + ? config.getFailedMcpServerNames() + : []; + if (failedMcpServers.length > 0) { + process.stderr.write( + `Warning: MCP server(s) failed to start: ${failedMcpServers.join(', ')}. ` + + `Continuing with built-in tools and any servers that did connect.\n`, + ); + } const stdout = Writable.toWeb(process.stdout) as WritableStream; const stdin = Readable.toWeb(process.stdin) as ReadableStream; @@ -673,6 +694,26 @@ class QwenAgent implements Agent { }, ); await config.initialize(); + // Same reasoning as the top-level runAcpAgent path: ACP feeds session + // messages to the model immediately, so we cannot return a Config whose + // MCP discovery is still in flight. + await config.waitForMcpReady(); + // Surface MCP failures to stderr — mirrors `runAcpAgent` (lines 95-107) + // and the other non-interactive entry points (`gemini.tsx`, + // `session.ts`). Without this, per-session ACP configs that lose MCP + // servers fall back to built-in-tools-only with no user-visible + // indication. Defensive against tests that pass a stubbed Config + // without `getFailedMcpServerNames`. + const failedMcpServers = + typeof config.getFailedMcpServerNames === 'function' + ? config.getFailedMcpServerNames() + : []; + if (failedMcpServers.length > 0) { + process.stderr.write( + `Warning: MCP server(s) failed to start: ${failedMcpServers.join(', ')}. ` + + `Continuing with built-in tools and any servers that did connect.\n`, + ); + } return config; } diff --git a/packages/cli/src/gemini.test.tsx b/packages/cli/src/gemini.test.tsx index 1c55ecd9c..fab881622 100644 --- a/packages/cli/src/gemini.test.tsx +++ b/packages/cli/src/gemini.test.tsx @@ -183,6 +183,7 @@ describe('gemini.tsx main function', () => { getListExtensions: () => false, getMcpServers: () => ({}), initialize: vi.fn(), + waitForMcpReady: vi.fn().mockResolvedValue(undefined), getIdeMode: () => false, getExperimentalZedIntegration: () => false, getScreenReader: () => false, @@ -261,6 +262,7 @@ describe('gemini.tsx main function', () => { getListExtensions: () => false, getMcpServers: () => ({}), initialize: vi.fn().mockResolvedValue(undefined), + waitForMcpReady: vi.fn().mockResolvedValue(undefined), getIdeMode: () => false, getExperimentalZedIntegration: () => false, getScreenReader: () => false, @@ -436,6 +438,7 @@ describe('gemini.tsx main function', () => { getListExtensions: () => false, getMcpServers: () => ({}), initialize: vi.fn().mockResolvedValue(undefined), + waitForMcpReady: vi.fn().mockResolvedValue(undefined), getIdeMode: () => false, getExperimentalZedIntegration: () => false, getScreenReader: () => false, @@ -475,9 +478,16 @@ describe('gemini.tsx main function', () => { } expect(runStreamJsonSpy).toHaveBeenCalledTimes(1); - const [configArg, inputArg] = runStreamJsonSpy.mock.calls[0]; + const [configArg, inputArg, settingsArg] = runStreamJsonSpy.mock.calls[0]; expect(configArg).toBe(validatedConfig); expect(inputArg).toBe('hello stream'); + // Regression guard: PR-A's progressive-MCP refactor previously + // dropped the `settings` argument here, which silently fell back to + // `createMinimalSettings()` inside `runNonInteractiveStreamJson`. + // The parallel `runNonInteractive` path still received settings, so + // stream-json sessions lost any user-configured permission / + // approval / hook setup. + expect(settingsArg).toBeDefined(); expect(validateAuthSpy).toHaveBeenCalledWith( undefined, @@ -561,6 +571,7 @@ describe('gemini.tsx main function kitty protocol', () => { getListExtensions: () => false, getMcpServers: () => ({}), initialize: vi.fn(), + waitForMcpReady: vi.fn().mockResolvedValue(undefined), getIdeMode: () => false, getExperimentalZedIntegration: () => false, getScreenReader: () => false, @@ -669,6 +680,7 @@ describe('gemini.tsx main function kitty protocol', () => { getListExtensions: () => false, getMcpServers: () => ({}), initialize: vi.fn(), + waitForMcpReady: vi.fn().mockResolvedValue(undefined), getIdeMode: () => false, getExperimentalZedIntegration: () => false, getScreenReader: () => false, @@ -754,6 +766,7 @@ describe('gemini.tsx main function kitty protocol', () => { getListExtensions: () => false, getMcpServers: () => ({}), initialize: vi.fn(), + waitForMcpReady: vi.fn().mockResolvedValue(undefined), getIdeMode: () => false, getExperimentalZedIntegration: () => false, getScreenReader: () => false, diff --git a/packages/cli/src/gemini.tsx b/packages/cli/src/gemini.tsx index 5f0aafbef..b8da69498 100644 --- a/packages/cli/src/gemini.tsx +++ b/packages/cli/src/gemini.tsx @@ -13,6 +13,7 @@ import { QWEN_CODE_SIMPLE_ENV_VAR, Storage, SessionService, + setStartupEventSink, type Config, createDebugLogger, writeRuntimeStatus, @@ -64,7 +65,10 @@ import { handleAutoUpdate } from './utils/handleAutoUpdate.js'; import { readStdin } from './utils/readStdin.js'; import { profileCheckpoint, + recordStartupEvent, + setInteractiveMode, finalizeStartupProfile, + isStartupProfilerEnabled, } from './utils/startupProfiler.js'; import { relaunchAppInChildProcess, @@ -222,7 +226,9 @@ export async function startInteractiveUI( // tools (terminal multiplexers, IDE integrations, status daemons) can // map the running PID back to its session id and work directory. // Best-effort: a read-only filesystem must not prevent the UI from - // starting up. + // starting up. Marking the runtime status as enabled is what arms the + // session-swap refresh in `Config.refreshSessionId()` — without this + // call, the sidecar would never update on `/clear` or `/resume`. try { const sessionId = config.getSessionId(); const runtimeStatusPath = config.storage.getRuntimeStatusPath(sessionId); @@ -231,14 +237,11 @@ export async function startInteractiveUI( workDir: config.getTargetDir(), qwenVersion: version, }); - // Mark this process as the runtime.json owner so subsequent - // session swaps (/clear, /resume, etc.) refresh the sidecar. - // Non-interactive entry points never reach here, so they won't - // trample a sibling shell's sidecar on the same session id. config.markRuntimeStatusEnabled(); } catch { // ignored: best-effort, never block UI startup. } + const restoreTerminalRedrawOptimizer = process.stdout.isTTY && !config.getScreenReader() ? installTerminalRedrawOptimizer(process.stdout) @@ -350,6 +353,15 @@ export async function startInteractiveUI( isScreenReaderEnabled: config.getScreenReader(), }, ); + // Records the moment Ink's `render()` call has returned, which is + // synchronous and happens before React reconciliation actually pushes + // bytes to the terminal. We intentionally keep the legacy name + // `first_paint` for backward compatibility with previously-collected + // profile files; the value is best read as "render call returned" + // rather than literal pixel paint. AppContainer's mount effect runs + // after this — it carries the `config_initialize_*` and + // `input_enabled` checkpoints that complete the first-screen picture. + profileCheckpoint('first_paint'); // Check for updates only if enableAutoUpdate is not explicitly disabled. // Using !== false ensures updates are enabled by default when undefined. @@ -379,6 +391,16 @@ export async function startInteractiveUI( export async function main() { profileCheckpoint('main_entry'); + // Bridge core-package startup events (Config.initialize, MCP discovery, + // GeminiClient.setTools) into the cli's startup profiler. Gated on + // `isStartupProfilerEnabled()` so that when QWEN_CODE_PROFILE_STARTUP is + // unset (the common case) every core-side `recordStartupEvent()` call + // sees a null sink and short-circuits at the first comparison, instead + // of going through this arrow wrapper and the profiler's own enabled + // check. + if (isStartupProfilerEnabled()) { + setStartupEventSink((name, attrs) => recordStartupEvent(name, attrs)); + } setupUnhandledRejectionHandler(); if (process.argv.includes('--bare')) { @@ -708,7 +730,6 @@ export async function main() { // Render UI, passing necessary config values. Check that there is no command line question. profileCheckpoint('before_render'); - finalizeStartupProfile(config.getSessionId()); if (config.isInteractive()) { // --json-schema is a headless-only contract: the synthetic @@ -729,6 +750,11 @@ export async function main() { await runExitCleanup(); process.exit(1); } + // For the interactive path, the profile is finalized by AppContainer + // after `config.initialize()` and `input_enabled` are recorded — that's + // the only way `first_paint`, `config_initialize_*`, `input_enabled`, + // and the MCP events are captured. See AppContainer's mount effect. + setInteractiveMode(true); // Need kitty detection to be complete before we can start the interactive UI. await kittyProtocolDetectionComplete; // Drain the auto-theme probe before render so the OSC 11 response is @@ -746,6 +772,10 @@ export async function main() { return; } + // Non-interactive: defer finalize until after `config.initialize()` runs + // so MCP discovery events (mcp_first_tool_registered, mcp_all_servers_settled, + // gemini_tools_updated) are captured in the profile. + // Print debug mode notice to stderr for non-interactive mode if (config.getDebugMode()) { writeStderrLine('Debug mode enabled'); @@ -759,9 +789,57 @@ export async function main() { } } - // For non-stream-json mode, initialize config here + // For non-stream-json mode, initialize config here. Stream-json defers + // `config.initialize()` to inside `Session.ensureConfigInitialized` + // because the initial control_request may register SDK MCP servers + // that must be in place before discovery runs (see session.ts). if (inputFormat !== InputFormat.STREAM_JSON) { + profileCheckpoint('config_initialize_start'); await config.initialize(); + profileCheckpoint('config_initialize_end'); + // Non-interactive paths feed a prompt to the model immediately after + // init. Under PR-A's progressive MCP availability, + // `config.initialize()` returns BEFORE MCP servers settle, so + // without this wait the first sendMessage would see only built-in + // tools — a silent regression versus the legacy synchronous + // behavior. Interactive paths skip this (AppContainer's batch-flush + // subscriber updates the tool list as MCP servers come online). + await config.waitForMcpReady(); + // Surface MCP server failures on stderr so non-interactive runs + // (--prompt / piped stdin / scripts) don't silently regress to + // built-in-tools-only when a server cannot connect. The legacy + // synchronous MCP path was visibly noisy on failures because + // per-server errors logged to stderr during the blocking + // `discoverAllMcpTools` call; PR-A moves discovery to a + // background promise whose per-server errors are caught inside + // `discoverAllMcpToolsIncremental` and never reach a TTY. This + // helper closes that gap without re-introducing blocking. + // Defensive against tests that pass a stubbed Config without + // `getFailedMcpServerNames` — the warning is best-effort visibility + // and never gates startup. + const failedMcpServers = + typeof config.getFailedMcpServerNames === 'function' + ? config.getFailedMcpServerNames() + : []; + if (failedMcpServers.length > 0) { + writeStderrLine( + `Warning: MCP server(s) failed to start: ${failedMcpServers.join(', ')}. ` + + `Continuing with built-in tools and any servers that did connect. ` + + `Re-run with QWEN_CODE_DEBUG=1 to see per-server reasons.`, + ); + } + // Finalize the non-interactive startup profile here so MCP events + // emitted during initialize() / waitForMcpReady() are captured. + // Subsequent stdin reads / auth checks / prompt execution are not + // part of the "first-screen" budget. + // + // For stream-json we deliberately do NOT finalize here: the profile + // is finalized inside Session.ensureConfigInitialized() after MCP + // settles, so its `config_initialize_*` and MCP events make it into + // the file. Finalizing here would write an empty profile and the + // module-level `finalized` guard would suppress every subsequent + // event. + finalizeStartupProfile(config.getSessionId()); } // Only read stdin if NOT in stream-json mode diff --git a/packages/cli/src/nonInteractive/session.test.ts b/packages/cli/src/nonInteractive/session.test.ts index 6461f9fbe..6cc127753 100644 --- a/packages/cli/src/nonInteractive/session.test.ts +++ b/packages/cli/src/nonInteractive/session.test.ts @@ -77,6 +77,7 @@ function createConfig(overrides: ConfigOverrides = {}): Config { getApprovalMode: () => 'auto', getOutputFormat: () => 'stream-json', initialize: vi.fn(), + waitForMcpReady: vi.fn().mockResolvedValue(undefined), getMonitorRegistry: () => mockMonitorRegistry, getBackgroundShellRegistry: () => mockBackgroundShellRegistry, getBackgroundTaskRegistry: () => mockBackgroundTaskRegistry, diff --git a/packages/cli/src/nonInteractive/session.ts b/packages/cli/src/nonInteractive/session.ts index 4d01d9d4f..f5e32aebb 100644 --- a/packages/cli/src/nonInteractive/session.ts +++ b/packages/cli/src/nonInteractive/session.ts @@ -34,6 +34,10 @@ import { import { createMinimalSettings } from '../config/settings.js'; import type { LoadedSettings } from '../config/settings.js'; import { runNonInteractive } from '../nonInteractiveCli.js'; +import { + finalizeStartupProfile, + profileCheckpoint, +} from '../utils/startupProfiler.js'; const debugLogger = createDebugLogger('NON_INTERACTIVE_SESSION'); @@ -135,7 +139,46 @@ class Session { debugLogger.debug('[Session] Initializing config'); try { + // Bracket `config.initialize()` with the same profiler checkpoints + // the non-stream-json branch in `gemini.tsx` uses so the + // `config_initialize_dur` derived phase shows up in stream-json + // startup profiles. `profileCheckpoint` is a no-op when + // `QWEN_CODE_PROFILE_STARTUP` is unset, so this adds zero overhead + // off the profiling path. Without these, stream-json profiles read + // as missing the initialize phase entirely, which made the MCP + // discovery timings look like they happened "before init". + profileCheckpoint('config_initialize_start'); await this.config.initialize(options); + profileCheckpoint('config_initialize_end'); + // Stream-json sessions feed prompts straight to the model after init. + // Under progressive MCP availability `initialize()` returns before + // MCP servers settle, so we must explicitly await discovery here — + // otherwise the first prompt would see only built-in tools. + await this.config.waitForMcpReady(); + // Surface MCP failures on stderr — same rationale as gemini.tsx's + // non-interactive branch: per-server errors are caught inside + // `discoverAllMcpToolsIncremental` and never reach a TTY otherwise, + // so a script using stream-json with broken MCP config would + // silently run with only built-in tools. + // Defensive against tests that pass a stubbed Config without + // `getFailedMcpServerNames`. + const failedMcpServers = + typeof this.config.getFailedMcpServerNames === 'function' + ? this.config.getFailedMcpServerNames() + : []; + if (failedMcpServers.length > 0) { + process.stderr.write( + `Warning: MCP server(s) failed to start: ${failedMcpServers.join(', ')}. ` + + `Continuing with built-in tools and any servers that did connect.\n`, + ); + } + // Finalize the startup profile here so `config_initialize_*` and the + // MCP discovery events captured during init/discovery make it into + // the on-disk profile. gemini.tsx's stream-json branch deliberately + // skips finalize because the profiler's `finalized` guard would + // otherwise suppress every event emitted during the + // `Session.ensureConfigInitialized` flow above. + finalizeStartupProfile(this.config.getSessionId()); this.configInitialized = true; this.registerMonitorRegistrations(); this.registerMonitorNotifications(); diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index d6cbfd82e..15c21560c 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -55,6 +55,7 @@ import { IDLE_SPECULATION, ApprovalMode, ConditionalRulesRegistry, + MCPDiscoveryState, type PermissionMode, ToolConfirmationOutcome, type WaitingToolCall, @@ -69,7 +70,30 @@ import { } from './utils/todoSnapshot.js'; import type { TodoItem } from './components/TodoDisplay.js'; import { loadHierarchicalGeminiMemory } from '../config/config.js'; +import { + profileCheckpoint, + finalizeStartupProfile, +} from '../utils/startupProfiler.js'; +import { appEvents } from '../utils/events.js'; import process from 'node:process'; + +/** + * Window in which mcp-client-update events are coalesced before the cli calls + * `setTools()`. Matches Claude Code's `MCP_BATCH_FLUSH_MS` (16 ≈ one 60Hz + * frame). Smaller windows would refresh the model tool list more often + * without user benefit; larger windows would let multiple servers settle + * before the model sees them. 16ms is the sweet spot validated by Claude's + * production deployment (see design.md § 8.3 + § 3.2 Round 2). + */ +const MCP_BATCH_FLUSH_MS = 16; + +/** + * Maximum time we keep the startup profile open waiting for MCP discovery to + * settle. Slightly longer than the default 30s per-server discovery timeout + * so a server that times out can still log its `outcome: failed` event into + * the profile. After this cap the profile file is written regardless. + */ +const STARTUP_PROFILE_FINALIZE_CAP_MS = 35_000; import { useHistory } from './hooks/useHistoryManager.js'; import { useMemoryMonitor } from './hooks/useMemoryMonitor.js'; import { useThemeCommand } from './hooks/useThemeCommand.js'; @@ -384,7 +408,6 @@ export const AppContainer = (props: AppContainerProps) => { // Terminal and layout hooks const { columns: terminalWidth, rows: terminalHeight } = useTerminalSize(); - const previousTerminalWidthRef = useRef(terminalWidth); const { stdin, setRawMode } = useStdin(); const { stdout } = useStdout(); @@ -419,8 +442,19 @@ export const AppContainer = (props: AppContainerProps) => { (async () => { // Note: the program will not work if this fails so let errors be // handled by the global catch. + profileCheckpoint('config_initialize_start'); await config.initialize(); + profileCheckpoint('config_initialize_end'); setConfigInitialized(true); + profileCheckpoint('input_enabled'); + // Profile finalize is intentionally NOT here. With PR-A's background + // MCP discovery, MCP-related events (`mcp_server_ready:*`, + // `mcp_first_tool_registered`, `mcp_all_servers_settled`, + // `gemini_tools_updated`) arrive AFTER `input_enabled`. The dedicated + // `useEffect` below (gated by `configInitialized`) defers finalize + // until MCP discovery settles or the 35s hard cap elapses — that way + // the profile captures the full MCP timeline without holding back + // the user-facing TTI. const resumedSessionData = config.getResumedSessionData(); if (resumedSessionData) { @@ -500,6 +534,138 @@ export const AppContainer = (props: AppContainerProps) => { // eslint-disable-next-line react-hooks/exhaustive-deps }, [config]); + /** + * PR-A wiring: progressive MCP availability. + * + * This effect does two coupled things, both gated on `configInitialized`: + * + * 1. **16ms batch-flush of `setTools()`**: as each MCP server completes + * discover, `McpClientManager` emits `mcp-client-update`. We coalesce + * these into at most one `GeminiClient.setTools()` call per ~16ms + * window. With three MCP servers settling within a few ms of each + * other, the model sees one consolidated tool refresh instead of + * three back-to-back; with a server stream over 1s, the model sees + * each batch with at most one frame of lag (this is the gap the + * baseline measured at 6235 ms in three-mixed-mcp before PR-A). + * + * 2. **Deferred startup-profile finalize**: in PR-A's default mode + * MCP discovery runs in the background, so MCP-related profiler + * events arrive AFTER `input_enabled`. The profile file is held open + * until either the manager's discovery state reaches `COMPLETED` + * (all servers ready or failed) or `STARTUP_PROFILE_FINALIZE_CAP_MS` + * elapses (so a hung server doesn't keep the profile open forever). + * + * In legacy blocking mode (`QWEN_CODE_LEGACY_MCP_BLOCKING=1`) MCP + * discovery already completed inside `config.initialize()`, so this + * effect observes `MCPDiscoveryState.COMPLETED` immediately and finalizes + * without waiting. + */ + useEffect(() => { + if (!isConfigInitialized) return undefined; + const geminiClient = config.getGeminiClient(); + if (!geminiClient) return undefined; + + const manager = config.getToolRegistry().getMcpClientManager(); + let flushTimer: NodeJS.Timeout | null = null; + let finalized = false; + + const finalizeOnce = () => { + if (finalized) return; + finalized = true; + finalizeStartupProfile(config.getSessionId()); + }; + + // Runs the pending batched setTools() immediately and clears the timer. + // Returns a promise that resolves when setTools() finishes so callers + // can sequence subsequent work after `gemini_tools_updated` is + // recorded into the startup profile. + const flushNow = (): Promise => { + if (flushTimer !== null) { + clearTimeout(flushTimer); + flushTimer = null; + } + // GeminiClient.setTools() has no try/catch around warmAll() / + // getFunctionDeclarations() / getChat().setTools(). A silent + // discard here would make production tool-registration regressions + // invisible, so route the error through debugLogger. + return geminiClient.setTools().catch((err) => { + debugLogger.error( + `setTools() batch-flush failed: ${err instanceof Error ? err.message : String(err)}`, + ); + }); + }; + + const scheduleFlush = () => { + if (flushTimer !== null) return; + flushTimer = setTimeout(() => { + flushTimer = null; + void flushNow(); + }, MCP_BATCH_FLUSH_MS); + }; + + // Match the non-interactive entry points (`gemini.tsx`, `session.ts`, + // `acpAgent.ts`) which warn to stderr when MCP discovery completes with + // failed servers. The interactive path can't use stderr (it would + // collide with Ink's rendered output), so we route through + // `debugLogger.warn` so it shows up under `QWEN_CODE_DEBUG=1` and in + // the debug log file — matching the channel `setTools()` errors above + // use. The MCP status footer pill already surfaces failures + // continuously in the UI; this log is the actionable-on-debug record + // wenshao asked for in round 7. + let failureSurfaced = false; + const surfaceFailuresOnce = () => { + if (failureSurfaced) return; + failureSurfaced = true; + const failedNames = + typeof config.getFailedMcpServerNames === 'function' + ? config.getFailedMcpServerNames() + : []; + if (failedNames.length > 0) { + debugLogger.warn( + `MCP server(s) failed to start: ${failedNames.join(', ')}. ` + + `Continuing with built-in tools and any servers that did connect.`, + ); + } + }; + + const onMcpUpdate = () => { + if (manager.getDiscoveryState() === MCPDiscoveryState.COMPLETED) { + // Discovery has settled. Flush the pending setTools() NOW (rather + // than waiting for the 16ms batch timer) and only finalize after + // it runs — `setTools()` emits the `gemini_tools_updated` event, + // and finalizing before it fires would drop that event because + // the module-level `finalized` guard suppresses every subsequent + // record. That dropped event is what `gemini_tools_lag` is + // derived from in the profile summary. + surfaceFailuresOnce(); + void flushNow().finally(finalizeOnce); + } else { + scheduleFlush(); + } + }; + + // Legacy / no-MCP path: discovery already finished synchronously + // inside config.initialize(), so finalize immediately and only keep + // the flush listener around for late refreshes (e.g. SkillTool's + // post-construction refreshSkills triggering setTools). + if (manager.getDiscoveryState() === MCPDiscoveryState.COMPLETED) { + surfaceFailuresOnce(); + finalizeOnce(); + } + + appEvents.on('mcp-client-update', onMcpUpdate); + const finalizeCap = setTimeout( + finalizeOnce, + STARTUP_PROFILE_FINALIZE_CAP_MS, + ); + + return () => { + appEvents.off('mcp-client-update', onMcpUpdate); + if (flushTimer !== null) clearTimeout(flushTimer); + clearTimeout(finalizeCap); + }; + }, [isConfigInitialized, config]); + // Track idle state via ref so the update handler can defer notifications // while the model is streaming, without triggering re-renders. // Note: isIdleRef.current is assigned after streamingState becomes available @@ -582,12 +748,21 @@ export const AppContainer = (props: AppContainerProps) => { }, [remountStaticHistory, stdout]); // Targeted repaint for resize events: move cursor to top-left and erase - // downward instead of a full clearTerminal, avoiding the full-screen flash. + // downward instead of a full clearTerminal, avoiding the full-screen + // flash. Ink's region is append-only, so when terminal width + // changes (tmux split, fullscreen toggle, font size change) we must + // explicitly re-emit the static history at the new width — otherwise + // header content stays at the old width and visibly tears. const repaintStaticViewport = useCallback(() => { stdout.write(`${ansiEscapes.cursorTo(0, 0)}${ansiEscapes.eraseDown}`); remountStaticHistory(); }, [remountStaticHistory, stdout]); + // Track previous terminal width across renders so we only repaint when + // the width actually changes. Initialized to the current width to avoid + // a spurious repaint on first mount. + const previousTerminalWidthRef = useRef(terminalWidth); + // Keep the static header in sync with model changes without polling. // Ink's output is append-only, so model changes must explicitly // clear and remount the static region to redraw the banner at the top. @@ -1935,6 +2110,11 @@ export const AppContainer = (props: AppContainerProps) => { } }, [terminalWidth, availableTerminalHeight, activePtyId]); + // Repaint static header on terminal resize. Without this, tmux pane + // resizes and fullscreen toggles leave the static region rendered at the + // old width — header content visibly tears until the next refreshStatic + // (e.g. /model). Cheap repaint (cursor-to + erase-down) rather than a + // full clearTerminal to avoid the full-screen flash. useEffect(() => { if (previousTerminalWidthRef.current === terminalWidth) { return; diff --git a/packages/cli/src/utils/startupProfiler.test.ts b/packages/cli/src/utils/startupProfiler.test.ts index adfb4bf99..64eb28d77 100644 --- a/packages/cli/src/utils/startupProfiler.test.ts +++ b/packages/cli/src/utils/startupProfiler.test.ts @@ -3,6 +3,8 @@ import * as fs from 'node:fs'; import { initStartupProfiler, profileCheckpoint, + recordStartupEvent, + setInteractiveMode, finalizeStartupProfile, getStartupReport, resetStartupProfiler, @@ -32,8 +34,15 @@ describe('startupProfiler', () => { beforeEach(() => { resetStartupProfiler(); vi.restoreAllMocks(); - saveEnv('QWEN_CODE_PROFILE_STARTUP', 'SANDBOX'); + saveEnv( + 'QWEN_CODE_PROFILE_STARTUP', + 'QWEN_CODE_PROFILE_STARTUP_OUTER', + 'QWEN_CODE_PROFILE_STARTUP_NO_HEAP', + 'SANDBOX', + ); delete process.env['QWEN_CODE_PROFILE_STARTUP']; + delete process.env['QWEN_CODE_PROFILE_STARTUP_OUTER']; + delete process.env['QWEN_CODE_PROFILE_STARTUP_NO_HEAP']; delete process.env['SANDBOX']; }); @@ -217,5 +226,153 @@ describe('startupProfiler', () => { resetStartupProfiler(); expect(getStartupReport()).toBeNull(); }); + + it('records startup events as a separate list with attrs', () => { + initStartupProfiler(); + profileCheckpoint('main_entry'); + recordStartupEvent('mcp_server_ready:foo', { outcome: 'ready' }); + recordStartupEvent('mcp_server_ready:bar', { outcome: 'failed' }); + + const report = getStartupReport()!; + expect(report.events.map((e) => e.name)).toEqual([ + 'mcp_server_ready:foo', + 'mcp_server_ready:bar', + ]); + expect(report.events[0]!.tMs).toBeGreaterThanOrEqual(0); + expect(report.events[0]!.attrs).toEqual({ outcome: 'ready' }); + }); + + it('drops events recorded after finalize to keep memory bounded', () => { + vi.mocked(fs.mkdirSync).mockReturnValue(undefined); + vi.mocked(fs.writeFileSync).mockReturnValue(undefined); + vi.spyOn(process.stderr, 'write').mockReturnValue(true); + + initStartupProfiler(); + profileCheckpoint('main_entry'); + recordStartupEvent('first'); + finalizeStartupProfile('s1'); + // Post-finalize emissions (e.g. setTools refresh during a long + // interactive session) must NOT accumulate. + recordStartupEvent('after_finalize'); + profileCheckpoint('after_finalize_cp'); + + const written = JSON.parse( + vi.mocked(fs.writeFileSync).mock.calls[0]![1] as string, + ); + expect(written.events.map((e: { name: string }) => e.name)).toEqual([ + 'first', + ]); + expect(written.phases.map((p: { name: string }) => p.name)).toEqual([ + 'main_entry', + ]); + }); + + it('marks interactiveMode and computes derived phases', () => { + vi.mocked(fs.mkdirSync).mockReturnValue(undefined); + vi.mocked(fs.writeFileSync).mockReturnValue(undefined); + vi.spyOn(process.stderr, 'write').mockReturnValue(true); + + initStartupProfiler(); + setInteractiveMode(true); + profileCheckpoint('main_entry'); + profileCheckpoint('after_load_settings'); + profileCheckpoint('after_load_cli_config'); + profileCheckpoint('after_initialize_app'); + profileCheckpoint('before_render'); + profileCheckpoint('first_paint'); + profileCheckpoint('config_initialize_start'); + profileCheckpoint('config_initialize_end'); + profileCheckpoint('input_enabled'); + recordStartupEvent('mcp_first_tool_registered'); + recordStartupEvent('gemini_tools_updated'); + recordStartupEvent('mcp_all_servers_settled'); + finalizeStartupProfile('iaa'); + + const written = JSON.parse( + vi.mocked(fs.writeFileSync).mock.calls[0]![1] as string, + ); + expect(written.interactiveMode).toBe(true); + expect(written.outerProcess).toBe(false); + // Every derived phase should appear when its checkpoint/event was recorded. + const dp = written.derivedPhases; + expect(dp).toHaveProperty('module_load'); + expect(dp).toHaveProperty('settings_time'); + expect(dp).toHaveProperty('config_time'); + expect(dp).toHaveProperty('init_time'); + expect(dp).toHaveProperty('pre_render'); + expect(dp).toHaveProperty('to_first_paint'); + expect(dp).toHaveProperty('to_input_enabled'); + expect(dp).toHaveProperty('config_initialize_dur'); + expect(dp).toHaveProperty('mcp_first_tool'); + expect(dp).toHaveProperty('mcp_all_settled'); + expect(dp).toHaveProperty('gemini_tools_lag'); + // gemini_tools_lag is the gap between mcp_first_tool_registered and + // gemini_tools_updated; should be non-negative. + expect(dp.gemini_tools_lag).toBeGreaterThanOrEqual(0); + }); + + it('caps the events array at MAX_EVENTS and flags truncation', () => { + initStartupProfiler(); + profileCheckpoint('main_entry'); + // Force well past the cap. + for (let i = 0; i < 2000; i++) { + recordStartupEvent(`evt:${i}`); + } + const report = getStartupReport()!; + // Cap is 1024; report should reflect both the cap and the truncated flag. + expect(report.events.length).toBeLessThanOrEqual(1024); + expect(report.events.length).toBeGreaterThan(1000); + expect(report.eventsTruncated).toBe(true); + }); + + it('captures heap snapshots at each checkpoint by default', () => { + initStartupProfiler(); + profileCheckpoint('phase_a'); + const report = getStartupReport()!; + expect(report.phases[0]!.heapUsedMb).toBeGreaterThan(0); + }); + + it('omits heap snapshots when QWEN_CODE_PROFILE_STARTUP_NO_HEAP=1', () => { + process.env['QWEN_CODE_PROFILE_STARTUP_NO_HEAP'] = '1'; + initStartupProfiler(); + profileCheckpoint('phase_a'); + const report = getStartupReport()!; + expect(report.phases[0]!.heapUsedMb).toBeUndefined(); + }); + }); + + describe('outer-process opt-in (QWEN_CODE_PROFILE_STARTUP_OUTER=1)', () => { + it('does NOT collect outside sandbox without OUTER opt-in', () => { + process.env['QWEN_CODE_PROFILE_STARTUP'] = '1'; + delete process.env['SANDBOX']; + + initStartupProfiler(); + profileCheckpoint('test'); + expect(getStartupReport()).toBeNull(); + }); + + it('collects outside sandbox when OUTER=1 and writes outer-prefixed file', () => { + process.env['QWEN_CODE_PROFILE_STARTUP'] = '1'; + process.env['QWEN_CODE_PROFILE_STARTUP_OUTER'] = '1'; + delete process.env['SANDBOX']; + + vi.mocked(fs.mkdirSync).mockReturnValue(undefined); + vi.mocked(fs.writeFileSync).mockReturnValue(undefined); + vi.spyOn(process.stderr, 'write').mockReturnValue(true); + + initStartupProfiler(); + profileCheckpoint('main_entry'); + finalizeStartupProfile('outer-session'); + + const written = JSON.parse( + vi.mocked(fs.writeFileSync).mock.calls[0]![1] as string, + ); + expect(written.outerProcess).toBe(true); + + const writtenPath = vi.mocked(fs.writeFileSync).mock + .calls[0]![0] as string; + // outer-prefixed filename keeps it distinct from sandbox-child reports. + expect(writtenPath).toMatch(/[\\/]outer-/); + }); }); }); diff --git a/packages/cli/src/utils/startupProfiler.ts b/packages/cli/src/utils/startupProfiler.ts index d9884bf84..6b0cf5a73 100644 --- a/packages/cli/src/utils/startupProfiler.ts +++ b/packages/cli/src/utils/startupProfiler.ts @@ -1,3 +1,9 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + /** * Lightweight startup performance profiler. * @@ -7,10 +13,15 @@ * * Usage (already wired in index.ts / gemini.tsx): * initStartupProfiler() — call once at process start to record T0 - * profileCheckpoint('name') — call at each phase boundary + * profileCheckpoint('name') — call at each phase boundary (sequential) + * recordStartupEvent('name', attrs?) — record a discrete event (multi-fire allowed) * finalizeStartupProfile(id) — call after last checkpoint to write report * - * Only profiles inside the sandbox child process to avoid duplicate reports. + * By default profiles only inside the sandbox child process to avoid duplicate + * reports. Set QWEN_CODE_PROFILE_STARTUP_OUTER=1 to also profile the outer + * (pre-sandbox) process; outer reports are written with an `outer-` filename + * prefix to keep them separate from sandbox-child reports. + * * Zero overhead when disabled (single env var check). */ import * as fs from 'node:fs'; @@ -18,35 +29,108 @@ import * as os from 'node:os'; import * as path from 'node:path'; import { performance } from 'node:perf_hooks'; +import type { StartupEventAttrs } from '@qwen-code/qwen-code-core'; + interface Checkpoint { name: string; timestamp: number; + heapUsedMb?: number; } export interface StartupPhase { name: string; startMs: number; durationMs: number; + heapUsedMb?: number; } +export interface StartupEvent { + name: string; + tMs: number; + heapUsedMb?: number; + attrs?: StartupEventAttrs; +} + +/** + * Derived phase summary, keyed by phase name. Values are absolute ms from T0. + * Mirrors the spirit of Claude Code's PHASE_DEFINITIONS for nightly CI thresholds. + * Only phases for which the underlying checkpoint/event was recorded appear. + */ +export type DerivedPhases = Partial<{ + /** Time from process start to T0 (covers V8 module-eval). */ + module_load: number; + /** T0 → after_load_settings. */ + settings_time: number; + /** after_load_settings → after_load_cli_config. */ + config_time: number; + /** after_load_cli_config → after_initialize_app. */ + init_time: number; + /** T0 → before_render. */ + pre_render: number; + /** T0 → first_paint. */ + to_first_paint: number; + /** T0 → input_enabled. (Real TTI.) */ + to_input_enabled: number; + /** Duration of `config.initialize()` (interactive only). */ + config_initialize_dur: number; + /** T0 → mcp_first_tool_registered. */ + mcp_first_tool: number; + /** T0 → mcp_all_servers_settled. */ + mcp_all_settled: number; + /** mcp_first_tool_registered → gemini_tools_updated lag. */ + gemini_tools_lag: number; +}>; + export interface StartupReport { timestamp: string; sessionId: string; + /** Whether this run was an interactive UI startup. */ + interactiveMode: boolean; + /** True when the report was produced by the outer (pre-sandbox) process. */ + outerProcess: boolean; /** Time from Node.js process start to T0 (initStartupProfiler call), covers module loading. */ processUptimeAtT0Ms: number; totalMs: number; phases: StartupPhase[]; + events: StartupEvent[]; + /** True if the events list hit MAX_EVENTS and dropped some entries. */ + eventsTruncated: boolean; + derivedPhases: DerivedPhases; nodeVersion: string; platform: string; arch: string; } let enabled = false; +let captureHeap = false; +let outerProcess = false; +let interactiveMode = false; let t0 = 0; let processUptimeAtT0Ms = 0; let checkpoints: Checkpoint[] = []; +let events: StartupEvent[] = []; +let eventsTruncated = false; let finalized = false; +// Defense-in-depth cap on the events list. Under normal flow `finalized` +// stops new events shortly after `input_enabled`. This bound only matters in +// pathological paths where finalize is bypassed (e.g. crash before the mount +// effect runs while MCP still emits server-ready events). +const MAX_EVENTS = 1024; + +const HEAP_BYTES_TO_MB = 1 / (1024 * 1024); + +function snapshotHeapMb(): number | undefined { + if (!captureHeap) return undefined; + try { + return ( + Math.round(process.memoryUsage().heapUsed * HEAP_BYTES_TO_MB * 100) / 100 + ); + } catch { + return undefined; + } +} + export function initStartupProfiler(): void { // Reset any prior state so the function is idempotent. resetStartupProfiler(); @@ -54,25 +138,151 @@ export function initStartupProfiler(): void { if (process.env['QWEN_CODE_PROFILE_STARTUP'] !== '1') { return; } - // Skip profiling in the outer (pre-sandbox) process — the child will - // re-run index.ts inside the sandbox and collect its own profile. - if (!process.env['SANDBOX']) { + + const inSandboxChild = !!process.env['SANDBOX']; + const outerOptIn = process.env['QWEN_CODE_PROFILE_STARTUP_OUTER'] === '1'; + + // Default behavior is unchanged: only the sandbox child collects. + // Outer (pre-sandbox) collection requires an explicit opt-in to avoid + // accidentally producing duplicate reports. + if (!inSandboxChild && !outerOptIn) { return; } + enabled = true; + outerProcess = !inSandboxChild; + // Default to capturing heap snapshots at every checkpoint. + // Disable with QWEN_CODE_PROFILE_STARTUP_NO_HEAP=1 when measuring the + // Heisenberg overhead of the heap call itself. + captureHeap = process.env['QWEN_CODE_PROFILE_STARTUP_NO_HEAP'] !== '1'; finalized = false; processUptimeAtT0Ms = Math.round(process.uptime() * 1000 * 100) / 100; t0 = performance.now(); checkpoints = []; + events = []; } export function profileCheckpoint(name: string): void { + if (!enabled || finalized) return; + checkpoints.push({ + name, + timestamp: performance.now(), + heapUsedMb: snapshotHeapMb(), + }); +} + +/** + * Records a discrete startup event (allowed to fire multiple times). + * Distinct from `profileCheckpoint` which is sequential and assumed unique. + * + * Once {@link finalizeStartupProfile} runs, further events are dropped to + * keep memory bounded — long-running interactive sessions still call + * `setTools()` (which emits `gemini_tools_updated`) for each MCP refresh. + */ +export function recordStartupEvent( + name: string, + attrs?: StartupEventAttrs, +): void { + if (!enabled || finalized) return; + if (events.length >= MAX_EVENTS) { + eventsTruncated = true; + return; + } + events.push({ + name, + tMs: Math.round((performance.now() - t0) * 100) / 100, + heapUsedMb: snapshotHeapMb(), + ...(attrs ? { attrs } : {}), + }); +} + +/** + * Marks this run as an interactive UI startup. Affects derived phases and + * is recorded in the report for downstream filtering. + */ +export function setInteractiveMode(value: boolean): void { if (!enabled) return; - checkpoints.push({ name, timestamp: performance.now() }); + interactiveMode = value; +} + +function findCheckpointMs(name: string): number | undefined { + for (const cp of checkpoints) { + if (cp.name === name) { + return Math.round((cp.timestamp - t0) * 100) / 100; + } + } + return undefined; +} + +function findEventMs(name: string): number | undefined { + for (const ev of events) { + if (ev.name === name) return ev.tMs; + } + return undefined; +} + +function computeDerivedPhases(): DerivedPhases { + const out: DerivedPhases = {}; + out.module_load = processUptimeAtT0Ms; + + const afterSettings = findCheckpointMs('after_load_settings'); + if (afterSettings !== undefined) out.settings_time = afterSettings; + + const afterCfg = findCheckpointMs('after_load_cli_config'); + if (afterSettings !== undefined && afterCfg !== undefined) { + out.config_time = Math.round((afterCfg - afterSettings) * 100) / 100; + } + + const afterInit = findCheckpointMs('after_initialize_app'); + if (afterCfg !== undefined && afterInit !== undefined) { + out.init_time = Math.round((afterInit - afterCfg) * 100) / 100; + } + + const beforeRender = findCheckpointMs('before_render'); + if (beforeRender !== undefined) out.pre_render = beforeRender; + + const firstPaint = findCheckpointMs('first_paint'); + if (firstPaint !== undefined) out.to_first_paint = firstPaint; + + const inputEnabled = findCheckpointMs('input_enabled'); + if (inputEnabled !== undefined) out.to_input_enabled = inputEnabled; + + const ciStart = findCheckpointMs('config_initialize_start'); + const ciEnd = findCheckpointMs('config_initialize_end'); + if (ciStart !== undefined && ciEnd !== undefined) { + out.config_initialize_dur = Math.round((ciEnd - ciStart) * 100) / 100; + } + + const mcpFirst = findEventMs('mcp_first_tool_registered'); + if (mcpFirst !== undefined) out.mcp_first_tool = mcpFirst; + + const mcpSettled = findEventMs('mcp_all_servers_settled'); + if (mcpSettled !== undefined) out.mcp_all_settled = mcpSettled; + + // gemini_tools_lag = how long after the first MCP server finished + // discover did the model actually receive an updated tool list. We must + // pick the FIRST `gemini_tools_updated` event whose timestamp is >= + // `mcp_first_tool_registered`, because earlier `setTools()` calls fire + // from `GeminiClient.initialize() -> startChat()` (built-in tools only) + // and from `SkillTool` post-construction refresh — both happen BEFORE + // MCP discovery starts under PR-A, so naively taking the first + // `gemini_tools_updated` would give a misleading negative lag. + if (mcpFirst !== undefined) { + for (const ev of events) { + if (ev.name === 'gemini_tools_updated' && ev.tMs >= mcpFirst) { + out.gemini_tools_lag = Math.round((ev.tMs - mcpFirst) * 100) / 100; + break; + } + } + } + + return out; } export function getStartupReport(): StartupReport | null { - if (!enabled || checkpoints.length === 0) return null; + if (!enabled || (checkpoints.length === 0 && events.length === 0)) { + return null; + } const phases: StartupPhase[] = []; let prev = t0; @@ -84,18 +294,27 @@ export function getStartupReport(): StartupReport | null { name: cp.name, startMs: Math.round((prev - t0) * 100) / 100, durationMs: Math.round((cp.timestamp - prev) * 100) / 100, + ...(cp.heapUsedMb !== undefined ? { heapUsedMb: cp.heapUsedMb } : {}), }); prev = cp.timestamp; } - const lastTimestamp = checkpoints[checkpoints.length - 1]!.timestamp; + const lastTimestamp = + checkpoints.length > 0 + ? checkpoints[checkpoints.length - 1]!.timestamp + : performance.now(); return { timestamp: new Date().toISOString(), sessionId: 'unknown', + interactiveMode, + outerProcess, processUptimeAtT0Ms, totalMs: Math.round((lastTimestamp - t0) * 100) / 100, phases, + events: [...events], + eventsTruncated, + derivedPhases: computeDerivedPhases(), nodeVersion: process.version, platform: process.platform, arch: process.arch, @@ -117,7 +336,8 @@ export function finalizeStartupProfile(sessionId?: string): void { const dir = path.join(os.homedir(), '.qwen', 'startup-perf'); fs.mkdirSync(dir, { recursive: true }); - const filename = `${report.timestamp.replace(/[:.]/g, '-')}-${report.sessionId}.json`; + const prefix = report.outerProcess ? 'outer-' : ''; + const filename = `${prefix}${report.timestamp.replace(/[:.]/g, '-')}-${report.sessionId}.json`; const filepath = path.join(dir, filename); fs.writeFileSync(filepath, JSON.stringify(report, null, 2), 'utf-8'); process.stderr.write(`Startup profile written to: ${filepath}\n`); @@ -128,8 +348,21 @@ export function finalizeStartupProfile(sessionId?: string): void { export function resetStartupProfiler(): void { enabled = false; + captureHeap = false; + outerProcess = false; + interactiveMode = false; t0 = 0; processUptimeAtT0Ms = 0; checkpoints = []; + events = []; + eventsTruncated = false; finalized = false; } + +/** + * Test-only: returns whether profiling is currently active. Used by the + * cli to short-circuit the cross-package event sink registration. + */ +export function isStartupProfilerEnabled(): boolean { + return enabled; +} diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts index 9f435b4b8..316d64cf3 100644 --- a/packages/core/src/config/config.test.ts +++ b/packages/core/src/config/config.test.ts @@ -7,7 +7,7 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; import type { Mock } from 'vitest'; import type { ConfigParameters, SandboxConfig } from './config.js'; -import { Config, ApprovalMode } from './config.js'; +import { Config, ApprovalMode, MCPServerConfig } from './config.js'; import * as fs from 'node:fs'; import * as path from 'node:path'; import { setGeminiMdFilename as mockSetGeminiMdFilename } from '../memory/const.js'; @@ -468,6 +468,69 @@ describe('Server Config (config.ts)', () => { ), ).toEqual([ToolNames.READ_FILE, ToolNames.EDIT, ToolNames.SHELL]); }); + + it('skips inline MCP discovery by default (progressive availability)', async () => { + const config = new Config({ ...baseParams, checkpointing: false }); + await config.initialize(); + + // Default path passes `skipDiscovery: true` to createToolRegistry, + // so the synchronous tool-registry construction must NOT invoke + // discoverAllTools. MCP is started in the background instead. + expect(ToolRegistry.prototype.discoverAllTools).not.toHaveBeenCalled(); + }); + + it('honors QWEN_CODE_LEGACY_MCP_BLOCKING=1 by running MCP discovery inline', async () => { + const originalLegacy = process.env['QWEN_CODE_LEGACY_MCP_BLOCKING']; + process.env['QWEN_CODE_LEGACY_MCP_BLOCKING'] = '1'; + try { + const config = new Config({ ...baseParams, checkpointing: false }); + await config.initialize(); + + // Legacy escape hatch must call back into the synchronous discover + // path the cli relied on prior to PR-A. + expect(ToolRegistry.prototype.discoverAllTools).toHaveBeenCalledTimes( + 1, + ); + } finally { + if (originalLegacy === undefined) { + delete process.env['QWEN_CODE_LEGACY_MCP_BLOCKING']; + } else { + process.env['QWEN_CODE_LEGACY_MCP_BLOCKING'] = originalLegacy; + } + } + }); + + it('waitForMcpReady resolves immediately when no MCP discovery was started', async () => { + // No MCP servers + non-bare + default mode: startMcpDiscoveryInBackground + // is called but the registry mock returns no manager, so the discovery + // promise stays undefined and waitForMcpReady is a no-op. + const config = new Config({ ...baseParams, checkpointing: false }); + await config.initialize(); + await expect(config.waitForMcpReady()).resolves.toBeUndefined(); + }); + + it('getFailedMcpServerNames returns an empty array when no MCP servers are configured', () => { + // The helper underpins the non-interactive "Warning: MCP server(s) + // failed to start" emission. Must be a no-op when there's nothing + // to warn about, otherwise --prompt runs with no MCP config would + // emit a spurious warning every time. + const config = new Config({ ...baseParams, checkpointing: false }); + expect(config.getFailedMcpServerNames()).toEqual([]); + }); + + it('getFailedMcpServerNames skips disabled servers', () => { + // A user-disabled server is not "failed" — the user explicitly + // turned it off. Treating it as failed would generate noise on + // every non-interactive run. Disablement is tracked via + // `excludedMcpServers` (see `isMcpServerDisabled`). + const config = new Config({ + ...baseParams, + checkpointing: false, + mcpServers: { off: new MCPServerConfig() }, + excludedMcpServers: ['off'], + } as ConfigParameters); + expect(config.getFailedMcpServerNames()).toEqual([]); + }); }); describe('refreshAuth', () => { diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index b439abf66..676541671 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -46,9 +46,14 @@ import { GitService } from '../services/gitService.js'; import { CronScheduler } from '../services/cronScheduler.js'; // Tools — only lightweight imports; tool classes are lazy-loaded via dynamic import -import type { SendSdkMcpMessage } from '../tools/mcp-client.js'; +import { + MCPServerStatus, + getMCPServerStatus, + type SendSdkMcpMessage, +} from '../tools/mcp-client.js'; import { setGeminiMdFilename } from '../memory/const.js'; import { canUseRipgrep } from '../utils/ripgrepUtils.js'; +import { recordStartupEvent } from '../utils/startupEventSink.js'; import { ToolRegistry, type ToolFactory } from '../tools/tool-registry.js'; import { ToolNames } from '../tools/tool-names.js'; import type { LspClient } from '../lsp/types.js'; @@ -361,6 +366,16 @@ export class MCPServerConfig { readonly targetServiceAccount?: string, // SDK MCP server type - 'sdk' indicates server runs in SDK process readonly type?: 'sdk', + /** + * Per-server cap on the discovery handshake (`connect` + `tools/list` + + * `prompts/list` + `resources/list`). Defaults: 30s for stdio servers, + * 5s for remote HTTP/SSE. Tool-call timeout (`timeout` above) is + * unaffected — a long-running tool invocation is not a startup + * pathology. Appended at the end of the parameter list to avoid + * shifting positional arguments at the many `new MCPServerConfig(...)` + * call sites. + */ + readonly discoveryTimeoutMs?: number, ) {} } @@ -1216,10 +1231,25 @@ export class Config { await this.refreshHierarchicalMemory(); this.debugLogger.debug('Hierarchical memory loaded'); + // Progressive MCP availability: skip MCP discovery in the synchronous + // tool-registry construction path and kick it off in the background + // after the registry exists. This lets `Config.initialize()` (and the + // cli's `input_enabled` checkpoint) resolve without waiting on MCP + // server response time. Users can opt back into the legacy synchronous + // behavior with `QWEN_CODE_LEGACY_MCP_BLOCKING=1` — kept ≥ 1 release as + // an escape hatch. + const legacyBlockingMcp = + process.env['QWEN_CODE_LEGACY_MCP_BLOCKING'] === '1'; + const skipInlineMcpDiscovery = this.getBareMode() || !legacyBlockingMcp; + this.toolRegistry = await this.createToolRegistry( options?.sendSdkMcpMessage, - this.getBareMode() ? { skipDiscovery: true } : undefined, + skipInlineMcpDiscovery ? { skipDiscovery: true } : undefined, ); + recordStartupEvent('tool_registry_created', { + toolCount: this.toolRegistry.getAllToolNames().length, + mcpInline: !skipInlineMcpDiscovery, + }); this.debugLogger.info( `Tool registry initialized with ${this.toolRegistry.getAllToolNames().length} tools`, ); @@ -1234,10 +1264,154 @@ export class Config { // Use strict mode so a broken built-in tool surfaces immediately at startup. await this.toolRegistry.warmAll({ strict: true }); + // Fire-and-forget MCP discovery. Each server's tools land in the + // registry as it becomes ready; the cli's AppContainer debounces + // `setTools()` (~16ms / one frame) so the model sees the new tools + // shortly after each server settles. See `AppContainer.tsx`'s + // `mcp-client-update` subscriber. + if (skipInlineMcpDiscovery && !this.getBareMode()) { + this.startMcpDiscoveryInBackground(); + } + logStartSession(this, new StartSessionEvent(this)); this.debugLogger.info('Config initialization completed'); } + /** + * In-flight background MCP discovery promise. Captured so non-interactive + * code paths can await it before invoking the model (see + * {@link waitForMcpReady}). Undefined when MCP discovery was skipped + * entirely (bare mode, legacy blocking mode, or no MCP servers). + */ + private mcpDiscoveryPromise?: Promise; + + /** + * Kicks off MCP server discovery in the background after the synchronous + * portion of {@link initialize} returns. Errors are logged, never thrown: + * a broken MCP server must not bring down the cli, and per-server + * connect/discover failures are already surfaced through the + * `mcp-client-update` event stream the UI subscribes to. + * + * Defensive against partially-stubbed `ToolRegistry` in some tests, where + * the manager getter is unavailable — we'd rather log-and-skip than crash + * the init path in tests that don't exercise MCP at all. + */ + private startMcpDiscoveryInBackground(): void { + // `getMcpClientManager` is a public method on `ToolRegistry`. The + // cast below is NOT defensive against the production type — it + // exists only because some tests (e.g. those using + // `createMockToolRegistry`) stub `ToolRegistry` as a plain object + // that doesn't implement the method. The optional-chaining call + // (`?.()`) means the stubbed path resolves to `undefined` instead + // of crashing `initialize()` for tests that never exercise MCP. + // + // Crucially, the inner shape is `ReturnType` + // — not a hand-rolled `{ discoverAllMcpToolsIncremental: ... }` — so + // a future rename of `getMcpClientManager` on `ToolRegistry` still + // surfaces here as a type error rather than silently falling + // through to the `if (!manager) return` branch. + const manager = ( + this.toolRegistry as ToolRegistry & { + getMcpClientManager?: () => ReturnType< + ToolRegistry['getMcpClientManager'] + >; + } + ).getMcpClientManager?.(); + if (!manager) { + this.debugLogger.debug( + 'Skipping background MCP discovery: ToolRegistry has no MCP client manager', + ); + return; + } + this.mcpDiscoveryPromise = manager + .discoverAllMcpToolsIncremental(this) + .then(async () => { + // After background discovery completes, push the newly-registered + // MCP tools into the active GeminiChat so the next model request + // sees them. Interactive mode also calls setTools() via + // AppContainer's batch-flush effect — this trailing call is + // idempotent there, but it's the ONLY path that updates + // `chat.tools` for non-interactive runs (no AppContainer). + // Without this, `chat.tools` would be frozen at the built-in-only + // snapshot taken inside `geminiClient.initialize()` → `startChat()`, + // and `runNonInteractive` / stream-json / ACP would silently lose + // every MCP tool — a regression vs the legacy synchronous path. + try { + await this.geminiClient?.setTools(); + } catch (err) { + this.debugLogger.error( + `setTools() after background MCP discovery failed: ${err instanceof Error ? err.message : String(err)}`, + ); + } + }) + .catch((err: unknown) => { + this.debugLogger.error( + `Background MCP discovery failed: ${err instanceof Error ? err.message : String(err)}`, + ); + }); + } + + /** + * Resolves when background MCP discovery has settled (all servers ready, + * failed, or timed out). Non-interactive code paths (`runNonInteractive`, + * stream-json, ACP) MUST await this before invoking the model so the + * first model request sees the same tool surface the legacy + * synchronous-MCP path produced. + * + * Interactive code paths should NOT call this — `AppContainer`'s + * `mcp-client-update` subscriber handles `setTools()` refreshes + * progressively without blocking the UI. + * + * Resolves immediately when: + * - bare mode is on (no MCP discovery is started), + * - `QWEN_CODE_LEGACY_MCP_BLOCKING=1` is set (MCP already discovered + * synchronously inside {@link initialize}), or + * - no MCP servers are configured. + */ + async waitForMcpReady(): Promise { + if (this.mcpDiscoveryPromise) { + await this.mcpDiscoveryPromise; + } + } + + /** + * Returns the names of configured (non-disabled) MCP servers whose + * discovery did NOT end in a CONNECTED state. Intended to be called by + * non-interactive entry points AFTER {@link waitForMcpReady} resolves, + * so they can surface a single user-visible warning summarizing which + * servers failed. + * + * The legacy synchronous MCP path surfaced these failures visibly + * during `config.initialize()` (because they happened on the main + * thread and per-server errors logged to stderr). Under PR-A's + * progressive discovery, per-server errors are caught inside + * `McpClientManager.discoverAllMcpToolsIncremental` and routed to + * profiler events + `mcp-client-update` notifications — both of which + * are invisible to a non-interactive run with only built-in stderr. + * This helper closes that gap WITHOUT re-introducing the blocking + * behavior. + * + * Returns an empty array when MCP discovery was skipped (bare mode / + * legacy blocking / no servers configured) or when every configured + * server settled successfully. + */ + getFailedMcpServerNames(): string[] { + const servers = this.getMcpServers(); + if (!servers) { + return []; + } + const failed: string[] = []; + for (const name of Object.keys(servers)) { + if (this.isMcpServerDisabled(name)) { + continue; + } + if (getMCPServerStatus(name) !== MCPServerStatus.CONNECTED) { + failed.push(name); + } + } + return failed; + } + async refreshHierarchicalMemory(): Promise { const { memoryContent, fileCount, conditionalRules, projectRoot } = await loadServerHierarchicalMemory( diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 6d6d84157..036e2c9c0 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -17,6 +17,7 @@ import { SpanStatusCode } from '@opentelemetry/api'; // Config import { ApprovalMode, type Config } from '../config/config.js'; import { createDebugLogger } from '../utils/debugLogger.js'; +import { recordStartupEvent } from '../utils/startupEventSink.js'; import { microcompactHistory } from '../services/microcompaction/microcompact.js'; const debugLogger = createDebugLogger('CLIENT'); @@ -369,6 +370,9 @@ export class GeminiClient { const toolDeclarations = toolRegistry.getFunctionDeclarations(); const tools: Tool[] = [{ functionDeclarations: toolDeclarations }]; this.getChat().setTools(tools); + recordStartupEvent('gemini_tools_updated', { + toolCount: toolDeclarations.length, + }); } async resetChat(): Promise { diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 24fa44777..ae5592fa1 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -371,3 +371,14 @@ export { type PostToolUseFailureHookResult, generateToolUseId, } from './core/toolHookTriggers.js'; + +// ============================================================================ +// Startup profiler — cross-package event sink (first-screen perf observability) +// ============================================================================ + +export { + setStartupEventSink, + recordStartupEvent, + type StartupEventSink, + type StartupEventAttrs, +} from './utils/startupEventSink.js'; diff --git a/packages/core/src/tools/mcp-client-manager.test.ts b/packages/core/src/tools/mcp-client-manager.test.ts index cbc927699..1e2da7dde 100644 --- a/packages/core/src/tools/mcp-client-manager.test.ts +++ b/packages/core/src/tools/mcp-client-manager.test.ts @@ -462,4 +462,470 @@ describe('McpClientManager', () => { expect(vi.mocked(McpClient)).not.toHaveBeenCalled(); }); + + it('discoverAllMcpToolsIncremental enforces a per-server discoveryTimeoutMs', async () => { + // A stdio server whose `connect` hangs forever. The 50ms per-server + // timeout should fire and surface as a swallowed error, leaving the + // manager in COMPLETED state instead of stuck. + let neverResolve!: () => void; + const hung = new Promise((resolve) => { + neverResolve = resolve; + }); + const mockedMcpClient = { + connect: vi.fn().mockReturnValue(hung), + discover: vi.fn().mockResolvedValue(undefined), + disconnect: vi.fn().mockResolvedValue(undefined), + getStatus: vi.fn(), + }; + vi.mocked(McpClient).mockReturnValue( + mockedMcpClient as unknown as McpClient, + ); + + const mockConfig = { + isTrustedFolder: () => true, + getMcpServers: () => ({ + broken: { command: 'node', args: [], discoveryTimeoutMs: 50 }, + }), + getMcpServerCommand: () => undefined, + getPromptRegistry: () => ({}) as PromptRegistry, + getWorkspaceContext: () => ({}) as WorkspaceContext, + getDebugMode: () => false, + isMcpServerDisabled: () => false, + } as unknown as Config; + const manager = new McpClientManager(mockConfig, { + removeMcpToolsByServer: vi.fn(), + } as unknown as ToolRegistry); + + const t0 = Date.now(); + await manager.discoverAllMcpToolsIncremental(mockConfig); + const elapsed = Date.now() - t0; + + expect(elapsed).toBeGreaterThanOrEqual(40); + // Generous upper bound — the 50ms timeout should fire well within 2s + // even on a heavily-loaded CI runner. + expect(elapsed).toBeLessThan(2000); + // discoveryAllMcpToolsIncremental must always settle the state, even + // when every server times out. Otherwise the cli's deferred-finalize + // path would hang forever. + expect(manager.getDiscoveryState()).toBe( + (await import('./mcp-client.js')).MCPDiscoveryState.COMPLETED, + ); + + // Cleanup the stuck connect so test doesn't leak a pending promise. + neverResolve(); + }); + + it('discoverAllMcpToolsIncremental skips servers flagged as disabled', async () => { + // PR-A regression guard: the new incremental path used to iterate + // `Object.entries(servers)` without consulting `isMcpServerDisabled`, + // so a server the user had explicitly disabled (e.g. via + // `mcpServers.foo.disabled: true`) would still get connected and its + // tools registered. Mirrors the existing protection in + // `discoverAllMcpTools`. + const mockedMcpClient = { + connect: vi.fn().mockResolvedValue(undefined), + discover: vi.fn().mockResolvedValue(undefined), + disconnect: vi.fn().mockResolvedValue(undefined), + getStatus: vi.fn(), + }; + vi.mocked(McpClient).mockReturnValue( + mockedMcpClient as unknown as McpClient, + ); + const mockConfig = { + isTrustedFolder: () => true, + getMcpServers: () => ({ + enabled: { command: 'node', args: [] }, + disabled: { command: 'node', args: [] }, + }), + getMcpServerCommand: () => undefined, + getPromptRegistry: () => ({}) as PromptRegistry, + getWorkspaceContext: () => ({}) as WorkspaceContext, + getDebugMode: () => false, + isMcpServerDisabled: (name: string) => name === 'disabled', + } as unknown as Config; + const manager = new McpClientManager(mockConfig, {} as ToolRegistry); + + await manager.discoverAllMcpToolsIncremental(mockConfig); + + // Only the enabled server should have driven a discover; the disabled + // one is skipped before any connect attempt. + expect(mockedMcpClient.connect).toHaveBeenCalledTimes(1); + expect(mockedMcpClient.discover).toHaveBeenCalledTimes(1); + }); + + it('discoverAllMcpToolsIncremental tears down enabled→disabled transitions', async () => { + // Mid-session, the user disables a previously-connected server (e.g. + // via `/mcp disable foo` or by editing settings). The incremental + // path must tear down the existing client, drop its registered tools, + // stop its health check, and remove its global status — otherwise + // the Footer pill keeps counting it, its tools stay live in the + // ToolRegistry, and the health-check loop keeps probing a server + // the user has told us to ignore. + const mockedMcpClient = { + connect: vi.fn().mockResolvedValue(undefined), + discover: vi.fn().mockResolvedValue(undefined), + disconnect: vi.fn().mockResolvedValue(undefined), + getStatus: vi.fn(), + }; + vi.mocked(McpClient).mockReturnValue( + mockedMcpClient as unknown as McpClient, + ); + + const removeMcpToolsByServer = vi.fn(); + const toolRegistryStub = { + removeMcpToolsByServer, + } as unknown as ToolRegistry; + + let disabled = false; + const mockConfig = { + isTrustedFolder: () => true, + getMcpServers: () => ({ foo: { command: 'node', args: [] } }), + getMcpServerCommand: () => undefined, + getPromptRegistry: () => ({}) as PromptRegistry, + getWorkspaceContext: () => ({}) as WorkspaceContext, + getDebugMode: () => false, + isMcpServerDisabled: (name: string) => name === 'foo' && disabled, + } as unknown as Config; + const manager = new McpClientManager(mockConfig, toolRegistryStub); + + // First pass: server enabled, gets connected. + await manager.discoverAllMcpToolsIncremental(mockConfig); + expect(mockedMcpClient.connect).toHaveBeenCalledTimes(1); + expect(mockedMcpClient.disconnect).not.toHaveBeenCalled(); + + // Now disable mid-session and re-run incremental discovery. + disabled = true; + await manager.discoverAllMcpToolsIncremental(mockConfig); + + // The previously-connected client must be disconnected and its tools + // dropped from the registry. + expect(mockedMcpClient.disconnect).toHaveBeenCalledTimes(1); + expect(removeMcpToolsByServer).toHaveBeenCalledWith('foo'); + // And no fresh connect was attempted (the disabled branch fires + // before serversToUpdate is populated). + expect(mockedMcpClient.connect).toHaveBeenCalledTimes(1); + }); + + it('discoverAllMcpToolsIncremental records `failed` outcome for swallowed connect errors', async () => { + // `discoverMcpToolsForServerInternal` catches connect/discover errors + // without re-throwing (best-effort semantics — one broken server + // shouldn't bring down the others). Before this fix, the try block in + // `discoverAllMcpToolsIncremental` therefore resolved even for failed + // servers, and we'd record `mcp_server_ready:` with + // `outcome: 'ready'`. Now we consult the actual server status (set + // to DISCONNECTED by McpClient.connect's catch) and emit `failed` + // instead — otherwise the startup profile claims success for every + // auth error / crashed server. + const events: Array<{ name: string; attrs?: Record }> = []; + const startupEventSink = await import('../utils/startupEventSink.js'); + startupEventSink.setStartupEventSink((name, attrs) => { + events.push({ name, attrs }); + }); + + const mockedMcpClient = { + connect: vi.fn().mockRejectedValue(new Error('auth failed')), + discover: vi.fn().mockResolvedValue(undefined), + disconnect: vi.fn().mockResolvedValue(undefined), + getStatus: vi.fn(), + }; + vi.mocked(McpClient).mockReturnValue( + mockedMcpClient as unknown as McpClient, + ); + + const mockConfig = { + isTrustedFolder: () => true, + getMcpServers: () => ({ 'broken-auth': { command: 'node', args: [] } }), + getMcpServerCommand: () => undefined, + getPromptRegistry: () => ({}) as PromptRegistry, + getWorkspaceContext: () => ({}) as WorkspaceContext, + getDebugMode: () => false, + isMcpServerDisabled: () => false, + } as unknown as Config; + const manager = new McpClientManager(mockConfig, {} as ToolRegistry); + await manager.discoverAllMcpToolsIncremental(mockConfig); + + // Cleanup the global sink so it doesn't leak into other tests. + startupEventSink.setStartupEventSink(null); + + const readyEvents = events.filter( + (e) => e.name === 'mcp_server_ready:broken-auth', + ); + expect(readyEvents).toHaveLength(1); + expect(readyEvents[0].attrs?.['outcome']).toBe('failed'); + // And no `mcp_first_tool_registered` was emitted — that metric is + // user-facing ("first MCP server became usable") so a failed server + // must not pollute it. + const firstToolEvents = events.filter( + (e) => e.name === 'mcp_first_tool_registered', + ); + expect(firstToolEvents).toHaveLength(0); + }); + + it('discoveryTimeoutMs is clamped to a minimum and maximum', async () => { + // A 0 or negative override would cause the timeout to fire on the + // very next macrotask, racing the connect() handshake. Combined with + // the lack of disconnect-on-timeout this used to be a silent tool + // registration vector. The clamp puts the floor at 100ms. + const calls: number[] = []; + const realSetTimeout = globalThis.setTimeout; + const spy = vi.spyOn(globalThis, 'setTimeout').mockImplementation((( + cb: () => void, + ms?: number, + ) => { + if (typeof ms === 'number') calls.push(ms); + return realSetTimeout(cb, ms ?? 0); + }) as unknown as typeof setTimeout); + + const mockedMcpClient = { + connect: vi.fn().mockResolvedValue(undefined), + discover: vi.fn().mockResolvedValue(undefined), + disconnect: vi.fn().mockResolvedValue(undefined), + getStatus: vi.fn(), + }; + vi.mocked(McpClient).mockReturnValue( + mockedMcpClient as unknown as McpClient, + ); + const mockConfig = { + isTrustedFolder: () => true, + getMcpServers: () => ({ + zero: { command: 'node', args: [], discoveryTimeoutMs: 0 }, + negative: { command: 'node', args: [], discoveryTimeoutMs: -5 }, + huge: { command: 'node', args: [], discoveryTimeoutMs: 10_000_000 }, + }), + getMcpServerCommand: () => undefined, + getPromptRegistry: () => ({}) as PromptRegistry, + getWorkspaceContext: () => ({}) as WorkspaceContext, + getDebugMode: () => false, + isMcpServerDisabled: () => false, + } as unknown as Config; + const manager = new McpClientManager(mockConfig, { + removeMcpToolsByServer: vi.fn(), + } as unknown as ToolRegistry); + await manager.discoverAllMcpToolsIncremental(mockConfig); + spy.mockRestore(); + + // Among the values setTimeout was called with, look only at the ones + // our discoveryTimeoutFor would have produced: 100 (clamped floor) + // and 300_000 (clamped ceiling). Other timers (test infra, vitest) + // may be in `calls` but never both 100 AND 300000 by coincidence. + expect(calls).toContain(100); + expect(calls).toContain(300_000); + expect(calls).not.toContain(0); + expect(calls).not.toContain(-5); + expect(calls).not.toContain(10_000_000); + }); + + it('discoveryTimeoutFor treats websocket (tcp) transport as remote', async () => { + // The remote-vs-stdio classification gates the 5s vs 30s default + // timeout. `tcp` is the WebSocket transport field on MCPServerConfig + // — without it, hung WS handshakes would block `waitForMcpReady()` + // for 30s instead of 5s. + const mockedMcpClient = { + connect: vi.fn().mockReturnValue(new Promise(() => {})), + discover: vi.fn().mockResolvedValue(undefined), + disconnect: vi.fn().mockResolvedValue(undefined), + getStatus: vi.fn(), + }; + vi.mocked(McpClient).mockReturnValue( + mockedMcpClient as unknown as McpClient, + ); + + const calls: number[] = []; + const realSetTimeout = globalThis.setTimeout; + const spy = vi.spyOn(globalThis, 'setTimeout').mockImplementation((( + cb: () => void, + ms?: number, + ) => { + if (typeof ms === 'number') calls.push(ms); + // Fire immediately to settle quickly without waiting 5s/30s. + return realSetTimeout(cb, 1); + }) as unknown as typeof setTimeout); + + const mockConfig = { + isTrustedFolder: () => true, + getMcpServers: () => ({ wsServer: { tcp: 'ws://example.test' } }), + getMcpServerCommand: () => undefined, + getPromptRegistry: () => ({}) as PromptRegistry, + getWorkspaceContext: () => ({}) as WorkspaceContext, + getDebugMode: () => false, + isMcpServerDisabled: () => false, + } as unknown as Config; + const manager = new McpClientManager(mockConfig, { + removeMcpToolsByServer: vi.fn(), + } as unknown as ToolRegistry); + await manager.discoverAllMcpToolsIncremental(mockConfig); + spy.mockRestore(); + + expect(calls).toContain(5_000); + expect(calls).not.toContain(30_000); + }); + + it('runWithDiscoveryTimeout disconnects the client AND drops registered tools on timeout', async () => { + // Before this fix, the inner `discoverMcpToolsForServer` kept running + // after the timeout rejected the outer promise. If `client.discover()` + // eventually succeeded it would register the late-arriving server's + // tools into the live toolRegistry (a remote-exploitable silent + // registration). + // + // Disconnecting the client on timeout aborts the handshake, but a + // fire-and-forget `void disconnect()` doesn't help when `discover()` + // already pumped tools into the registry synchronously — the + // transport close lands a tick later. We therefore (a) await the + // disconnect and (b) call `removeMcpToolsByServer()` to drop any + // tools that slipped through the race window. + let resolveConnect!: () => void; + const hungConnect = new Promise((res) => { + resolveConnect = res; + }); + const mockedMcpClient = { + connect: vi.fn().mockReturnValue(hungConnect), + discover: vi.fn().mockResolvedValue(undefined), + disconnect: vi.fn().mockResolvedValue(undefined), + getStatus: vi.fn(), + }; + vi.mocked(McpClient).mockReturnValue( + mockedMcpClient as unknown as McpClient, + ); + + const mockConfig = { + isTrustedFolder: () => true, + getMcpServers: () => ({ + slow: { command: 'node', args: [], discoveryTimeoutMs: 100 }, + }), + getMcpServerCommand: () => undefined, + getPromptRegistry: () => ({}) as PromptRegistry, + getWorkspaceContext: () => ({}) as WorkspaceContext, + getDebugMode: () => false, + isMcpServerDisabled: () => false, + } as unknown as Config; + const removeMcpToolsByServer = vi.fn(); + const manager = new McpClientManager(mockConfig, { + removeMcpToolsByServer, + } as unknown as ToolRegistry); + + await manager.discoverAllMcpToolsIncremental(mockConfig); + + // The timeout must have triggered the disconnect — that's what + // aborts the connect() handshake so no tools land. + expect(mockedMcpClient.disconnect).toHaveBeenCalled(); + // And any tools that registered during the disconnect race window + // must have been removed from the registry. + expect(removeMcpToolsByServer).toHaveBeenCalledWith('slow'); + + // Cleanup the hung promise to avoid leaking it across tests. + resolveConnect(); + }); + + it('runWithDiscoveryTimeout drops the client + stops health-check so the auto-reconnect loop cannot resurrect an intentionally timed-out server', async () => { + // Round-7 regression: before this fix, the timeout handler removed + // tools but left the client in `this.clients` and didn't stop its + // health-check timer. `discoverMcpToolsForServerInternal`'s `finally` + // block would then `startHealthCheck`, which (with `autoReconnect`) + // detects `status !== CONNECTED`, increments the failure counter for + // ~maxConsecutiveFailures intervals, and calls `reconnectServer()` → + // `discoverMcpToolsForServer()` directly — bypassing + // `runWithDiscoveryTimeout` entirely. The intentionally slow server + // would silently come back. + let resolveConnect!: () => void; + const hungConnect = new Promise((res) => { + resolveConnect = res; + }); + const mockedMcpClient = { + connect: vi.fn().mockReturnValue(hungConnect), + discover: vi.fn().mockResolvedValue(undefined), + disconnect: vi.fn().mockResolvedValue(undefined), + getStatus: vi.fn(), + }; + vi.mocked(McpClient).mockReturnValue( + mockedMcpClient as unknown as McpClient, + ); + + const mockConfig = { + isTrustedFolder: () => true, + getMcpServers: () => ({ + slow: { command: 'node', args: [], discoveryTimeoutMs: 100 }, + }), + getMcpServerCommand: () => undefined, + getPromptRegistry: () => ({}) as PromptRegistry, + getWorkspaceContext: () => ({}) as WorkspaceContext, + getDebugMode: () => false, + isMcpServerDisabled: () => false, + } as unknown as Config; + const manager = new McpClientManager(mockConfig, { + removeMcpToolsByServer: vi.fn(), + } as unknown as ToolRegistry); + + await manager.discoverAllMcpToolsIncremental(mockConfig); + + // The client entry must be gone — otherwise `performHealthCheck` + // would observe it (and the disconnected status) every checkInterval. + expect( + (manager as unknown as { clients: Map }).clients.has( + 'slow', + ), + ).toBe(false); + // And no health-check timer must remain for this server. + expect( + ( + manager as unknown as { + healthCheckTimers: Map; + } + ).healthCheckTimers.has('slow'), + ).toBe(false); + + // Cleanup the hung promise to avoid leaking it across tests. + resolveConnect(); + }); + + it('discoverAllMcpToolsIncremental emits the trailing mcp-client-update after COMPLETED', async () => { + // Without the trailing emit, the cli's deferred-finalize subscriber + // (which polls discoveryState on each `mcp-client-update`) would never + // observe the terminal state. Regression-protect the emit ordering. + const mockedMcpClient = { + connect: vi.fn().mockResolvedValue(undefined), + discover: vi.fn().mockResolvedValue(undefined), + disconnect: vi.fn().mockResolvedValue(undefined), + getStatus: vi.fn(), + }; + vi.mocked(McpClient).mockReturnValue( + mockedMcpClient as unknown as McpClient, + ); + + const mcpClientModule = await import('./mcp-client.js'); + const { MCPDiscoveryState } = mcpClientModule; + const observedStatesAtEmit: Array< + (typeof mcpClientModule.MCPDiscoveryState)[keyof typeof mcpClientModule.MCPDiscoveryState] + > = []; + const events = { + emit: vi.fn((eventName: string) => { + if (eventName === 'mcp-client-update') { + observedStatesAtEmit.push(manager.getDiscoveryState()); + } + return true; + }), + } as unknown as import('node:events').EventEmitter; + + const mockConfig = { + isTrustedFolder: () => true, + getMcpServers: () => ({ srv: { command: 'node', args: [] } }), + getMcpServerCommand: () => undefined, + getPromptRegistry: () => ({}) as PromptRegistry, + getWorkspaceContext: () => ({}) as WorkspaceContext, + getDebugMode: () => false, + isMcpServerDisabled: () => false, + } as unknown as Config; + const manager = new McpClientManager( + mockConfig, + {} as ToolRegistry, + events, + ); + + await manager.discoverAllMcpToolsIncremental(mockConfig); + + // Must include at least one COMPLETED-state emit at the tail. + expect(observedStatesAtEmit.at(-1)).toBe(MCPDiscoveryState.COMPLETED); + // And must have started with an IN_PROGRESS emit (so progress UI shows + // the transition even when there are no servers to update). + expect(observedStatesAtEmit[0]).toBe(MCPDiscoveryState.IN_PROGRESS); + }); }); diff --git a/packages/core/src/tools/mcp-client-manager.ts b/packages/core/src/tools/mcp-client-manager.ts index 7956254e8..54df8adc4 100644 --- a/packages/core/src/tools/mcp-client-manager.ts +++ b/packages/core/src/tools/mcp-client-manager.ts @@ -4,19 +4,21 @@ * SPDX-License-Identifier: Apache-2.0 */ -import type { Config } from '../config/config.js'; +import type { Config, MCPServerConfig } from '../config/config.js'; import { isSdkMcpServerConfig } from '../config/config.js'; import type { ToolRegistry } from './tool-registry.js'; import { McpClient, MCPDiscoveryState, MCPServerStatus, + getMCPServerStatus, populateMcpServerCommand, removeMCPServerStatus, } from './mcp-client.js'; import type { SendSdkMcpMessage } from './mcp-client.js'; import { getErrorMessage } from '../utils/errors.js'; import { createDebugLogger } from '../utils/debugLogger.js'; +import { recordStartupEvent } from '../utils/startupEventSink.js'; import type { EventEmitter } from 'node:events'; import type { ReadResourceResult } from '@modelcontextprotocol/sdk/types.js'; @@ -319,6 +321,16 @@ export class McpClientManager { return; } + // Don't arm a health-check timer for a server that no longer has a + // tracked client. The discovery-timeout handler deletes the client + // before the discovery `finally` block runs `startHealthCheck`, and + // without this guard we'd create a timer that fires every + // checkIntervalMs and ultimately reconnects an intentionally + // timed-out server (bypassing `runWithDiscoveryTimeout`). + if (!this.clients.has(serverName)) { + return; + } + // Clear existing timer if any this.stopHealthCheck(serverName); @@ -447,6 +459,20 @@ export class McpClientManager { ); this.discoveryState = MCPDiscoveryState.IN_PROGRESS; + recordStartupEvent('mcp_discovery_start', { + serverCount: Object.keys(servers).length, + incremental: true, + }); + // Mirrors `discoverAllMcpTools`: announce IN_PROGRESS so UI subscribers + // (MCP status pill, AppContainer batch-flush effect) know discovery + // started, even when no servers need updates this pass. + this.eventEmitter?.emit('mcp-client-update', this.clients); + + // Tracks the first successful server discover so we can emit the + // `mcp_first_tool_registered` event exactly once. "First successful + // discover" rather than a tool-count delta — simpler and aligns with the + // user-perceived metric ("first MCP server is ready"). + let firstToolEventFired = false; // Find servers that are new or have changed configuration const serversToUpdate: string[] = []; @@ -455,6 +481,25 @@ export class McpClientManager { // Check for new servers or configuration changes for (const [name] of Object.entries(servers)) { + // Mirror `discoverAllMcpTools` (line ~102): users who explicitly + // disabled a server via `mcpServers..disabled: true` must not + // see it reconnected by the incremental path. Without this, the + // PR-A background path silently re-registers tools the user has + // told us to ignore. + if (cliConfig.isMcpServerDisabled(name)) { + debugLogger.debug(`Skipping disabled MCP server: ${name}`); + // If the server was previously enabled and got connected, we now + // need to tear it down — otherwise its client, registered tools + // and health checks linger after an enabled→disabled mid-session + // transition (e.g. via `/mcp disable `). `removeServer` + // disconnects, drops the client entry, removes tools from the + // registry, stops the health check, and removes the global + // status so the Footer pill stops counting it. + if (this.clients.has(name)) { + await this.removeServer(name); + } + continue; + } const existingClient = this.clients.get(name); if (!existingClient) { // New server @@ -475,11 +520,54 @@ export class McpClientManager { } } - // Update only the servers that need it + // Update only the servers that need it. Each per-server discover is + // wrapped in a discovery-only timeout (stdio default 30s, remote 5s, + // per-server override via `discoveryTimeoutMs`). Tool-call timeout is + // intentionally left alone — a long-running tool invocation is not a + // startup pathology. const discoveryPromises = serversToUpdate.map(async (name) => { + const serverConfig = servers[name]; try { - await this.discoverMcpToolsForServer(name, cliConfig); + await this.runWithDiscoveryTimeout(name, serverConfig, () => + this.discoverMcpToolsForServer(name, cliConfig), + ); + // `discoverMcpToolsForServerInternal` swallows connect/discover + // errors (best-effort discovery semantics — see its catch block), + // so the try here resolves even for failed servers. Only the + // timeout path reaches the catch below. Consult the actual + // server status to decide which outcome to record, otherwise + // every auth failure / crash / "no tools found" looks like + // `ready` in the startup profile. + const client = this.clients.get(name); + const actuallyReady = + !!client && getMCPServerStatus(name) === MCPServerStatus.CONNECTED; + if (actuallyReady) { + if (!firstToolEventFired) { + firstToolEventFired = true; + recordStartupEvent('mcp_first_tool_registered', { + serverName: name, + }); + } + recordStartupEvent(`mcp_server_ready:${name}`, { outcome: 'ready' }); + } else { + recordStartupEvent(`mcp_server_ready:${name}`, { + outcome: 'failed', + reason: 'connect or discover error', + }); + } } catch (error) { + // Defensive cleanup: the dedup Map entry is normally removed by + // `discoverMcpToolsForServer`'s `finally`, but `runWithDiscoveryTimeout` + // can reject before that finally runs (the timeout also disconnects + // the client to abort the underlying handshake). Without this + // explicit delete, a brief window exists where a subsequent + // `discoverMcpToolsForServer(name)` call would short-circuit on + // a now-doomed promise. + this.serverDiscoveryPromises.delete(name); + recordStartupEvent(`mcp_server_ready:${name}`, { + outcome: 'failed', + reason: getErrorMessage(error), + }); debugLogger.error( `Error during incremental discovery for server '${name}': ${getErrorMessage(error)}`, ); @@ -494,6 +582,139 @@ export class McpClientManager { } this.discoveryState = MCPDiscoveryState.COMPLETED; + recordStartupEvent('mcp_all_servers_settled', { + serverCount: Object.keys(servers).length, + incremental: true, + }); + // Trailing `mcp-client-update` AFTER flipping discoveryState to + // COMPLETED. Without this the per-server updates above all fire while + // the state is still IN_PROGRESS, so the AppContainer batch-flush + // subscriber never observes the terminal state. + this.eventEmitter?.emit('mcp-client-update', this.clients); + } + + /** + * Caps how long a single MCP server's discover handshake is allowed to + * take during startup. Local stdio servers default to 30s; remote + * HTTP/SSE servers default to 5s (mirrors Claude Code's + * `CLAUDE_AI_MCP_TIMEOUT_MS`). Per-server override via + * `mcpServers..discoveryTimeoutMs` in settings. + */ + private runWithDiscoveryTimeout( + serverName: string, + serverConfig: MCPServerConfig | undefined, + fn: () => Promise, + ): Promise { + const timeoutMs = this.discoveryTimeoutFor(serverConfig); + let timedOut = false; + return new Promise((resolve, reject) => { + const timer = setTimeout(async () => { + timedOut = true; + // CRITICAL: rejecting `runWithDiscoveryTimeout` does NOT cancel + // the underlying `discoverMcpToolsForServer` — it keeps trying + // to `connect()` / `discover()`, and if the slow server + // eventually responds, `discover()` registers its tools into + // the live `toolRegistry` and re-emits `mcp-client-update`. + // From the user's perspective the server "failed" but its tools + // are silently active, including any that shadow built-ins. + // + // Disconnect the client to abort the handshake so the background + // promise rejects, then drop any tools that DID slip through the + // race window. A fire-and-forget `client.disconnect()` is NOT + // enough: `disconnect()` awaits `transport.close()`, and the + // in-flight `discover()` may have already pumped its `tools/list` + // response through the transport AND iterated + // `toolRegistry.registerTool(tool)` synchronously by the time + // the close lands. The earlier fix's comment described the + // pre-fix state as a "remote-exploitable silent-tool-registration + // vector" — `await` plus `removeMcpToolsByServer` closes it. + const client = this.clients.get(serverName); + if (client) { + try { + await client.disconnect(); + } catch (err) { + debugLogger.debug( + `Forced disconnect of timed-out server '${serverName}' threw: ${getErrorMessage(err)}`, + ); + } + } + // Drop any tools that registered during the disconnect window. No-op + // if the server hadn't reached `discover()` yet, so it's safe to + // always call. + this.toolRegistry.removeMcpToolsByServer(serverName); + // Prevent the discovery `finally` block's `startHealthCheck` from + // resurrecting this server: without removing the client entry, + // `performHealthCheck` would observe `status !== CONNECTED` for + // ~maxConsecutiveFailures intervals and then call + // `reconnectServer()` → `discoverMcpToolsForServer()` directly, + // bypassing `runWithDiscoveryTimeout` entirely. The intentionally + // timed-out server would silently come back. Removing the client + // entry + stopping any pending health-check timer closes that + // loop; `startHealthCheck` early-returns when the client is + // absent, so the trailing `finally`-block call becomes a no-op. + this.stopHealthCheck(serverName); + this.clients.delete(serverName); + reject( + new Error( + `MCP server '${serverName}' discovery timed out after ${timeoutMs}ms`, + ), + ); + }, timeoutMs); + fn().then( + (value) => { + clearTimeout(timer); + // Suppress success after timeout — the timeout already + // rejected the outer promise; resolving it again is a no-op + // but the success path would also re-emit + // `mcp_server_ready:ready` and `mcp_first_tool_registered` + // even though the rest of the system has moved on. + if (!timedOut) resolve(value); + }, + (err) => { + clearTimeout(timer); + if (!timedOut) { + reject(err instanceof Error ? err : new Error(String(err))); + } + }, + ); + }); + } + + /** + * Minimum / maximum discovery timeouts. `0` or a negative value as a + * per-server override would cause every discover to fire its timeout on + * the next tick — combined with the lack of disconnect on timeout this + * was a remote-exploitable silent-tool-registration vector (a + * MITM/attacker-controlled MCP server could land its tools after the + * timeout fired). `Infinity` / very large values would hang + * `waitForMcpReady()` forever for non-interactive paths. The 100ms + * floor is generous (real handshakes start in single-digit ms locally, + * tens of ms remote); the 5-minute ceiling matches the longest tool + * call timeouts we've documented. + */ + private static readonly MIN_DISCOVERY_TIMEOUT_MS = 100; + private static readonly MAX_DISCOVERY_TIMEOUT_MS = 300_000; + + private discoveryTimeoutFor(serverConfig?: MCPServerConfig): number { + const override = serverConfig?.discoveryTimeoutMs; + if (override !== undefined && Number.isFinite(override)) { + return Math.max( + McpClientManager.MIN_DISCOVERY_TIMEOUT_MS, + Math.min(override, McpClientManager.MAX_DISCOVERY_TIMEOUT_MS), + ); + } + // Remote transports (HTTP/SSE/WebSocket) carry network risk and get + // a shorter default; stdio servers we trust the user already runs + // locally. `tcp` is the WebSocket transport field on + // `MCPServerConfig` — without it, websocket servers fall through to + // the stdio default and a hung WS handshake holds back the + // non-interactive `waitForMcpReady()` for 30s instead of 5s. + const isRemote = !!( + serverConfig?.httpUrl || + serverConfig?.url || + serverConfig?.tcp + ); + return isRemote ? 5_000 : 30_000; } /** diff --git a/packages/core/src/tools/mcp-client.test.ts b/packages/core/src/tools/mcp-client.test.ts index fab84f95e..2fe52d62f 100644 --- a/packages/core/src/tools/mcp-client.test.ts +++ b/packages/core/src/tools/mcp-client.test.ts @@ -185,6 +185,57 @@ describe('mcp-client', () => { 'No prompts or tools found on the server.', ); }); + + it('flips status to DISCONNECTED when discover() throws', async () => { + // `Config.getFailedMcpServerNames()` filters by + // `status !== CONNECTED`, so a server that connects successfully + // but whose `discover()` then crashes (e.g. tools/list rejects, or + // the "no prompts or tools found" guard fires) must be marked + // DISCONNECTED before the error propagates. Without this, the + // server stays CONNECTED in the global registry, the non-interactive + // failure banner silently omits it, and the Footer's MCP health + // pill keeps counting it as healthy. + const mockedClient = { + connect: vi.fn(), + discover: vi.fn(), + disconnect: vi.fn(), + getStatus: vi.fn(), + registerCapabilities: vi.fn(), + setRequestHandler: vi.fn(), + getServerCapabilities: vi.fn().mockReturnValue({ prompts: {} }), + request: vi.fn().mockRejectedValue(new Error('tools/list crashed')), + close: vi.fn(), + }; + vi.mocked(ClientLib.Client).mockReturnValue( + mockedClient as unknown as ClientLib.Client, + ); + vi.spyOn(SdkClientStdioLib, 'StdioClientTransport').mockReturnValue( + {} as SdkClientStdioLib.StdioClientTransport, + ); + vi.mocked(GenAiLib.mcpToTool).mockReturnValue({ + tool: () => Promise.resolve({ functionDeclarations: [] }), + } as unknown as GenAiLib.CallableTool); + const serverName = `discover-error-${Date.now()}`; + const client = new McpClient( + serverName, + { + command: 'test-command', + }, + {} as ToolRegistry, + {} as PromptRegistry, + {} as WorkspaceContext, + false, + ); + await client.connect(); + // Sanity: connect succeeded so the status is CONNECTED before the + // discover failure we're about to assert against. + expect(client.getStatus()).toBe(MCPServerStatus.CONNECTED); + + await expect(client.discover({} as Config)).rejects.toThrow(); + + expect(client.getStatus()).toBe(MCPServerStatus.DISCONNECTED); + expect(getMCPServerStatus(serverName)).toBe(MCPServerStatus.DISCONNECTED); + }); }); describe('appendMcpServerCommand', () => { it('should do nothing if no MCP servers or command are configured', () => { @@ -597,6 +648,56 @@ describe('mcp-client', () => { // The entry must remain absent — no resurrection. expect(getAllMCPServerStatuses().has('racy-server')).toBe(false); }); + + it('disconnect() propagates DISCONNECTED to the global registry', async () => { + // Regression: a previous version set `isDisconnecting = true` BEFORE + // calling `updateStatus(DISCONNECTED)`, and `updateStatus`'s guard + // (designed to block stale `connect()` catch updates) silently + // swallowed the write. The global registry stayed CONNECTED forever, + // so `Config.getFailedMcpServerNames()` (which filters + // `status !== CONNECTED`) omitted timeout-disconnected servers from + // the non-interactive failure banner and the Footer's MCP health + // pill kept counting them as healthy. + const mockedClient = { + connect: vi.fn(), + registerCapabilities: vi.fn(), + setRequestHandler: vi.fn(), + close: vi.fn(), + }; + vi.mocked(ClientLib.Client).mockReturnValue( + mockedClient as unknown as ClientLib.Client, + ); + const mockedTransport = { close: vi.fn().mockResolvedValue(undefined) }; + vi.spyOn(SdkClientStdioLib, 'StdioClientTransport').mockReturnValue( + mockedTransport as unknown as SdkClientStdioLib.StdioClientTransport, + ); + + const client = new McpClient( + 'healthy-server', + { command: 'test-command' }, + {} as ToolRegistry, + {} as PromptRegistry, + { getDirectories: () => [] } as unknown as WorkspaceContext, + false, + ); + + await client.connect(); + // After connect, the registry should show CONNECTED. + expect(getMCPServerStatus('healthy-server')).toBe( + MCPServerStatus.CONNECTED, + ); + + await client.disconnect(); + // After an intentional disconnect, the global registry MUST reflect + // DISCONNECTED — otherwise downstream code (failure banner, health + // pill) treats the server as still healthy. + expect(getMCPServerStatus('healthy-server')).toBe( + MCPServerStatus.DISCONNECTED, + ); + + // Cleanup the registry entry so this test doesn't leak. + removeMCPServerStatus('healthy-server'); + }); }); describe('hasNetworkTransport', () => { diff --git a/packages/core/src/tools/mcp-client.ts b/packages/core/src/tools/mcp-client.ts index 413d2cc10..52c82171a 100644 --- a/packages/core/src/tools/mcp-client.ts +++ b/packages/core/src/tools/mcp-client.ts @@ -165,34 +165,62 @@ export class McpClient { /** * Discovers tools and prompts from the MCP server. + * + * On error, the client's status is flipped to DISCONNECTED before the + * error is re-thrown. Without this, a server that connects successfully + * but then crashes (or returns no tools, or whose `tools/list` call + * rejects) would remain `CONNECTED` in the global status registry, and + * `Config.getFailedMcpServerNames()` — which filters by + * `status !== CONNECTED` — would silently omit it from the + * non-interactive failure banner. The caller (manager) still catches + * and logs; we just need the status registry to reflect reality. */ async discover(cliConfig: Config): Promise { if (this.status !== MCPServerStatus.CONNECTED) { throw new Error('Client is not connected.'); } - const prompts = await this.discoverPrompts(); - const tools = await this.discoverTools(cliConfig); + try { + const prompts = await this.discoverPrompts(); + const tools = await this.discoverTools(cliConfig); - if (prompts.length === 0 && tools.length === 0) { - throw new Error('No prompts or tools found on the server.'); - } + if (prompts.length === 0 && tools.length === 0) { + throw new Error('No prompts or tools found on the server.'); + } - for (const tool of tools) { - this.toolRegistry.registerTool(tool); + for (const tool of tools) { + this.toolRegistry.registerTool(tool); + } + } catch (error) { + this.updateStatus(MCPServerStatus.DISCONNECTED); + throw error; } } /** * Disconnects from the MCP server. + * + * The intentional DISCONNECTED status update must reach the global + * registry — `getFailedMcpServerNames()` filters on `status !== CONNECTED` + * and the Footer's MCP health pill subscribes to the registry. Going + * through `updateStatus()` would have it swallowed by the + * `isDisconnecting` guard whose only purpose is to suppress LATE writes + * from a stale `connect()` catch. We therefore write the local field and + * the global registry directly, then flip `isDisconnecting = true` to + * shut down propagation from any in-flight `connect()` / `discover()` + * whose catch will fire after the transport has been torn down. */ async disconnect(): Promise { + // Set the local status BEFORE flipping `isDisconnecting`. A concurrent + // `discover()` reading `this.status` would otherwise see the stale + // CONNECTED value and try to register tools that we're about to drop. + this.status = MCPServerStatus.DISCONNECTED; + updateMCPServerStatus(this.serverName, MCPServerStatus.DISCONNECTED); this.isDisconnecting = true; if (this.transport) { await this.transport.close(); } this.client.close(); - this.updateStatus(MCPServerStatus.DISCONNECTED); } /** diff --git a/packages/core/src/tools/tool-registry.ts b/packages/core/src/tools/tool-registry.ts index ffd77f219..2f405fe72 100644 --- a/packages/core/src/tools/tool-registry.ts +++ b/packages/core/src/tools/tool-registry.ts @@ -405,6 +405,16 @@ export class ToolRegistry { } } + /** + * Returns the manager that owns MCP client lifecycles. Exposed so + * `Config.initialize()`'s background discovery path can call + * `discoverAllMcpToolsIncremental` directly without going through + * `discoverMcpTools` (which would wipe already-registered tools). + */ + getMcpClientManager(): McpClientManager { + return this.mcpClientManager; + } + /** * Discovers tools from project (if available and configured). * Can be called multiple times to update discovered tools. diff --git a/packages/core/src/utils/startupEventSink.test.ts b/packages/core/src/utils/startupEventSink.test.ts new file mode 100644 index 000000000..75834dd90 --- /dev/null +++ b/packages/core/src/utils/startupEventSink.test.ts @@ -0,0 +1,53 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ +import { afterEach, describe, expect, it, vi } from 'vitest'; +import { recordStartupEvent, setStartupEventSink } from './startupEventSink.js'; + +describe('startupEventSink', () => { + afterEach(() => { + setStartupEventSink(null); + }); + + it('is a no-op when no sink is registered', () => { + // Must not throw, must not interact with anything. + expect(() => recordStartupEvent('foo')).not.toThrow(); + expect(() => recordStartupEvent('bar', { reason: 'x' })).not.toThrow(); + }); + + it('forwards name and attrs to the registered sink', () => { + const sink = vi.fn(); + setStartupEventSink(sink); + + recordStartupEvent('mcp_server_ready:foo', { outcome: 'ready' }); + recordStartupEvent('gemini_tools_updated'); + + expect(sink).toHaveBeenCalledTimes(2); + expect(sink).toHaveBeenNthCalledWith(1, 'mcp_server_ready:foo', { + outcome: 'ready', + }); + expect(sink).toHaveBeenNthCalledWith(2, 'gemini_tools_updated', undefined); + }); + + it('does not bubble sink exceptions into hot paths', () => { + const sink = vi.fn(() => { + throw new Error('sink should not break callers'); + }); + setStartupEventSink(sink); + + expect(() => recordStartupEvent('e')).not.toThrow(); + expect(sink).toHaveBeenCalledTimes(1); + }); + + it('lets the caller unset the sink by passing null', () => { + const sink = vi.fn(); + setStartupEventSink(sink); + recordStartupEvent('one'); + setStartupEventSink(null); + recordStartupEvent('two'); + expect(sink).toHaveBeenCalledTimes(1); + expect(sink).toHaveBeenCalledWith('one', undefined); + }); +}); diff --git a/packages/core/src/utils/startupEventSink.ts b/packages/core/src/utils/startupEventSink.ts new file mode 100644 index 000000000..b57f35302 --- /dev/null +++ b/packages/core/src/utils/startupEventSink.ts @@ -0,0 +1,65 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * Cross-package sink for startup-time profiler events. + * + * The cli package owns the actual startup profiler (`packages/cli/src/utils/startupProfiler.ts`) + * but core-package code (config init, MCP discovery, GeminiClient.setTools, etc.) is + * the source of several first-screen / first-paint metrics. To avoid an + * undesirable core → cli dependency, core code records events via this sink, + * and the cli registers a real handler at startup. + * + * When no handler is registered (the common case in tests / non-interactive + * paths / when QWEN_CODE_PROFILE_STARTUP=0), `recordStartupEvent` is a no-op + * with O(1) overhead. + */ + +import { createDebugLogger } from './debugLogger.js'; + +const debugLogger = createDebugLogger('STARTUP_EVENT_SINK'); + +export type StartupEventAttrs = Record; + +export type StartupEventSink = ( + name: string, + attrs?: StartupEventAttrs, +) => void; + +let sink: StartupEventSink | null = null; + +/** + * Registers the active sink. Typically called once at cli entry. + */ +export function setStartupEventSink(handler: StartupEventSink | null): void { + sink = handler; +} + +/** + * Records a startup event. Safe to call from any package; no-op when no sink + * is registered. + */ +export function recordStartupEvent( + name: string, + attrs?: StartupEventAttrs, +): void { + if (sink) { + try { + sink(name, attrs); + } catch (err) { + // Profiler sinks must never throw into hot paths (this is called + // from startup-critical code: config init, MCP discovery, setTools), + // but route the failure through `debugLogger` so a corrupted sink + // doesn't silently drop every subsequent event. `debugLogger` is + // quiet by default and visible under `QWEN_CODE_DEBUG=1` and in the + // debug log file — matching how other "must never throw" sites in + // this PR (e.g. AppContainer's `setTools` flush) surface errors. + debugLogger.error( + `startup event sink threw for '${name}': ${err instanceof Error ? err.message : String(err)}`, + ); + } + } +}