diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 3e8a1891f..95ab1e220 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -169,6 +169,11 @@ export interface CliArgs { inputFile?: string | undefined; } +/** 4 MiB — well above any real schema, well below an accidental + * gigabyte-sized file that would OOM `fs.readFileSync` + `JSON.parse`. + */ +const MAX_JSON_SCHEMA_FILE_BYTES = 4 * 1024 * 1024; + /** * Resolves the `--json-schema` argument into a parsed JSON Schema object. * @@ -191,8 +196,25 @@ export function resolveJsonSchemaArg( if (trimmed.startsWith('@')) { const resolvedPath = resolvePath(trimmed.slice(1)); try { + // Cap the read size at 4 MiB. Real-world JSON schemas are well + // under this (the spec encourages decomposition via `$ref`); any + // multi-MiB file is almost certainly a wrong-path mistake or a + // typo'd argument. Without a cap, a multi-GB file (e.g. accidental + // `--json-schema @./node_modules/.cache/*.json`) loads into memory + // before `JSON.parse` even runs and OOMs the CLI. + const stat = fs.statSync(resolvedPath); + if (stat.size > MAX_JSON_SCHEMA_FILE_BYTES) { + throw new FatalConfigError( + `--json-schema file "${resolvedPath}" is ${stat.size} bytes ` + + `(>${MAX_JSON_SCHEMA_FILE_BYTES}). Refusing to read; this is ` + + 'almost certainly a wrong-path argument. Schemas should be ' + + 'small enough to fit in a few KiB; decompose with `$ref` if ' + + 'you need a large family of types.', + ); + } payload = fs.readFileSync(resolvedPath, 'utf8'); } catch (err) { + if (err instanceof FatalConfigError) throw err; throw new FatalConfigError( `--json-schema could not read "${resolvedPath}": ${ err instanceof Error ? err.message : String(err) diff --git a/packages/cli/src/nonInteractiveCli.ts b/packages/cli/src/nonInteractiveCli.ts index a1c2a5d86..9ab632776 100644 --- a/packages/cli/src/nonInteractiveCli.ts +++ b/packages/cli/src/nonInteractiveCli.ts @@ -492,7 +492,35 @@ export async function runNonInteractive( let structuredSubmission: unknown = undefined; let hasStructuredSubmission = false; - for (const requestInfo of toolCallRequests) { + // Pre-scan: when --json-schema is active and the model emitted + // structured_output alongside other tools in the same turn, + // execute structured_output FIRST so its terminal-flag wins + // before sibling tools' side effects (write_file, shell, …) + // get a chance to persist. If structured_output succeeds the + // siblings receive a synthesized "skipped" tool_result instead + // of running. If structured_output fails (validation), the + // siblings still run via the normal loop body — same behavior + // as a turn that didn't issue structured_output at all. + // + // Without this, [write_file, structured_output] runs write_file + // first (irreversible), THEN structured_output sets the flag, + // and the user gets back a "successful" structured result with + // unrelated side-effects already on disk. + let orderedToolCallRequests = toolCallRequests; + if (config.getJsonSchema()) { + const structIdx = orderedToolCallRequests.findIndex( + (r) => r.name === ToolNames.STRUCTURED_OUTPUT, + ); + if (structIdx > 0) { + orderedToolCallRequests = [ + orderedToolCallRequests[structIdx], + ...orderedToolCallRequests.slice(0, structIdx), + ...orderedToolCallRequests.slice(structIdx + 1), + ]; + } + } + + for (const requestInfo of orderedToolCallRequests) { const finalRequestInfo = requestInfo; const inputFormat = @@ -567,14 +595,14 @@ export async function runNonInteractive( modelOverride = toolResponse.modelOverride; } - // Single-shot contract: once structured_output has succeeded - // we terminate immediately, so any *remaining* tool calls in - // this same model batch must not run (a model that emits - // `[structured_output, write_file]` should not see write_file - // execute). Tool calls that came BEFORE structured_output in - // the batch have already executed; preventing those requires - // a pre-scan of toolCallRequests and is tracked as a - // follow-up (overlap with #3598). + // Single-shot contract: structured_output is terminal. + // The pre-scan above hoists it to the front of the batch, + // so once it succeeds the remaining (now reordered) + // entries are guaranteed to be siblings the model + // intended for THIS turn — break and let the terminal + // emitResult fire below. Unpaired tool_use entries in + // the model's record are harmless because no next API + // call happens (the session is over). if (hasStructuredSubmission) { break; } diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 4a983eaa3..fcb7b327e 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -442,14 +442,29 @@ export class GeminiClient { } } } - // Only surface the deferred list if ToolSearch is actually registered. - // It may be filtered out by the permission manager (allow/deny rules), - // in which case telling the model to call ToolSearch would be - // misleading — we'd just be instructing it to invoke an unknown tool. - // Exclude any tools revealed by the resume scan above: their schemas - // are already in the declaration list, so advertising them as - // "reachable via ToolSearch" would invite redundant lookup calls. + // ToolSearch availability gates two things: + // (a) Whether the deferred-tools discovery section appears in the + // prompt (otherwise we'd be telling the model to call a tool + // that isn't registered). + // (b) Whether deferral itself makes sense at all — if the model + // has no way to reveal a deferred tool, the tool is effectively + // hidden + uncallable. Silent disappearance is the worst + // failure mode (user sees no error, just thinks the tool + // doesn't exist), so when ToolSearch is filtered out (e.g. via + // `--exclude-tools tool_search` or a deny rule), reveal every + // deferred tool eagerly so they all land in the declaration + // list. The token-saving rationale of deferral was predicated + // on the discovery surface being available. const toolSearchAvailable = !!toolRegistry.getTool(ToolNames.TOOL_SEARCH); + if (!toolSearchAvailable && deferredSummary.length > 0) { + for (const t of deferredSummary) { + toolRegistry.revealDeferredTool(t.name); + } + } + // Exclude any tools revealed by the resume scan (or the no-ToolSearch + // eager-reveal above): their schemas are already in the declaration + // list, so advertising them as "reachable via ToolSearch" would + // invite redundant lookup calls. const deferredTools = toolSearchAvailable ? deferredSummary.filter( (t) => !toolRegistry.isDeferredToolRevealed(t.name), diff --git a/packages/core/src/tools/tool-registry.test.ts b/packages/core/src/tools/tool-registry.test.ts index 25520ffa4..f2d842a46 100644 --- a/packages/core/src/tools/tool-registry.test.ts +++ b/packages/core/src/tools/tool-registry.test.ts @@ -282,6 +282,31 @@ describe('ToolRegistry', () => { { name: 'bravo', description: 'bravo desc' }, ]); }); + + it('removeMcpToolsByServer also drops revealedDeferred entries', async () => { + // Pin the regression: a server-disconnect-then-reconnect cycle that + // re-registers a tool of the same name must NOT inherit + // `revealed: true` from before the disconnect — that would leak + // into `getFunctionDeclarations` before the model has any way to + // know the tool exists this session. + const mcpCallable = {} as CallableTool; + const tool = new DiscoveredMCPTool( + mcpCallable, + 'slack', + 'send_message', + 'send a message', + {}, + ); + toolRegistry.registerTool(tool); + // Use the actual generated tool name (mcp__slack__send_message) — the + // reveal-state map is keyed by that, not the server-tool-name alone. + const toolName = tool.name; + toolRegistry.revealDeferredTool(toolName); + expect(toolRegistry.isDeferredToolRevealed(toolName)).toBe(true); + + toolRegistry.removeMcpToolsByServer('slack'); + expect(toolRegistry.isDeferredToolRevealed(toolName)).toBe(false); + }); }); describe('getToolsByServer', () => { diff --git a/packages/core/src/tools/tool-registry.ts b/packages/core/src/tools/tool-registry.ts index 1a3903868..3b37729e2 100644 --- a/packages/core/src/tools/tool-registry.ts +++ b/packages/core/src/tools/tool-registry.ts @@ -310,6 +310,10 @@ export class ToolRegistry { for (const tool of this.tools.values()) { if (tool instanceof DiscoveredTool || tool instanceof DiscoveredMCPTool) { this.tools.delete(tool.name); + // Drop reveal state too — see `removeMcpToolsByServer`. Without + // this a re-discovered tool of the same name would inherit + // stale "revealed" state across the disconnect/reconnect. + this.revealedDeferred.delete(tool.name); } } } @@ -322,6 +326,13 @@ export class ToolRegistry { for (const [name, tool] of this.tools.entries()) { if (tool instanceof DiscoveredMCPTool && tool.serverName === serverName) { this.tools.delete(name); + // Drop reveal state for the removed tool. Otherwise a server + // disconnect → reconnect cycle that re-registers a tool of + // the same name would inherit `revealed: true` from the prior + // session — `getFunctionDeclarations` would emit it (since it + // checks reveal state) before the model has any way to know + // the tool exists this session. + this.revealedDeferred.delete(name); } } } @@ -412,6 +423,11 @@ export class ToolRegistry { for (const [name, tool] of this.tools.entries()) { if (tool instanceof DiscoveredMCPTool && tool.serverName === serverName) { this.tools.delete(name); + // Drop reveal state too so a re-discovered tool of the same + // name doesn't inherit a `revealed: true` from before the + // disconnect (would surface in declarations before any + // ToolSearch call this session). + this.revealedDeferred.delete(name); } }