mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-05-17 12:21:10 +00:00
fix: address #3589 wave 2 — Critical reveal/race + revealed-set hygiene
Critical correctness: - `client.ts`: when ToolSearch is filtered out (allow/deny rules, `--exclude-tools tool_search`), eagerly reveal every deferred tool so they all land in the function declaration list. Without this the user sees those tools just disappear silently — the deferred- tool discovery surface is gone, but the tools are still hidden by the registry filter, so they're effectively invisible AND uncallable. Token-saving rationale of deferral was predicated on the discovery surface being available; if not, eager reveal preserves the invariant "all registered tools are callable". - `config.ts`: `--json-schema` now requires the root schema to declare `type: "object"` (or array containing it). Tool-call args are always validated as objects, so root-only `anyOf` / `oneOf` / `allOf` / `not` would create schemas the model can't consistently satisfy — surface as a startup error instead of mid-session "Model produced plain text" failures users can't easily diagnose. - `nonInteractiveCli.ts`: structured_output + sibling tools in the same turn no longer leaks side effects. Pre-scan reorders structured_output to the front of `toolCallRequests`; once it succeeds, sibling tools (write_file, shell, …) get a synthesized `Skipped: this turn's structured_output contract took precedence as the terminal output. Re-issue this call in a separate turn if needed.` tool_result instead of running. If structured_output fails (e.g. validation), siblings still execute via the normal loop body, same as a turn that didn't issue structured_output at all. Reveal-set hygiene: - `tool-registry.ts`: `removeMcpToolsByServer`, `removeDiscoveredTools`, and `discoverToolsForServer` (the re-discovery path) now also drop the affected tool names from `revealedDeferred`. Without this, an MCP server disconnect / reconnect that re-registers a tool of the same name inherits `revealed: true` from before the disconnect — the schema lands in `getFunctionDeclarations` before the model has any way to know the tool exists this session. Defensive: - `config.ts`: `resolveJsonSchemaArg` caps `@path/to/schema.json` reads at 4 MiB. Real schemas are well under (decompose with `$ref` if needed); the cap catches accidental wrong-path arguments (`@./node_modules/.cache/*.json`) before they OOM `fs.readFileSync` + `JSON.parse`. Tests: - New regression in `tool-registry.test.ts` for the `removeMcpToolsByServer` revealedDeferred prune. - 23/23 tool-search.test.ts, 23/23 tool-registry.test.ts, 226/229 nonInteractiveCli.test.ts (3 skipped pre-existing), 195/197 config.test.ts (2 skipped pre-existing) — all pass. Deferred to follow-up (replied + tracked): - 10-positional-param API on DeclarativeTool (refactor breadth). - `instanceof DiscoveredMCPTool` (needs `toolType` tag). - `structured_result` intersection vs canonical interface. - Resume-scan error/permission-denied filter + early-exit. - `getAllTools()` sort discarded (perf, ~negligible). - DeferredTools section cap. - `setTools` → `warmAll` undercutting deferral (theoretical; factories are nearly empty in practice today).
This commit is contained in:
parent
33a946a286
commit
9588231d77
5 changed files with 122 additions and 16 deletions
|
|
@ -169,6 +169,11 @@ export interface CliArgs {
|
|||
inputFile?: string | undefined;
|
||||
}
|
||||
|
||||
/** 4 MiB — well above any real schema, well below an accidental
|
||||
* gigabyte-sized file that would OOM `fs.readFileSync` + `JSON.parse`.
|
||||
*/
|
||||
const MAX_JSON_SCHEMA_FILE_BYTES = 4 * 1024 * 1024;
|
||||
|
||||
/**
|
||||
* Resolves the `--json-schema` argument into a parsed JSON Schema object.
|
||||
*
|
||||
|
|
@ -191,8 +196,25 @@ export function resolveJsonSchemaArg(
|
|||
if (trimmed.startsWith('@')) {
|
||||
const resolvedPath = resolvePath(trimmed.slice(1));
|
||||
try {
|
||||
// Cap the read size at 4 MiB. Real-world JSON schemas are well
|
||||
// under this (the spec encourages decomposition via `$ref`); any
|
||||
// multi-MiB file is almost certainly a wrong-path mistake or a
|
||||
// typo'd argument. Without a cap, a multi-GB file (e.g. accidental
|
||||
// `--json-schema @./node_modules/.cache/*.json`) loads into memory
|
||||
// before `JSON.parse` even runs and OOMs the CLI.
|
||||
const stat = fs.statSync(resolvedPath);
|
||||
if (stat.size > MAX_JSON_SCHEMA_FILE_BYTES) {
|
||||
throw new FatalConfigError(
|
||||
`--json-schema file "${resolvedPath}" is ${stat.size} bytes ` +
|
||||
`(>${MAX_JSON_SCHEMA_FILE_BYTES}). Refusing to read; this is ` +
|
||||
'almost certainly a wrong-path argument. Schemas should be ' +
|
||||
'small enough to fit in a few KiB; decompose with `$ref` if ' +
|
||||
'you need a large family of types.',
|
||||
);
|
||||
}
|
||||
payload = fs.readFileSync(resolvedPath, 'utf8');
|
||||
} catch (err) {
|
||||
if (err instanceof FatalConfigError) throw err;
|
||||
throw new FatalConfigError(
|
||||
`--json-schema could not read "${resolvedPath}": ${
|
||||
err instanceof Error ? err.message : String(err)
|
||||
|
|
|
|||
|
|
@ -492,7 +492,35 @@ export async function runNonInteractive(
|
|||
let structuredSubmission: unknown = undefined;
|
||||
let hasStructuredSubmission = false;
|
||||
|
||||
for (const requestInfo of toolCallRequests) {
|
||||
// Pre-scan: when --json-schema is active and the model emitted
|
||||
// structured_output alongside other tools in the same turn,
|
||||
// execute structured_output FIRST so its terminal-flag wins
|
||||
// before sibling tools' side effects (write_file, shell, …)
|
||||
// get a chance to persist. If structured_output succeeds the
|
||||
// siblings receive a synthesized "skipped" tool_result instead
|
||||
// of running. If structured_output fails (validation), the
|
||||
// siblings still run via the normal loop body — same behavior
|
||||
// as a turn that didn't issue structured_output at all.
|
||||
//
|
||||
// Without this, [write_file, structured_output] runs write_file
|
||||
// first (irreversible), THEN structured_output sets the flag,
|
||||
// and the user gets back a "successful" structured result with
|
||||
// unrelated side-effects already on disk.
|
||||
let orderedToolCallRequests = toolCallRequests;
|
||||
if (config.getJsonSchema()) {
|
||||
const structIdx = orderedToolCallRequests.findIndex(
|
||||
(r) => r.name === ToolNames.STRUCTURED_OUTPUT,
|
||||
);
|
||||
if (structIdx > 0) {
|
||||
orderedToolCallRequests = [
|
||||
orderedToolCallRequests[structIdx],
|
||||
...orderedToolCallRequests.slice(0, structIdx),
|
||||
...orderedToolCallRequests.slice(structIdx + 1),
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
for (const requestInfo of orderedToolCallRequests) {
|
||||
const finalRequestInfo = requestInfo;
|
||||
|
||||
const inputFormat =
|
||||
|
|
@ -567,14 +595,14 @@ export async function runNonInteractive(
|
|||
modelOverride = toolResponse.modelOverride;
|
||||
}
|
||||
|
||||
// Single-shot contract: once structured_output has succeeded
|
||||
// we terminate immediately, so any *remaining* tool calls in
|
||||
// this same model batch must not run (a model that emits
|
||||
// `[structured_output, write_file]` should not see write_file
|
||||
// execute). Tool calls that came BEFORE structured_output in
|
||||
// the batch have already executed; preventing those requires
|
||||
// a pre-scan of toolCallRequests and is tracked as a
|
||||
// follow-up (overlap with #3598).
|
||||
// Single-shot contract: structured_output is terminal.
|
||||
// The pre-scan above hoists it to the front of the batch,
|
||||
// so once it succeeds the remaining (now reordered)
|
||||
// entries are guaranteed to be siblings the model
|
||||
// intended for THIS turn — break and let the terminal
|
||||
// emitResult fire below. Unpaired tool_use entries in
|
||||
// the model's record are harmless because no next API
|
||||
// call happens (the session is over).
|
||||
if (hasStructuredSubmission) {
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -442,14 +442,29 @@ export class GeminiClient {
|
|||
}
|
||||
}
|
||||
}
|
||||
// Only surface the deferred list if ToolSearch is actually registered.
|
||||
// It may be filtered out by the permission manager (allow/deny rules),
|
||||
// in which case telling the model to call ToolSearch would be
|
||||
// misleading — we'd just be instructing it to invoke an unknown tool.
|
||||
// Exclude any tools revealed by the resume scan above: their schemas
|
||||
// are already in the declaration list, so advertising them as
|
||||
// "reachable via ToolSearch" would invite redundant lookup calls.
|
||||
// ToolSearch availability gates two things:
|
||||
// (a) Whether the deferred-tools discovery section appears in the
|
||||
// prompt (otherwise we'd be telling the model to call a tool
|
||||
// that isn't registered).
|
||||
// (b) Whether deferral itself makes sense at all — if the model
|
||||
// has no way to reveal a deferred tool, the tool is effectively
|
||||
// hidden + uncallable. Silent disappearance is the worst
|
||||
// failure mode (user sees no error, just thinks the tool
|
||||
// doesn't exist), so when ToolSearch is filtered out (e.g. via
|
||||
// `--exclude-tools tool_search` or a deny rule), reveal every
|
||||
// deferred tool eagerly so they all land in the declaration
|
||||
// list. The token-saving rationale of deferral was predicated
|
||||
// on the discovery surface being available.
|
||||
const toolSearchAvailable = !!toolRegistry.getTool(ToolNames.TOOL_SEARCH);
|
||||
if (!toolSearchAvailable && deferredSummary.length > 0) {
|
||||
for (const t of deferredSummary) {
|
||||
toolRegistry.revealDeferredTool(t.name);
|
||||
}
|
||||
}
|
||||
// Exclude any tools revealed by the resume scan (or the no-ToolSearch
|
||||
// eager-reveal above): their schemas are already in the declaration
|
||||
// list, so advertising them as "reachable via ToolSearch" would
|
||||
// invite redundant lookup calls.
|
||||
const deferredTools = toolSearchAvailable
|
||||
? deferredSummary.filter(
|
||||
(t) => !toolRegistry.isDeferredToolRevealed(t.name),
|
||||
|
|
|
|||
|
|
@ -282,6 +282,31 @@ describe('ToolRegistry', () => {
|
|||
{ name: 'bravo', description: 'bravo desc' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('removeMcpToolsByServer also drops revealedDeferred entries', async () => {
|
||||
// Pin the regression: a server-disconnect-then-reconnect cycle that
|
||||
// re-registers a tool of the same name must NOT inherit
|
||||
// `revealed: true` from before the disconnect — that would leak
|
||||
// into `getFunctionDeclarations` before the model has any way to
|
||||
// know the tool exists this session.
|
||||
const mcpCallable = {} as CallableTool;
|
||||
const tool = new DiscoveredMCPTool(
|
||||
mcpCallable,
|
||||
'slack',
|
||||
'send_message',
|
||||
'send a message',
|
||||
{},
|
||||
);
|
||||
toolRegistry.registerTool(tool);
|
||||
// Use the actual generated tool name (mcp__slack__send_message) — the
|
||||
// reveal-state map is keyed by that, not the server-tool-name alone.
|
||||
const toolName = tool.name;
|
||||
toolRegistry.revealDeferredTool(toolName);
|
||||
expect(toolRegistry.isDeferredToolRevealed(toolName)).toBe(true);
|
||||
|
||||
toolRegistry.removeMcpToolsByServer('slack');
|
||||
expect(toolRegistry.isDeferredToolRevealed(toolName)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getToolsByServer', () => {
|
||||
|
|
|
|||
|
|
@ -310,6 +310,10 @@ export class ToolRegistry {
|
|||
for (const tool of this.tools.values()) {
|
||||
if (tool instanceof DiscoveredTool || tool instanceof DiscoveredMCPTool) {
|
||||
this.tools.delete(tool.name);
|
||||
// Drop reveal state too — see `removeMcpToolsByServer`. Without
|
||||
// this a re-discovered tool of the same name would inherit
|
||||
// stale "revealed" state across the disconnect/reconnect.
|
||||
this.revealedDeferred.delete(tool.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -322,6 +326,13 @@ export class ToolRegistry {
|
|||
for (const [name, tool] of this.tools.entries()) {
|
||||
if (tool instanceof DiscoveredMCPTool && tool.serverName === serverName) {
|
||||
this.tools.delete(name);
|
||||
// Drop reveal state for the removed tool. Otherwise a server
|
||||
// disconnect → reconnect cycle that re-registers a tool of
|
||||
// the same name would inherit `revealed: true` from the prior
|
||||
// session — `getFunctionDeclarations` would emit it (since it
|
||||
// checks reveal state) before the model has any way to know
|
||||
// the tool exists this session.
|
||||
this.revealedDeferred.delete(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -412,6 +423,11 @@ export class ToolRegistry {
|
|||
for (const [name, tool] of this.tools.entries()) {
|
||||
if (tool instanceof DiscoveredMCPTool && tool.serverName === serverName) {
|
||||
this.tools.delete(name);
|
||||
// Drop reveal state too so a re-discovered tool of the same
|
||||
// name doesn't inherit a `revealed: true` from before the
|
||||
// disconnect (would surface in declarations before any
|
||||
// ToolSearch call this session).
|
||||
this.revealedDeferred.delete(name);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue