From 62038527ccd2f6cb7a0a5bb886fa100173aa4570 Mon Sep 17 00:00:00 2001 From: wenshao Date: Tue, 5 May 2026 01:00:53 +0800 Subject: [PATCH] fix: address Copilot follow-up review on --json-schema scaffolding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five small but real findings flagged on the latest pass: 1. core/src/index.ts re-exported `SyntheticOutputTool` via `export type`, but it's a runtime class — that erased it from the emitted JS and would break value imports. Split into a value `export { ... }` and a `export type { StructuredOutputParams }`. 2. The structured-output success path returned without flushing `localQueue` notifications or finalising one-shot monitors. If a background task had already emitted `task_started`, exiting here could drop its paired `task_notification` and leave SDK consumers with unpaired lifecycle events. Mirror the regular terminal path's `flushQueuedNotificationsToSdk` + `finalizeOneShotMonitors` calls before `emitResult`. 3. `schemaRootAcceptsObject` ignored the `not` keyword, so `{not:{type:"object"}}` (which forbids every object value) slipped through. Add a best-effort `not` check that rejects when `not.type` directly excludes object. Deeper negated patterns still fall through to Ajv at runtime. 4. `compileStrict`'s JSDoc claimed it errored on "Ajv versions we can't support", but the function doesn't actually check Ajv versions. Reword to "malformed or uses unsupported draft/features for our Ajv configuration" so the contract matches the implementation. 5. The pre-scan suppressed sibling tool calls but only synthesised tool_result events for them on the retry path — the success path left those tool_use blocks unpaired in the emitted JSONL/stream-json event log. Move the synthesis after the for loop so it runs for both the success break and the validation-failure fall-through; the event log is now consistent regardless of which path the run takes. Tests cover the new \`not\`-rejection paths, the success-path tool_result synthesis, and the existing retry-pairing test still passes against the restructured emit ordering. --- packages/cli/src/config/config.ts | 28 +++++++ packages/cli/src/config/jsonSchemaArg.test.ts | 18 +++++ packages/cli/src/nonInteractiveCli.test.ts | 76 +++++++++++++----- packages/cli/src/nonInteractiveCli.ts | 79 +++++++++++-------- packages/core/src/index.ts | 6 +- packages/core/src/utils/schemaValidator.ts | 11 ++- 6 files changed, 158 insertions(+), 60 deletions(-) diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 50cf1e0b6..c9b7e0945 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -231,6 +231,34 @@ function schemaRootAcceptsObject(schema: Record): boolean { if (!everyBranchAcceptsObject) return false; } + // Best-effort `not` handling: when `not` directly forbids object via its + // own `type` keyword (e.g. `{not:{type:"object"}}` or + // `{not:{type:["object","null"]}}`), the schema can never be satisfied + // by an object — reject. We don't try to do full satisfiability analysis + // for arbitrary `not` schemas (e.g. `not:{const:"foo"}` is fine, but + // `not:{anyOf:[{type:"object"},…]}` would also reject objects); those + // fall through to Ajv at runtime. + const notSchema = schema['not']; + if ( + typeof notSchema === 'object' && + notSchema !== null && + !Array.isArray(notSchema) + ) { + const notType = (notSchema as Record)['type']; + if (notType !== undefined) { + const types = Array.isArray(notType) ? notType : [notType]; + // If `not.type` *only* lists types that include "object", every + // object value matches `not.type`, so the negation rejects every + // object. The simplest sound check: if "object" is in the list at + // all and there's no sibling positive constraint that would allow + // an object some other way, reject. We've already validated the + // sibling type / anyOf / oneOf / allOf above — at this point we + // know they at least permit objects, so a `not.type` containing + // "object" still excludes them. + if (types.includes('object')) return false; + } + } + // No narrowing at the root — lenient default, treated as object-compatible. return true; } diff --git a/packages/cli/src/config/jsonSchemaArg.test.ts b/packages/cli/src/config/jsonSchemaArg.test.ts index 2befaa709..7b18b4ee0 100644 --- a/packages/cli/src/config/jsonSchemaArg.test.ts +++ b/packages/cli/src/config/jsonSchemaArg.test.ts @@ -181,4 +181,22 @@ describe('resolveJsonSchemaArg', () => { ); expect(schema).toBeDefined(); }); + + it('rejects a root `not` that directly forbids object', () => { + // `not:{type:"object"}` excludes every object value, so the schema is + // unsatisfiable for tool-call args. Best-effort check — only inspects + // `not.type`; deeper negated patterns fall through to Ajv at runtime. + expect(() => resolveJsonSchemaArg('{"not":{"type":"object"}}')).toThrow( + /must accept object-typed values/, + ); + expect(() => + resolveJsonSchemaArg('{"not":{"type":["object","null"]}}'), + ).toThrow(/must accept object-typed values/); + }); + + it('accepts a root `not` whose negated type does not exclude object', () => { + // `not:{type:"string"}` only forbids strings — objects are still fine. + const schema = resolveJsonSchemaArg('{"not":{"type":"string"}}'); + expect(schema).toBeDefined(); + }); }); diff --git a/packages/cli/src/nonInteractiveCli.test.ts b/packages/cli/src/nonInteractiveCli.test.ts index 70b8748fd..13f9c41a6 100644 --- a/packages/cli/src/nonInteractiveCli.test.ts +++ b/packages/cli/src/nonInteractiveCli.test.ts @@ -2421,6 +2421,25 @@ describe('runNonInteractive', () => { }); describe('--json-schema structured output', () => { + // Helper: walk an emitted event and extract the first tool_use_id when + // it represents a tool_result block. Returns undefined for any other + // event shape. + const extractToolResultId = (event: unknown): string | undefined => { + if (typeof event !== 'object' || event === null) return undefined; + const e = event as { + type?: unknown; + message?: { content?: unknown }; + }; + if (e.type !== 'user') return undefined; + const content = e.message?.content; + if (!Array.isArray(content) || content.length === 0) return undefined; + const block = content[0] as { type?: unknown; tool_use_id?: unknown }; + if (block?.type !== 'tool_result') return undefined; + return typeof block.tool_use_id === 'string' + ? block.tool_use_id + : undefined; + }; + it('stops executing remaining tool calls from the same turn once structured_output succeeds', async () => { (mockConfig.getJsonSchema as Mock).mockReturnValue({ type: 'object', @@ -2506,22 +2525,23 @@ describe('runNonInteractive', () => { // torn down before we emit the terminal result. expect(abortAllSpy).toHaveBeenCalledTimes(1); - // The emitted result must carry the submitted args under `result` as - // the JSON-stringified payload (the headless JSON formatter encodes - // the structured submission so SDK consumers always see a string here, - // matching how text-mode `result` is also a string). - const result = writes + const events = writes .join('') .split('\n') .filter((line) => line.trim().length > 0) .map((line) => JSON.parse(line)) - .flat() - .find( - (m: unknown) => - typeof m === 'object' && - m !== null && - (m as { type?: string }).type === 'result', - ); + .flat(); + + // The emitted result must carry the submitted args under `result` as + // the JSON-stringified payload (the headless JSON formatter encodes + // the structured submission so SDK consumers always see a string here, + // matching how text-mode `result` is also a string). + const result = events.find( + (m: unknown) => + typeof m === 'object' && + m !== null && + (m as { type?: string }).type === 'result', + ); expect(result).toBeDefined(); expect(result.is_error).toBe(false); expect(typeof result.result).toBe('string'); @@ -2529,6 +2549,14 @@ describe('runNonInteractive', () => { // The raw object is also exposed under `structured_result` for SDK // consumers that don't want to re-parse the stringified payload. expect(result.structured_result).toEqual(structuredArgs); + + // The suppressed trailing tool_use must have a synthesised + // tool_result so the event log pairs every tool_use with a + // tool_result, even on the success path. + const trailingToolResult = events.find( + (m: unknown) => extractToolResultId(m) === 'tool-trailing', + ); + expect(trailingToolResult).toBeDefined(); }); it('skips side-effecting tool calls that precede structured_output in the same turn', async () => { @@ -2611,21 +2639,29 @@ describe('runNonInteractive', () => { expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledTimes(1); expect(abortAllSpy).toHaveBeenCalledTimes(1); - const result = writes + const events = writes .join('') .split('\n') .filter((line) => line.trim().length > 0) .map((line) => JSON.parse(line)) - .flat() - .find( - (m: unknown) => - typeof m === 'object' && - m !== null && - (m as { type?: string }).type === 'result', - ); + .flat(); + const result = events.find( + (m: unknown) => + typeof m === 'object' && + m !== null && + (m as { type?: string }).type === 'result', + ); expect(result).toBeDefined(); expect(result.is_error).toBe(false); expect(result.structured_result).toEqual(structuredArgs); + + // The suppressed leading tool_use must have a synthesised + // tool_result event so the event log pairs every tool_use with a + // tool_result on the success path. + const leadingToolResult = events.find( + (m: unknown) => extractToolResultId(m) === 'tool-leading', + ); + expect(leadingToolResult).toBeDefined(); }); it('keeps the session running when structured_output args fail validation so the model can retry', async () => { diff --git a/packages/cli/src/nonInteractiveCli.ts b/packages/cli/src/nonInteractiveCli.ts index f678dadfb..205e11a99 100644 --- a/packages/cli/src/nonInteractiveCli.ts +++ b/packages/cli/src/nonInteractiveCli.ts @@ -489,12 +489,12 @@ export async function runNonInteractive( // If --json-schema is active and the model emitted a // `structured_output` call in the same assistant turn as other - // tools, the structured call is the terminal contract — pre-scan - // the batch and execute ONLY the first `structured_output`, - // suppressing both leading and trailing tool calls. Without this, + // tools, the structured call is the terminal contract — execute + // ONLY the first `structured_output` and synthesise a "skipped" + // tool_result for every other call in the batch. Without this, // a batch like `[write_file(...), structured_output(...)]` would - // run the side-effecting tool before the run is accepted, and an - // invalid structured_output mid-batch (`[structured_output(bad), + // run the side-effecting tool before the run is accepted, and + // an invalid structured_output mid-batch (`[structured_output(bad), // write_file(...)]`) would still fall through to the trailing // tool before the retry turn. let requestsToExecute = toolCallRequests; @@ -583,22 +583,57 @@ export async function runNonInteractive( !toolResponse.error ) { // Honour the "first valid call ends the session" contract. - // The pre-scan above already filtered the batch to the first - // structured_output, so any other tool_use blocks the model - // emitted in this turn lack a matching tool_result — which is - // consistent with how other terminal paths (max-turns, - // cancellation) leave the stream. The break is after the - // responseParts/modelOverride capture above so future changes - // to SyntheticOutputTool can't silently drop those signals. + // The break is after the responseParts/modelOverride capture + // above so future changes to SyntheticOutputTool can't + // silently drop those signals. structuredSubmission = finalRequestInfo.args; break; } } + + // Synthesise tool_result events + retry parts for any sibling + // tool calls suppressed by the pre-scan. Runs for both the + // success and retry paths so the emitted event log pairs every + // tool_use the model produced with a tool_result, AND the + // retry-turn payload (when reached) doesn't leave Anthropic / + // OpenAI staring at unpaired tool_use blocks. + if (suppressedCalls.length > 0) { + const suppressedOutput = + 'Skipped: structured_output was also requested in this turn and takes precedence as the terminal output contract. Re-issue this call in a separate turn if needed.'; + for (const call of suppressedCalls) { + const responseParts: Part[] = [ + { + functionResponse: { + id: call.callId, + name: call.name, + response: { output: suppressedOutput }, + }, + }, + ]; + adapter.emitToolResult(call, { + callId: call.callId, + responseParts, + resultDisplay: suppressedOutput, + error: undefined, + errorType: undefined, + }); + toolResponseParts.push(...responseParts); + } + } + if (structuredSubmission !== undefined) { // Abort any in-flight background agents so they don't race the // terminal emitResult; structured-output mode is a single-shot // contract and the caller expects a deterministic shutdown. registry.abortAll(); + // Match the regular terminal path's holdback: drain queued + // task notifications to the SDK and finalise one-shot + // monitors before emitResult, so consumers always see a + // task_notification paired with every task_started and don't + // lose monitor lifecycle events when structured-output mode + // exits early. + flushQueuedNotificationsToSdk(localQueue); + finalizeOneShotMonitors(); const metrics = uiTelemetryService.getMetrics(); const usage = computeUsageFromMetrics(metrics); const stats = @@ -616,26 +651,6 @@ export async function runNonInteractive( }); return 0; } - // Retry path: structured_output either wasn't called or its args - // failed validation. If the pre-scan suppressed sibling tool calls - // from this turn, the prior assistant message still contained - // their tool_use blocks — synthesize matching tool_result blocks - // now so the next-turn payload pairs every tool_use with a - // tool_result. Anthropic and OpenAI both reject batches with - // unpaired tool_use entries, which would otherwise turn the - // retry path into an opaque provider-side error. - for (const call of suppressedCalls) { - toolResponseParts.push({ - functionResponse: { - id: call.callId, - name: call.name, - response: { - output: - 'Skipped: structured_output was also requested in this turn and takes precedence as the terminal output contract. Re-issue this call in a separate turn if needed.', - }, - }, - }); - } currentMessages = [{ role: 'user', parts: toolResponseParts }]; } else { // Drain-turns count toward getMaxSessionTurns() for symmetry with the main diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 0ce921db8..5c850964e 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -111,10 +111,8 @@ export type { ExitPlanModeTool, ExitPlanModeParams, } from './tools/exitPlanMode.js'; -export type { - SyntheticOutputTool, - StructuredOutputParams, -} from './tools/syntheticOutput.js'; +export { SyntheticOutputTool } from './tools/syntheticOutput.js'; +export type { StructuredOutputParams } from './tools/syntheticOutput.js'; export type { GlobTool, GlobToolParams, GlobPath } from './tools/glob.js'; export type { GrepTool, GrepToolParams } from './tools/grep.js'; export type { LSTool, LSToolParams, FileEntry } from './tools/ls.js'; diff --git a/packages/core/src/utils/schemaValidator.ts b/packages/core/src/utils/schemaValidator.ts index 74e3c37fb..869ab9fd2 100644 --- a/packages/core/src/utils/schemaValidator.ts +++ b/packages/core/src/utils/schemaValidator.ts @@ -66,10 +66,13 @@ function getValidator(schema: AnySchema): Ajv { export class SchemaValidator { /** * Strictly compiles a schema. Returns an error message if the schema is - * malformed or uses an Ajv version we can't support. Returns null on - * success. Unlike {@link validate}, this does NOT silently skip on - * compile failure — callers (e.g. the CLI's `--json-schema` parser) need - * to surface invalid schemas instead of letting them no-op at runtime. + * malformed or uses unsupported draft/features for our Ajv configuration + * (see {@link getValidator} — `$schema` selects between draft-07 and + * draft-2020-12; anything else falls through to draft-07's compiler). + * Returns null on success. Unlike {@link validate}, this does NOT + * silently skip on compile failure — callers (e.g. the CLI's + * `--json-schema` parser) need to surface invalid schemas instead of + * letting them no-op at runtime. */ static compileStrict(schema: unknown): string | null { if (!schema || typeof schema !== 'object' || Array.isArray(schema)) {