From 62038527ccd2f6cb7a0a5bb886fa100173aa4570 Mon Sep 17 00:00:00 2001
From: wenshao <shaojin.wensj@alibaba-inc.com>
Date: Tue, 5 May 2026 01:00:53 +0800
Subject: [PATCH] fix: address Copilot follow-up review on --json-schema
 scaffolding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Five small but real findings flagged on the latest pass:

1. core/src/index.ts re-exported `SyntheticOutputTool` via `export type`,
   but it's a runtime class — that erased it from the emitted JS and
   would break value imports. Split into a value `export { ... }` and a
   `export type { StructuredOutputParams }`.

2. The structured-output success path returned without flushing
   `localQueue` notifications or finalising one-shot monitors. If a
   background task had already emitted `task_started`, exiting here
   could drop its paired `task_notification` and leave SDK consumers
   with unpaired lifecycle events. Mirror the regular terminal path's
   `flushQueuedNotificationsToSdk` + `finalizeOneShotMonitors` calls
   before `emitResult`.

3. `schemaRootAcceptsObject` ignored the `not` keyword, so
   `{not:{type:"object"}}` (which forbids every object value) slipped
   through. Add a best-effort `not` check that rejects when
   `not.type` directly excludes object. Deeper negated patterns still
   fall through to Ajv at runtime.

4. `compileStrict`'s JSDoc claimed it errored on "Ajv versions we can't
   support", but the function doesn't actually check Ajv versions. Reword
   to "malformed or uses unsupported draft/features for our Ajv
   configuration" so the contract matches the implementation.

5. The pre-scan suppressed sibling tool calls but only synthesised
   tool_result events for them on the retry path — the success path
   left those tool_use blocks unpaired in the emitted JSONL/stream-json
   event log. Move the synthesis after the for loop so it runs for both
   the success break and the validation-failure fall-through; the event
   log is now consistent regardless of which path the run takes.

Tests cover the new \`not\`-rejection paths, the success-path tool_result
synthesis, and the existing retry-pairing test still passes against the
restructured emit ordering.
---
 packages/cli/src/config/config.ts             | 28 +++++++
 packages/cli/src/config/jsonSchemaArg.test.ts | 18 +++++
 packages/cli/src/nonInteractiveCli.test.ts    | 76 +++++++++++++-----
 packages/cli/src/nonInteractiveCli.ts         | 79 +++++++++++--------
 packages/core/src/index.ts                    |  6 +-
 packages/core/src/utils/schemaValidator.ts    | 11 ++-
 6 files changed, 158 insertions(+), 60 deletions(-)

diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts
index 50cf1e0b6..c9b7e0945 100755
--- a/packages/cli/src/config/config.ts
+++ b/packages/cli/src/config/config.ts
@@ -231,6 +231,34 @@ function schemaRootAcceptsObject(schema: Record<string, unknown>): boolean {
     if (!everyBranchAcceptsObject) return false;
   }
 
+  // Best-effort `not` handling: when `not` directly forbids object via its
+  // own `type` keyword (e.g. `{not:{type:"object"}}` or
+  // `{not:{type:["object","null"]}}`), the schema can never be satisfied
+  // by an object — reject. We don't try to do full satisfiability analysis
+  // for arbitrary `not` schemas (e.g. `not:{const:"foo"}` is fine, but
+  // `not:{anyOf:[{type:"object"},…]}` would also reject objects); those
+  // fall through to Ajv at runtime.
+  const notSchema = schema['not'];
+  if (
+    typeof notSchema === 'object' &&
+    notSchema !== null &&
+    !Array.isArray(notSchema)
+  ) {
+    const notType = (notSchema as Record<string, unknown>)['type'];
+    if (notType !== undefined) {
+      const types = Array.isArray(notType) ? notType : [notType];
+      // If `not.type` *only* lists types that include "object", every
+      // object value matches `not.type`, so the negation rejects every
+      // object. The simplest sound check: if "object" is in the list at
+      // all and there's no sibling positive constraint that would allow
+      // an object some other way, reject. We've already validated the
+      // sibling type / anyOf / oneOf / allOf above — at this point we
+      // know they at least permit objects, so a `not.type` containing
+      // "object" still excludes them.
+      if (types.includes('object')) return false;
+    }
+  }
+
   // No narrowing at the root — lenient default, treated as object-compatible.
   return true;
 }
diff --git a/packages/cli/src/config/jsonSchemaArg.test.ts b/packages/cli/src/config/jsonSchemaArg.test.ts
index 2befaa709..7b18b4ee0 100644
--- a/packages/cli/src/config/jsonSchemaArg.test.ts
+++ b/packages/cli/src/config/jsonSchemaArg.test.ts
@@ -181,4 +181,22 @@ describe('resolveJsonSchemaArg', () => {
     );
     expect(schema).toBeDefined();
   });
+
+  it('rejects a root `not` that directly forbids object', () => {
+    // `not:{type:"object"}` excludes every object value, so the schema is
+    // unsatisfiable for tool-call args. Best-effort check — only inspects
+    // `not.type`; deeper negated patterns fall through to Ajv at runtime.
+    expect(() => resolveJsonSchemaArg('{"not":{"type":"object"}}')).toThrow(
+      /must accept object-typed values/,
+    );
+    expect(() =>
+      resolveJsonSchemaArg('{"not":{"type":["object","null"]}}'),
+    ).toThrow(/must accept object-typed values/);
+  });
+
+  it('accepts a root `not` whose negated type does not exclude object', () => {
+    // `not:{type:"string"}` only forbids strings — objects are still fine.
+    const schema = resolveJsonSchemaArg('{"not":{"type":"string"}}');
+    expect(schema).toBeDefined();
+  });
 });
diff --git a/packages/cli/src/nonInteractiveCli.test.ts b/packages/cli/src/nonInteractiveCli.test.ts
index 70b8748fd..13f9c41a6 100644
--- a/packages/cli/src/nonInteractiveCli.test.ts
+++ b/packages/cli/src/nonInteractiveCli.test.ts
@@ -2421,6 +2421,25 @@ describe('runNonInteractive', () => {
   });
 
   describe('--json-schema structured output', () => {
+    // Helper: walk an emitted event and extract the first tool_use_id when
+    // it represents a tool_result block. Returns undefined for any other
+    // event shape.
+    const extractToolResultId = (event: unknown): string | undefined => {
+      if (typeof event !== 'object' || event === null) return undefined;
+      const e = event as {
+        type?: unknown;
+        message?: { content?: unknown };
+      };
+      if (e.type !== 'user') return undefined;
+      const content = e.message?.content;
+      if (!Array.isArray(content) || content.length === 0) return undefined;
+      const block = content[0] as { type?: unknown; tool_use_id?: unknown };
+      if (block?.type !== 'tool_result') return undefined;
+      return typeof block.tool_use_id === 'string'
+        ? block.tool_use_id
+        : undefined;
+    };
+
     it('stops executing remaining tool calls from the same turn once structured_output succeeds', async () => {
       (mockConfig.getJsonSchema as Mock).mockReturnValue({
         type: 'object',
@@ -2506,22 +2525,23 @@ describe('runNonInteractive', () => {
       // torn down before we emit the terminal result.
       expect(abortAllSpy).toHaveBeenCalledTimes(1);
 
-      // The emitted result must carry the submitted args under `result` as
-      // the JSON-stringified payload (the headless JSON formatter encodes
-      // the structured submission so SDK consumers always see a string here,
-      // matching how text-mode `result` is also a string).
-      const result = writes
+      const events = writes
         .join('')
         .split('\n')
         .filter((line) => line.trim().length > 0)
         .map((line) => JSON.parse(line))
-        .flat()
-        .find(
-          (m: unknown) =>
-            typeof m === 'object' &&
-            m !== null &&
-            (m as { type?: string }).type === 'result',
-        );
+        .flat();
+
+      // The emitted result must carry the submitted args under `result` as
+      // the JSON-stringified payload (the headless JSON formatter encodes
+      // the structured submission so SDK consumers always see a string here,
+      // matching how text-mode `result` is also a string).
+      const result = events.find(
+        (m: unknown) =>
+          typeof m === 'object' &&
+          m !== null &&
+          (m as { type?: string }).type === 'result',
+      );
       expect(result).toBeDefined();
       expect(result.is_error).toBe(false);
       expect(typeof result.result).toBe('string');
@@ -2529,6 +2549,14 @@ describe('runNonInteractive', () => {
       // The raw object is also exposed under `structured_result` for SDK
       // consumers that don't want to re-parse the stringified payload.
       expect(result.structured_result).toEqual(structuredArgs);
+
+      // The suppressed trailing tool_use must have a synthesised
+      // tool_result so the event log pairs every tool_use with a
+      // tool_result, even on the success path.
+      const trailingToolResult = events.find(
+        (m: unknown) => extractToolResultId(m) === 'tool-trailing',
+      );
+      expect(trailingToolResult).toBeDefined();
     });
 
     it('skips side-effecting tool calls that precede structured_output in the same turn', async () => {
@@ -2611,21 +2639,29 @@ describe('runNonInteractive', () => {
       expect(mockGeminiClient.sendMessageStream).toHaveBeenCalledTimes(1);
       expect(abortAllSpy).toHaveBeenCalledTimes(1);
 
-      const result = writes
+      const events = writes
         .join('')
         .split('\n')
         .filter((line) => line.trim().length > 0)
         .map((line) => JSON.parse(line))
-        .flat()
-        .find(
-          (m: unknown) =>
-            typeof m === 'object' &&
-            m !== null &&
-            (m as { type?: string }).type === 'result',
-        );
+        .flat();
+      const result = events.find(
+        (m: unknown) =>
+          typeof m === 'object' &&
+          m !== null &&
+          (m as { type?: string }).type === 'result',
+      );
       expect(result).toBeDefined();
       expect(result.is_error).toBe(false);
       expect(result.structured_result).toEqual(structuredArgs);
+
+      // The suppressed leading tool_use must have a synthesised
+      // tool_result event so the event log pairs every tool_use with a
+      // tool_result on the success path.
+      const leadingToolResult = events.find(
+        (m: unknown) => extractToolResultId(m) === 'tool-leading',
+      );
+      expect(leadingToolResult).toBeDefined();
     });
 
     it('keeps the session running when structured_output args fail validation so the model can retry', async () => {
diff --git a/packages/cli/src/nonInteractiveCli.ts b/packages/cli/src/nonInteractiveCli.ts
index f678dadfb..205e11a99 100644
--- a/packages/cli/src/nonInteractiveCli.ts
+++ b/packages/cli/src/nonInteractiveCli.ts
@@ -489,12 +489,12 @@ export async function runNonInteractive(
 
           // If --json-schema is active and the model emitted a
           // `structured_output` call in the same assistant turn as other
-          // tools, the structured call is the terminal contract — pre-scan
-          // the batch and execute ONLY the first `structured_output`,
-          // suppressing both leading and trailing tool calls. Without this,
+          // tools, the structured call is the terminal contract — execute
+          // ONLY the first `structured_output` and synthesise a "skipped"
+          // tool_result for every other call in the batch. Without this,
           // a batch like `[write_file(...), structured_output(...)]` would
-          // run the side-effecting tool before the run is accepted, and an
-          // invalid structured_output mid-batch (`[structured_output(bad),
+          // run the side-effecting tool before the run is accepted, and
+          // an invalid structured_output mid-batch (`[structured_output(bad),
           // write_file(...)]`) would still fall through to the trailing
           // tool before the retry turn.
           let requestsToExecute = toolCallRequests;
@@ -583,22 +583,57 @@ export async function runNonInteractive(
               !toolResponse.error
             ) {
               // Honour the "first valid call ends the session" contract.
-              // The pre-scan above already filtered the batch to the first
-              // structured_output, so any other tool_use blocks the model
-              // emitted in this turn lack a matching tool_result — which is
-              // consistent with how other terminal paths (max-turns,
-              // cancellation) leave the stream. The break is after the
-              // responseParts/modelOverride capture above so future changes
-              // to SyntheticOutputTool can't silently drop those signals.
+              // The break is after the responseParts/modelOverride capture
+              // above so future changes to SyntheticOutputTool can't
+              // silently drop those signals.
               structuredSubmission = finalRequestInfo.args;
               break;
             }
           }
+
+          // Synthesise tool_result events + retry parts for any sibling
+          // tool calls suppressed by the pre-scan. Runs for both the
+          // success and retry paths so the emitted event log pairs every
+          // tool_use the model produced with a tool_result, AND the
+          // retry-turn payload (when reached) doesn't leave Anthropic /
+          // OpenAI staring at unpaired tool_use blocks.
+          if (suppressedCalls.length > 0) {
+            const suppressedOutput =
+              'Skipped: structured_output was also requested in this turn and takes precedence as the terminal output contract. Re-issue this call in a separate turn if needed.';
+            for (const call of suppressedCalls) {
+              const responseParts: Part[] = [
+                {
+                  functionResponse: {
+                    id: call.callId,
+                    name: call.name,
+                    response: { output: suppressedOutput },
+                  },
+                },
+              ];
+              adapter.emitToolResult(call, {
+                callId: call.callId,
+                responseParts,
+                resultDisplay: suppressedOutput,
+                error: undefined,
+                errorType: undefined,
+              });
+              toolResponseParts.push(...responseParts);
+            }
+          }
+
           if (structuredSubmission !== undefined) {
             // Abort any in-flight background agents so they don't race the
             // terminal emitResult; structured-output mode is a single-shot
             // contract and the caller expects a deterministic shutdown.
             registry.abortAll();
+            // Match the regular terminal path's holdback: drain queued
+            // task notifications to the SDK and finalise one-shot
+            // monitors before emitResult, so consumers always see a
+            // task_notification paired with every task_started and don't
+            // lose monitor lifecycle events when structured-output mode
+            // exits early.
+            flushQueuedNotificationsToSdk(localQueue);
+            finalizeOneShotMonitors();
             const metrics = uiTelemetryService.getMetrics();
             const usage = computeUsageFromMetrics(metrics);
             const stats =
@@ -616,26 +651,6 @@ export async function runNonInteractive(
             });
             return 0;
           }
-          // Retry path: structured_output either wasn't called or its args
-          // failed validation. If the pre-scan suppressed sibling tool calls
-          // from this turn, the prior assistant message still contained
-          // their tool_use blocks — synthesize matching tool_result blocks
-          // now so the next-turn payload pairs every tool_use with a
-          // tool_result. Anthropic and OpenAI both reject batches with
-          // unpaired tool_use entries, which would otherwise turn the
-          // retry path into an opaque provider-side error.
-          for (const call of suppressedCalls) {
-            toolResponseParts.push({
-              functionResponse: {
-                id: call.callId,
-                name: call.name,
-                response: {
-                  output:
-                    'Skipped: structured_output was also requested in this turn and takes precedence as the terminal output contract. Re-issue this call in a separate turn if needed.',
-                },
-              },
-            });
-          }
           currentMessages = [{ role: 'user', parts: toolResponseParts }];
         } else {
           // Drain-turns count toward getMaxSessionTurns() for symmetry with the main
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 0ce921db8..5c850964e 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -111,10 +111,8 @@ export type {
   ExitPlanModeTool,
   ExitPlanModeParams,
 } from './tools/exitPlanMode.js';
-export type {
-  SyntheticOutputTool,
-  StructuredOutputParams,
-} from './tools/syntheticOutput.js';
+export { SyntheticOutputTool } from './tools/syntheticOutput.js';
+export type { StructuredOutputParams } from './tools/syntheticOutput.js';
 export type { GlobTool, GlobToolParams, GlobPath } from './tools/glob.js';
 export type { GrepTool, GrepToolParams } from './tools/grep.js';
 export type { LSTool, LSToolParams, FileEntry } from './tools/ls.js';
diff --git a/packages/core/src/utils/schemaValidator.ts b/packages/core/src/utils/schemaValidator.ts
index 74e3c37fb..869ab9fd2 100644
--- a/packages/core/src/utils/schemaValidator.ts
+++ b/packages/core/src/utils/schemaValidator.ts
@@ -66,10 +66,13 @@ function getValidator(schema: AnySchema): Ajv {
 export class SchemaValidator {
   /**
    * Strictly compiles a schema. Returns an error message if the schema is
-   * malformed or uses an Ajv version we can't support. Returns null on
-   * success. Unlike {@link validate}, this does NOT silently skip on
-   * compile failure — callers (e.g. the CLI's `--json-schema` parser) need
-   * to surface invalid schemas instead of letting them no-op at runtime.
+   * malformed or uses unsupported draft/features for our Ajv configuration
+   * (see {@link getValidator} — `$schema` selects between draft-07 and
+   * draft-2020-12; anything else falls through to draft-07's compiler).
+   * Returns null on success. Unlike {@link validate}, this does NOT
+   * silently skip on compile failure — callers (e.g. the CLI's
+   * `--json-schema` parser) need to surface invalid schemas instead of
+   * letting them no-op at runtime.
    */
   static compileStrict(schema: unknown): string | null {
     if (!schema || typeof schema !== 'object' || Array.isArray(schema)) {