fix(cli,core): tighten --json-schema validation

Closes 3 #3589 review threads:

- Schemas like `{"type":"string"}` and `{"type":"array"}` compiled
  fine (they're valid JSON Schemas in isolation), but the
  `--json-schema` value becomes the synthetic structured_output tool's
  parameter schema and tool-call arguments are object-shaped. Reject
  any non-undefined top-level type that is not "object" so the user
  sees the contract violation at parse time, not as an unrecoverable
  runtime mismatch.

- `SchemaValidator.compileStrict` accepted arrays since
  `typeof [] === 'object'` — Ajv would later emit a confusing error.
  Add an explicit `Array.isArray` guard so the contract stated by
  the function name is honored at the boundary.

- `compileStrict` shared the project-wide Ajv instances configured
  with `strictSchema: false` (intentionally lenient so MCP servers
  can ship custom keywords without breaking runtime validation).
  That leniency is wrong for the `--json-schema` surface — typos
  like `propertees` were silently ignored. Compile inside a dedicated
  `strict: true` Ajv so user-supplied schemas surface mistakes
  immediately.

Tests:
  - jsonSchemaArg: rejects non-object top-level type ("string"/"array").
  - schemaValidator.compileStrict: rejects arrays; flags unknown
    keywords (typos) under strict mode.
This commit is contained in:
wenshao 2026-05-08 11:54:51 +08:00
parent 88b10c20ee
commit 38726567b9
4 changed files with 64 additions and 7 deletions

View file

@ -231,6 +231,19 @@ export function resolveJsonSchemaArg(
);
}
// The schema becomes the parameter schema of the synthetic
// structured_output tool, and tool-call arguments are object-shaped.
// A schema like `{"type":"string"}` would compile fine but be
// unsatisfiable as a tool-call argument — fail at parse time so the
// user sees the contract violation immediately instead of at runtime.
const schemaType = (parsed as { type?: unknown }).type;
if (schemaType !== undefined && schemaType !== 'object') {
throw new FatalConfigError(
`--json-schema top-level type must be "object" (got "${String(schemaType)}"); ` +
'wrap your value under an object property if you need a non-object payload.',
);
}
return parsed as Record<string, unknown>;
}
@ -540,7 +553,7 @@ export async function parseArguments(): Promise<CliArgs> {
.option('json-schema', {
type: 'string',
description:
'JSON Schema that the model\'s final output must conform to ' +
"JSON Schema that the model's final output must conform to " +
'(headless mode only). Accepts a JSON literal or "@path/to/schema.json". ' +
'Registers a synthetic `structured_output` tool; the session ends on ' +
'the first valid call.',

View file

@ -42,9 +42,7 @@ describe('resolveJsonSchemaArg', () => {
});
it('throws on invalid JSON', () => {
expect(() => resolveJsonSchemaArg('{not json}')).toThrow(
/not valid JSON/,
);
expect(() => resolveJsonSchemaArg('{not json}')).toThrow(/not valid JSON/);
});
it('throws when the parsed value is not an object', () => {
@ -78,4 +76,15 @@ describe('resolveJsonSchemaArg', () => {
);
expect(schema).toBeDefined();
});
it('rejects schemas whose top-level type is not "object"', () => {
// The schema becomes structured_output's parameter schema; tool args
// are object-shaped, so non-object roots can never be satisfied.
expect(() => resolveJsonSchemaArg('{"type":"string"}')).toThrow(
/top-level type must be "object"/,
);
expect(() => resolveJsonSchemaArg('{"type":"array"}')).toThrow(
/top-level type must be "object"/,
);
});
});

View file

@ -478,5 +478,28 @@ describe('SchemaValidator', () => {
expect(SchemaValidator.compileStrict(undefined)).toMatch(/JSON object/);
expect(SchemaValidator.compileStrict('a string')).toMatch(/JSON object/);
});
it('rejects arrays even though typeof === "object"', () => {
// Arrays satisfy `typeof === 'object'` but are not valid JSON Schema
// root values; the prior guard accepted them and let the misleading
// error surface from Ajv much later.
expect(SchemaValidator.compileStrict([])).toMatch(/JSON object/);
expect(SchemaValidator.compileStrict([{ type: 'string' }])).toMatch(
/JSON object/,
);
});
it('flags unknown keywords (typos) under strict mode', () => {
// The shared SchemaValidator.validate is intentionally lenient
// (`strictSchema: false`) so MCP-style custom keywords don't break
// runtime validation. compileStrict is the explicit user-supplied
// surface and should NOT swallow typos like `propertees`.
const err = SchemaValidator.compileStrict({
type: 'object',
propertees: { foo: { type: 'string' } },
});
expect(err).not.toBeNull();
expect(err).toMatch(/propert/i);
});
});
});

View file

@ -72,12 +72,24 @@ export class SchemaValidator {
* to surface invalid schemas instead of letting them no-op at runtime.
*/
static compileStrict(schema: unknown): string | null {
if (!schema || typeof schema !== 'object') {
if (!schema || typeof schema !== 'object' || Array.isArray(schema)) {
return 'schema must be a JSON object';
}
const validator = getValidator(schema as AnySchema);
// Use a dedicated strict-mode Ajv so typos like `propertees` raise
// instead of being silently ignored. The shared ajvDefault/ajv2020
// instances run with `strictSchema: false` so unknown MCP keywords
// don't break runtime validation — that leniency is wrong for
// explicit user-supplied schemas where `compileStrict` is exactly
// the surface meant to surface mistakes.
const isDraft2020 =
'$schema' in schema &&
(schema as { $schema?: string }).$schema === DRAFT_2020_12_SCHEMA;
const strictAjv: Ajv = isDraft2020
? new Ajv2020Class({ strict: true })
: new AjvClass({ strict: true });
addFormatsFunc(strictAjv);
try {
validator.compile(schema as AnySchema);
strictAjv.compile(schema as AnySchema);
return null;
} catch (error) {
return error instanceof Error ? error.message : String(error);