feat(effect-zod): add ZodPreprocess annotation for pre-parse transforms (#23222)

This commit is contained in:
Kit Langton 2026-04-17 22:02:37 -04:00 committed by GitHub
parent 81b7b58a5e
commit 1fae784b81
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 157 additions and 2 deletions

View file

@ -8,6 +8,43 @@ import z from "zod"
*/
export const ZodOverride: unique symbol = Symbol.for("effect-zod/override")
/**
* Annotation key for a pre-parse transform that runs on the raw input before
* the derived Zod schema validates it. The walker emits
* `z.preprocess(fn, inner)` when this annotation is present.
*
* Models zod's `z.preprocess(fn, schema)` pattern useful when the schema
* needs to inspect the user's raw input (e.g. to capture insertion order)
* before `Schema.Struct` canonicalises the object.
*
* TODO: This exists to paper over a missing Effect Schema feature. The
* parser canonicalises open struct output (known fields first in
* declaration order, then catchall fields) before any user-defined
* transform sees the value, and there is no pre-parse hook so the
* user's original property insertion order is gone by the time
* `Schema.decodeTo` or `middlewareDecoding` runs.
*
* That canonicalisation is a reasonable default, but `config/permission.ts`
* encodes rule precedence in the user's JSON key order (`evaluate.ts`
* uses `findLast`, so later entries win), which the canonicalisation
* silently destroys.
*
* The cleanest upstream fix would be either:
*
* 1. A `preserveInputOrder` option on `Schema.Struct` /
* `Schema.StructWithRest` that keeps the input's insertion order in
* the parsed object (opt-in; canonical order stays default).
* 2. A generic pre-parse hook (`Schema.preprocess(schema, fn)` or a
* transformation whose decode receives the raw `unknown`).
*
* Either of those would let us delete `ZodPreprocess` and the
* `__originalKeys` hack. Alternatively, the permission model could move
* to specificity-based precedence (exact keys beat wildcards) or an
* explicit ordered array of rules, which removes the ordering
* dependency at the data-model level.
*/
export const ZodPreprocess: unique symbol = Symbol.for("effect-zod/preprocess")
// AST nodes are immutable and frequently shared across schemas (e.g. a single
// Schema.Class embedded in multiple parents). Memoizing by node identity
// avoids rebuilding equivalent Zod subtrees and keeps derived children stable
@ -47,7 +84,9 @@ function walkUncached(ast: SchemaAST.AST): z.ZodTypeAny {
const hasEncoding = ast.encoding?.length && ast._tag !== "Declaration"
const hasTransform = hasEncoding && !(SchemaAST.isOptional(ast) && extractDefault(ast) !== undefined)
const base = hasTransform ? encoded(ast) : body(ast)
const out = ast.checks?.length ? applyChecks(base, ast.checks, ast) : base
const checked = ast.checks?.length ? applyChecks(base, ast.checks, ast) : base
const preprocess = (ast.annotations as { [ZodPreprocess]?: (val: unknown) => unknown } | undefined)?.[ZodPreprocess]
const out = preprocess ? z.preprocess(preprocess, checked) : checked
const desc = SchemaAST.resolveDescription(ast)
const ref = SchemaAST.resolveIdentifier(ast)
const described = desc ? out.describe(desc) : out

View file

@ -2,7 +2,7 @@ import { describe, expect, test } from "bun:test"
import { Effect, Schema, SchemaGetter } from "effect"
import z from "zod"
import { zod, ZodOverride } from "../../src/util/effect-zod"
import { zod, ZodOverride, ZodPreprocess } from "../../src/util/effect-zod"
function json(schema: z.ZodTypeAny) {
const { $schema: _, ...rest } = z.toJSONSchema(schema)
@ -751,4 +751,120 @@ describe("util.effect-zod", () => {
expect(schema.parse({ foo: "hi" })).toEqual({ foo: "hi" })
})
})
describe("ZodPreprocess annotation", () => {
test("preprocess runs on raw input before the inner schema parses", () => {
// Models the permission.ts __originalKeys pattern: capture the original
// insertion order of a user-provided object BEFORE Schema parsing
// canonicalises the keys.
const preprocess = (val: unknown) => {
if (typeof val === "object" && val !== null && !Array.isArray(val)) {
return { __keys: Object.keys(val), ...(val as Record<string, unknown>) }
}
return val
}
const Inner = Schema.Struct({
__keys: Schema.optional(Schema.mutable(Schema.Array(Schema.String))),
a: Schema.optional(Schema.String),
b: Schema.optional(Schema.String),
}).annotate({ [ZodPreprocess]: preprocess })
const schema = zod(Inner)
const parsed = schema.parse({ b: "1", a: "2" }) as {
__keys?: string[]
a?: string
b?: string
}
expect(parsed.__keys).toEqual(["b", "a"])
expect(parsed.a).toBe("2")
expect(parsed.b).toBe("1")
})
test("preprocess does not transform already-shaped input", () => {
// When the user passes an object that already has __keys, preprocess
// returns it unchanged because spreading preserves any existing key.
const preprocess = (val: unknown) => {
if (typeof val === "object" && val !== null && !("__keys" in val)) {
return { __keys: Object.keys(val), ...(val as Record<string, unknown>) }
}
return val
}
const Inner = Schema.Struct({
__keys: Schema.optional(Schema.mutable(Schema.Array(Schema.String))),
a: Schema.optional(Schema.String),
}).annotate({ [ZodPreprocess]: preprocess })
const schema = zod(Inner)
const parsed = schema.parse({ __keys: ["existing"], a: "hi" }) as {
__keys?: string[]
a?: string
}
expect(parsed.__keys).toEqual(["existing"])
})
test("preprocess composes with a union (either object or string)", () => {
// Mirrors permission.ts exactly: input can be either an object (with
// preprocess injecting metadata) or a plain string action.
const Action = Schema.Literals(["ask", "allow", "deny"])
const Obj = Schema.Struct({
__keys: Schema.optional(Schema.mutable(Schema.Array(Schema.String))),
read: Schema.optional(Action),
write: Schema.optional(Action),
})
const preprocess = (val: unknown) => {
if (typeof val === "object" && val !== null && !Array.isArray(val)) {
return { __keys: Object.keys(val), ...(val as Record<string, unknown>) }
}
return val
}
const Inner = Schema.Union([Obj, Action]).annotate({ [ZodPreprocess]: preprocess })
const schema = zod(Inner)
// String branch — passes through preprocess unchanged
expect(schema.parse("allow")).toBe("allow")
// Object branch — __keys injected, preserves order
const parsed = schema.parse({ write: "allow", read: "deny" }) as {
__keys?: string[]
read?: string
write?: string
}
expect(parsed.__keys).toEqual(["write", "read"])
expect(parsed.write).toBe("allow")
expect(parsed.read).toBe("deny")
})
test("JSON Schema output comes from the inner schema — preprocess is runtime-only", () => {
const Inner = Schema.Struct({
a: Schema.optional(Schema.String),
b: Schema.optional(Schema.Number),
}).annotate({ [ZodPreprocess]: (v: unknown) => v })
const shape = json(zod(Inner)) as any
expect(shape.type).toBe("object")
expect(shape.properties.a.type).toBe("string")
expect(shape.properties.b.type).toBe("number")
})
test("identifier + description propagate through the preprocess wrapper", () => {
const Inner = Schema.Struct({
x: Schema.optional(Schema.String),
})
.annotate({
identifier: "WithPreproc",
description: "A schema with preprocess",
[ZodPreprocess]: (v: unknown) => v,
})
const schema = zod(Inner)
expect(schema.meta()?.ref).toBe("WithPreproc")
expect(schema.meta()?.description).toBe("A schema with preprocess")
})
test("preprocess inside a struct field applies only to that field", () => {
const Inner = Schema.String.annotate({
[ZodPreprocess]: (v: unknown) => (typeof v === "number" ? String(v) : v),
})
const schema = zod(Schema.Struct({ name: Inner, raw: Schema.Number }))
expect(schema.parse({ name: 42, raw: 7 })).toEqual({ name: "42", raw: 7 })
})
})
})