refactor(core): gate cache-scope beta on body presence, not just predicate

#4020 review (Copilot): the comment promised "beta and body-side
scope: 'global' field always ship together" but the gate was just
`useGlobalCacheScope()`. In the degenerate case where the predicate is
true but the request body has no system text AND no tools, the beta
would still ship without any matching `cache_control.scope: 'global'`
on the wire — overstating the contract and shipping dead weight.

- New `hasGlobalCacheScopeOnWire(req)` scans the assembled request body
  (system block when shaped as `TextBlockParam[]`, tools array) for any
  `cache_control: { …, scope: 'global' }` entry. `buildPerRequestHeaders`
  gates the `prompt-caching-scope-2026-01-05` beta on this scan, so the
  beta and the body field share a single source of truth. No window
  between sampling the predicate and emitting the body where the two
  could diverge.
- `useGlobalCacheScope()` is still sampled once per `buildRequest` and
  threaded into the converter to decide whether to ATTACH
  `scope: 'global'` to the body. The body-scan downstream then derives
  the beta from what actually landed.

Tests:
- New: empty systemInstruction + no tools + Anthropic-native + cache on
  → beta NOT shipped (degenerate body-scan case).
- New: empty systemInstruction + non-empty tools → beta shipped
  (tool scope:'global' triggers the scan).
- Existing per-request beta tests now include a `systemInstruction` so
  the body has the scope field; degenerate case is covered by the new
  dedicated test.

Also tightened two stale comments (#3217834451, #3217834505) that
claimed `Config.setModel()` mutates both `enableCacheControl` and
`baseUrl` in place — only `enableCacheControl` is hot-mutated (qwen-oauth
path); non-qwen-oauth providers recreate the generator on refresh, so
`baseUrl` is captured fresh at construct time. Comments now describe the
real in-place mutation and note the qwen-oauth boundary.
This commit is contained in:
wenshao 2026-05-11 18:00:38 +08:00
parent ea4ce13dfd
commit 491a441f93
3 changed files with 174 additions and 37 deletions

View file

@ -510,6 +510,13 @@ describe('AnthropicContentGenerator', () => {
schemaCompliance: 'auto',
};
// Default request shape carries a systemInstruction so the converter
// attaches `cache_control: { …, scope: 'global' }` to the system text
// — that's what `buildPerRequestHeaders` scans to decide whether the
// `prompt-caching-scope-2026-01-05` beta ships. Without a system or
// tools the body has nothing to attach scope to, and the beta is
// correctly suppressed (covered by a separate degenerate-case test
// below). Tests can merge their own `requestConfig` to override.
async function callOnce(
config: ContentGeneratorConfig,
requestConfig?: object,
@ -524,7 +531,10 @@ describe('AnthropicContentGenerator', () => {
await generator.generateContent({
model: 'models/ignored',
contents: 'Hi',
...(requestConfig ? { config: requestConfig } : {}),
config: {
systemInstruction: 'sys',
...(requestConfig ?? {}),
},
} as unknown as GenerateContentParameters);
const [, options] = anthropicState.lastCreateArgs as AnthropicCreateArgs;
return ((options as { headers?: Record<string, string> })?.headers ||
@ -680,6 +690,69 @@ describe('AnthropicContentGenerator', () => {
expect(userBlocks2[0]).not.toHaveProperty('cache_control');
});
it('suppresses the cache-scope beta when the body has no scope field (empty system + no tools)', async () => {
// The beta gate is a body-scan over `req.system` / `req.tools` for
// any `cache_control.scope === 'global'` entry, not a re-read of
// the `useGlobalCacheScope()` predicate. So a request with no
// systemInstruction AND no tools — predicate true but no body
// surface to attach scope to — correctly omits the beta.
const { AnthropicContentGenerator } = await importGenerator();
anthropicState.createImpl.mockResolvedValue({
id: 'msg-1',
model: 'claude-test',
content: [{ type: 'text', text: 'ok' }],
});
const generator = new AnthropicContentGenerator(
{ ...baseConfig, reasoning: false },
mockConfig,
);
await generator.generateContent({
model: 'models/ignored',
contents: 'Hi',
// No systemInstruction, no tools.
} as unknown as GenerateContentParameters);
const [, options] = anthropicState.lastCreateArgs as AnthropicCreateArgs;
const reqHeaders = ((options as { headers?: Record<string, string> })
?.headers || {}) as Record<string, string>;
expect(reqHeaders['anthropic-beta']).toBeUndefined();
});
it('ships the cache-scope beta when only tools (no systemInstruction) carry scope:"global"', async () => {
// Mirror of the above: scope:'global' on the last tool is enough
// for the body-scan to fire, even with no systemInstruction.
const { AnthropicContentGenerator } = await importGenerator();
anthropicState.createImpl.mockResolvedValue({
id: 'msg-1',
model: 'claude-test',
content: [{ type: 'text', text: 'ok' }],
});
const generator = new AnthropicContentGenerator(
{ ...baseConfig, reasoning: false },
mockConfig,
);
await generator.generateContent({
model: 'models/ignored',
contents: 'Hi',
config: {
tools: [
{
functionDeclarations: [
{ name: 'get_weather', description: 'Get weather' },
],
},
],
},
} as unknown as GenerateContentParameters);
const [, options] = anthropicState.lastCreateArgs as AnthropicCreateArgs;
const reqHeaders = ((options as { headers?: Record<string, string> })
?.headers || {}) as Record<string, string>;
expect(reqHeaders['anthropic-beta']).toBe(
'prompt-caching-scope-2026-01-05',
);
});
it('strips the cache-scope beta and scope:"global" field on non-Anthropic baseURLs', async () => {
// Symmetry with the auth/identity gate: the
// `prompt-caching-scope-2026-01-05` beta and the body-side
@ -872,6 +945,12 @@ describe('AnthropicContentGenerator', () => {
await generator.generateContent({
model: 'models/ignored',
contents: 'Hi',
// Include a system instruction so the converter attaches
// `cache_control: { …, scope: 'global' }` on the system block —
// the beta-header builder body-scans for that field, so a
// realistic request shape is needed to observe the
// `prompt-caching-scope-2026-01-05` beta.
config: { systemInstruction: 'sys' },
} as unknown as GenerateContentParameters);
// defaultHeaders carries User-Agent and customHeaders (not beta).
@ -917,6 +996,10 @@ describe('AnthropicContentGenerator', () => {
const stream = await generator.generateContentStream({
model: 'models/ignored',
contents: 'Hi',
// See the systemInstruction note in the non-streaming sibling
// test above — the body-scan beta gate needs an actual scope:
// 'global' field on the wire to fire.
config: { systemInstruction: 'sys' },
} as unknown as GenerateContentParameters);
// Drain the stream so create() has been called.
for await (const _chunk of stream) {
@ -1308,6 +1391,11 @@ describe('AnthropicContentGenerator', () => {
await generator.generateContent({
model: 'models/ignored',
contents: 'Hello',
// Include systemInstruction so the body carries a
// `cache_control: { scope: 'global' }` field — the beta gate
// is now a body-scan, so the test needs an actual scope field
// on the wire to observe the `prompt-caching-scope` flag.
config: { systemInstruction: 'sys' },
} as unknown as GenerateContentParameters);
const [req, options] =

View file

@ -349,18 +349,22 @@ export class AnthropicContentGenerator implements ContentGenerator {
betas.push('effort-2025-11-24');
}
// Enable global prompt cache scope so identical system/tool prefixes
// are cached across sessions, greatly improving cross-session cache hit
// rates. The beta and the body-side `scope: 'global'` field always
// ship together — gated on `enableCacheControl !== false` AND
// Anthropic-native baseURL — so a DeepSeek/IdeaLab proxy stays on its
// pre-PR per-session caching shape rather than receiving an
// Anthropic-specific extension it may not understand. Symmetric with
// the auth/identity gate (`useProxyIdentity`) so all Anthropic-only
// wire-shape extensions live behind one predicate. Disabling
// `enableCacheControl` also disables the gate, preserving the
// `betas.length === 0` early-return below for the all-disabled case.
if (this.useGlobalCacheScope()) {
// The `prompt-caching-scope-2026-01-05` beta is meaningful only when
// the body actually carries a `cache_control: { …, scope: 'global' }`
// entry. The converter emits those entries on the system text block
// and the last tool entry when `useGlobalCacheScope` is true (gated
// on `enableCacheControl !== false` AND Anthropic-native baseURL).
// Scan the assembled request body for that field rather than
// re-deriving the gate here, so:
// 1. The beta and the body-side field share a single source of
// truth — there's no window between sampling the predicate and
// emitting the body where the two could diverge.
// 2. The degenerate empty-system + no-tools case (predicate true,
// body has nothing to attach scope to) doesn't ship the beta as
// dead weight.
// 3. Anthropic-compatible proxies that disable cache stay clean —
// no body-side scope field means no beta either.
if (this.hasGlobalCacheScopeOnWire(anthropicRequest)) {
betas.push('prompt-caching-scope-2026-01-05');
}
@ -370,19 +374,23 @@ export class AnthropicContentGenerator implements ContentGenerator {
}
/**
* Whether the global prompt cache scope (the
* `prompt-caching-scope-2026-01-05` beta + the body-side
* `scope: 'global'` field) should be active for this request.
* Requires both `enableCacheControl !== false` AND an Anthropic-native
* baseURL these two outputs always travel together. Computed per
* request because `Config.handleModelChange()` hot-updates
* Whether to ATTACH the body-side `scope: 'global'` field on
* `cache_control` entries this request. Requires both
* `enableCacheControl !== false` AND an Anthropic-native baseURL.
* Computed per request: `Config.handleModelChange()` hot-updates
* `enableCacheControl` in-place on the qwen-oauth path (without
* recreating the ContentGenerator); for non-qwen-oauth providers a
* model change goes through the refresh path which DOES recreate
* the ContentGenerator (so `baseUrl` is captured fresh at construct
* time, not mutated). Either way reading both fields each request
* is the right defensive choice cheap and avoids stale-cache
* surprises if the hot-update list ever expands.
* recreating the ContentGenerator); non-qwen-oauth providers refresh
* via generator recreation, which captures `baseUrl` fresh at
* construct time (not mutated). Reading both fields each request is
* the right defense cheap and avoids stale-cache surprises if the
* hot-update list ever expands.
*
* The matching `prompt-caching-scope-2026-01-05` beta header is NOT
* gated on this predicate directly; instead `buildPerRequestHeaders`
* scans the assembled body via `hasGlobalCacheScopeOnWire` so the beta
* and the body field always agree even in degenerate cases (e.g.
* empty-system + no-tools request predicate true, body has nothing
* to attach scope to, beta correctly suppressed).
*/
private useGlobalCacheScope(): boolean {
return (
@ -391,6 +399,38 @@ export class AnthropicContentGenerator implements ContentGenerator {
);
}
/**
* Whether the assembled request body carries any
* `cache_control: { …, scope: 'global' }` entry. Scans the system
* block (when present as TextBlockParam[]) and the tools array these
* are the only two places the converter attaches scoped cache control.
* Used to gate the `prompt-caching-scope-2026-01-05` beta header so it
* never ships without a matching body field, and conversely so the
* field never ships without the beta declaring it.
*/
private hasGlobalCacheScopeOnWire(
req: MessageCreateParamsWithThinking,
): boolean {
const isGlobalScope = (block: unknown): boolean => {
if (!block || typeof block !== 'object') return false;
const cc = (block as { cache_control?: unknown }).cache_control;
if (!cc || typeof cc !== 'object') return false;
return (cc as { scope?: string }).scope === 'global';
};
if (Array.isArray(req.system)) {
for (const block of req.system) {
if (isGlobalScope(block)) return true;
}
}
if (Array.isArray(req.tools)) {
for (const tool of req.tools) {
if (isGlobalScope(tool)) return true;
}
}
return false;
}
/**
* Read every customHeaders entry whose key (case-insensitively) is
* `anthropic-beta` and yield the comma-separated flags from each. Multiple
@ -445,15 +485,21 @@ export class AnthropicContentGenerator implements ContentGenerator {
const stripAssistantThinking = isDeepSeek && !thinking;
// Sample the live cache-control flags once per request and forward
// them to both the converter (body-side `cache_control`) and the
// per-request beta-header builder. `Config.setModel()` mutates both
// `enableCacheControl` and `baseUrl` in place, so the converter's
// constructor-time value would otherwise diverge from the generator's
// per-request reads. `useGlobalCacheScope` is a strict subset of
// `enableCacheControl` — only true when caching is on AND the resolved
// baseURL is Anthropic-native — and governs whether the body
// `cache_control` entries carry `scope: 'global'`, paired with the
// `prompt-caching-scope-2026-01-05` beta in `buildPerRequestHeaders`.
// them to the converter (body-side `cache_control`). The converter's
// constructor-time value would otherwise diverge from the live value
// on the qwen-oauth path, where `Config.handleModelChange()`
// hot-updates `enableCacheControl` in place without recreating the
// ContentGenerator. (Non-qwen-oauth providers refresh via generator
// recreation, so `baseUrl` is captured fresh at construct time, not
// mutated mid-session — defensive per-request reads on both fields
// cover both paths.) `useGlobalCacheScope` is a strict subset of
// `enableCacheControl` (true only when caching is on AND the resolved
// baseURL is Anthropic-native) and governs whether the body's
// `cache_control` entries carry `scope: 'global'`. The matching
// `prompt-caching-scope-2026-01-05` beta isn't passed through this
// sample — `buildPerRequestHeaders` instead scans the assembled body
// via `hasGlobalCacheScopeOnWire` so beta and body field share a
// single source of truth.
const enableCacheControl =
this.contentGeneratorConfig.enableCacheControl !== false;
const useGlobalCacheScope = this.useGlobalCacheScope();

View file

@ -229,9 +229,12 @@ export class AnthropicContentConverter {
// ship the standard per-session shape so they don't see a scope
// extension they may not recognize.
// Per-call overrides mirror the request-shape gates in
// `convertGeminiRequestToAnthropic` so a hot `Config.setModel()` flip of
// `enableCacheControl` / `baseUrl` doesn't leave the tool body and the
// beta header out of sync.
// `convertGeminiRequestToAnthropic` so a qwen-oauth-style hot flip of
// `enableCacheControl` (the only field `Config.handleModelChange()`
// mutates in place without recreating the generator) doesn't leave
// the tool body and the beta header out of sync. `baseUrl` isn't
// hot-mutated — non-qwen-oauth providers recreate the generator on
// refresh — but the same per-call plumbing covers it for free.
const enableCacheControl =
options.enableCacheControl ?? this.enableCacheControl;
const useGlobalCacheScope = options.useGlobalCacheScope ?? false;