mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-05-17 03:57:18 +00:00
refactor(core): gate cache-scope beta on body presence, not just predicate
#4020 review (Copilot): the comment promised "beta and body-side scope: 'global' field always ship together" but the gate was just `useGlobalCacheScope()`. In the degenerate case where the predicate is true but the request body has no system text AND no tools, the beta would still ship without any matching `cache_control.scope: 'global'` on the wire — overstating the contract and shipping dead weight. - New `hasGlobalCacheScopeOnWire(req)` scans the assembled request body (system block when shaped as `TextBlockParam[]`, tools array) for any `cache_control: { …, scope: 'global' }` entry. `buildPerRequestHeaders` gates the `prompt-caching-scope-2026-01-05` beta on this scan, so the beta and the body field share a single source of truth. No window between sampling the predicate and emitting the body where the two could diverge. - `useGlobalCacheScope()` is still sampled once per `buildRequest` and threaded into the converter to decide whether to ATTACH `scope: 'global'` to the body. The body-scan downstream then derives the beta from what actually landed. Tests: - New: empty systemInstruction + no tools + Anthropic-native + cache on → beta NOT shipped (degenerate body-scan case). - New: empty systemInstruction + non-empty tools → beta shipped (tool scope:'global' triggers the scan). - Existing per-request beta tests now include a `systemInstruction` so the body has the scope field; degenerate case is covered by the new dedicated test. Also tightened two stale comments (#3217834451, #3217834505) that claimed `Config.setModel()` mutates both `enableCacheControl` and `baseUrl` in place — only `enableCacheControl` is hot-mutated (qwen-oauth path); non-qwen-oauth providers recreate the generator on refresh, so `baseUrl` is captured fresh at construct time. Comments now describe the real in-place mutation and note the qwen-oauth boundary.
This commit is contained in:
parent
ea4ce13dfd
commit
491a441f93
3 changed files with 174 additions and 37 deletions
|
|
@ -510,6 +510,13 @@ describe('AnthropicContentGenerator', () => {
|
|||
schemaCompliance: 'auto',
|
||||
};
|
||||
|
||||
// Default request shape carries a systemInstruction so the converter
|
||||
// attaches `cache_control: { …, scope: 'global' }` to the system text
|
||||
// — that's what `buildPerRequestHeaders` scans to decide whether the
|
||||
// `prompt-caching-scope-2026-01-05` beta ships. Without a system or
|
||||
// tools the body has nothing to attach scope to, and the beta is
|
||||
// correctly suppressed (covered by a separate degenerate-case test
|
||||
// below). Tests can merge their own `requestConfig` to override.
|
||||
async function callOnce(
|
||||
config: ContentGeneratorConfig,
|
||||
requestConfig?: object,
|
||||
|
|
@ -524,7 +531,10 @@ describe('AnthropicContentGenerator', () => {
|
|||
await generator.generateContent({
|
||||
model: 'models/ignored',
|
||||
contents: 'Hi',
|
||||
...(requestConfig ? { config: requestConfig } : {}),
|
||||
config: {
|
||||
systemInstruction: 'sys',
|
||||
...(requestConfig ?? {}),
|
||||
},
|
||||
} as unknown as GenerateContentParameters);
|
||||
const [, options] = anthropicState.lastCreateArgs as AnthropicCreateArgs;
|
||||
return ((options as { headers?: Record<string, string> })?.headers ||
|
||||
|
|
@ -680,6 +690,69 @@ describe('AnthropicContentGenerator', () => {
|
|||
expect(userBlocks2[0]).not.toHaveProperty('cache_control');
|
||||
});
|
||||
|
||||
it('suppresses the cache-scope beta when the body has no scope field (empty system + no tools)', async () => {
|
||||
// The beta gate is a body-scan over `req.system` / `req.tools` for
|
||||
// any `cache_control.scope === 'global'` entry, not a re-read of
|
||||
// the `useGlobalCacheScope()` predicate. So a request with no
|
||||
// systemInstruction AND no tools — predicate true but no body
|
||||
// surface to attach scope to — correctly omits the beta.
|
||||
const { AnthropicContentGenerator } = await importGenerator();
|
||||
anthropicState.createImpl.mockResolvedValue({
|
||||
id: 'msg-1',
|
||||
model: 'claude-test',
|
||||
content: [{ type: 'text', text: 'ok' }],
|
||||
});
|
||||
const generator = new AnthropicContentGenerator(
|
||||
{ ...baseConfig, reasoning: false },
|
||||
mockConfig,
|
||||
);
|
||||
await generator.generateContent({
|
||||
model: 'models/ignored',
|
||||
contents: 'Hi',
|
||||
// No systemInstruction, no tools.
|
||||
} as unknown as GenerateContentParameters);
|
||||
|
||||
const [, options] = anthropicState.lastCreateArgs as AnthropicCreateArgs;
|
||||
const reqHeaders = ((options as { headers?: Record<string, string> })
|
||||
?.headers || {}) as Record<string, string>;
|
||||
expect(reqHeaders['anthropic-beta']).toBeUndefined();
|
||||
});
|
||||
|
||||
it('ships the cache-scope beta when only tools (no systemInstruction) carry scope:"global"', async () => {
|
||||
// Mirror of the above: scope:'global' on the last tool is enough
|
||||
// for the body-scan to fire, even with no systemInstruction.
|
||||
const { AnthropicContentGenerator } = await importGenerator();
|
||||
anthropicState.createImpl.mockResolvedValue({
|
||||
id: 'msg-1',
|
||||
model: 'claude-test',
|
||||
content: [{ type: 'text', text: 'ok' }],
|
||||
});
|
||||
const generator = new AnthropicContentGenerator(
|
||||
{ ...baseConfig, reasoning: false },
|
||||
mockConfig,
|
||||
);
|
||||
await generator.generateContent({
|
||||
model: 'models/ignored',
|
||||
contents: 'Hi',
|
||||
config: {
|
||||
tools: [
|
||||
{
|
||||
functionDeclarations: [
|
||||
{ name: 'get_weather', description: 'Get weather' },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
} as unknown as GenerateContentParameters);
|
||||
|
||||
const [, options] = anthropicState.lastCreateArgs as AnthropicCreateArgs;
|
||||
const reqHeaders = ((options as { headers?: Record<string, string> })
|
||||
?.headers || {}) as Record<string, string>;
|
||||
expect(reqHeaders['anthropic-beta']).toBe(
|
||||
'prompt-caching-scope-2026-01-05',
|
||||
);
|
||||
});
|
||||
|
||||
it('strips the cache-scope beta and scope:"global" field on non-Anthropic baseURLs', async () => {
|
||||
// Symmetry with the auth/identity gate: the
|
||||
// `prompt-caching-scope-2026-01-05` beta and the body-side
|
||||
|
|
@ -872,6 +945,12 @@ describe('AnthropicContentGenerator', () => {
|
|||
await generator.generateContent({
|
||||
model: 'models/ignored',
|
||||
contents: 'Hi',
|
||||
// Include a system instruction so the converter attaches
|
||||
// `cache_control: { …, scope: 'global' }` on the system block —
|
||||
// the beta-header builder body-scans for that field, so a
|
||||
// realistic request shape is needed to observe the
|
||||
// `prompt-caching-scope-2026-01-05` beta.
|
||||
config: { systemInstruction: 'sys' },
|
||||
} as unknown as GenerateContentParameters);
|
||||
|
||||
// defaultHeaders carries User-Agent and customHeaders (not beta).
|
||||
|
|
@ -917,6 +996,10 @@ describe('AnthropicContentGenerator', () => {
|
|||
const stream = await generator.generateContentStream({
|
||||
model: 'models/ignored',
|
||||
contents: 'Hi',
|
||||
// See the systemInstruction note in the non-streaming sibling
|
||||
// test above — the body-scan beta gate needs an actual scope:
|
||||
// 'global' field on the wire to fire.
|
||||
config: { systemInstruction: 'sys' },
|
||||
} as unknown as GenerateContentParameters);
|
||||
// Drain the stream so create() has been called.
|
||||
for await (const _chunk of stream) {
|
||||
|
|
@ -1308,6 +1391,11 @@ describe('AnthropicContentGenerator', () => {
|
|||
await generator.generateContent({
|
||||
model: 'models/ignored',
|
||||
contents: 'Hello',
|
||||
// Include systemInstruction so the body carries a
|
||||
// `cache_control: { scope: 'global' }` field — the beta gate
|
||||
// is now a body-scan, so the test needs an actual scope field
|
||||
// on the wire to observe the `prompt-caching-scope` flag.
|
||||
config: { systemInstruction: 'sys' },
|
||||
} as unknown as GenerateContentParameters);
|
||||
|
||||
const [req, options] =
|
||||
|
|
|
|||
|
|
@ -349,18 +349,22 @@ export class AnthropicContentGenerator implements ContentGenerator {
|
|||
betas.push('effort-2025-11-24');
|
||||
}
|
||||
|
||||
// Enable global prompt cache scope so identical system/tool prefixes
|
||||
// are cached across sessions, greatly improving cross-session cache hit
|
||||
// rates. The beta and the body-side `scope: 'global'` field always
|
||||
// ship together — gated on `enableCacheControl !== false` AND
|
||||
// Anthropic-native baseURL — so a DeepSeek/IdeaLab proxy stays on its
|
||||
// pre-PR per-session caching shape rather than receiving an
|
||||
// Anthropic-specific extension it may not understand. Symmetric with
|
||||
// the auth/identity gate (`useProxyIdentity`) so all Anthropic-only
|
||||
// wire-shape extensions live behind one predicate. Disabling
|
||||
// `enableCacheControl` also disables the gate, preserving the
|
||||
// `betas.length === 0` early-return below for the all-disabled case.
|
||||
if (this.useGlobalCacheScope()) {
|
||||
// The `prompt-caching-scope-2026-01-05` beta is meaningful only when
|
||||
// the body actually carries a `cache_control: { …, scope: 'global' }`
|
||||
// entry. The converter emits those entries on the system text block
|
||||
// and the last tool entry when `useGlobalCacheScope` is true (gated
|
||||
// on `enableCacheControl !== false` AND Anthropic-native baseURL).
|
||||
// Scan the assembled request body for that field rather than
|
||||
// re-deriving the gate here, so:
|
||||
// 1. The beta and the body-side field share a single source of
|
||||
// truth — there's no window between sampling the predicate and
|
||||
// emitting the body where the two could diverge.
|
||||
// 2. The degenerate empty-system + no-tools case (predicate true,
|
||||
// body has nothing to attach scope to) doesn't ship the beta as
|
||||
// dead weight.
|
||||
// 3. Anthropic-compatible proxies that disable cache stay clean —
|
||||
// no body-side scope field means no beta either.
|
||||
if (this.hasGlobalCacheScopeOnWire(anthropicRequest)) {
|
||||
betas.push('prompt-caching-scope-2026-01-05');
|
||||
}
|
||||
|
||||
|
|
@ -370,19 +374,23 @@ export class AnthropicContentGenerator implements ContentGenerator {
|
|||
}
|
||||
|
||||
/**
|
||||
* Whether the global prompt cache scope (the
|
||||
* `prompt-caching-scope-2026-01-05` beta + the body-side
|
||||
* `scope: 'global'` field) should be active for this request.
|
||||
* Requires both `enableCacheControl !== false` AND an Anthropic-native
|
||||
* baseURL — these two outputs always travel together. Computed per
|
||||
* request because `Config.handleModelChange()` hot-updates
|
||||
* Whether to ATTACH the body-side `scope: 'global'` field on
|
||||
* `cache_control` entries this request. Requires both
|
||||
* `enableCacheControl !== false` AND an Anthropic-native baseURL.
|
||||
* Computed per request: `Config.handleModelChange()` hot-updates
|
||||
* `enableCacheControl` in-place on the qwen-oauth path (without
|
||||
* recreating the ContentGenerator); for non-qwen-oauth providers a
|
||||
* model change goes through the refresh path which DOES recreate
|
||||
* the ContentGenerator (so `baseUrl` is captured fresh at construct
|
||||
* time, not mutated). Either way reading both fields each request
|
||||
* is the right defensive choice — cheap and avoids stale-cache
|
||||
* surprises if the hot-update list ever expands.
|
||||
* recreating the ContentGenerator); non-qwen-oauth providers refresh
|
||||
* via generator recreation, which captures `baseUrl` fresh at
|
||||
* construct time (not mutated). Reading both fields each request is
|
||||
* the right defense — cheap and avoids stale-cache surprises if the
|
||||
* hot-update list ever expands.
|
||||
*
|
||||
* The matching `prompt-caching-scope-2026-01-05` beta header is NOT
|
||||
* gated on this predicate directly; instead `buildPerRequestHeaders`
|
||||
* scans the assembled body via `hasGlobalCacheScopeOnWire` so the beta
|
||||
* and the body field always agree even in degenerate cases (e.g.
|
||||
* empty-system + no-tools request — predicate true, body has nothing
|
||||
* to attach scope to, beta correctly suppressed).
|
||||
*/
|
||||
private useGlobalCacheScope(): boolean {
|
||||
return (
|
||||
|
|
@ -391,6 +399,38 @@ export class AnthropicContentGenerator implements ContentGenerator {
|
|||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether the assembled request body carries any
|
||||
* `cache_control: { …, scope: 'global' }` entry. Scans the system
|
||||
* block (when present as TextBlockParam[]) and the tools array — these
|
||||
* are the only two places the converter attaches scoped cache control.
|
||||
* Used to gate the `prompt-caching-scope-2026-01-05` beta header so it
|
||||
* never ships without a matching body field, and conversely so the
|
||||
* field never ships without the beta declaring it.
|
||||
*/
|
||||
private hasGlobalCacheScopeOnWire(
|
||||
req: MessageCreateParamsWithThinking,
|
||||
): boolean {
|
||||
const isGlobalScope = (block: unknown): boolean => {
|
||||
if (!block || typeof block !== 'object') return false;
|
||||
const cc = (block as { cache_control?: unknown }).cache_control;
|
||||
if (!cc || typeof cc !== 'object') return false;
|
||||
return (cc as { scope?: string }).scope === 'global';
|
||||
};
|
||||
|
||||
if (Array.isArray(req.system)) {
|
||||
for (const block of req.system) {
|
||||
if (isGlobalScope(block)) return true;
|
||||
}
|
||||
}
|
||||
if (Array.isArray(req.tools)) {
|
||||
for (const tool of req.tools) {
|
||||
if (isGlobalScope(tool)) return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read every customHeaders entry whose key (case-insensitively) is
|
||||
* `anthropic-beta` and yield the comma-separated flags from each. Multiple
|
||||
|
|
@ -445,15 +485,21 @@ export class AnthropicContentGenerator implements ContentGenerator {
|
|||
const stripAssistantThinking = isDeepSeek && !thinking;
|
||||
|
||||
// Sample the live cache-control flags once per request and forward
|
||||
// them to both the converter (body-side `cache_control`) and the
|
||||
// per-request beta-header builder. `Config.setModel()` mutates both
|
||||
// `enableCacheControl` and `baseUrl` in place, so the converter's
|
||||
// constructor-time value would otherwise diverge from the generator's
|
||||
// per-request reads. `useGlobalCacheScope` is a strict subset of
|
||||
// `enableCacheControl` — only true when caching is on AND the resolved
|
||||
// baseURL is Anthropic-native — and governs whether the body
|
||||
// `cache_control` entries carry `scope: 'global'`, paired with the
|
||||
// `prompt-caching-scope-2026-01-05` beta in `buildPerRequestHeaders`.
|
||||
// them to the converter (body-side `cache_control`). The converter's
|
||||
// constructor-time value would otherwise diverge from the live value
|
||||
// on the qwen-oauth path, where `Config.handleModelChange()`
|
||||
// hot-updates `enableCacheControl` in place without recreating the
|
||||
// ContentGenerator. (Non-qwen-oauth providers refresh via generator
|
||||
// recreation, so `baseUrl` is captured fresh at construct time, not
|
||||
// mutated mid-session — defensive per-request reads on both fields
|
||||
// cover both paths.) `useGlobalCacheScope` is a strict subset of
|
||||
// `enableCacheControl` (true only when caching is on AND the resolved
|
||||
// baseURL is Anthropic-native) and governs whether the body's
|
||||
// `cache_control` entries carry `scope: 'global'`. The matching
|
||||
// `prompt-caching-scope-2026-01-05` beta isn't passed through this
|
||||
// sample — `buildPerRequestHeaders` instead scans the assembled body
|
||||
// via `hasGlobalCacheScopeOnWire` so beta and body field share a
|
||||
// single source of truth.
|
||||
const enableCacheControl =
|
||||
this.contentGeneratorConfig.enableCacheControl !== false;
|
||||
const useGlobalCacheScope = this.useGlobalCacheScope();
|
||||
|
|
|
|||
|
|
@ -229,9 +229,12 @@ export class AnthropicContentConverter {
|
|||
// ship the standard per-session shape so they don't see a scope
|
||||
// extension they may not recognize.
|
||||
// Per-call overrides mirror the request-shape gates in
|
||||
// `convertGeminiRequestToAnthropic` so a hot `Config.setModel()` flip of
|
||||
// `enableCacheControl` / `baseUrl` doesn't leave the tool body and the
|
||||
// beta header out of sync.
|
||||
// `convertGeminiRequestToAnthropic` so a qwen-oauth-style hot flip of
|
||||
// `enableCacheControl` (the only field `Config.handleModelChange()`
|
||||
// mutates in place without recreating the generator) doesn't leave
|
||||
// the tool body and the beta header out of sync. `baseUrl` isn't
|
||||
// hot-mutated — non-qwen-oauth providers recreate the generator on
|
||||
// refresh — but the same per-call plumbing covers it for free.
|
||||
const enableCacheControl =
|
||||
options.enableCacheControl ?? this.enableCacheControl;
|
||||
const useGlobalCacheScope = options.useGlobalCacheScope ?? false;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue