diff --git a/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.test.ts b/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.test.ts index 7be26d879..7789f4fb2 100644 --- a/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.test.ts +++ b/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.test.ts @@ -104,7 +104,12 @@ describe('AnthropicContentGenerator', () => { vi.restoreAllMocks(); }); - it('passes a QwenCode User-Agent header to the Anthropic SDK', async () => { + it('uses claude-cli identity (User-Agent + x-app + Bearer auth) for non-Anthropic baseURLs', async () => { + // Non-Anthropic-native baseURL → IdeaLab-style proxy path: + // - User-Agent presents as `claude-cli/ (external, cli)` + // - `x-app: cli` is sent + // - SDK is constructed with `authToken` (sends `Authorization: Bearer`) + // rather than `apiKey` (`x-api-key`), avoiding dual-header conflicts. const { AnthropicContentGenerator } = await importGenerator(); void new AnthropicContentGenerator( { @@ -121,10 +126,364 @@ describe('AnthropicContentGenerator', () => { const headers = (anthropicState.constructorOptions?.['defaultHeaders'] || {}) as Record; - expect(headers['User-Agent']).toContain('QwenCode/1.2.3'); - expect(headers['User-Agent']).toContain( - `(${process.platform}; ${process.arch})`, + expect(headers['User-Agent']).toContain('claude-cli/1.2.3'); + expect(headers['User-Agent']).toContain('(external, cli)'); + expect(headers['x-app']).toBe('cli'); + expect(anthropicState.constructorOptions?.['authToken']).toBe('test-key'); + expect(anthropicState.constructorOptions?.['apiKey']).toBeNull(); + }); + + it('uses QwenCode identity + apiKey auth when baseURL is api.anthropic.com', async () => { + // Anthropic-native baseURL: keep the SDK-default `x-api-key` auth and + // a truthful `QwenCode` User-Agent (no `x-app` header) so usage isn't + // misattributed to Claude CLI in Anthropic's logs/quotas. + const { AnthropicContentGenerator } = await importGenerator(); + void new AnthropicContentGenerator( + { + model: 'claude-opus-4-7', + apiKey: 'test-key', + baseUrl: 'https://api.anthropic.com', + timeout: 10_000, + maxRetries: 2, + samplingParams: {}, + schemaCompliance: 'auto', + }, + mockConfig, ); + + const headers = (anthropicState.constructorOptions?.['defaultHeaders'] || + {}) as Record; + expect(headers['User-Agent']).toContain('QwenCode/1.2.3'); + expect(headers['User-Agent']).not.toContain('claude-cli'); + expect(headers['x-app']).toBeUndefined(); + expect(anthropicState.constructorOptions?.['apiKey']).toBe('test-key'); + expect(anthropicState.constructorOptions?.['authToken']).toBeNull(); + }); + + it('treats unset baseURL as Anthropic-native (SDK default targets api.anthropic.com)', async () => { + const { AnthropicContentGenerator } = await importGenerator(); + void new AnthropicContentGenerator( + { + model: 'claude-opus-4-7', + apiKey: 'test-key', + timeout: 10_000, + maxRetries: 2, + samplingParams: {}, + schemaCompliance: 'auto', + }, + mockConfig, + ); + + const headers = (anthropicState.constructorOptions?.['defaultHeaders'] || + {}) as Record; + expect(headers['User-Agent']).toContain('QwenCode/1.2.3'); + expect(headers['x-app']).toBeUndefined(); + expect(anthropicState.constructorOptions?.['apiKey']).toBe('test-key'); + expect(anthropicState.constructorOptions?.['authToken']).toBeNull(); + }); + + it('treats *.anthropic.com subdomains as Anthropic-native', async () => { + // Anthropic's own subdomains (regional endpoints, internal routes) all + // share the native auth/identity contract — none of them want the + // proxy-flavored Bearer auth or claude-cli UA. + const { AnthropicContentGenerator } = await importGenerator(); + void new AnthropicContentGenerator( + { + model: 'claude-opus-4-7', + apiKey: 'test-key', + baseUrl: 'https://eu.api.anthropic.com', + timeout: 10_000, + maxRetries: 2, + samplingParams: {}, + schemaCompliance: 'auto', + }, + mockConfig, + ); + + const headers = (anthropicState.constructorOptions?.['defaultHeaders'] || + {}) as Record; + expect(headers['User-Agent']).toContain('QwenCode/1.2.3'); + expect(headers['x-app']).toBeUndefined(); + expect(anthropicState.constructorOptions?.['apiKey']).toBe('test-key'); + expect(anthropicState.constructorOptions?.['authToken']).toBeNull(); + }); + + it('treats malformed baseURL as proxy (URL parse failure falls through to claude-cli identity)', async () => { + // A bogus baseUrl string trips `new URL()`. The detector's catch + // branch must fall through to the proxy path rather than throw or + // silently treat the broken value as Anthropic-native. + const { AnthropicContentGenerator } = await importGenerator(); + void new AnthropicContentGenerator( + { + model: 'claude-test', + apiKey: 'test-key', + baseUrl: 'not a valid url', + timeout: 10_000, + maxRetries: 2, + samplingParams: {}, + schemaCompliance: 'auto', + }, + mockConfig, + ); + + const headers = (anthropicState.constructorOptions?.['defaultHeaders'] || + {}) as Record; + expect(headers['User-Agent']).toContain('claude-cli/1.2.3'); + expect(headers['x-app']).toBe('cli'); + expect(anthropicState.constructorOptions?.['authToken']).toBe('test-key'); + expect(anthropicState.constructorOptions?.['apiKey']).toBeNull(); + }); + + it('pins DeepSeek anthropic-compatible baseURL onto the proxy auth/identity path', async () => { + // The auth/identity gate uses an Anthropic-native allow-list rather + // than an IdeaLab-only allow-list, so `api.deepseek.com/anthropic` + // gets the same Bearer + claude-cli + x-app bundle that proxies get. + // This documents the assumption — if DeepSeek's anthropic-compatible + // endpoint ever rejects `Authorization: Bearer`, this test pins the + // shape we'd need to flip back, and any future change here surfaces + // the auth contract decision instead of silently flipping behavior. + const { AnthropicContentGenerator } = await importGenerator(); + void new AnthropicContentGenerator( + { + model: 'deepseek-v4-pro', + apiKey: 'test-key', + baseUrl: 'https://api.deepseek.com/anthropic', + timeout: 10_000, + maxRetries: 2, + samplingParams: {}, + schemaCompliance: 'auto', + }, + mockConfig, + ); + + const headers = (anthropicState.constructorOptions?.['defaultHeaders'] || + {}) as Record; + expect(headers['User-Agent']).toContain('claude-cli/1.2.3'); + expect(headers['x-app']).toBe('cli'); + expect(anthropicState.constructorOptions?.['authToken']).toBe('test-key'); + expect(anthropicState.constructorOptions?.['apiKey']).toBeNull(); + }); + + it('trims whitespace on config.baseUrl before classification', async () => { + // A copy-pasted baseURL with leading/trailing whitespace would + // otherwise trip `new URL(...)` in `isAnthropicNativeBaseUrl` and + // fall through to proxy identity — meaning real api.anthropic.com + // gets Bearer auth + claude-cli UA and 401s. Trim the config side + // before classification, mirroring how the env-side already + // handles whitespace. + const { AnthropicContentGenerator } = await importGenerator(); + void new AnthropicContentGenerator( + { + model: 'claude-opus-4-7', + apiKey: 'test-key', + baseUrl: ' https://api.anthropic.com ', + timeout: 10_000, + maxRetries: 2, + samplingParams: {}, + schemaCompliance: 'auto', + }, + mockConfig, + ); + const headers = (anthropicState.constructorOptions?.['defaultHeaders'] || + {}) as Record; + expect(headers['User-Agent']).toContain('QwenCode/1.2.3'); + expect(headers['x-app']).toBeUndefined(); + expect(anthropicState.constructorOptions?.['apiKey']).toBe('test-key'); + expect(anthropicState.constructorOptions?.['authToken']).toBeNull(); + }); + + it('does not match spoofed anthropic.com.evil.com hostnames', async () => { + // Mirror of the DeepSeek hostname-spoof test: a suffix like + // `anthropic.com.evil.com` must NOT be classified as Anthropic-native — + // otherwise an attacker controlling DNS could route real Anthropic + // credentials with `x-api-key` to their endpoint. + const { AnthropicContentGenerator } = await importGenerator(); + void new AnthropicContentGenerator( + { + model: 'claude-test', + apiKey: 'test-key', + baseUrl: 'https://api.anthropic.com.evil.com', + timeout: 10_000, + maxRetries: 2, + samplingParams: {}, + schemaCompliance: 'auto', + }, + mockConfig, + ); + + const headers = (anthropicState.constructorOptions?.['defaultHeaders'] || + {}) as Record; + expect(headers['User-Agent']).toContain('claude-cli/1.2.3'); + expect(headers['x-app']).toBe('cli'); + expect(anthropicState.constructorOptions?.['authToken']).toBe('test-key'); + expect(anthropicState.constructorOptions?.['apiKey']).toBeNull(); + }); + + // Regression coverage for #4020 review: the SDK destructures with + // defaults (`apiKey = readEnv('ANTHROPIC_API_KEY') ?? null`), which only + // fire for `undefined`. Spreading `{ authToken }` alone — without an + // explicit `apiKey: null` — used to let the env back-fill `apiKey`, and + // the SDK's auth resolver then preferred `apiKey` over `authToken`, so a + // user with `ANTHROPIC_API_KEY=sk-ant-…` exported alongside an IdeaLab + // proxy `baseUrl` shipped their real Anthropic key to the proxy as + // `X-Api-Key`. These tests pin the explicit-null suppression on both + // branches, plus the matching baseURL-env resolution. + describe('env back-fill suppression and baseURL env resolution', () => { + const ENV_KEYS = [ + 'ANTHROPIC_API_KEY', + 'ANTHROPIC_AUTH_TOKEN', + 'ANTHROPIC_BASE_URL', + ]; + const savedEnv: Record = {}; + beforeEach(() => { + for (const k of ENV_KEYS) savedEnv[k] = process.env[k]; + }); + afterEach(() => { + for (const k of ENV_KEYS) { + if (savedEnv[k] === undefined) delete process.env[k]; + else process.env[k] = savedEnv[k]; + } + }); + + it('suppresses ANTHROPIC_API_KEY back-fill on the proxy branch (prevents credential leak)', async () => { + // Scenario: user runs Claude Code in the same shell so + // ANTHROPIC_API_KEY is exported with their real Anthropic key, and + // separately configures qwen-code with an IdeaLab proxy + IdeaLab + // token. Pre-fix, the SDK's destructuring default would back-fill + // `apiKey` from the env, then the auth resolver would prefer it + // over our `authToken` and ship `X-Api-Key: ` + // to the third-party proxy. + process.env['ANTHROPIC_API_KEY'] = 'sk-ant-secret-do-not-leak'; + const { AnthropicContentGenerator } = await importGenerator(); + void new AnthropicContentGenerator( + { + model: 'claude-test', + apiKey: 'idealab-token', + baseUrl: 'https://idealab.example/anthropic', + timeout: 10_000, + maxRetries: 2, + samplingParams: {}, + schemaCompliance: 'auto', + }, + mockConfig, + ); + // The constructor must receive an explicit `null` so the SDK + // destructuring default for ANTHROPIC_API_KEY does NOT fire. + expect(anthropicState.constructorOptions?.['apiKey']).toBeNull(); + expect(anthropicState.constructorOptions?.['authToken']).toBe( + 'idealab-token', + ); + }); + + it('suppresses ANTHROPIC_AUTH_TOKEN back-fill on the Anthropic-native branch', async () => { + // Inverse of the leak: if the user has ANTHROPIC_AUTH_TOKEN set + // (an Anthropic-supported alt) and routes to api.anthropic.com, + // we should still ship our explicit `apiKey` rather than letting + // the env back-fill `authToken` and risk the SDK picking the wrong + // one if precedence flips in a future SDK version. + process.env['ANTHROPIC_AUTH_TOKEN'] = 'env-bearer-token'; + const { AnthropicContentGenerator } = await importGenerator(); + void new AnthropicContentGenerator( + { + model: 'claude-opus-4-7', + apiKey: 'config-api-key', + baseUrl: 'https://api.anthropic.com', + timeout: 10_000, + maxRetries: 2, + samplingParams: {}, + schemaCompliance: 'auto', + }, + mockConfig, + ); + expect(anthropicState.constructorOptions?.['apiKey']).toBe( + 'config-api-key', + ); + expect(anthropicState.constructorOptions?.['authToken']).toBeNull(); + }); + + it('applies proxy identity when ANTHROPIC_BASE_URL env points to a proxy and config.baseUrl is unset', async () => { + // Symmetric concern: pre-fix, `isAnthropicNativeBaseUrl` only read + // `config.baseUrl`, so a user who set ANTHROPIC_BASE_URL only via + // env (leaving qwen-code's baseUrl unset) had the SDK route to the + // proxy while our predicate thought it was Anthropic-native — wrong + // UA, wrong auth shape, and the cache-scope beta + scope:'global' + // shipped to a proxy that likely doesn't recognize them. + process.env['ANTHROPIC_BASE_URL'] = 'https://idealab.example/anthropic'; + const { AnthropicContentGenerator } = await importGenerator(); + void new AnthropicContentGenerator( + { + model: 'claude-test', + apiKey: 'idealab-token', + // baseUrl intentionally omitted; SDK uses ANTHROPIC_BASE_URL env. + timeout: 10_000, + maxRetries: 2, + samplingParams: {}, + schemaCompliance: 'auto', + }, + mockConfig, + ); + const headers = (anthropicState.constructorOptions?.['defaultHeaders'] || + {}) as Record; + expect(headers['User-Agent']).toContain('claude-cli/1.2.3'); + expect(headers['x-app']).toBe('cli'); + expect(anthropicState.constructorOptions?.['authToken']).toBe( + 'idealab-token', + ); + expect(anthropicState.constructorOptions?.['apiKey']).toBeNull(); + }); + + it('keeps Anthropic-native identity when ANTHROPIC_BASE_URL is unset (SDK default applies)', async () => { + // With no config.baseUrl and no env, the SDK defaults to + // api.anthropic.com — our predicate must agree and ship the native + // identity bundle (so the SDK default isn't silently misclassified + // as a proxy). + delete process.env['ANTHROPIC_BASE_URL']; + const { AnthropicContentGenerator } = await importGenerator(); + void new AnthropicContentGenerator( + { + model: 'claude-opus-4-7', + apiKey: 'config-key', + timeout: 10_000, + maxRetries: 2, + samplingParams: {}, + schemaCompliance: 'auto', + }, + mockConfig, + ); + const headers = (anthropicState.constructorOptions?.['defaultHeaders'] || + {}) as Record; + expect(headers['User-Agent']).toContain('QwenCode/1.2.3'); + expect(headers['x-app']).toBeUndefined(); + expect(anthropicState.constructorOptions?.['apiKey']).toBe('config-key'); + expect(anthropicState.constructorOptions?.['authToken']).toBeNull(); + }); + + it('config.baseUrl wins over ANTHROPIC_BASE_URL when both are set', async () => { + // Mirror the SDK's own resolution: explicit config beats env. A + // user who deliberately points qwen-code at api.anthropic.com + // shouldn't have a stray ANTHROPIC_BASE_URL silently flip them + // onto the proxy path. + process.env['ANTHROPIC_BASE_URL'] = 'https://idealab.example/anthropic'; + const { AnthropicContentGenerator } = await importGenerator(); + void new AnthropicContentGenerator( + { + model: 'claude-opus-4-7', + apiKey: 'config-key', + baseUrl: 'https://api.anthropic.com', + timeout: 10_000, + maxRetries: 2, + samplingParams: {}, + schemaCompliance: 'auto', + }, + mockConfig, + ); + const headers = (anthropicState.constructorOptions?.['defaultHeaders'] || + {}) as Record; + expect(headers['User-Agent']).toContain('QwenCode/1.2.3'); + expect(headers['x-app']).toBeUndefined(); + expect(anthropicState.constructorOptions?.['apiKey']).toBe('config-key'); + expect(anthropicState.constructorOptions?.['authToken']).toBeNull(); + }); }); it('merges customHeaders into defaultHeaders (does not replace defaults)', async () => { @@ -150,7 +509,7 @@ describe('AnthropicContentGenerator', () => { {}) as Record; // Beta headers moved out of defaultHeaders — see PR #3788 review feedback. // Only User-Agent and customHeaders remain at construction time. - expect(headers['User-Agent']).toContain('QwenCode/1.2.3'); + expect(headers['User-Agent']).toContain('claude-cli/1.2.3'); expect(headers['X-Custom']).toBe('1'); expect(headers['anthropic-beta']).toBeUndefined(); }); @@ -163,16 +522,29 @@ describe('AnthropicContentGenerator', () => { // so the wire shape stays consistent when a per-request opt-out drops // `thinking` / `output_config`. See PR #3788 review feedback. describe('per-request anthropic-beta header', () => { + // baseURL points at api.anthropic.com so cache-scope (beta + + // body-side `scope: 'global'`) participates by default. The + // `prompt-caching-scope-2026-01-05` beta is now gated jointly on + // `enableCacheControl` AND `isAnthropicNativeBaseUrl`, so tests that + // want to observe the beta need a native baseURL. Proxy-baseURL + // behavior is covered separately below. const baseConfig: ContentGeneratorConfig = { model: 'claude-test', apiKey: 'test-key', - baseUrl: 'https://example.invalid', + baseUrl: 'https://api.anthropic.com', timeout: 10_000, maxRetries: 2, samplingParams: { max_tokens: 100 }, schemaCompliance: 'auto', }; + // Default request shape carries a systemInstruction so the converter + // attaches `cache_control: { …, scope: 'global' }` to the system text + // — that's what `buildPerRequestHeaders` scans to decide whether the + // `prompt-caching-scope-2026-01-05` beta ships. Without a system or + // tools the body has nothing to attach scope to, and the beta is + // correctly suppressed (covered by a separate degenerate-case test + // below). Tests can merge their own `requestConfig` to override. async function callOnce( config: ContentGeneratorConfig, requestConfig?: object, @@ -187,7 +559,10 @@ describe('AnthropicContentGenerator', () => { await generator.generateContent({ model: 'models/ignored', contents: 'Hi', - ...(requestConfig ? { config: requestConfig } : {}), + config: { + systemInstruction: 'sys', + ...(requestConfig ?? {}), + }, } as unknown as GenerateContentParameters); const [, options] = anthropicState.lastCreateArgs as AnthropicCreateArgs; return ((options as { headers?: Record })?.headers || @@ -210,14 +585,260 @@ describe('AnthropicContentGenerator', () => { ...baseConfig, // No reasoning config: thinking defaults to enabled, no effort. }); - expect(headers['anthropic-beta']).toBe('interleaved-thinking-2025-05-14'); + expect(headers['anthropic-beta']).toContain( + 'interleaved-thinking-2025-05-14', + ); + expect(headers['anthropic-beta']).toContain( + 'prompt-caching-scope-2026-01-05', + ); }); - it('omits beta header when reasoning is disabled (no thinking, no effort)', async () => { + it('sends only prompt-caching-scope when reasoning is disabled (no thinking, no effort)', async () => { const headers = await callOnce({ ...baseConfig, reasoning: false }); + expect(headers['anthropic-beta']).toBe('prompt-caching-scope-2026-01-05'); + }); + + it('drops the prompt-caching-scope beta when enableCacheControl is false', async () => { + // The cache-scope beta is dead weight (and risks 4xx on backends that + // don't recognize it) when the converter isn't actually attaching + // `cache_control` to the request body. With both cache and reasoning + // disabled, the betas list is empty and no header should be sent. + const headers = await callOnce({ + ...baseConfig, + reasoning: false, + enableCacheControl: false, + } as ContentGeneratorConfig); expect(headers['anthropic-beta']).toBeUndefined(); }); + it('drops only the cache-scope beta when enableCacheControl is false but reasoning is on', async () => { + // With reasoning enabled, `interleaved-thinking` (and `effort` when + // applicable) still ride the per-request header — only the cache-scope + // flag is gated off, since there's no cache_control on the body to + // pair it with. + const headers = await callOnce({ + ...baseConfig, + reasoning: { effort: 'medium' }, + enableCacheControl: false, + } as ContentGeneratorConfig); + expect(headers['anthropic-beta']).toContain( + 'interleaved-thinking-2025-05-14', + ); + expect(headers['anthropic-beta']).toContain('effort-2025-11-24'); + expect(headers['anthropic-beta']).not.toContain( + 'prompt-caching-scope-2026-01-05', + ); + }); + + it('reflects hot enableCacheControl flips between requests (no stale converter cache)', async () => { + // `Config.setModel()` mutates `contentGeneratorConfig.enableCacheControl` + // in place. A constructor-time cache on the converter would let the + // body-side `cache_control` and the per-request `prompt-caching-scope` + // beta header drift apart on a hot flip. Verify all three downstream + // surfaces — system block, last user message, and last tool entry — + // sample the same live value so the wire shape stays coherent. + const { AnthropicContentGenerator } = await importGenerator(); + anthropicState.createImpl.mockResolvedValue({ + id: 'msg-1', + model: 'claude-test', + content: [{ type: 'text', text: 'ok' }], + }); + + const config: ContentGeneratorConfig = { + ...baseConfig, + reasoning: false, + }; + const generator = new AnthropicContentGenerator(config, mockConfig); + + const requestWithTool = { + model: 'models/ignored', + contents: 'Hi', + config: { + systemInstruction: 'sys', + tools: [ + { + functionDeclarations: [ + { name: 'get_weather', description: 'Get weather' }, + ], + }, + ], + }, + } as unknown as GenerateContentParameters; + + // 1st request: cache on (default). Beta header AND body cache_control + // both present on system + last user msg + last tool. + await generator.generateContent(requestWithTool); + let [req, options] = anthropicState.lastCreateArgs as AnthropicCreateArgs; + let reqHeaders = ((options as { headers?: Record }) + ?.headers || {}) as Record; + expect(reqHeaders['anthropic-beta']).toBe( + 'prompt-caching-scope-2026-01-05', + ); + expect((req as { system?: unknown }).system).toEqual([ + { + type: 'text', + text: 'sys', + cache_control: { type: 'ephemeral', scope: 'global' }, + }, + ]); + const reqTools = (req as { tools?: Array<{ cache_control?: unknown }> }) + .tools; + expect(reqTools).toHaveLength(1); + expect(reqTools?.[0]?.cache_control).toEqual({ + type: 'ephemeral', + scope: 'global', + }); + const reqMessages = (req as { messages?: Array<{ content?: unknown }> }) + .messages; + const userBlocks = reqMessages?.[0]?.content as Array<{ + cache_control?: unknown; + }>; + expect(userBlocks[0].cache_control).toEqual({ type: 'ephemeral' }); + + // Hot-flip enableCacheControl off (Config.setModel mutates in place). + config.enableCacheControl = false; + + // 2nd request: beta header dropped AND body cache_control gone on + // every surface, in lockstep — the converter must not be reading a + // stale constructor value. + await generator.generateContent(requestWithTool); + [req, options] = anthropicState.lastCreateArgs as AnthropicCreateArgs; + reqHeaders = ((options as { headers?: Record }) + ?.headers || {}) as Record; + expect(reqHeaders['anthropic-beta']).toBeUndefined(); + expect((req as { system?: unknown }).system).toBe('sys'); + const reqTools2 = (req as { tools?: Array<{ cache_control?: unknown }> }) + .tools; + expect(reqTools2?.[0]).not.toHaveProperty('cache_control'); + const reqMessages2 = (req as { messages?: Array<{ content?: unknown }> }) + .messages; + const userBlocks2 = reqMessages2?.[0]?.content as Array< + Record + >; + expect(userBlocks2[0]).not.toHaveProperty('cache_control'); + }); + + it('suppresses the cache-scope beta when the body has no scope field (empty system + no tools)', async () => { + // The beta gate is a body-scan over `req.system` / `req.tools` for + // any `cache_control.scope === 'global'` entry, not a re-read of + // the `useGlobalCacheScope()` predicate. So a request with no + // systemInstruction AND no tools — predicate true but no body + // surface to attach scope to — correctly omits the beta. + const { AnthropicContentGenerator } = await importGenerator(); + anthropicState.createImpl.mockResolvedValue({ + id: 'msg-1', + model: 'claude-test', + content: [{ type: 'text', text: 'ok' }], + }); + const generator = new AnthropicContentGenerator( + { ...baseConfig, reasoning: false }, + mockConfig, + ); + await generator.generateContent({ + model: 'models/ignored', + contents: 'Hi', + // No systemInstruction, no tools. + } as unknown as GenerateContentParameters); + + const [, options] = anthropicState.lastCreateArgs as AnthropicCreateArgs; + const reqHeaders = ((options as { headers?: Record }) + ?.headers || {}) as Record; + expect(reqHeaders['anthropic-beta']).toBeUndefined(); + }); + + it('ships the cache-scope beta when only tools (no systemInstruction) carry scope:"global"', async () => { + // Mirror of the above: scope:'global' on the last tool is enough + // for the body-scan to fire, even with no systemInstruction. + const { AnthropicContentGenerator } = await importGenerator(); + anthropicState.createImpl.mockResolvedValue({ + id: 'msg-1', + model: 'claude-test', + content: [{ type: 'text', text: 'ok' }], + }); + const generator = new AnthropicContentGenerator( + { ...baseConfig, reasoning: false }, + mockConfig, + ); + await generator.generateContent({ + model: 'models/ignored', + contents: 'Hi', + config: { + tools: [ + { + functionDeclarations: [ + { name: 'get_weather', description: 'Get weather' }, + ], + }, + ], + }, + } as unknown as GenerateContentParameters); + + const [, options] = anthropicState.lastCreateArgs as AnthropicCreateArgs; + const reqHeaders = ((options as { headers?: Record }) + ?.headers || {}) as Record; + expect(reqHeaders['anthropic-beta']).toBe( + 'prompt-caching-scope-2026-01-05', + ); + }); + + it('strips the cache-scope beta and scope:"global" field on non-Anthropic baseURLs', async () => { + // Symmetry with the auth/identity gate: the + // `prompt-caching-scope-2026-01-05` beta and the body-side + // `scope: 'global'` field are Anthropic-only wire-shape extensions. + // DeepSeek / IdeaLab proxies should still get per-session + // `cache_control: { type: 'ephemeral' }` so existing prompt-caching + // behavior is preserved, but without the new beta or scope field + // (their server side likely doesn't understand them, and silently + // ignoring them isn't guaranteed across proxies). + const { AnthropicContentGenerator } = await importGenerator(); + anthropicState.createImpl.mockResolvedValue({ + id: 'msg-1', + model: 'claude-test', + content: [{ type: 'text', text: 'ok' }], + }); + + const generator = new AnthropicContentGenerator( + { + ...baseConfig, + baseUrl: 'https://api.deepseek.com/anthropic', + reasoning: false, + }, + mockConfig, + ); + await generator.generateContent({ + model: 'models/ignored', + contents: 'Hi', + config: { + systemInstruction: 'sys', + tools: [ + { + functionDeclarations: [ + { name: 'get_weather', description: 'Get weather' }, + ], + }, + ], + }, + } as unknown as GenerateContentParameters); + + const [req, options] = + anthropicState.lastCreateArgs as AnthropicCreateArgs; + const reqHeaders = ((options as { headers?: Record }) + ?.headers || {}) as Record; + // Beta header must not be sent to non-Anthropic baseURL. + expect(reqHeaders['anthropic-beta']).toBeUndefined(); + // Body still carries per-session cache_control (pre-PR behavior). + expect((req as { system?: unknown }).system).toEqual([ + { + type: 'text', + text: 'sys', + cache_control: { type: 'ephemeral' }, + }, + ]); + const reqTools = (req as { tools?: Array<{ cache_control?: unknown }> }) + .tools; + expect(reqTools?.[0]?.cache_control).toEqual({ type: 'ephemeral' }); + }); + it('merges user-supplied customHeaders[anthropic-beta] with computed flags (no overwrite)', async () => { // Users configure additional Anthropic beta flags via customHeaders. // The per-request override must add to that list, not replace it. @@ -243,7 +864,10 @@ describe('AnthropicContentGenerator', () => { reasoning: false, customHeaders: { 'anthropic-beta': 'experimental-x' }, }); - expect(headers['anthropic-beta']).toBe('experimental-x'); + expect(headers['anthropic-beta']).toContain('experimental-x'); + expect(headers['anthropic-beta']).toContain( + 'prompt-caching-scope-2026-01-05', + ); }); it('does not leak customHeaders[anthropic-beta] (any casing) into defaultHeaders', async () => { @@ -315,15 +939,15 @@ describe('AnthropicContentGenerator', () => { expect(occurrences).toHaveLength(1); }); - it('omits beta header when per-request thinkingConfig.includeThoughts=false', async () => { + it('sends only prompt-caching-scope when per-request thinkingConfig.includeThoughts=false', async () => { // Even though the global reasoning config sets effort, the per-request // opt-out drops both `thinking` and `output_config` from the body — and - // the beta header must follow. + // the thinking/effort beta flags must not be present. const headers = await callOnce( { ...baseConfig, reasoning: { effort: 'medium' } }, { thinkingConfig: { includeThoughts: false } }, ); - expect(headers['anthropic-beta']).toBeUndefined(); + expect(headers['anthropic-beta']).toBe('prompt-caching-scope-2026-01-05'); }); it('keeps customHeaders + User-Agent in defaultHeaders while sending computed anthropic-beta per-request', async () => { @@ -349,9 +973,18 @@ describe('AnthropicContentGenerator', () => { await generator.generateContent({ model: 'models/ignored', contents: 'Hi', + // Include a system instruction so the converter attaches + // `cache_control: { …, scope: 'global' }` on the system block — + // the beta-header builder body-scans for that field, so a + // realistic request shape is needed to observe the + // `prompt-caching-scope-2026-01-05` beta. + config: { systemInstruction: 'sys' }, } as unknown as GenerateContentParameters); // defaultHeaders carries User-Agent and customHeaders (not beta). + // baseConfig now targets api.anthropic.com, so this asserts the + // Anthropic-native UA (QwenCode) — the claude-cli identity bundle + // is covered by the proxy-baseURL tests at the top of the suite. const defaultHeaders = (anthropicState.constructorOptions?.[ 'defaultHeaders' ] || {}) as Record; @@ -368,6 +1001,9 @@ describe('AnthropicContentGenerator', () => { expect(reqHeaders['anthropic-beta']).toContain( 'interleaved-thinking-2025-05-14', ); + expect(reqHeaders['anthropic-beta']).toContain( + 'prompt-caching-scope-2026-01-05', + ); }); it('also sends the computed beta header on streaming requests', async () => { @@ -388,6 +1024,10 @@ describe('AnthropicContentGenerator', () => { const stream = await generator.generateContentStream({ model: 'models/ignored', contents: 'Hi', + // See the systemInstruction note in the non-streaming sibling + // test above — the body-scan beta gate needs an actual scope: + // 'global' field on the wire to fire. + config: { systemInstruction: 'sys' }, } as unknown as GenerateContentParameters); // Drain the stream so create() has been called. for await (const _chunk of stream) { @@ -401,6 +1041,9 @@ describe('AnthropicContentGenerator', () => { 'interleaved-thinking-2025-05-14', ); expect(headers['anthropic-beta']).toContain('effort-2025-11-24'); + expect(headers['anthropic-beta']).toContain( + 'prompt-caching-scope-2026-01-05', + ); }); }); @@ -654,6 +1297,155 @@ describe('AnthropicContentGenerator', () => { ); }); + describe('adaptive thinking (Claude 4.6+ models)', () => { + // Claude 4.6+ models reject the budget_tokens-shaped thinking config and + // require `{ type: 'adaptive' }`. The detection uses numeric major/minor + // comparison so future families/versions are recognized instead of + // silently falling back to the budget path. + async function thinkingFor( + model: string, + reasoningOverride?: ContentGeneratorConfig['reasoning'], + ): Promise { + const { AnthropicContentGenerator } = await importGenerator(); + anthropicState.createImpl.mockResolvedValue({ + id: 'anthropic-1', + model, + content: [{ type: 'text', text: 'hi' }], + }); + const generator = new AnthropicContentGenerator( + { + model, + apiKey: 'test-key', + baseUrl: 'https://api.anthropic.com', + timeout: 10_000, + maxRetries: 2, + samplingParams: { max_tokens: 500 }, + schemaCompliance: 'auto', + reasoning: reasoningOverride ?? { effort: 'medium' }, + }, + mockConfig, + ); + await generator.generateContent({ + model: 'models/ignored', + contents: 'Hello', + } as unknown as GenerateContentParameters); + const [req] = anthropicState.lastCreateArgs as AnthropicCreateArgs; + return (req as { thinking?: unknown }).thinking; + } + + it('selects adaptive for claude-opus-4-6 / sonnet-4-6 / opus-4-7', async () => { + expect(await thinkingFor('claude-opus-4-6')).toEqual({ + type: 'adaptive', + }); + expect(await thinkingFor('claude-sonnet-4-6')).toEqual({ + type: 'adaptive', + }); + expect(await thinkingFor('claude-opus-4-7')).toEqual({ + type: 'adaptive', + }); + }); + + it('selects adaptive for claude-haiku-4-6 (haiku family is in scope)', async () => { + // Single-digit character-class regex would have missed haiku entirely. + expect(await thinkingFor('claude-haiku-4-6')).toEqual({ + type: 'adaptive', + }); + }); + + it('selects adaptive for two-digit minors like claude-opus-4-10', async () => { + // Single-digit `[6-9]` would have skipped this and produced an + // invalid `{ type: 'enabled', budget_tokens: ... }` body. + expect(await thinkingFor('claude-opus-4-10')).toEqual({ + type: 'adaptive', + }); + }); + + it('selects adaptive for a future major like claude-opus-5-1', async () => { + expect(await thinkingFor('claude-opus-5-1')).toEqual({ + type: 'adaptive', + }); + }); + + it('keeps the budget_tokens config for older 4.x models (e.g. claude-opus-4-5)', async () => { + expect(await thinkingFor('claude-opus-4-5')).toEqual({ + type: 'enabled', + budget_tokens: 32_000, + }); + }); + + it('honors explicit reasoning.budget_tokens before falling back to adaptive', async () => { + // Explicit budget_tokens is a user escape hatch — adaptive thinking + // would otherwise silently drop the user-supplied value because the + // adaptive shape carries no budget field. The explicit branch must + // run first. + expect( + await thinkingFor('claude-opus-4-7', { + effort: 'medium', + budget_tokens: 42_000, + }), + ).toEqual({ type: 'enabled', budget_tokens: 42_000 }); + }); + + it('still ships adaptive (no output_config, no effort beta) when reasoning is undefined on a 4.6+ model', async () => { + // Pins the existing wire shape for the corner case where a 4.6+ + // model runs with no `reasoning` config at all: the thinking field + // takes the adaptive shape, but `resolveEffectiveEffort` returns + // undefined (no effort enum to emit), so `output_config` is + // omitted and the `effort-2025-11-24` beta isn't pushed. + // `prompt-caching-scope-2026-01-05` rides along because + // enableCacheControl defaults to true. If Anthropic ever requires + // `output_config.effort` to accompany adaptive thinking, this + // pinned shape will surface the regression at this test instead + // of at runtime as a server 400. + const { AnthropicContentGenerator } = await importGenerator(); + anthropicState.createImpl.mockResolvedValue({ + id: 'anthropic-1', + model: 'claude-opus-4-7', + content: [{ type: 'text', text: 'hi' }], + }); + const generator = new AnthropicContentGenerator( + { + model: 'claude-opus-4-7', + apiKey: 'test-key', + baseUrl: 'https://api.anthropic.com', + timeout: 10_000, + maxRetries: 2, + samplingParams: { max_tokens: 500 }, + schemaCompliance: 'auto', + // No `reasoning` key at all — different from `reasoning: false`. + }, + mockConfig, + ); + await generator.generateContent({ + model: 'models/ignored', + contents: 'Hello', + // Include systemInstruction so the body carries a + // `cache_control: { scope: 'global' }` field — the beta gate + // is now a body-scan, so the test needs an actual scope field + // on the wire to observe the `prompt-caching-scope` flag. + config: { systemInstruction: 'sys' }, + } as unknown as GenerateContentParameters); + + const [req, options] = + anthropicState.lastCreateArgs as AnthropicCreateArgs; + expect((req as { thinking?: unknown }).thinking).toEqual({ + type: 'adaptive', + }); + expect(req).toEqual( + expect.not.objectContaining({ output_config: expect.anything() }), + ); + const headers = ((options as { headers?: Record }) + ?.headers || {}) as Record; + expect(headers['anthropic-beta']).toContain( + 'interleaved-thinking-2025-05-14', + ); + expect(headers['anthropic-beta']).not.toContain('effort-2025-11-24'); + expect(headers['anthropic-beta']).toContain( + 'prompt-caching-scope-2026-01-05', + ); + }); + }); + it('omits thinking when request.config.thinkingConfig.includeThoughts is false', async () => { const { AnthropicContentGenerator } = await importGenerator(); anthropicState.createImpl.mockResolvedValue({ diff --git a/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.ts b/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.ts index f22990dd9..b8cb8eca2 100644 --- a/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.ts +++ b/packages/core/src/core/anthropicContentGenerator/anthropicContentGenerator.ts @@ -82,6 +82,53 @@ function isDeepSeekAnthropicProvider( return model.includes('deepseek'); } +/** + * Resolve the baseURL the Anthropic SDK will actually use, mirroring the + * SDK's own destructuring-default order: explicit config first, then + * `ANTHROPIC_BASE_URL` env, then the SDK default. Returns the SDK default + * literal when nothing is configured so callers can do hostname matching + * without a special case for the empty path. + * + * Both inputs get the SDK's `readEnv`-style normalization + * (whitespace-trim + empty-as-missing). Trimming the config side too + * prevents a copy-pasted baseURL with stray whitespace from tripping + * `new URL(...)` in `isAnthropicNativeBaseUrl`, which would otherwise + * fall through the catch branch to proxy identity and ship Bearer auth + * against the real Anthropic API. + */ +function resolveEffectiveBaseUrl( + contentGeneratorConfig: ContentGeneratorConfig, +): string { + const fromConfig = contentGeneratorConfig.baseUrl?.trim(); + if (fromConfig) return fromConfig; + const fromEnv = process.env['ANTHROPIC_BASE_URL']?.trim(); + if (fromEnv) return fromEnv; + return 'https://api.anthropic.com'; +} + +/** + * Whether the resolved baseURL is Anthropic's native API (or the SDK default + * when no baseURL is set). Used to gate IdeaLab-style proxy workarounds — + * `Authorization: Bearer` auth and the `claude-cli` User-Agent — so that + * users hitting `api.anthropic.com` directly keep the SDK-default + * `x-api-key` auth and a truthful `QwenCode` User-Agent (avoids identity + * misattribution in Anthropic-side logs/quotas). + */ +function isAnthropicNativeBaseUrl( + contentGeneratorConfig: ContentGeneratorConfig, +): boolean { + try { + const hostname = new URL( + resolveEffectiveBaseUrl(contentGeneratorConfig), + ).hostname.toLowerCase(); + return ( + hostname === 'api.anthropic.com' || hostname.endsWith('.anthropic.com') + ); + } catch { + return false; + } +} + type StreamingBlockState = { type: string; id?: string; @@ -90,8 +137,16 @@ type StreamingBlockState = { signature: string; }; +// Two thinking shapes — the budget-tokens shape for pre-4.6 Claude families +// and the adaptive shape for 4.6+. Centralized so the message-params type, +// the streaming-request override, and `buildThinkingConfig`'s return type +// stay in lockstep when a third shape (e.g. `extended`) eventually lands. +type AnthropicThinkingParam = + | { type: 'enabled'; budget_tokens: number } + | { type: 'adaptive' }; + type MessageCreateParamsWithThinking = MessageCreateParamsNonStreaming & { - thinking?: { type: 'enabled'; budget_tokens: number }; + thinking?: AnthropicThinkingParam; // Anthropic beta feature: output_config.effort (requires beta header effort-2025-11-24) // This is not yet represented in the official SDK types we depend on. The // 'max' tier is a DeepSeek extension (see contentGenerator.ts comment). @@ -109,7 +164,13 @@ export class AnthropicContentGenerator implements ContentGenerator { private contentGeneratorConfig: ContentGeneratorConfig, private readonly cliConfig: Config, ) { - const defaultHeaders = this.buildHeaders(); + // One predicate drives the whole IdeaLab-style proxy compatibility + // bundle: `Authorization: Bearer` auth, `claude-cli` User-Agent, and + // `x-app: cli`. Two locally-named booleans for the same thing would + // obscure that coupling and tempt a future contributor to split one + // half of the bundle without the other. + const useProxyIdentity = !isAnthropicNativeBaseUrl(contentGeneratorConfig); + const defaultHeaders = this.buildHeaders(useProxyIdentity); const baseURL = contentGeneratorConfig.baseUrl; // Configure runtime options to ensure user-configured timeout works as expected // bodyTimeout is always disabled (0) to let Anthropic SDK timeout control the request @@ -118,8 +179,28 @@ export class AnthropicContentGenerator implements ContentGenerator { this.cliConfig.getProxy(), ); + // IdeaLab-style Anthropic proxies expect `Authorization: Bearer ` + // instead of the SDK-default `x-api-key` header. Use the SDK's + // `authToken` parameter (sends `Authorization: Bearer` natively) only + // when targeting a non-Anthropic-native baseURL — direct + // `api.anthropic.com` users keep the SDK-default `apiKey` (`x-api-key`) + // path so they don't break against the Anthropic API itself. + // + // Pass `null` on the unused side rather than omitting it: the SDK + // destructures with defaults (`apiKey = readEnv('ANTHROPIC_API_KEY') ?? null`, + // same for `authToken`), and destructuring defaults fire ONLY for + // `undefined`. Omitting the field would let `ANTHROPIC_API_KEY` / + // `ANTHROPIC_AUTH_TOKEN` env back-fill it; the SDK's auth resolver + // then prefers `apiKey` over `authToken`, so a user with + // `ANTHROPIC_API_KEY=sk-ant-…` exported (common for anyone who also + // runs Claude Code in the same shell) would ship their real Anthropic + // key as `X-Api-Key` to the IdeaLab proxy — leaking the credential to + // a third-party endpoint. Explicit `null` suppresses the back-fill + // and forces the intended auth path. this.client = new Anthropic({ - apiKey: contentGeneratorConfig.apiKey, + ...(useProxyIdentity + ? { authToken: contentGeneratorConfig.apiKey, apiKey: null } + : { apiKey: contentGeneratorConfig.apiKey, authToken: null }), baseURL, timeout: contentGeneratorConfig.timeout || DEFAULT_TIMEOUT, maxRetries: contentGeneratorConfig.maxRetries, @@ -153,7 +234,7 @@ export class AnthropicContentGenerator implements ContentGenerator { const anthropicRequest = await this.buildRequest(request); const headers = this.buildPerRequestHeaders(anthropicRequest); const streamingRequest: MessageCreateParamsStreaming & { - thinking?: { type: 'enabled'; budget_tokens: number }; + thinking?: AnthropicThinkingParam; } = { ...anthropicRequest, stream: true, @@ -205,19 +286,34 @@ export class AnthropicContentGenerator implements ContentGenerator { return false; } - private buildHeaders(): Record { + private buildHeaders(useProxyIdentity: boolean): Record { // Beta headers are computed per-request in buildPerRequestHeaders so they // stay in sync with what the request body actually carries — see #3788 - // review feedback. Constructor headers carry only User-Agent and any - // user-supplied custom headers EXCEPT anthropic-beta (any casing): the - // per-request path owns that header, and copying it into defaultHeaders - // would cause two physical headers on the wire (one mixed-case, one - // lowercase) when the per-request override fires. + // review feedback. Constructor headers carry User-Agent, the + // proxy-only `x-app: cli` (when useProxyIdentity is true), and any + // user-supplied custom headers EXCEPT anthropic-beta (any casing): + // the per-request path owns that header, and copying it into + // defaultHeaders would cause two physical headers on the wire (one + // mixed-case, one lowercase) when the per-request override fires. const version = this.cliConfig.getCliVersion() || 'unknown'; - const userAgent = `QwenCode/${version} (${process.platform}; ${process.arch})`; + // For non-Anthropic-native baseURLs (IdeaLab-style proxies), present as + // `claude-cli` + `x-app: cli` to satisfy proxy Team rules that restrict + // usage by client identity. For api.anthropic.com itself we keep the + // truthful QwenCode User-Agent so usage isn't misattributed to Claude + // CLI in Anthropic's logs/quotas, and we don't ship the proxy-specific + // `x-app` header. Predicate is computed once at construction and shared + // with the auth-mode decision so the bundle stays internally consistent. + const userAgent = useProxyIdentity + ? `claude-cli/${version} (external, cli)` + : `QwenCode/${version} (${process.platform}; ${process.arch})`; const { customHeaders } = this.contentGeneratorConfig; - const headers: Record = { 'User-Agent': userAgent }; + const headers: Record = { + 'User-Agent': userAgent, + }; + if (useProxyIdentity) { + headers['x-app'] = 'cli'; + } if (customHeaders) { for (const [key, value] of Object.entries(customHeaders)) { if (key.toLowerCase() === 'anthropic-beta') continue; @@ -255,11 +351,88 @@ export class AnthropicContentGenerator implements ContentGenerator { betas.push('effort-2025-11-24'); } + // The `prompt-caching-scope-2026-01-05` beta is meaningful only when + // the body actually carries a `cache_control: { …, scope: 'global' }` + // entry. The converter emits those entries on the system text block + // and the last tool entry when `useGlobalCacheScope` is true (gated + // on `enableCacheControl !== false` AND Anthropic-native baseURL). + // Scan the assembled request body for that field rather than + // re-deriving the gate here, so: + // 1. The beta and the body-side field share a single source of + // truth — there's no window between sampling the predicate and + // emitting the body where the two could diverge. + // 2. The degenerate empty-system + no-tools case (predicate true, + // body has nothing to attach scope to) doesn't ship the beta as + // dead weight. + // 3. Anthropic-compatible proxies that disable cache stay clean — + // no body-side scope field means no beta either. + if (this.hasGlobalCacheScopeOnWire(anthropicRequest)) { + betas.push('prompt-caching-scope-2026-01-05'); + } + if (betas.length === 0) return undefined; const unique = Array.from(new Set(betas)); return { 'anthropic-beta': unique.join(',') }; } + /** + * Whether to ATTACH the body-side `scope: 'global'` field on + * `cache_control` entries this request. Requires both + * `enableCacheControl !== false` AND an Anthropic-native baseURL. + * Computed per request: `Config.handleModelChange()` hot-updates + * `enableCacheControl` in-place on the qwen-oauth path (without + * recreating the ContentGenerator); non-qwen-oauth providers refresh + * via generator recreation, which captures `baseUrl` fresh at + * construct time (not mutated). Reading both fields each request is + * the right defense — cheap and avoids stale-cache surprises if the + * hot-update list ever expands. + * + * The matching `prompt-caching-scope-2026-01-05` beta header is NOT + * gated on this predicate directly; instead `buildPerRequestHeaders` + * scans the assembled body via `hasGlobalCacheScopeOnWire` so the beta + * and the body field always agree even in degenerate cases (e.g. + * empty-system + no-tools request — predicate true, body has nothing + * to attach scope to, beta correctly suppressed). + */ + private useGlobalCacheScope(): boolean { + return ( + this.contentGeneratorConfig.enableCacheControl !== false && + isAnthropicNativeBaseUrl(this.contentGeneratorConfig) + ); + } + + /** + * Whether the assembled request body carries any + * `cache_control: { …, scope: 'global' }` entry. Scans the system + * block (when present as TextBlockParam[]) and the tools array — these + * are the only two places the converter attaches scoped cache control. + * Used to gate the `prompt-caching-scope-2026-01-05` beta header so it + * never ships without a matching body field, and conversely so the + * field never ships without the beta declaring it. + */ + private hasGlobalCacheScopeOnWire( + req: MessageCreateParamsWithThinking, + ): boolean { + const isGlobalScope = (block: unknown): boolean => { + if (!block || typeof block !== 'object') return false; + const cc = (block as { cache_control?: unknown }).cache_control; + if (!cc || typeof cc !== 'object') return false; + return (cc as { scope?: string }).scope === 'global'; + }; + + if (Array.isArray(req.system)) { + for (const block of req.system) { + if (isGlobalScope(block)) return true; + } + } + if (Array.isArray(req.tools)) { + for (const tool of req.tools) { + if (isGlobalScope(tool)) return true; + } + } + return false; + } + /** * Read every customHeaders entry whose key (case-insensitively) is * `anthropic-beta` and yield the comma-separated flags from each. Multiple @@ -313,6 +486,26 @@ export class AnthropicContentGenerator implements ContentGenerator { const deepseekThinkingOn = isDeepSeek && !!thinking; const stripAssistantThinking = isDeepSeek && !thinking; + // Sample the live cache-control flags once per request and forward + // them to the converter (body-side `cache_control`). The converter's + // constructor-time value would otherwise diverge from the live value + // on the qwen-oauth path, where `Config.handleModelChange()` + // hot-updates `enableCacheControl` in place without recreating the + // ContentGenerator. (Non-qwen-oauth providers refresh via generator + // recreation, so `baseUrl` is captured fresh at construct time, not + // mutated mid-session — defensive per-request reads on both fields + // cover both paths.) `useGlobalCacheScope` is a strict subset of + // `enableCacheControl` (true only when caching is on AND the resolved + // baseURL is Anthropic-native) and governs whether the body's + // `cache_control` entries carry `scope: 'global'`. The matching + // `prompt-caching-scope-2026-01-05` beta isn't passed through this + // sample — `buildPerRequestHeaders` instead scans the assembled body + // via `hasGlobalCacheScopeOnWire` so beta and body field share a + // single source of truth. + const enableCacheControl = + this.contentGeneratorConfig.enableCacheControl !== false; + const useGlobalCacheScope = this.useGlobalCacheScope(); + const { system, messages } = this.converter.convertGeminiRequestToAnthropic( request, { @@ -322,11 +515,16 @@ export class AnthropicContentGenerator implements ContentGenerator { normalizeAssistantThinkingSignature: deepseekThinkingOn, injectThinkingOnToolUseTurns: deepseekThinkingOn, stripAssistantThinking, + enableCacheControl, + useGlobalCacheScope, }, ); const tools = request.config?.tools - ? await this.converter.convertGeminiToolsToAnthropic(request.config.tools) + ? await this.converter.convertGeminiToolsToAnthropic( + request.config.tools, + { enableCacheControl, useGlobalCacheScope }, + ) : undefined; return { @@ -438,10 +636,34 @@ export class AnthropicContentGenerator implements ContentGenerator { return effort; } + /** + * Check if the current model supports adaptive thinking (type: 'adaptive'). + * Claude 4.6+ models require adaptive thinking; older models use the + * budget-based config. Uses numeric major/minor comparison rather than a + * single-digit character class so that future families (haiku, opus-4-10, + * opus-5-1, …) are recognized instead of silently falling back to the + * budget path and tripping HTTP 400 with `budget_tokens` they don't + * accept. + * + * The regex is intentionally unanchored so reseller-prefixed model names + * also match (`bedrock/claude-opus-4-7`, `vertex_ai/claude-sonnet-4-6@…`, + * `idealab:claude-opus-4-6`, etc.) — those route to the same Anthropic + * models on the wire and need the same thinking shape. Do not tighten to + * `^claude-` without also covering those naming conventions. + */ + private modelSupportsAdaptiveThinking(): boolean { + const model = (this.contentGeneratorConfig.model || '').toLowerCase(); + const match = model.match(/claude-(?:opus|sonnet|haiku)-(\d+)-(\d+)/); + if (!match) return false; + const major = Number.parseInt(match[1], 10); + const minor = Number.parseInt(match[2], 10); + return major > 4 || (major === 4 && minor >= 6); + } + private buildThinkingConfig( request: GenerateContentParameters, effectiveEffort: 'low' | 'medium' | 'high' | 'max' | undefined, - ): { type: 'enabled'; budget_tokens: number } | undefined { + ): AnthropicThinkingParam | undefined { if (request.config?.thinkingConfig?.includeThoughts === false) { return undefined; } @@ -463,6 +685,10 @@ export class AnthropicContentGenerator implements ContentGenerator { // just an integer the server accepts within its context window, so // an explicit override stays explicit. The default ladder below is // what stays consistent with the clamped effort. + // + // Checked before the adaptive-thinking branch so an explicit budget + // isn't silently dropped on Claude 4.6+ models — adaptive omits + // `budget_tokens` entirely, which would discard the user override. if (reasoning?.budget_tokens !== undefined) { return { type: 'enabled', @@ -470,6 +696,13 @@ export class AnthropicContentGenerator implements ContentGenerator { }; } + // Models that support adaptive thinking use { type: 'adaptive' } without + // a budget_tokens field. The server controls the thinking budget via + // output_config.effort instead. + if (this.modelSupportsAdaptiveThinking()) { + return { type: 'adaptive' }; + } + // When using interleaved thinking with tools, this budget token limit is the entire context window(200k tokens). // 'max' is the DeepSeek-specific extra-strong tier; bump the budget // accordingly so any client-side budgeting matches the spirit of the diff --git a/packages/core/src/core/anthropicContentGenerator/converter.test.ts b/packages/core/src/core/anthropicContentGenerator/converter.test.ts index c7d4158d9..0cf25c225 100644 --- a/packages/core/src/core/anthropicContentGenerator/converter.test.ts +++ b/packages/core/src/core/anthropicContentGenerator/converter.test.ts @@ -63,6 +63,30 @@ describe('AnthropicContentConverter', () => { ]); }); + it('emits scope:"global" on the system text when useGlobalCacheScope is set', () => { + // Anthropic-native + caching enabled → generator passes + // `useGlobalCacheScope: true` and the system prefix participates in + // cross-session caching under the `prompt-caching-scope-2026-01-05` + // beta. Non-Anthropic backends pass false (or omit) so they see the + // standard per-session shape verified by the test above. + const { system } = converter.convertGeminiRequestToAnthropic( + { + model: 'models/test', + contents: 'hi', + config: { systemInstruction: 'sys' }, + }, + { useGlobalCacheScope: true }, + ); + + expect(system).toEqual([ + { + type: 'text', + text: 'sys', + cache_control: { type: 'ephemeral', scope: 'global' }, + }, + ]); + }); + it('converts a plain string content into a user message', () => { const { messages } = converter.convertGeminiRequestToAnthropic({ model: 'models/test', @@ -1118,6 +1142,29 @@ describe('AnthropicContentConverter', () => { expect(vi.mocked(convertSchema)).toHaveBeenCalledTimes(1); }); + it('emits scope:"global" on the last tool when useGlobalCacheScope is set', async () => { + // Mirror of the system-block scope test: cross-session caching for + // tools (the largest, slowest-changing prefix) only fires for + // Anthropic-native baseURLs. The generator latches the predicate + // once per request and forwards the same value here. + const tools = [ + { + functionDeclarations: [ + { name: 'get_weather', description: 'Get weather' }, + ], + }, + ] as Tool[]; + + const result = await converter.convertGeminiToolsToAnthropic(tools, { + useGlobalCacheScope: true, + }); + + expect(result[0].cache_control).toEqual({ + type: 'ephemeral', + scope: 'global', + }); + }); + it('resolves CallableTool.tool() and converts its functionDeclarations', async () => { const callable = [ { @@ -1378,5 +1425,134 @@ describe('AnthropicContentConverter', () => { }); expect(result[0]).not.toHaveProperty('cache_control'); }); + + describe('per-call options override constructor default', () => { + // The generator latches `contentGeneratorConfig.enableCacheControl` + // per request and forwards the live value to the converter, so a + // `Config.setModel()` flip is reflected without rebuilding the + // converter. These tests exercise the override directly so the + // contract is pinned at the converter level too. + const tools = [ + { + functionDeclarations: [ + { name: 'get_weather', description: 'Get weather' }, + ], + }, + ] as Tool[]; + + it('overrides constructor false → true for system + messages + tools', async () => { + const constructedWithCacheOff = new AnthropicContentConverter( + 'test-model', + 'auto', + false, + ); + + const { system, messages } = + constructedWithCacheOff.convertGeminiRequestToAnthropic( + { + model: 'models/test', + contents: 'Hello', + config: { systemInstruction: 'sys' }, + }, + { enableCacheControl: true, useGlobalCacheScope: true }, + ); + + expect(system).toEqual([ + { + type: 'text', + text: 'sys', + cache_control: { type: 'ephemeral', scope: 'global' }, + }, + ]); + // Last user-text block gets per-session cache_control (no scope). + expect(messages).toEqual([ + { + role: 'user', + content: [ + { + type: 'text', + text: 'Hello', + cache_control: { type: 'ephemeral' }, + }, + ], + }, + ]); + + const result = + await constructedWithCacheOff.convertGeminiToolsToAnthropic(tools, { + enableCacheControl: true, + useGlobalCacheScope: true, + }); + expect(result[0].cache_control).toEqual({ + type: 'ephemeral', + scope: 'global', + }); + }); + + it('overrides constructor true → false (cache fully off)', async () => { + // Default ctor: enableCacheControl true. Per-call override flips to + // false, mirroring a runtime `setModel()` that switches into a + // cache-disabled provider config. + const constructedWithCacheOn = new AnthropicContentConverter( + 'test-model', + 'auto', + true, + ); + + const { system, messages } = + constructedWithCacheOn.convertGeminiRequestToAnthropic( + { + model: 'models/test', + contents: 'Hello', + config: { systemInstruction: 'sys' }, + }, + { enableCacheControl: false }, + ); + + expect(system).toBe('sys'); + expect(messages).toEqual([ + { role: 'user', content: [{ type: 'text', text: 'Hello' }] }, + ]); + + const result = + await constructedWithCacheOn.convertGeminiToolsToAnthropic(tools, { + enableCacheControl: false, + }); + expect(result[0]).not.toHaveProperty('cache_control'); + }); + + it('honors useGlobalCacheScope independently of enableCacheControl source', async () => { + // Cache on (per-call), scope off (per-call default). Verify the + // emitted shape is per-session even though cache_control IS + // attached — non-Anthropic baseURL behavior in one call. + const converterDefault = new AnthropicContentConverter( + 'test-model', + 'auto', + ); + const { system } = converterDefault.convertGeminiRequestToAnthropic( + { + model: 'models/test', + contents: 'Hello', + config: { systemInstruction: 'sys' }, + }, + { + enableCacheControl: true /* useGlobalCacheScope omitted → false */, + }, + ); + expect(system).toEqual([ + { + type: 'text', + text: 'sys', + cache_control: { type: 'ephemeral' }, + }, + ]); + + const result = await converterDefault.convertGeminiToolsToAnthropic( + tools, + { enableCacheControl: true }, + ); + expect(result[0].cache_control).toEqual({ type: 'ephemeral' }); + }); + }); }); }); diff --git a/packages/core/src/core/anthropicContentGenerator/converter.ts b/packages/core/src/core/anthropicContentGenerator/converter.ts index 81f6e908d..81c996b31 100644 --- a/packages/core/src/core/anthropicContentGenerator/converter.ts +++ b/packages/core/src/core/anthropicContentGenerator/converter.ts @@ -26,8 +26,18 @@ import { } from '../../utils/schemaConverter.js'; type AnthropicMessageParam = Anthropic.MessageParam; +// `scope: 'global'` is sent under the `prompt-caching-scope-2026-01-05` beta +// to extend prompt caching across sessions (rather than the default +// per-session ephemeral scope). The Anthropic SDK types we depend on still +// model `cache_control` as `{ type: 'ephemeral' }` only, so we widen the +// shape here for the fields where we actually attach it (tool params and +// the system text block). +type AnthropicCacheControl = { type: 'ephemeral'; scope?: 'global' }; type AnthropicToolParam = Anthropic.Tool & { - cache_control?: { type: 'ephemeral' }; + cache_control?: AnthropicCacheControl; +}; +type AnthropicTextBlockParam = Anthropic.TextBlockParam & { + cache_control?: AnthropicCacheControl; }; type AnthropicContentBlockParam = Anthropic.ContentBlockParam; @@ -67,6 +77,28 @@ export interface ConvertGeminiRequestToAnthropicOptions { * spawned with `thinkingConfig.includeThoughts: false`). */ stripAssistantThinking?: boolean; + /** + * Per-call override for `enableCacheControl`. Falls back to the value + * captured at construction. The generator passes the live + * `contentGeneratorConfig.enableCacheControl` here so a hot + * `Config.setModel()` flip is reflected on the next request — otherwise + * the converter's body-side `cache_control` and the generator's + * per-request `prompt-caching-scope-2026-01-05` beta header (which reads + * the live config directly) can disagree. + */ + enableCacheControl?: boolean; + /** + * When `true`, emit `cache_control: { type: 'ephemeral', scope: 'global' }` + * on the system text and last tool entry so prefixes cache across + * sessions; when `false` (or omitted), emit the SDK-standard per-session + * shape `{ type: 'ephemeral' }`. Must be a strict subset of + * `enableCacheControl` (no scope without a cache_control entry to + * attach it to) and should mirror the generator's + * `prompt-caching-scope-2026-01-05` beta-header gate — both ship + * together or neither, so anthropic-compatible backends without + * cross-session caching support don't see an unrecognized scope field. + */ + useGlobalCacheScope?: boolean; } export class AnthropicContentConverter { @@ -88,7 +120,7 @@ export class AnthropicContentConverter { request: GenerateContentParameters, options: ConvertGeminiRequestToAnthropicOptions = {}, ): { - system?: Anthropic.TextBlockParam[] | string; + system?: AnthropicTextBlockParam[] | string; messages: AnthropicMessageParam[]; } { const messages: AnthropicMessageParam[] = []; @@ -111,11 +143,22 @@ export class AnthropicContentConverter { this.injectEmptyThinkingOnToolUseTurns(messages); } - // Add cache_control to enable prompt caching (if enabled) - const system = this.enableCacheControl - ? this.buildSystemWithCacheControl(systemText) + // Add cache_control to enable prompt caching (if enabled). Prefer the + // per-call override when the caller (typically the generator) passes + // one — that path latches the live config value alongside the + // per-request beta-header decision so the two stay in sync after + // `Config.setModel()` mutates `enableCacheControl` mid-session. + // `useGlobalCacheScope` is independent of (and a strict subset of) + // `enableCacheControl`: it only controls whether the emitted + // cache_control carries `scope: 'global'`, not whether the + // cache_control itself is emitted. + const enableCacheControl = + options.enableCacheControl ?? this.enableCacheControl; + const useGlobalCacheScope = options.useGlobalCacheScope ?? false; + const system = enableCacheControl + ? this.buildSystemWithCacheControl(systemText, useGlobalCacheScope) : systemText; - if (this.enableCacheControl) { + if (enableCacheControl) { this.addCacheControlToMessages(messages); } @@ -127,6 +170,10 @@ export class AnthropicContentConverter { async convertGeminiToolsToAnthropic( geminiTools: ToolListUnion, + options: { + enableCacheControl?: boolean; + useGlobalCacheScope?: boolean; + } = {}, ): Promise { const tools: AnthropicToolParam[] = []; @@ -173,12 +220,31 @@ export class AnthropicContentConverter { } } - // Add cache_control to the last tool for prompt caching (if enabled) - if (this.enableCacheControl && tools.length > 0) { + // Add cache_control to the last tool for prompt caching (if enabled). + // When `useGlobalCacheScope` is set, attach `scope: 'global'` so + // identical tool prefixes are cached across sessions — tools tend to + // be the largest, slowest-changing prefix (often 5K+ tokens), so + // cross-session reuse is where most of the hit-rate improvement under + // `prompt-caching-scope-2026-01-05` shows up. Non-Anthropic baseURLs + // ship the standard per-session shape so they don't see a scope + // extension they may not recognize. + // Per-call overrides mirror the request-shape gates in + // `convertGeminiRequestToAnthropic` so a qwen-oauth-style hot flip of + // `enableCacheControl` (the only field `Config.handleModelChange()` + // mutates in place without recreating the generator) doesn't leave + // the tool body and the beta header out of sync. `baseUrl` isn't + // hot-mutated — non-qwen-oauth providers recreate the generator on + // refresh — but the same per-call plumbing covers it for free. + const enableCacheControl = + options.enableCacheControl ?? this.enableCacheControl; + const useGlobalCacheScope = options.useGlobalCacheScope ?? false; + if (enableCacheControl && tools.length > 0) { const lastToolIndex = tools.length - 1; tools[lastToolIndex] = { ...tools[lastToolIndex], - cache_control: { type: 'ephemeral' }, + cache_control: useGlobalCacheScope + ? { type: 'ephemeral', scope: 'global' } + : { type: 'ephemeral' }, }; } @@ -580,10 +646,16 @@ export class AnthropicContentConverter { /** * Build system content blocks with cache_control. * Anthropic prompt caching requires cache_control on system content. + * When `useGlobalCacheScope` is set, attach `scope: 'global'` so the + * system prefix participates in cross-session caching under the + * `prompt-caching-scope-2026-01-05` beta. Otherwise emit the standard + * per-session shape so non-Anthropic baseURLs aren't sent a scope + * extension they may not recognize. */ private buildSystemWithCacheControl( systemText: string, - ): Anthropic.TextBlockParam[] | string { + useGlobalCacheScope: boolean, + ): AnthropicTextBlockParam[] | string { if (!systemText) { return systemText; } @@ -592,7 +664,9 @@ export class AnthropicContentConverter { { type: 'text', text: systemText, - cache_control: { type: 'ephemeral' }, + cache_control: useGlobalCacheScope + ? { type: 'ephemeral', scope: 'global' } + : { type: 'ephemeral' }, }, ]; } @@ -723,6 +797,14 @@ export class AnthropicContentConverter { /** * Add cache_control to the last user message's content. * This enables prompt caching for the conversation context. + * + * Deliberately emits the per-session `{ type: 'ephemeral' }` shape only — + * no `scope: 'global'`. The last user message changes every turn (it's + * the live prompt and any tool_result blocks from the immediately prior + * round), so cross-session reuse here has effectively zero hit rate and + * paying the global-scope overhead would just churn cache. System text + * and tool prefixes (which DO repeat across sessions) carry + * `scope: 'global'` instead. */ private addCacheControlToMessages(messages: Anthropic.MessageParam[]): void { // Find the last user message to add cache_control