diff --git a/open-sse/services/contextManager.ts b/open-sse/services/contextManager.ts index 4a1e53da..722b9f4d 100644 --- a/open-sse/services/contextManager.ts +++ b/open-sse/services/contextManager.ts @@ -109,8 +109,12 @@ export function compressContext( const provider = options.provider || "default"; const maxTokens = options.maxTokens || getTokenLimit(provider, (body.model as string) || options.model || null); - const reserveTokens = options.reserveTokens || 16000; // Reserve for response - const targetTokens = maxTokens - reserveTokens; + const defaultReserveTokens = Math.min(16000, Math.max(256, Math.floor(maxTokens * 0.15))); + const reserveTokens = Math.min( + options.reserveTokens ?? defaultReserveTokens, + Math.max(0, maxTokens - 1) + ); + const targetTokens = Math.max(0, maxTokens - reserveTokens); let messages = [...body.messages]; let currentTokens = estimateTokens(JSON.stringify(messages)); diff --git a/tests/unit/chatcore-compression-integration.test.ts b/tests/unit/chatcore-compression-integration.test.ts index 8c08746e..66bccce0 100644 --- a/tests/unit/chatcore-compression-integration.test.ts +++ b/tests/unit/chatcore-compression-integration.test.ts @@ -10,14 +10,16 @@ test("chatCore integration: compressContext called proactively when context exce const contextLimit = getTokenLimit(provider, model); const threshold = Math.floor(contextLimit * 0.85); - const largeMessage = "x".repeat(threshold * 4 + 1000); + const history = Array.from({ length: 24 }, (_, index) => [ + { role: "user", content: `Question ${index}: ${"context ".repeat(80)}` }, + { role: "assistant", content: `Answer ${index}: ${"history ".repeat(80)}` }, + ]).flat(); const body = { model, messages: [ { role: "system", content: "You are helpful." }, - { role: "user", content: "Garbage 1".repeat(1000) }, - { role: "assistant", content: "Garbage 2".repeat(1000) }, - { role: "user", content: largeMessage }, + ...history, + { role: "user", content: "Final question?" }, ], }; @@ -38,6 +40,11 @@ test("chatCore integration: compressContext called proactively when context exce result.stats.final <= contextLimit, `Final tokens ${result.stats.final} should fit within limit ${contextLimit}` ); + assert.equal( + result.body.messages[result.body.messages.length - 1].content, + "Final question?", + "Latest user turn should be preserved after compression" + ); }); test("chatCore integration: compressContext NOT called when context is below 85% threshold", async () => { diff --git a/tests/unit/context-manager.test.ts b/tests/unit/context-manager.test.ts index 77d20aa0..25e966a2 100644 --- a/tests/unit/context-manager.test.ts +++ b/tests/unit/context-manager.test.ts @@ -44,6 +44,20 @@ test("compressContext: returns unchanged if fits", () => { assert.equal(result.compressed, false); }); +test("compressContext: default reserve scales down for smaller context windows", () => { + const body = { + model: "gpt-4", + messages: [ + { role: "system", content: "You are helpful." }, + { role: "user", content: "Hello" }, + ], + }; + + const result = compressContext(body, { provider: "openai", maxTokens: 8192 }); + assert.equal(result.compressed, false); + assert.equal(result.stats.final, result.stats.original); +}); + test("compressContext: handles null/empty body", () => { assert.equal(compressContext(null).compressed, false); assert.equal(compressContext({}).compressed, false); diff --git a/tests/unit/usage-fetcher-antigravity.test.ts b/tests/unit/usage-fetcher-antigravity.test.ts index d8d1cfe9..5bb58104 100644 --- a/tests/unit/usage-fetcher-antigravity.test.ts +++ b/tests/unit/usage-fetcher-antigravity.test.ts @@ -15,9 +15,11 @@ test("usage fetcher retries Antigravity quota discovery across shared fallback U globalThis.fetch = async (url, init = {}) => { calls.push({ url: String(url), init }); - // Mock the first two to fail with 503 const urlStr = String(url); - if (urlStr.includes("://cloudcode-pa.googleapis.com/") && !urlStr.includes("sandbox")) { + if ( + urlStr === "https://cloudcode-pa.googleapis.com/v1internal:fetchAvailableModels" || + urlStr === "https://daily-cloudcode-pa.googleapis.com/v1internal:fetchAvailableModels" + ) { return new Response("unavailable", { status: 503 }); }