fix(context): scale reserved tokens for smaller model windows

Adjust context compression to derive a smaller default response reserve
from the available token limit and cap manual reserves below the full
window.

This prevents aggressive over-reservation on smaller contexts, keeps the
latest user turn during compression, and updates unit coverage for the
new token budgeting and Antigravity fallback behavior.
This commit is contained in:
diegosouzapw 2026-04-17 20:33:15 -03:00
parent eff2c0beb7
commit e4e7bdebc6
4 changed files with 35 additions and 8 deletions

View file

@ -44,6 +44,20 @@ test("compressContext: returns unchanged if fits", () => {
assert.equal(result.compressed, false);
});
test("compressContext: default reserve scales down for smaller context windows", () => {
const body = {
model: "gpt-4",
messages: [
{ role: "system", content: "You are helpful." },
{ role: "user", content: "Hello" },
],
};
const result = compressContext(body, { provider: "openai", maxTokens: 8192 });
assert.equal(result.compressed, false);
assert.equal(result.stats.final, result.stats.original);
});
test("compressContext: handles null/empty body", () => {
assert.equal(compressContext(null).compressed, false);
assert.equal(compressContext({}).compressed, false);