diff --git a/open-sse/services/contextManager.ts b/open-sse/services/contextManager.ts
index 4a1e53da..722b9f4d 100644
--- a/open-sse/services/contextManager.ts
+++ b/open-sse/services/contextManager.ts
@@ -109,8 +109,12 @@ export function compressContext(
   const provider = options.provider || "default";
   const maxTokens =
     options.maxTokens || getTokenLimit(provider, (body.model as string) || options.model || null);
-  const reserveTokens = options.reserveTokens || 16000; // Reserve for response
-  const targetTokens = maxTokens - reserveTokens;
+  const defaultReserveTokens = Math.min(16000, Math.max(256, Math.floor(maxTokens * 0.15)));
+  const reserveTokens = Math.min(
+    options.reserveTokens ?? defaultReserveTokens,
+    Math.max(0, maxTokens - 1)
+  );
+  const targetTokens = Math.max(0, maxTokens - reserveTokens);
 
   let messages = [...body.messages];
   let currentTokens = estimateTokens(JSON.stringify(messages));
diff --git a/tests/unit/chatcore-compression-integration.test.ts b/tests/unit/chatcore-compression-integration.test.ts
index 8c08746e..66bccce0 100644
--- a/tests/unit/chatcore-compression-integration.test.ts
+++ b/tests/unit/chatcore-compression-integration.test.ts
@@ -10,14 +10,16 @@ test("chatCore integration: compressContext called proactively when context exce
   const contextLimit = getTokenLimit(provider, model);
   const threshold = Math.floor(contextLimit * 0.85);
 
-  const largeMessage = "x".repeat(threshold * 4 + 1000);
+  const history = Array.from({ length: 24 }, (_, index) => [
+    { role: "user", content: `Question ${index}: ${"context ".repeat(80)}` },
+    { role: "assistant", content: `Answer ${index}: ${"history ".repeat(80)}` },
+  ]).flat();
   const body = {
     model,
     messages: [
       { role: "system", content: "You are helpful." },
-      { role: "user", content: "Garbage 1".repeat(1000) },
-      { role: "assistant", content: "Garbage 2".repeat(1000) },
-      { role: "user", content: largeMessage },
+      ...history,
+      { role: "user", content: "Final question?" },
     ],
   };
 
@@ -38,6 +40,11 @@ test("chatCore integration: compressContext called proactively when context exce
     result.stats.final <= contextLimit,
     `Final tokens ${result.stats.final} should fit within limit ${contextLimit}`
   );
+  assert.equal(
+    result.body.messages[result.body.messages.length - 1].content,
+    "Final question?",
+    "Latest user turn should be preserved after compression"
+  );
 });
 
 test("chatCore integration: compressContext NOT called when context is below 85% threshold", async () => {
diff --git a/tests/unit/context-manager.test.ts b/tests/unit/context-manager.test.ts
index 77d20aa0..25e966a2 100644
--- a/tests/unit/context-manager.test.ts
+++ b/tests/unit/context-manager.test.ts
@@ -44,6 +44,20 @@ test("compressContext: returns unchanged if fits", () => {
   assert.equal(result.compressed, false);
 });
 
+test("compressContext: default reserve scales down for smaller context windows", () => {
+  const body = {
+    model: "gpt-4",
+    messages: [
+      { role: "system", content: "You are helpful." },
+      { role: "user", content: "Hello" },
+    ],
+  };
+
+  const result = compressContext(body, { provider: "openai", maxTokens: 8192 });
+  assert.equal(result.compressed, false);
+  assert.equal(result.stats.final, result.stats.original);
+});
+
 test("compressContext: handles null/empty body", () => {
   assert.equal(compressContext(null).compressed, false);
   assert.equal(compressContext({}).compressed, false);
diff --git a/tests/unit/usage-fetcher-antigravity.test.ts b/tests/unit/usage-fetcher-antigravity.test.ts
index d8d1cfe9..5bb58104 100644
--- a/tests/unit/usage-fetcher-antigravity.test.ts
+++ b/tests/unit/usage-fetcher-antigravity.test.ts
@@ -15,9 +15,11 @@ test("usage fetcher retries Antigravity quota discovery across shared fallback U
   globalThis.fetch = async (url, init = {}) => {
     calls.push({ url: String(url), init });
 
-    // Mock the first two to fail with 503
     const urlStr = String(url);
-    if (urlStr.includes("://cloudcode-pa.googleapis.com/") && !urlStr.includes("sandbox")) {
+    if (
+      urlStr === "https://cloudcode-pa.googleapis.com/v1internal:fetchAvailableModels" ||
+      urlStr === "https://daily-cloudcode-pa.googleapis.com/v1internal:fetchAvailableModels"
+    ) {
       return new Response("unavailable", { status: 503 });
     }