From ea3a2d6f5b3a985edadf87e4be1fe198c1348b4f Mon Sep 17 00:00:00 2001
From: LaZzyMan <zeusdream7@gmail.com>
Date: Wed, 25 Mar 2026 15:13:10 +0800
Subject: [PATCH 1/2] fix: improve /compress split point selection for
 tool-heavy conversations

When conversation history is near the context window limit and dominated by
tool call/response cycles, findCompressSplitPoint would return a near-zero
split point because it only considered non-functionResponse user messages as
valid split points. This caused /compress to send almost no history to the
compression API (e.g. 29 tokens), producing a useless summary that inflated
token count instead of reducing it.

Changes:
- Track tool completion boundaries (positions after functionResponse) as
  fallback split points in findCompressSplitPoint
- Add user-with-functionResponse to the compress-everything safety check
- Use Math.max of primary and fallback split points for better coverage
- Add minimum content guard (5% threshold) to prevent futile API calls
- Add 4 new test cases covering tool-heavy conversation scenarios

Fixes #2647
---
 .../services/chatCompressionService.test.ts   | 169 ++++++++++++++++++
 .../src/services/chatCompressionService.ts    |  50 +++++-
 2 files changed, 217 insertions(+), 2 deletions(-)

diff --git a/packages/core/src/services/chatCompressionService.test.ts b/packages/core/src/services/chatCompressionService.test.ts
index f3f490214..95d83c354 100644
--- a/packages/core/src/services/chatCompressionService.test.ts
+++ b/packages/core/src/services/chatCompressionService.test.ts
@@ -100,6 +100,175 @@ describe('findCompressSplitPoint', () => {
     ];
     expect(findCompressSplitPoint(historyWithEmptyParts, 0.5)).toBe(2);
   });
+
+  it('should compress everything when last message is a functionResponse', () => {
+    const history: Content[] = [
+      { role: 'user', parts: [{ text: 'Fix this bug' }] },
+      {
+        role: 'model',
+        parts: [{ functionCall: { name: 'readFile', args: {} } }],
+      },
+      {
+        role: 'user',
+        parts: [
+          {
+            functionResponse: {
+              name: 'readFile',
+              response: { result: 'file content' },
+            },
+          },
+        ],
+      },
+      {
+        role: 'model',
+        parts: [{ functionCall: { name: 'writeFile', args: {} } }],
+      },
+      {
+        role: 'user',
+        parts: [
+          {
+            functionResponse: {
+              name: 'writeFile',
+              response: { result: 'ok' },
+            },
+          },
+        ],
+      },
+    ];
+    // Last message is functionResponse -> safe to compress everything
+    expect(findCompressSplitPoint(history, 0.7)).toBe(5);
+  });
+
+  it('should use tool completion split point when only one user query exists', () => {
+    const history: Content[] = [
+      { role: 'user', parts: [{ text: 'Fix this' }] },
+      {
+        role: 'model',
+        parts: [{ functionCall: { name: 'read1', args: {} } }],
+      },
+      {
+        role: 'user',
+        parts: [
+          {
+            functionResponse: {
+              name: 'read1',
+              response: { result: 'a'.repeat(1000) },
+            },
+          },
+        ],
+      },
+      {
+        role: 'model',
+        parts: [{ functionCall: { name: 'read2', args: {} } }],
+      },
+      {
+        role: 'user',
+        parts: [
+          {
+            functionResponse: {
+              name: 'read2',
+              response: { result: 'b'.repeat(1000) },
+            },
+          },
+        ],
+      },
+      {
+        role: 'model',
+        parts: [{ functionCall: { name: 'write1', args: {} } }],
+      },
+    ];
+    // Only one non-functionResponse user message (index 0) -> lastSplitPoint=0
+    // Last message has functionCall -> can't compress everything
+    // But tool completion split points exist (at indices 3 and 5)
+    // Should use lastToolCompletionSplitPoint=5 instead of lastSplitPoint=0
+    expect(findCompressSplitPoint(history, 0.7)).toBe(5);
+  });
+
+  it('should prefer larger split point between primary and tool completion', () => {
+    const longContent = 'a'.repeat(10000);
+    const history: Content[] = [
+      { role: 'user', parts: [{ text: 'Fix bug A' }] },
+      { role: 'model', parts: [{ text: 'OK' }] },
+      { role: 'user', parts: [{ text: 'Fix bug B' }] },
+      {
+        role: 'model',
+        parts: [{ functionCall: { name: 'read1', args: {} } }],
+      },
+      {
+        role: 'user',
+        parts: [
+          {
+            functionResponse: {
+              name: 'read1',
+              response: { result: longContent },
+            },
+          },
+        ],
+      },
+      {
+        role: 'model',
+        parts: [{ functionCall: { name: 'read2', args: {} } }],
+      },
+      {
+        role: 'user',
+        parts: [
+          {
+            functionResponse: {
+              name: 'read2',
+              response: { result: longContent },
+            },
+          },
+        ],
+      },
+      {
+        role: 'model',
+        parts: [{ functionCall: { name: 'write1', args: {} } }],
+      },
+    ];
+    // Primary split points at 0 and 2 (early, before the bulky tool outputs)
+    // Tool completion split points at 5 and 7 (after tool call pairs)
+    // Last message has functionCall -> can't compress everything
+    // Should use max(lastSplitPoint=2, lastToolCompletionSplitPoint=7) = 7
+    expect(findCompressSplitPoint(history, 0.7)).toBe(7);
+  });
+
+  it('should still prefer primary split point when it is better', () => {
+    const history: Content[] = [
+      { role: 'user', parts: [{ text: 'msg1' }] },
+      { role: 'model', parts: [{ text: 'resp1' }] },
+      {
+        role: 'user',
+        parts: [{ text: 'msg2 with some substantial content here' }],
+      },
+      {
+        role: 'model',
+        parts: [{ functionCall: { name: 'tool1', args: {} } }],
+      },
+      {
+        role: 'user',
+        parts: [
+          {
+            functionResponse: {
+              name: 'tool1',
+              response: { result: 'short' },
+            },
+          },
+        ],
+      },
+      { role: 'user', parts: [{ text: 'msg3' }] },
+      { role: 'model', parts: [{ text: 'resp3' }] },
+      { role: 'user', parts: [{ text: 'msg4' }] },
+      {
+        role: 'model',
+        parts: [{ functionCall: { name: 'tool2', args: {} } }],
+      },
+    ];
+    // Primary split points: 0, 2, 5, 7
+    // Tool completion: 5
+    // Last message has functionCall -> can't compress everything
+    // At 0.99 fraction, lastSplitPoint should be 7 which is > lastToolCompletion=5
+    expect(findCompressSplitPoint(history, 0.99)).toBe(7);
+  });
 });
 
 describe('ChatCompressionService', () => {
diff --git a/packages/core/src/services/chatCompressionService.ts b/packages/core/src/services/chatCompressionService.ts
index 610445fb4..84836f72e 100644
--- a/packages/core/src/services/chatCompressionService.ts
+++ b/packages/core/src/services/chatCompressionService.ts
@@ -48,6 +48,11 @@ export function findCompressSplitPoint(
   const targetCharCount = totalCharCount * fraction;
 
   let lastSplitPoint = 0; // 0 is always valid (compress nothing)
+  // In tool-heavy conversations, non-functionResponse user messages are rare.
+  // Track positions after tool call completions (functionResponse) as fallback
+  // split points, ensuring all function calls in the compressed portion have
+  // matching responses.
+  let lastToolCompletionSplitPoint = 0;
   let cumulativeCharCount = 0;
   for (let i = 0; i < contents.length; i++) {
     const content = contents[i];
@@ -60,6 +65,14 @@ export function findCompressSplitPoint(
       }
       lastSplitPoint = i;
     }
+    // After a functionResponse, all preceding function calls are properly
+    // paired, making the next position (i+1) a safe place to split.
+    if (
+      content.role === 'user' &&
+      content.parts?.some((part) => !!part.functionResponse)
+    ) {
+      lastToolCompletionSplitPoint = i + 1;
+    }
     cumulativeCharCount += charCounts[i];
   }
 
@@ -72,9 +85,20 @@ export function findCompressSplitPoint(
   ) {
     return contents.length;
   }
+  // Also safe to compress everything if the last message completes a tool call
+  // sequence (all function calls have matching responses).
+  if (
+    lastContent?.role === 'user' &&
+    lastContent?.parts?.some((part) => !!part.functionResponse)
+  ) {
+    return contents.length;
+  }
 
-  // Can't compress everything so just compress at last splitpoint.
-  return lastSplitPoint;
+  // Use the split point that provides the most content to compress.
+  // In tool-heavy conversations with few user queries, lastSplitPoint may be
+  // near the beginning of the history while lastToolCompletionSplitPoint
+  // provides much better coverage.
+  return Math.max(lastSplitPoint, lastToolCompletionSplitPoint);
 }
 
 export class ChatCompressionService {
@@ -157,6 +181,28 @@ export class ChatCompressionService {
       };
     }
 
+    // Guard: if historyToCompress is too small relative to the total history,
+    // skip compression. This prevents futile API calls where the model receives
+    // almost no context and generates a useless "summary" that inflates tokens.
+    const compressCharCount = historyToCompress.reduce(
+      (sum, c) => sum + JSON.stringify(c).length,
+      0,
+    );
+    const totalCharCount = curatedHistory.reduce(
+      (sum, c) => sum + JSON.stringify(c).length,
+      0,
+    );
+    if (totalCharCount > 0 && compressCharCount / totalCharCount < 0.05) {
+      return {
+        newHistory: null,
+        info: {
+          originalTokenCount,
+          newTokenCount: originalTokenCount,
+          compressionStatus: CompressionStatus.NOOP,
+        },
+      };
+    }
+
     const summaryResponse = await config.getContentGenerator().generateContent(
       {
         model,

From 501f80a514acea9fbac5c756de9da61513f1296e Mon Sep 17 00:00:00 2001
From: LaZzyMan <zeusdream7@gmail.com>
Date: Mon, 30 Mar 2026 18:00:27 +0800
Subject: [PATCH 2/2] fix: handle orphaned funcCall and improve compression
 logic for tool-heavy conversations

- Strip trailing orphaned funcCall (force=true) before split point calculation,
  so normal compression logic runs cleanly on the remaining history instead of
  requiring ad-hoc special-casing
- Remove redundant lastToolCompletionSplitPoint machinery: after fixing the
  i+2 index bug, lastSplitPoint already subsumes it, making Math.max redundant
- Add MIN_COMPRESSION_FRACTION constant (0.05) to guard against futile API
  calls when historyToCompress is too small relative to total history
- Add tests for orphaned funcCall handling (force=true compresses, force=false NOOP)
- Add test for MIN_COMPRESSION_FRACTION guard

Fixes #2647
---
 .../services/chatCompressionService.test.ts   | 206 +++++++++++++++++-
 .../src/services/chatCompressionService.ts    |  61 ++++--
 2 files changed, 233 insertions(+), 34 deletions(-)

diff --git a/packages/core/src/services/chatCompressionService.test.ts b/packages/core/src/services/chatCompressionService.test.ts
index 95d83c354..55a1e2723 100644
--- a/packages/core/src/services/chatCompressionService.test.ts
+++ b/packages/core/src/services/chatCompressionService.test.ts
@@ -139,7 +139,7 @@ describe('findCompressSplitPoint', () => {
     expect(findCompressSplitPoint(history, 0.7)).toBe(5);
   });
 
-  it('should use tool completion split point when only one user query exists', () => {
+  it('should return primary split point when tool completions have no subsequent regular user message', () => {
     const history: Content[] = [
       { role: 'user', parts: [{ text: 'Fix this' }] },
       {
@@ -179,12 +179,12 @@ describe('findCompressSplitPoint', () => {
     ];
     // Only one non-functionResponse user message (index 0) -> lastSplitPoint=0
     // Last message has functionCall -> can't compress everything
-    // But tool completion split points exist (at indices 3 and 5)
-    // Should use lastToolCompletionSplitPoint=5 instead of lastSplitPoint=0
-    expect(findCompressSplitPoint(history, 0.7)).toBe(5);
+    // historyToKeep must start with a regular user message, so split at 0
+    // (compress nothing) is the only valid option.
+    expect(findCompressSplitPoint(history, 0.7)).toBe(0);
   });
 
-  it('should prefer larger split point between primary and tool completion', () => {
+  it('should prefer primary split point when tool completions yield no valid user-starting split', () => {
     const longContent = 'a'.repeat(10000);
     const history: Content[] = [
       { role: 'user', parts: [{ text: 'Fix bug A' }] },
@@ -225,11 +225,10 @@ describe('findCompressSplitPoint', () => {
         parts: [{ functionCall: { name: 'write1', args: {} } }],
       },
     ];
-    // Primary split points at 0 and 2 (early, before the bulky tool outputs)
-    // Tool completion split points at 5 and 7 (after tool call pairs)
+    // Primary split points at 0 and 2 (regular user messages before the bulky tool outputs)
     // Last message has functionCall -> can't compress everything
-    // Should use max(lastSplitPoint=2, lastToolCompletionSplitPoint=7) = 7
-    expect(findCompressSplitPoint(history, 0.7)).toBe(7);
+    // Should return lastSplitPoint=2 (last valid primary split point)
+    expect(findCompressSplitPoint(history, 0.7)).toBe(2);
   });
 
   it('should still prefer primary split point when it is better', () => {
@@ -264,9 +263,8 @@ describe('findCompressSplitPoint', () => {
       },
     ];
     // Primary split points: 0, 2, 5, 7
-    // Tool completion: 5
     // Last message has functionCall -> can't compress everything
-    // At 0.99 fraction, lastSplitPoint should be 7 which is > lastToolCompletion=5
+    // At 0.99 fraction, lastSplitPoint should be 7
     expect(findCompressSplitPoint(history, 0.99)).toBe(7);
   });
 });
@@ -409,6 +407,47 @@ describe('ChatCompressionService', () => {
     expect(tokenLimit).not.toHaveBeenCalled();
   });
 
+  it('should return NOOP when historyToCompress is below MIN_COMPRESSION_FRACTION of total', async () => {
+    // Construct a history where the split point lands on the 2nd regular user
+    // message (index 2), but indices 0-1 are tiny relative to the huge content
+    // at index 2. historyToCompress = [0,1] will be << 5% of totalCharCount.
+    const hugeContent = 'x'.repeat(100000);
+    const history: Content[] = [
+      { role: 'user', parts: [{ text: 'hello' }] },
+      { role: 'model', parts: [{ text: 'world' }] },
+      // Huge user message pushes the cumulative well past the split threshold
+      { role: 'user', parts: [{ text: hugeContent }] },
+      // Pending functionCall prevents returning contents.length,
+      // so the fallback split at index 2 is used
+      {
+        role: 'model',
+        parts: [{ functionCall: { name: 'process', args: {} } }],
+      },
+    ];
+    vi.mocked(mockChat.getHistory).mockReturnValue(history);
+    vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(100);
+    vi.mocked(tokenLimit).mockReturnValue(1000);
+
+    const mockGenerateContent = vi.fn();
+    vi.mocked(mockConfig.getContentGenerator).mockReturnValue({
+      generateContent: mockGenerateContent,
+    } as unknown as ContentGenerator);
+
+    // force=true bypasses the token threshold gate so we exercise the 5% guard
+    const result = await service.compress(
+      mockChat,
+      mockPromptId,
+      true,
+      mockModel,
+      mockConfig,
+      false,
+    );
+
+    expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP);
+    expect(result.newHistory).toBeNull();
+    expect(mockGenerateContent).not.toHaveBeenCalled();
+  });
+
   it('should compress if over token threshold', async () => {
     const history: Content[] = [
       { role: 'user', parts: [{ text: 'msg1' }] },
@@ -1103,4 +1142,149 @@ describe('ChatCompressionService', () => {
       expect(mockFirePreCompactEvent).not.toHaveBeenCalled();
     });
   });
+
+  describe('orphaned trailing funcCall handling', () => {
+    it('should compress everything when force=true and last message is an orphaned funcCall', async () => {
+      // Issue #2647: tool-heavy conversation interrupted/crashed while a tool
+      // was still running. The funcCall will never get a response since the agent
+      // is idle. Manual /compress strips the orphaned funcCall, then compresses
+      // the remaining history normally.
+      const history: Content[] = [
+        { role: 'user', parts: [{ text: 'Fix all TypeScript errors.' }] },
+        {
+          role: 'model',
+          parts: [{ functionCall: { name: 'glob', args: {} } }],
+        },
+        {
+          role: 'user',
+          parts: [
+            {
+              functionResponse: {
+                name: 'glob',
+                response: { result: 'files...' },
+              },
+            },
+          ],
+        },
+        {
+          role: 'model',
+          parts: [{ functionCall: { name: 'readFile', args: {} } }],
+        },
+        {
+          role: 'user',
+          parts: [
+            {
+              functionResponse: {
+                name: 'readFile',
+                response: { result: 'code...' },
+              },
+            },
+          ],
+        },
+        // orphaned funcCall — agent was interrupted before getting a response
+        {
+          role: 'model',
+          parts: [{ functionCall: { name: 'editFile', args: {} } }],
+        },
+      ];
+      vi.mocked(mockChat.getHistory).mockReturnValue(history);
+      vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(
+        100,
+      );
+      vi.mocked(tokenLimit).mockReturnValue(1000);
+
+      const mockGenerateContent = vi.fn().mockResolvedValue({
+        candidates: [
+          { content: { parts: [{ text: 'Summary of all work done' }] } },
+        ],
+        usageMetadata: {
+          promptTokenCount: 1100,
+          candidatesTokenCount: 50,
+          totalTokenCount: 1150,
+        },
+      } as unknown as GenerateContentResponse);
+      vi.mocked(mockConfig.getContentGenerator).mockReturnValue({
+        generateContent: mockGenerateContent,
+      } as unknown as ContentGenerator);
+
+      const result = await service.compress(
+        mockChat,
+        mockPromptId,
+        true, // force=true (manual /compress)
+        mockModel,
+        mockConfig,
+        false,
+      );
+
+      // Should compress successfully — orphaned funcCall is stripped first, then
+      // normal compression runs on the remaining history, historyToKeep is empty
+      expect(result.info.compressionStatus).toBe(CompressionStatus.COMPRESSED);
+      expect(result.newHistory).not.toBeNull();
+      // Reconstructed history: [User(summary), Model("Got it...")] — valid structure
+      expect(result.newHistory).toHaveLength(2);
+      expect(result.newHistory![0].role).toBe('user');
+      expect(result.newHistory![1].role).toBe('model');
+      // The orphaned funcCall is stripped before compression, so only the first 5
+      // messages are sent, plus the compression instruction (+1) = history.length total.
+      const callArg = mockGenerateContent.mock.calls[0][0];
+      expect(callArg.contents.length).toBe(history.length); // (history.length - 1) messages + 1 instruction
+    });
+
+    it('should NOT compress orphaned funcCall when force=false (auto-compress)', async () => {
+      // Auto-compress fires BEFORE the matching funcResponse is sent back to the
+      // model. Compressing the funcCall away would orphan the upcoming funcResponse
+      // and cause an API error. So force=false must NOT take this path.
+      const history: Content[] = [
+        { role: 'user', parts: [{ text: 'Fix all TypeScript errors.' }] },
+        {
+          role: 'model',
+          parts: [{ functionCall: { name: 'glob', args: {} } }],
+        },
+        {
+          role: 'user',
+          parts: [
+            {
+              functionResponse: {
+                name: 'glob',
+                response: { result: 'files...' },
+              },
+            },
+          ],
+        },
+        // Pending funcCall: tool is currently executing, funcResponse is coming
+        {
+          role: 'model',
+          parts: [{ functionCall: { name: 'readFile', args: {} } }],
+        },
+      ];
+      vi.mocked(mockChat.getHistory).mockReturnValue(history);
+      // Use a token count above threshold to ensure auto-compress isn't skipped
+      vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(
+        800,
+      );
+      vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({
+        model: 'gemini-pro',
+        contextWindowSize: 1000,
+      } as unknown as ReturnType<typeof mockConfig.getContentGeneratorConfig>);
+
+      const mockGenerateContent = vi.fn();
+      vi.mocked(mockConfig.getContentGenerator).mockReturnValue({
+        generateContent: mockGenerateContent,
+      } as unknown as ContentGenerator);
+
+      const result = await service.compress(
+        mockChat,
+        mockPromptId,
+        false, // force=false (auto-compress)
+        mockModel,
+        mockConfig,
+        false,
+      );
+
+      // Must return NOOP — compressing would orphan the upcoming funcResponse
+      expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP);
+      expect(result.newHistory).toBeNull();
+      expect(mockGenerateContent).not.toHaveBeenCalled();
+    });
+  });
 });
diff --git a/packages/core/src/services/chatCompressionService.ts b/packages/core/src/services/chatCompressionService.ts
index 84836f72e..528a57b44 100644
--- a/packages/core/src/services/chatCompressionService.ts
+++ b/packages/core/src/services/chatCompressionService.ts
@@ -29,6 +29,13 @@ export const COMPRESSION_TOKEN_THRESHOLD = 0.7;
  */
 export const COMPRESSION_PRESERVE_THRESHOLD = 0.3;
 
+/**
+ * Minimum fraction of history (by character count) that must be compressible
+ * to proceed with a compression API call. Prevents futile calls where the
+ * model receives almost no context and generates a useless summary.
+ */
+export const MIN_COMPRESSION_FRACTION = 0.05;
+
 /**
  * Returns the index of the oldest item to keep when compressing. May return
  * contents.length which indicates that everything should be compressed.
@@ -48,11 +55,6 @@ export function findCompressSplitPoint(
   const targetCharCount = totalCharCount * fraction;
 
   let lastSplitPoint = 0; // 0 is always valid (compress nothing)
-  // In tool-heavy conversations, non-functionResponse user messages are rare.
-  // Track positions after tool call completions (functionResponse) as fallback
-  // split points, ensuring all function calls in the compressed portion have
-  // matching responses.
-  let lastToolCompletionSplitPoint = 0;
   let cumulativeCharCount = 0;
   for (let i = 0; i < contents.length; i++) {
     const content = contents[i];
@@ -65,14 +67,6 @@ export function findCompressSplitPoint(
       }
       lastSplitPoint = i;
     }
-    // After a functionResponse, all preceding function calls are properly
-    // paired, making the next position (i+1) a safe place to split.
-    if (
-      content.role === 'user' &&
-      content.parts?.some((part) => !!part.functionResponse)
-    ) {
-      lastToolCompletionSplitPoint = i + 1;
-    }
     cumulativeCharCount += charCounts[i];
   }
 
@@ -94,11 +88,7 @@ export function findCompressSplitPoint(
     return contents.length;
   }
 
-  // Use the split point that provides the most content to compress.
-  // In tool-heavy conversations with few user queries, lastSplitPoint may be
-  // near the beginning of the history while lastToolCompletionSplitPoint
-  // provides much better coverage.
-  return Math.max(lastSplitPoint, lastToolCompletionSplitPoint);
+  return lastSplitPoint;
 }
 
 export class ChatCompressionService {
@@ -162,13 +152,31 @@ export class ChatCompressionService {
       }
     }
 
+    // For manual /compress (force=true), if the last message is an orphaned model
+    // funcCall (agent interrupted/crashed before the response arrived), strip it
+    // before computing the split point. After stripping, the history ends cleanly
+    // (typically with a user funcResponse) and findCompressSplitPoint handles it
+    // through its normal logic — no special-casing needed.
+    //
+    // auto-compress (force=false) must NOT strip: it fires inside
+    // sendMessageStream() before the matching funcResponse is pushed onto the
+    // history, so the trailing funcCall is still active, not orphaned.
+    const lastMessage = curatedHistory[curatedHistory.length - 1];
+    const hasOrphanedFuncCall =
+      force &&
+      lastMessage?.role === 'model' &&
+      lastMessage.parts?.some((p) => !!p.functionCall);
+    const historyForSplit = hasOrphanedFuncCall
+      ? curatedHistory.slice(0, -1)
+      : curatedHistory;
+
     const splitPoint = findCompressSplitPoint(
-      curatedHistory,
+      historyForSplit,
       1 - COMPRESSION_PRESERVE_THRESHOLD,
     );
 
-    const historyToCompress = curatedHistory.slice(0, splitPoint);
-    const historyToKeep = curatedHistory.slice(splitPoint);
+    const historyToCompress = historyForSplit.slice(0, splitPoint);
+    const historyToKeep = historyForSplit.slice(splitPoint);
 
     if (historyToCompress.length === 0) {
       return {
@@ -184,15 +192,22 @@ export class ChatCompressionService {
     // Guard: if historyToCompress is too small relative to the total history,
     // skip compression. This prevents futile API calls where the model receives
     // almost no context and generates a useless "summary" that inflates tokens.
+    //
+    // Note: findCompressSplitPoint already computes charCounts internally but
+    // returns only the split index. We intentionally recompute here to keep
+    // the function signature simple; this is a minor, acceptable duplication.
     const compressCharCount = historyToCompress.reduce(
       (sum, c) => sum + JSON.stringify(c).length,
       0,
     );
-    const totalCharCount = curatedHistory.reduce(
+    const totalCharCount = historyForSplit.reduce(
       (sum, c) => sum + JSON.stringify(c).length,
       0,
     );
-    if (totalCharCount > 0 && compressCharCount / totalCharCount < 0.05) {
+    if (
+      totalCharCount > 0 &&
+      compressCharCount / totalCharCount < MIN_COMPRESSION_FRACTION
+    ) {
       return {
         newHistory: null,
         info: {