fix(core): correct GPT-5.x input token limit to 272K

GPT-5.x models have 400K total context but 128K is reserved for output, so the actual input limit is 272K (400K - 128K). Also updates GLM-5 and GLM-4.7 output limits from 16K to 128K. Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
2026-06-01 14:21:21 +00:00 · 2026-03-13 14:26:12 +08:00 · 2026-03-13 14:26:12 +08:00 · 613e8b3633
commit 613e8b3633
parent 625c50aae7
2 changed files with 9 additions and 8 deletions
--- a/packages/core/src/core/tokenLimits.test.ts
+++ b/packages/core/src/core/tokenLimits.test.ts
@ -108,11 +108,11 @@ describe('tokenLimit', () => {
  });

  describe('OpenAI', () => {
-    it('should return 400K for GPT-5.x (latest)', () => {
-      expect(tokenLimit('gpt-5')).toBe(400000);
-      expect(tokenLimit('gpt-5-mini')).toBe(400000);
-      expect(tokenLimit('gpt-5.2')).toBe(400000);
-      expect(tokenLimit('gpt-5.2-pro')).toBe(400000);
+    it('should return 272K for GPT-5.x (latest)', () => {
+      expect(tokenLimit('gpt-5')).toBe(272000);
+      expect(tokenLimit('gpt-5-mini')).toBe(272000);
+      expect(tokenLimit('gpt-5.2')).toBe(272000);
+      expect(tokenLimit('gpt-5.2-pro')).toBe(272000);
    });

    it('should return 128K for legacy GPT (fallback)', () => {
--- a/packages/core/src/core/tokenLimits.ts
+++ b/packages/core/src/core/tokenLimits.ts
@ -23,6 +23,7 @@ const LIMITS = {
  '128k': 131_072,
  '200k': 200_000, // vendor-declared decimal, used by OpenAI, Anthropic, etc.
  '256k': 262_144,
+  '272k': 272_000, // vendor-declared decimal, GPT-5.x input (400K total - 128K output)
  '400k': 400_000, // vendor-declared decimal, used by OpenAI GPT-5.x
  '512k': 524_288,
  '1m': 1_000_000,
@ -87,7 +88,7 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
  // -------------------
  // OpenAI
  // -------------------
-  [/^gpt-5/, LIMITS['400k']], // GPT-5.x: 400K
+  [/^gpt-5/, LIMITS['272k']], // GPT-5.x: 272K input (400K total - 128K output)
  [/^gpt-/, LIMITS['128k']], // GPT fallback (4o, 4.1, etc.): 128K
  [/^o\d/, LIMITS['200k']], // o-series (o3, o4-mini, etc.): 200K

@ -171,8 +172,8 @@ const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
  [/^deepseek-chat/, LIMITS['8k']],

  // Zhipu GLM
-  [/^glm-5/, LIMITS['16k']],
-  [/^glm-4\.7/, LIMITS['16k']],
+  [/^glm-5/, LIMITS['128k']],
+  [/^glm-4\.7/, LIMITS['128k']],

  // MiniMax
  [/^minimax-m2\.5/i, LIMITS['64k']],