Merge pull request #2411 from QwenLM/fix-default-output-limit

Increase DEFAULT_OUTPUT_TOKEN_LIMIT from 8K to 16K
2026-05-01 21:20:44 +00:00 · 2026-03-16 17:34:22 +08:00 · 2026-03-16 17:34:22 +08:00 · b4b0041a34
commit b4b0041a34
parent 02ac895544 b9baf183b0
2 changed files with 4 additions and 3 deletions
--- a/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts
+++ b/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts
@ -817,12 +817,12 @@ describe('DashScopeOpenAICompatibleProvider', () => {
      const request: OpenAI.Chat.ChatCompletionCreateParams = {
        model: 'unknown-model',
        messages: [{ role: 'user', content: 'Hello' }],
-        max_tokens: 10000, // Exceeds the default limit
+        max_tokens: 20000, // Exceeds the default limit
      };

      const result = provider.buildRequest(request, 'test-prompt-id');

-      expect(result.max_tokens).toBe(8192); // Should be limited to default output limit (8K)
+      expect(result.max_tokens).toBe(16384); // Should be limited to default output limit (16K)
    });

    it('should preserve other request parameters when limiting max_tokens', () => {
--- a/packages/core/src/core/tokenLimits.ts
+++ b/packages/core/src/core/tokenLimits.ts
@ -9,7 +9,7 @@ type TokenCount = number;
 export type TokenLimitType = 'input' | 'output';

 export const DEFAULT_TOKEN_LIMIT: TokenCount = 131_072; // 128K (power-of-two)
-export const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 8_192; // 8K tokens
+export const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 16_384; // 16K tokens

 /**
 * Accurate numeric limits:
@ -166,6 +166,7 @@ const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
  [/^qwen3\.5/, LIMITS['64k']],
  [/^coder-model$/, LIMITS['64k']],
  [/^qwen3-max/, LIMITS['64k']],
+  [/^qwen/, LIMITS['8k']], // Qwen fallback (VL, turbo, plus, etc.): 8K

  // DeepSeek
  [/^deepseek-reasoner/, LIMITS['64k']],