diff --git a/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts b/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts index e1ecb61b6..024e9a28c 100644 --- a/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts +++ b/packages/core/src/core/openaiContentGenerator/provider/dashscope.test.ts @@ -817,12 +817,12 @@ describe('DashScopeOpenAICompatibleProvider', () => { const request: OpenAI.Chat.ChatCompletionCreateParams = { model: 'unknown-model', messages: [{ role: 'user', content: 'Hello' }], - max_tokens: 10000, // Exceeds the default limit + max_tokens: 20000, // Exceeds the default limit }; const result = provider.buildRequest(request, 'test-prompt-id'); - expect(result.max_tokens).toBe(8192); // Should be limited to default output limit (8K) + expect(result.max_tokens).toBe(16384); // Should be limited to default output limit (16K) }); it('should preserve other request parameters when limiting max_tokens', () => { diff --git a/packages/core/src/core/tokenLimits.ts b/packages/core/src/core/tokenLimits.ts index 2807e56c1..b566a01dc 100644 --- a/packages/core/src/core/tokenLimits.ts +++ b/packages/core/src/core/tokenLimits.ts @@ -9,7 +9,7 @@ type TokenCount = number; export type TokenLimitType = 'input' | 'output'; export const DEFAULT_TOKEN_LIMIT: TokenCount = 131_072; // 128K (power-of-two) -export const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 8_192; // 8K tokens +export const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 16_384; // 16K tokens /** * Accurate numeric limits: @@ -166,6 +166,7 @@ const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [ [/^qwen3\.5/, LIMITS['64k']], [/^coder-model$/, LIMITS['64k']], [/^qwen3-max/, LIMITS['64k']], + [/^qwen/, LIMITS['8k']], // Qwen fallback (VL, turbo, plus, etc.): 8K // DeepSeek [/^deepseek-reasoner/, LIMITS['64k']],