mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-05-04 06:30:53 +00:00
Merge pull request #2411 from QwenLM/fix-default-output-limit
Increase DEFAULT_OUTPUT_TOKEN_LIMIT from 8K to 16K
This commit is contained in:
commit
b4b0041a34
2 changed files with 4 additions and 3 deletions
|
|
@ -817,12 +817,12 @@ describe('DashScopeOpenAICompatibleProvider', () => {
|
||||||
const request: OpenAI.Chat.ChatCompletionCreateParams = {
|
const request: OpenAI.Chat.ChatCompletionCreateParams = {
|
||||||
model: 'unknown-model',
|
model: 'unknown-model',
|
||||||
messages: [{ role: 'user', content: 'Hello' }],
|
messages: [{ role: 'user', content: 'Hello' }],
|
||||||
max_tokens: 10000, // Exceeds the default limit
|
max_tokens: 20000, // Exceeds the default limit
|
||||||
};
|
};
|
||||||
|
|
||||||
const result = provider.buildRequest(request, 'test-prompt-id');
|
const result = provider.buildRequest(request, 'test-prompt-id');
|
||||||
|
|
||||||
expect(result.max_tokens).toBe(8192); // Should be limited to default output limit (8K)
|
expect(result.max_tokens).toBe(16384); // Should be limited to default output limit (16K)
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should preserve other request parameters when limiting max_tokens', () => {
|
it('should preserve other request parameters when limiting max_tokens', () => {
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ type TokenCount = number;
|
||||||
export type TokenLimitType = 'input' | 'output';
|
export type TokenLimitType = 'input' | 'output';
|
||||||
|
|
||||||
export const DEFAULT_TOKEN_LIMIT: TokenCount = 131_072; // 128K (power-of-two)
|
export const DEFAULT_TOKEN_LIMIT: TokenCount = 131_072; // 128K (power-of-two)
|
||||||
export const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 8_192; // 8K tokens
|
export const DEFAULT_OUTPUT_TOKEN_LIMIT: TokenCount = 16_384; // 16K tokens
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Accurate numeric limits:
|
* Accurate numeric limits:
|
||||||
|
|
@ -166,6 +166,7 @@ const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
|
||||||
[/^qwen3\.5/, LIMITS['64k']],
|
[/^qwen3\.5/, LIMITS['64k']],
|
||||||
[/^coder-model$/, LIMITS['64k']],
|
[/^coder-model$/, LIMITS['64k']],
|
||||||
[/^qwen3-max/, LIMITS['64k']],
|
[/^qwen3-max/, LIMITS['64k']],
|
||||||
|
[/^qwen/, LIMITS['8k']], // Qwen fallback (VL, turbo, plus, etc.): 8K
|
||||||
|
|
||||||
// DeepSeek
|
// DeepSeek
|
||||||
[/^deepseek-reasoner/, LIMITS['64k']],
|
[/^deepseek-reasoner/, LIMITS['64k']],
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue