diff --git a/docs/users/configuration/settings.md b/docs/users/configuration/settings.md index eeb751a03..21e468c44 100644 --- a/docs/users/configuration/settings.md +++ b/docs/users/configuration/settings.md @@ -101,7 +101,7 @@ Settings are organized into categories. All settings should be placed within the | `model.name` | string | The Qwen model to use for conversations. | `undefined` | | `model.maxSessionTurns` | number | Maximum number of user/model/tool turns to keep in a session. -1 means unlimited. | `-1` | | `model.summarizeToolOutput` | object | Enables or disables the summarization of tool output. You can specify the token budget for the summarization using the `tokenBudget` setting. Note: Currently only the `run_shell_command` tool is supported. For example `{"run_shell_command": {"tokenBudget": 2000}}` | `undefined` | -| `model.generationConfig` | object | Advanced overrides passed to the underlying content generator. Supports request controls such as `timeout`, `maxRetries`, `disableCacheControl`, and `customHeaders` (custom HTTP headers for API requests), along with fine-tuning knobs under `samplingParams` (for example `temperature`, `top_p`, `max_tokens`). Leave unset to rely on provider defaults. | `undefined` | +| `model.generationConfig` | object | Advanced overrides passed to the underlying content generator. Supports request controls such as `timeout`, `maxRetries`, `disableCacheControl`, `contextWindowSize` (override model's context window size), `maxOutputTokens` (override model's maximum output tokens), and `customHeaders` (custom HTTP headers for API requests), along with fine-tuning knobs under `samplingParams` (for example `temperature`, `top_p`, `max_tokens`). Leave unset to rely on provider defaults. | `undefined` | | `model.chatCompression.contextPercentageThreshold` | number | Sets the threshold for chat history compression as a percentage of the model's total token limit. This is a value between 0 and 1 that applies to both automatic compression and the manual `/compress` command. For example, a value of `0.6` will trigger compression when the chat history exceeds 60% of the token limit. Use `0` to disable compression entirely. | `0.7` | | `model.skipNextSpeakerCheck` | boolean | Skip the next speaker check. | `false` | | `model.skipLoopDetection` | boolean | Disables loop detection checks. Loop detection prevents infinite loops in AI responses but can generate false positives that interrupt legitimate workflows. Enable this option if you experience frequent false positive loop detection interruptions. | `false` | @@ -118,6 +118,7 @@ Settings are organized into categories. All settings should be placed within the "timeout": 60000, "disableCacheControl": false, "contextWindowSize": 128000, + "maxOutputTokens": 8192, "customHeaders": { "X-Request-ID": "req-123", "X-User-ID": "user-456" @@ -136,7 +137,13 @@ Settings are organized into categories. All settings should be placed within the Overrides the default context window size for the selected model. Qwen Code determines the context window using built-in defaults based on model name matching, with a constant fallback value. Use this setting when a provider's effective context limit differs from Qwen Code's default. This value defines the model's assumed maximum context capacity, not a per-request token limit. -The `customHeaders` field allows you to add custom HTTP headers to all API requests. This is useful for request tracing, monitoring, API gateway routing, or when different models require different headers. If `customHeaders` is defined in `modelProviders[].generationConfig.customHeaders`, it will be used directly; otherwise, headers from `model.generationConfig.customHeaders` will be used. No merging occurs between the two levels. +**maxOutputTokens:** + +Overrides the default maximum output tokens for the selected model. Qwen Code determines the maximum output tokens using built-in defaults based on model name matching, with a constant fallback value of 8,192 tokens. Use this setting when a provider's effective output limit differs from Qwen Code's default. This value defines the maximum number of tokens the model can generate in a single response. + +**customHeaders:** + +Allows you to add custom HTTP headers to all API requests. This is useful for request tracing, monitoring, API gateway routing, or when different models require different headers. If `customHeaders` is defined in `modelProviders[].generationConfig.customHeaders`, it will be used directly; otherwise, headers from `model.generationConfig.customHeaders` will be used. No merging occurs between the two levels. **model.openAILoggingDir examples:** diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index 00ccc683c..5d1599fec 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -69,7 +69,6 @@ export interface SettingDefinition { default: SettingsValue; description?: string; parentKey?: string; - childKey?: string; key?: string; properties?: SettingsSchema; showInDialog?: boolean; @@ -603,7 +602,6 @@ const SETTINGS_SCHEMA = { default: undefined as number | undefined, description: 'Request timeout in milliseconds.', parentKey: 'generationConfig', - childKey: 'timeout', showInDialog: false, }, maxRetries: { @@ -614,7 +612,6 @@ const SETTINGS_SCHEMA = { default: undefined as number | undefined, description: 'Maximum number of retries for failed requests.', parentKey: 'generationConfig', - childKey: 'maxRetries', showInDialog: false, }, disableCacheControl: { @@ -625,7 +622,6 @@ const SETTINGS_SCHEMA = { default: false, description: 'Disable cache control for DashScope providers.', parentKey: 'generationConfig', - childKey: 'disableCacheControl', showInDialog: false, }, schemaCompliance: { @@ -637,7 +633,6 @@ const SETTINGS_SCHEMA = { description: 'The compliance mode for tool schemas sent to the model. Use "openapi_30" for strict OpenAPI 3.0 compatibility (e.g., for Gemini).', parentKey: 'generationConfig', - childKey: 'schemaCompliance', showInDialog: false, options: [ { value: 'auto', label: 'Auto (Default)' }, @@ -653,7 +648,6 @@ const SETTINGS_SCHEMA = { description: "Overrides the default context window size for the selected model. Use this setting when a provider's effective context limit differs from Qwen Code's default. This value defines the model's assumed maximum context capacity, not a per-request token limit.", parentKey: 'generationConfig', - childKey: 'contextWindowSize', showInDialog: false, }, }, diff --git a/packages/cli/src/ui/components/ContextUsageDisplay.tsx b/packages/cli/src/ui/components/ContextUsageDisplay.tsx index 1cb0605a6..56a6f665f 100644 --- a/packages/cli/src/ui/components/ContextUsageDisplay.tsx +++ b/packages/cli/src/ui/components/ContextUsageDisplay.tsx @@ -6,26 +6,22 @@ import { Text } from 'ink'; import { theme } from '../semantic-colors.js'; -import { DEFAULT_TOKEN_LIMIT, type Config } from '@qwen-code/qwen-code-core'; +import { DEFAULT_TOKEN_LIMIT } from '@qwen-code/qwen-code-core'; export const ContextUsageDisplay = ({ promptTokenCount, - model: _model, terminalWidth, - config, + contextWindowSize, }: { promptTokenCount: number; - model: string; terminalWidth: number; - config: Config; + contextWindowSize?: number; }) => { if (promptTokenCount === 0) { return null; } - const contextLimit = - config.getContentGeneratorConfig()?.contextWindowSize ?? - DEFAULT_TOKEN_LIMIT; + const contextLimit = contextWindowSize ?? DEFAULT_TOKEN_LIMIT; const percentage = promptTokenCount / contextLimit; const percentageUsed = (percentage * 100).toFixed(1); diff --git a/packages/cli/src/ui/components/Footer.tsx b/packages/cli/src/ui/components/Footer.tsx index 38d4c2f4f..70e2cab12 100644 --- a/packages/cli/src/ui/components/Footer.tsx +++ b/packages/cli/src/ui/components/Footer.tsx @@ -5,6 +5,7 @@ */ import type React from 'react'; +import { useMemo } from 'react'; import { Box, Text } from 'ink'; import { theme } from '../semantic-colors.js'; import { ConsoleSummaryDisplay } from './ConsoleSummaryDisplay.js'; @@ -57,6 +58,12 @@ export const Footer: React.FC = () => { // Check if debug mode is enabled const debugMode = config.getDebugMode(); + // Memoize contextWindowSize to avoid recalculating on every render + const contextWindowSize = useMemo( + () => config.getContentGeneratorConfig()?.contextWindowSize, + [config] + ); + // Left section should show exactly ONE thing at any time, in priority order. const leftContent = uiState.ctrlCPressedOnce ? ( {t('Press Ctrl+C again to exit.')} @@ -95,9 +102,8 @@ export const Footer: React.FC = () => { ), diff --git a/packages/core/src/core/contentGenerator.ts b/packages/core/src/core/contentGenerator.ts index 539fb9840..c18823804 100644 --- a/packages/core/src/core/contentGenerator.ts +++ b/packages/core/src/core/contentGenerator.ts @@ -30,7 +30,11 @@ import { StrictMissingModelIdError, } from '../models/modelConfigErrors.js'; import { PROVIDER_SOURCED_FIELDS } from '../models/modelsConfig.js'; -import { tokenLimit } from './tokenLimits.js'; +import { + tokenLimit, + DEFAULT_TOKEN_LIMIT, + DEFAULT_OUTPUT_TOKEN_LIMIT, +} from './tokenLimits.js'; /** * Interface abstracting the core functionalities for generating content and counting tokens. @@ -179,38 +183,50 @@ export function resolveContentGeneratorConfigWithSources( // Initialize contextWindowSize if not set by user // This ensures contextWindowSize is always available as a model-bound property - if ( - newContentGeneratorConfig.contextWindowSize === undefined && - newContentGeneratorConfig.model - ) { - newContentGeneratorConfig.contextWindowSize = tokenLimit( - newContentGeneratorConfig.model, - 'input', - ); - setSource(sources, 'contextWindowSize', { - kind: 'computed', - detail: 'auto-detected from model', - }); - } else if (newContentGeneratorConfig.contextWindowSize !== undefined) { + if (newContentGeneratorConfig.contextWindowSize === undefined) { + if (newContentGeneratorConfig.model) { + newContentGeneratorConfig.contextWindowSize = tokenLimit( + newContentGeneratorConfig.model, + 'input', + ); + setSource(sources, 'contextWindowSize', { + kind: 'computed', + detail: 'auto-detected from model', + }); + } else { + // Fallback to default when model is not available + newContentGeneratorConfig.contextWindowSize = DEFAULT_TOKEN_LIMIT; + setSource(sources, 'contextWindowSize', { + kind: 'computed', + detail: 'default fallback', + }); + } + } else { // User explicitly set contextWindowSize setSource(sources, 'contextWindowSize', seedOrUnknown('contextWindowSize')); } // Initialize maxOutputTokens if not set by user // This ensures maxOutputTokens is always available as a model-bound property - if ( - newContentGeneratorConfig.maxOutputTokens === undefined && - newContentGeneratorConfig.model - ) { - newContentGeneratorConfig.maxOutputTokens = tokenLimit( - newContentGeneratorConfig.model, - 'output', - ); - setSource(sources, 'maxOutputTokens', { - kind: 'computed', - detail: 'auto-detected from model', - }); - } else if (newContentGeneratorConfig.maxOutputTokens !== undefined) { + if (newContentGeneratorConfig.maxOutputTokens === undefined) { + if (newContentGeneratorConfig.model) { + newContentGeneratorConfig.maxOutputTokens = tokenLimit( + newContentGeneratorConfig.model, + 'output', + ); + setSource(sources, 'maxOutputTokens', { + kind: 'computed', + detail: 'auto-detected from model', + }); + } else { + // Fallback to default when model is not available + newContentGeneratorConfig.maxOutputTokens = DEFAULT_OUTPUT_TOKEN_LIMIT; + setSource(sources, 'maxOutputTokens', { + kind: 'computed', + detail: 'default fallback', + }); + } + } else { // User explicitly set maxOutputTokens setSource(sources, 'maxOutputTokens', seedOrUnknown('maxOutputTokens')); }