From 3bd460a9cc122fc18390f41bc6f45b51dc04fa4a Mon Sep 17 00:00:00 2001 From: xwj02155382 Date: Fri, 23 Jan 2026 16:27:10 +0800 Subject: [PATCH] refactor: optimize token limit handling and simplify API - Initialize contextWindowSize and maxOutputTokens in contentGeneratorConfig during config resolution - Remove third parameter from tokenLimit() function for cleaner API - Replace all tokenLimit() calls with direct config property access for better performance - Add maxOutputTokens field to ContentGeneratorConfig type - Update dashscope provider to use config.maxOutputTokens - Auto-detect token limits from model during initialization if not user-configured - Update settingsSchema: set contextWindowSize default to undefined and showInDialog to false Benefits: - Token limits calculated once during initialization instead of repeatedly - Cleaner API with fewer parameters - Better performance by caching computed values - User configuration takes precedence over auto-detection - All 72 unit tests passing --- packages/cli/src/acp-integration/acpAgent.ts | 10 +++-- packages/cli/src/config/settingsSchema.ts | 6 +-- .../src/ui/components/ContextUsageDisplay.tsx | 9 ++-- packages/core/src/config/config.ts | 11 +++-- packages/core/src/core/contentGenerator.ts | 44 ++++++++++++++++++- .../provider/dashscope.ts | 11 +++-- packages/core/src/core/tokenLimits.ts | 17 ++----- .../src/services/chatCompressionService.ts | 7 +-- .../vscode-ide-companion/src/webview/App.tsx | 7 ++- 9 files changed, 86 insertions(+), 36 deletions(-) diff --git a/packages/cli/src/acp-integration/acpAgent.ts b/packages/cli/src/acp-integration/acpAgent.ts index 6c40bffee..a333d75d2 100644 --- a/packages/cli/src/acp-integration/acpAgent.ts +++ b/packages/cli/src/acp-integration/acpAgent.ts @@ -18,7 +18,6 @@ import { type Config, type ConversationRecord, type DeviceAuthorizationData, - tokenLimit, } from '@qwen-code/qwen-code-core'; import type { ApprovalModeValue } from './schema.js'; import * as acp from './acp.js'; @@ -374,12 +373,17 @@ class GeminiAgent { ).trim(); const availableModels = config.getAvailableModels(); + // Get the contentGeneratorConfig which contains contextWindowSize + // This value is either user-configured or auto-detected during config initialization + const contentGeneratorConfig = config.getContentGeneratorConfig(); + const mappedAvailableModels = availableModels.map((model) => ({ modelId: model.id, name: model.label, description: model.description ?? null, _meta: { - contextLimit: tokenLimit(model.id), + // Use the contextWindowSize from config, which is always set during initialization + contextLimit: contentGeneratorConfig?.contextWindowSize, }, })); @@ -392,7 +396,7 @@ class GeminiAgent { name: currentModelId, description: null, _meta: { - contextLimit: tokenLimit(currentModelId), + contextLimit: contentGeneratorConfig?.contextWindowSize, }, }); } diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index ef54ec34d..00ccc683c 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -649,12 +649,12 @@ const SETTINGS_SCHEMA = { label: 'Context Window Size', category: 'Generation Configuration', requiresRestart: false, - default: -1, + default: undefined, description: - 'Override the automatic context window size detection. Set to -1 to use automatic detection based on the model. Set to a positive number to use a custom context window size.', + "Overrides the default context window size for the selected model. Use this setting when a provider's effective context limit differs from Qwen Code's default. This value defines the model's assumed maximum context capacity, not a per-request token limit.", parentKey: 'generationConfig', childKey: 'contextWindowSize', - showInDialog: true, + showInDialog: false, }, }, }, diff --git a/packages/cli/src/ui/components/ContextUsageDisplay.tsx b/packages/cli/src/ui/components/ContextUsageDisplay.tsx index d7ccc46c9..1cb0605a6 100644 --- a/packages/cli/src/ui/components/ContextUsageDisplay.tsx +++ b/packages/cli/src/ui/components/ContextUsageDisplay.tsx @@ -6,11 +6,11 @@ import { Text } from 'ink'; import { theme } from '../semantic-colors.js'; -import { tokenLimit, type Config } from '@qwen-code/qwen-code-core'; +import { DEFAULT_TOKEN_LIMIT, type Config } from '@qwen-code/qwen-code-core'; export const ContextUsageDisplay = ({ promptTokenCount, - model, + model: _model, terminalWidth, config, }: { @@ -23,8 +23,9 @@ export const ContextUsageDisplay = ({ return null; } - const contentGeneratorConfig = config.getContentGeneratorConfig(); - const contextLimit = tokenLimit(model, 'input', contentGeneratorConfig); + const contextLimit = + config.getContentGeneratorConfig()?.contextWindowSize ?? + DEFAULT_TOKEN_LIMIT; const percentage = promptTokenCount / contextLimit; const percentageUsed = (percentage * 100).toFixed(1); diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 3dc3da97a..585befa0d 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -30,7 +30,6 @@ import { createContentGenerator, resolveContentGeneratorConfigWithSources, } from '../core/contentGenerator.js'; -import { tokenLimit } from '../core/tokenLimits.js'; // Services import { FileDiscoveryService } from '../services/fileDiscoveryService.js'; @@ -1483,11 +1482,15 @@ export class Config { return Number.POSITIVE_INFINITY; } + const contextWindowSize = + this.getContentGeneratorConfig()?.contextWindowSize; + if (!contextWindowSize) { + return this.truncateToolOutputThreshold; + } + return Math.min( // Estimate remaining context window in characters (1 token ~= 4 chars). - 4 * - (tokenLimit(this.getModel()) - - uiTelemetryService.getLastPromptTokenCount()), + 4 * (contextWindowSize - uiTelemetryService.getLastPromptTokenCount()), this.truncateToolOutputThreshold, ); } diff --git a/packages/core/src/core/contentGenerator.ts b/packages/core/src/core/contentGenerator.ts index fca7b724c..539fb9840 100644 --- a/packages/core/src/core/contentGenerator.ts +++ b/packages/core/src/core/contentGenerator.ts @@ -30,6 +30,7 @@ import { StrictMissingModelIdError, } from '../models/modelConfigErrors.js'; import { PROVIDER_SOURCED_FIELDS } from '../models/modelsConfig.js'; +import { tokenLimit } from './tokenLimits.js'; /** * Interface abstracting the core functionalities for generating content and counting tokens. @@ -92,8 +93,11 @@ export type ContentGeneratorConfig = { // Schema compliance mode for tool definitions schemaCompliance?: 'auto' | 'openapi_30'; // Context window size override. If set to a positive number, it will override - // the automatic detection. Set to -1 to use automatic detection. + // the automatic detection. Leave undefined to use automatic detection. contextWindowSize?: number; + // Maximum output tokens override. If set to a positive number, it will override + // the automatic detection. Leave undefined to use automatic detection. + maxOutputTokens?: number; // Custom HTTP headers to be sent with requests customHeaders?: Record; }; @@ -173,6 +177,44 @@ export function resolveContentGeneratorConfigWithSources( } } + // Initialize contextWindowSize if not set by user + // This ensures contextWindowSize is always available as a model-bound property + if ( + newContentGeneratorConfig.contextWindowSize === undefined && + newContentGeneratorConfig.model + ) { + newContentGeneratorConfig.contextWindowSize = tokenLimit( + newContentGeneratorConfig.model, + 'input', + ); + setSource(sources, 'contextWindowSize', { + kind: 'computed', + detail: 'auto-detected from model', + }); + } else if (newContentGeneratorConfig.contextWindowSize !== undefined) { + // User explicitly set contextWindowSize + setSource(sources, 'contextWindowSize', seedOrUnknown('contextWindowSize')); + } + + // Initialize maxOutputTokens if not set by user + // This ensures maxOutputTokens is always available as a model-bound property + if ( + newContentGeneratorConfig.maxOutputTokens === undefined && + newContentGeneratorConfig.model + ) { + newContentGeneratorConfig.maxOutputTokens = tokenLimit( + newContentGeneratorConfig.model, + 'output', + ); + setSource(sources, 'maxOutputTokens', { + kind: 'computed', + detail: 'auto-detected from model', + }); + } else if (newContentGeneratorConfig.maxOutputTokens !== undefined) { + // User explicitly set maxOutputTokens + setSource(sources, 'maxOutputTokens', seedOrUnknown('maxOutputTokens')); + } + // Validate required fields based on authType. This does not perform any // fallback resolution (resolution is handled by ModelConfigResolver). const validation = validateModelConfig( diff --git a/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts b/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts index 45b0568a0..838ce1aaf 100644 --- a/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts +++ b/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts @@ -8,7 +8,7 @@ import { DEFAULT_MAX_RETRIES, DEFAULT_DASHSCOPE_BASE_URL, } from '../constants.js'; -import { tokenLimit } from '../../tokenLimits.js'; +// tokenLimit import removed - now using contentGeneratorConfig.maxOutputTokens import type { OpenAICompatibleProvider, DashScopeRequestMetadata, @@ -320,7 +320,7 @@ export class DashScopeOpenAICompatibleProvider */ private applyOutputTokenLimit( request: T, - model: string, + _model: string, ): T { const currentMaxTokens = request.max_tokens; @@ -329,7 +329,12 @@ export class DashScopeOpenAICompatibleProvider return request; // No max_tokens parameter, return unchanged } - const modelLimit = tokenLimit(model, 'output'); + // Get output token limit from config + // This value is either user-configured or auto-detected during config initialization + const modelLimit = this.contentGeneratorConfig?.maxOutputTokens; + if (!modelLimit) { + return request; // No limit configured, return unchanged + } // If max_tokens exceeds the model limit, cap it to the model's limit if (currentMaxTokens > modelLimit) { diff --git a/packages/core/src/core/tokenLimits.ts b/packages/core/src/core/tokenLimits.ts index a5686043d..bafb38f99 100644 --- a/packages/core/src/core/tokenLimits.ts +++ b/packages/core/src/core/tokenLimits.ts @@ -224,29 +224,18 @@ const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [ * or output generation based on the model and token type. It uses the same * normalization logic for consistency across both input and output limits. * - * If a contentGeneratorConfig is provided with a contextWindowSize > 0, that value - * will be used for input token limits instead of the automatic detection. + * This function is primarily used during config initialization to auto-detect + * token limits. After initialization, code should use contentGeneratorConfig.contextWindowSize + * or contentGeneratorConfig.maxOutputTokens directly. * * @param model - The model name to get the token limit for * @param type - The type of token limit ('input' for context window, 'output' for generation) - * @param contentGeneratorConfig - Optional config that may contain a contextWindowSize override * @returns The maximum number of tokens allowed for this model and type */ export function tokenLimit( model: Model, type: TokenLimitType = 'input', - contentGeneratorConfig?: { contextWindowSize?: number }, ): TokenCount { - // If user configured a specific context window size for input, use it - const configuredLimit = contentGeneratorConfig?.contextWindowSize; - if ( - type === 'input' && - configuredLimit !== undefined && - configuredLimit > 0 - ) { - return configuredLimit; - } - const norm = normalize(model); // Choose the appropriate patterns based on token type diff --git a/packages/core/src/services/chatCompressionService.ts b/packages/core/src/services/chatCompressionService.ts index fce5778dc..3a89ee103 100644 --- a/packages/core/src/services/chatCompressionService.ts +++ b/packages/core/src/services/chatCompressionService.ts @@ -9,7 +9,7 @@ import type { Config } from '../config/config.js'; import type { GeminiChat } from '../core/geminiChat.js'; import { type ChatCompressionInfo, CompressionStatus } from '../core/turn.js'; import { uiTelemetryService } from '../telemetry/uiTelemetry.js'; -import { tokenLimit } from '../core/tokenLimits.js'; +import { DEFAULT_TOKEN_LIMIT } from '../core/tokenLimits.js'; import { getCompressionPrompt } from '../core/prompts.js'; import { getResponseText } from '../utils/partUtils.js'; import { logChatCompression } from '../telemetry/loggers.js'; @@ -109,8 +109,9 @@ export class ChatCompressionService { // Don't compress if not forced and we are under the limit. if (!force) { - const contentGeneratorConfig = config.getContentGeneratorConfig(); - const contextLimit = tokenLimit(model, 'input', contentGeneratorConfig); + const contextLimit = + config.getContentGeneratorConfig()?.contextWindowSize ?? + DEFAULT_TOKEN_LIMIT; if (originalTokenCount < threshold * contextLimit) { return { newHistory: null, diff --git a/packages/vscode-ide-companion/src/webview/App.tsx b/packages/vscode-ide-companion/src/webview/App.tsx index 4286cd44e..83b182331 100644 --- a/packages/vscode-ide-companion/src/webview/App.tsx +++ b/packages/vscode-ide-companion/src/webview/App.tsx @@ -179,8 +179,13 @@ export const App: React.FC = () => { ? modelInfo.name : undefined; + // Note: In the webview context, the contextWindowSize is already reflected in + // modelInfo._meta.contextLimit which is computed on the extension side with the proper config. + // We only use tokenLimit as a fallback if metaLimit is not available. const derivedLimit = - modelName && modelName.length > 0 ? tokenLimit(modelName) : undefined; + modelName && modelName.length > 0 + ? tokenLimit(modelName, 'input') + : undefined; const metaLimitRaw = modelInfo?._meta?.['contextLimit']; const metaLimit =