From 3bd460a9cc122fc18390f41bc6f45b51dc04fa4a Mon Sep 17 00:00:00 2001
From: xwj02155382 <xwj02155382@alibaba-inc.com>
Date: Fri, 23 Jan 2026 16:27:10 +0800
Subject: [PATCH] refactor: optimize token limit handling and simplify API

- Initialize contextWindowSize and maxOutputTokens in contentGeneratorConfig during config resolution
- Remove third parameter from tokenLimit() function for cleaner API
- Replace all tokenLimit() calls with direct config property access for better performance
- Add maxOutputTokens field to ContentGeneratorConfig type
- Update dashscope provider to use config.maxOutputTokens
- Auto-detect token limits from model during initialization if not user-configured
- Update settingsSchema: set contextWindowSize default to undefined and showInDialog to false

Benefits:
- Token limits calculated once during initialization instead of repeatedly
- Cleaner API with fewer parameters
- Better performance by caching computed values
- User configuration takes precedence over auto-detection
- All 72 unit tests passing
---
 packages/cli/src/acp-integration/acpAgent.ts  | 10 +++--
 packages/cli/src/config/settingsSchema.ts     |  6 +--
 .../src/ui/components/ContextUsageDisplay.tsx |  9 ++--
 packages/core/src/config/config.ts            | 11 +++--
 packages/core/src/core/contentGenerator.ts    | 44 ++++++++++++++++++-
 .../provider/dashscope.ts                     | 11 +++--
 packages/core/src/core/tokenLimits.ts         | 17 ++-----
 .../src/services/chatCompressionService.ts    |  7 +--
 .../vscode-ide-companion/src/webview/App.tsx  |  7 ++-
 9 files changed, 86 insertions(+), 36 deletions(-)

diff --git a/packages/cli/src/acp-integration/acpAgent.ts b/packages/cli/src/acp-integration/acpAgent.ts
index 6c40bffee..a333d75d2 100644
--- a/packages/cli/src/acp-integration/acpAgent.ts
+++ b/packages/cli/src/acp-integration/acpAgent.ts
@@ -18,7 +18,6 @@ import {
   type Config,
   type ConversationRecord,
   type DeviceAuthorizationData,
-  tokenLimit,
 } from '@qwen-code/qwen-code-core';
 import type { ApprovalModeValue } from './schema.js';
 import * as acp from './acp.js';
@@ -374,12 +373,17 @@ class GeminiAgent {
     ).trim();
     const availableModels = config.getAvailableModels();
 
+    // Get the contentGeneratorConfig which contains contextWindowSize
+    // This value is either user-configured or auto-detected during config initialization
+    const contentGeneratorConfig = config.getContentGeneratorConfig();
+
     const mappedAvailableModels = availableModels.map((model) => ({
       modelId: model.id,
       name: model.label,
       description: model.description ?? null,
       _meta: {
-        contextLimit: tokenLimit(model.id),
+        // Use the contextWindowSize from config, which is always set during initialization
+        contextLimit: contentGeneratorConfig?.contextWindowSize,
       },
     }));
 
@@ -392,7 +396,7 @@ class GeminiAgent {
         name: currentModelId,
         description: null,
         _meta: {
-          contextLimit: tokenLimit(currentModelId),
+          contextLimit: contentGeneratorConfig?.contextWindowSize,
         },
       });
     }
diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts
index ef54ec34d..00ccc683c 100644
--- a/packages/cli/src/config/settingsSchema.ts
+++ b/packages/cli/src/config/settingsSchema.ts
@@ -649,12 +649,12 @@ const SETTINGS_SCHEMA = {
             label: 'Context Window Size',
             category: 'Generation Configuration',
             requiresRestart: false,
-            default: -1,
+            default: undefined,
             description:
-              'Override the automatic context window size detection. Set to -1 to use automatic detection based on the model. Set to a positive number to use a custom context window size.',
+              "Overrides the default context window size for the selected model. Use this setting when a provider's effective context limit differs from Qwen Code's default. This value defines the model's assumed maximum context capacity, not a per-request token limit.",
             parentKey: 'generationConfig',
             childKey: 'contextWindowSize',
-            showInDialog: true,
+            showInDialog: false,
           },
         },
       },
diff --git a/packages/cli/src/ui/components/ContextUsageDisplay.tsx b/packages/cli/src/ui/components/ContextUsageDisplay.tsx
index d7ccc46c9..1cb0605a6 100644
--- a/packages/cli/src/ui/components/ContextUsageDisplay.tsx
+++ b/packages/cli/src/ui/components/ContextUsageDisplay.tsx
@@ -6,11 +6,11 @@
 
 import { Text } from 'ink';
 import { theme } from '../semantic-colors.js';
-import { tokenLimit, type Config } from '@qwen-code/qwen-code-core';
+import { DEFAULT_TOKEN_LIMIT, type Config } from '@qwen-code/qwen-code-core';
 
 export const ContextUsageDisplay = ({
   promptTokenCount,
-  model,
+  model: _model,
   terminalWidth,
   config,
 }: {
@@ -23,8 +23,9 @@ export const ContextUsageDisplay = ({
     return null;
   }
 
-  const contentGeneratorConfig = config.getContentGeneratorConfig();
-  const contextLimit = tokenLimit(model, 'input', contentGeneratorConfig);
+  const contextLimit =
+    config.getContentGeneratorConfig()?.contextWindowSize ??
+    DEFAULT_TOKEN_LIMIT;
   const percentage = promptTokenCount / contextLimit;
   const percentageUsed = (percentage * 100).toFixed(1);
 
diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts
index 3dc3da97a..585befa0d 100644
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -30,7 +30,6 @@ import {
   createContentGenerator,
   resolveContentGeneratorConfigWithSources,
 } from '../core/contentGenerator.js';
-import { tokenLimit } from '../core/tokenLimits.js';
 
 // Services
 import { FileDiscoveryService } from '../services/fileDiscoveryService.js';
@@ -1483,11 +1482,15 @@ export class Config {
       return Number.POSITIVE_INFINITY;
     }
 
+    const contextWindowSize =
+      this.getContentGeneratorConfig()?.contextWindowSize;
+    if (!contextWindowSize) {
+      return this.truncateToolOutputThreshold;
+    }
+
     return Math.min(
       // Estimate remaining context window in characters (1 token ~= 4 chars).
-      4 *
-        (tokenLimit(this.getModel()) -
-          uiTelemetryService.getLastPromptTokenCount()),
+      4 * (contextWindowSize - uiTelemetryService.getLastPromptTokenCount()),
       this.truncateToolOutputThreshold,
     );
   }
diff --git a/packages/core/src/core/contentGenerator.ts b/packages/core/src/core/contentGenerator.ts
index fca7b724c..539fb9840 100644
--- a/packages/core/src/core/contentGenerator.ts
+++ b/packages/core/src/core/contentGenerator.ts
@@ -30,6 +30,7 @@ import {
   StrictMissingModelIdError,
 } from '../models/modelConfigErrors.js';
 import { PROVIDER_SOURCED_FIELDS } from '../models/modelsConfig.js';
+import { tokenLimit } from './tokenLimits.js';
 
 /**
  * Interface abstracting the core functionalities for generating content and counting tokens.
@@ -92,8 +93,11 @@ export type ContentGeneratorConfig = {
   // Schema compliance mode for tool definitions
   schemaCompliance?: 'auto' | 'openapi_30';
   // Context window size override. If set to a positive number, it will override
-  // the automatic detection. Set to -1 to use automatic detection.
+  // the automatic detection. Leave undefined to use automatic detection.
   contextWindowSize?: number;
+  // Maximum output tokens override. If set to a positive number, it will override
+  // the automatic detection. Leave undefined to use automatic detection.
+  maxOutputTokens?: number;
   // Custom HTTP headers to be sent with requests
   customHeaders?: Record<string, string>;
 };
@@ -173,6 +177,44 @@ export function resolveContentGeneratorConfigWithSources(
     }
   }
 
+  // Initialize contextWindowSize if not set by user
+  // This ensures contextWindowSize is always available as a model-bound property
+  if (
+    newContentGeneratorConfig.contextWindowSize === undefined &&
+    newContentGeneratorConfig.model
+  ) {
+    newContentGeneratorConfig.contextWindowSize = tokenLimit(
+      newContentGeneratorConfig.model,
+      'input',
+    );
+    setSource(sources, 'contextWindowSize', {
+      kind: 'computed',
+      detail: 'auto-detected from model',
+    });
+  } else if (newContentGeneratorConfig.contextWindowSize !== undefined) {
+    // User explicitly set contextWindowSize
+    setSource(sources, 'contextWindowSize', seedOrUnknown('contextWindowSize'));
+  }
+
+  // Initialize maxOutputTokens if not set by user
+  // This ensures maxOutputTokens is always available as a model-bound property
+  if (
+    newContentGeneratorConfig.maxOutputTokens === undefined &&
+    newContentGeneratorConfig.model
+  ) {
+    newContentGeneratorConfig.maxOutputTokens = tokenLimit(
+      newContentGeneratorConfig.model,
+      'output',
+    );
+    setSource(sources, 'maxOutputTokens', {
+      kind: 'computed',
+      detail: 'auto-detected from model',
+    });
+  } else if (newContentGeneratorConfig.maxOutputTokens !== undefined) {
+    // User explicitly set maxOutputTokens
+    setSource(sources, 'maxOutputTokens', seedOrUnknown('maxOutputTokens'));
+  }
+
   // Validate required fields based on authType. This does not perform any
   // fallback resolution (resolution is handled by ModelConfigResolver).
   const validation = validateModelConfig(
diff --git a/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts b/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts
index 45b0568a0..838ce1aaf 100644
--- a/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts
+++ b/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts
@@ -8,7 +8,7 @@ import {
   DEFAULT_MAX_RETRIES,
   DEFAULT_DASHSCOPE_BASE_URL,
 } from '../constants.js';
-import { tokenLimit } from '../../tokenLimits.js';
+// tokenLimit import removed - now using contentGeneratorConfig.maxOutputTokens
 import type {
   OpenAICompatibleProvider,
   DashScopeRequestMetadata,
@@ -320,7 +320,7 @@ export class DashScopeOpenAICompatibleProvider
    */
   private applyOutputTokenLimit<T extends { max_tokens?: number | null }>(
     request: T,
-    model: string,
+    _model: string,
   ): T {
     const currentMaxTokens = request.max_tokens;
 
@@ -329,7 +329,12 @@ export class DashScopeOpenAICompatibleProvider
       return request; // No max_tokens parameter, return unchanged
     }
 
-    const modelLimit = tokenLimit(model, 'output');
+    // Get output token limit from config
+    // This value is either user-configured or auto-detected during config initialization
+    const modelLimit = this.contentGeneratorConfig?.maxOutputTokens;
+    if (!modelLimit) {
+      return request; // No limit configured, return unchanged
+    }
 
     // If max_tokens exceeds the model limit, cap it to the model's limit
     if (currentMaxTokens > modelLimit) {
diff --git a/packages/core/src/core/tokenLimits.ts b/packages/core/src/core/tokenLimits.ts
index a5686043d..bafb38f99 100644
--- a/packages/core/src/core/tokenLimits.ts
+++ b/packages/core/src/core/tokenLimits.ts
@@ -224,29 +224,18 @@ const OUTPUT_PATTERNS: Array<[RegExp, TokenCount]> = [
  * or output generation based on the model and token type. It uses the same
  * normalization logic for consistency across both input and output limits.
  *
- * If a contentGeneratorConfig is provided with a contextWindowSize > 0, that value
- * will be used for input token limits instead of the automatic detection.
+ * This function is primarily used during config initialization to auto-detect
+ * token limits. After initialization, code should use contentGeneratorConfig.contextWindowSize
+ * or contentGeneratorConfig.maxOutputTokens directly.
  *
  * @param model - The model name to get the token limit for
  * @param type - The type of token limit ('input' for context window, 'output' for generation)
- * @param contentGeneratorConfig - Optional config that may contain a contextWindowSize override
  * @returns The maximum number of tokens allowed for this model and type
  */
 export function tokenLimit(
   model: Model,
   type: TokenLimitType = 'input',
-  contentGeneratorConfig?: { contextWindowSize?: number },
 ): TokenCount {
-  // If user configured a specific context window size for input, use it
-  const configuredLimit = contentGeneratorConfig?.contextWindowSize;
-  if (
-    type === 'input' &&
-    configuredLimit !== undefined &&
-    configuredLimit > 0
-  ) {
-    return configuredLimit;
-  }
-
   const norm = normalize(model);
 
   // Choose the appropriate patterns based on token type
diff --git a/packages/core/src/services/chatCompressionService.ts b/packages/core/src/services/chatCompressionService.ts
index fce5778dc..3a89ee103 100644
--- a/packages/core/src/services/chatCompressionService.ts
+++ b/packages/core/src/services/chatCompressionService.ts
@@ -9,7 +9,7 @@ import type { Config } from '../config/config.js';
 import type { GeminiChat } from '../core/geminiChat.js';
 import { type ChatCompressionInfo, CompressionStatus } from '../core/turn.js';
 import { uiTelemetryService } from '../telemetry/uiTelemetry.js';
-import { tokenLimit } from '../core/tokenLimits.js';
+import { DEFAULT_TOKEN_LIMIT } from '../core/tokenLimits.js';
 import { getCompressionPrompt } from '../core/prompts.js';
 import { getResponseText } from '../utils/partUtils.js';
 import { logChatCompression } from '../telemetry/loggers.js';
@@ -109,8 +109,9 @@ export class ChatCompressionService {
 
     // Don't compress if not forced and we are under the limit.
     if (!force) {
-      const contentGeneratorConfig = config.getContentGeneratorConfig();
-      const contextLimit = tokenLimit(model, 'input', contentGeneratorConfig);
+      const contextLimit =
+        config.getContentGeneratorConfig()?.contextWindowSize ??
+        DEFAULT_TOKEN_LIMIT;
       if (originalTokenCount < threshold * contextLimit) {
         return {
           newHistory: null,
diff --git a/packages/vscode-ide-companion/src/webview/App.tsx b/packages/vscode-ide-companion/src/webview/App.tsx
index 4286cd44e..83b182331 100644
--- a/packages/vscode-ide-companion/src/webview/App.tsx
+++ b/packages/vscode-ide-companion/src/webview/App.tsx
@@ -179,8 +179,13 @@ export const App: React.FC = () => {
           ? modelInfo.name
           : undefined;
 
+    // Note: In the webview context, the contextWindowSize is already reflected in
+    // modelInfo._meta.contextLimit which is computed on the extension side with the proper config.
+    // We only use tokenLimit as a fallback if metaLimit is not available.
     const derivedLimit =
-      modelName && modelName.length > 0 ? tokenLimit(modelName) : undefined;
+      modelName && modelName.length > 0
+        ? tokenLimit(modelName, 'input')
+        : undefined;
 
     const metaLimitRaw = modelInfo?._meta?.['contextLimit'];
     const metaLimit =