From 8538f12689b8e2d492b6f2abbc87ffb29cf0192f Mon Sep 17 00:00:00 2001
From: xwj02155382 <xwj02155382@alibaba-inc.com>
Date: Mon, 26 Jan 2026 11:43:33 +0800
Subject: [PATCH] fix: update contextWindowSize and maxOutputTokens when
 switching models

- Fix handleModelChange to update contextWindowSize and maxOutputTokens during hot-update
- Fix dashscope.ts to use contentGeneratorConfig.maxOutputTokens instead of tokenLimit()
- Fix acpAgent.ts to use model-specific contextLimit for each model
- Add comprehensive tests for model switching scenarios
- Fix all TypeScript type errors (index signature and ConfigSource types)
- Fix all ESLint errors (remove 'any' types)
---
 packages/cli/src/acp-integration/acpAgent.ts  |  13 +-
 packages/core/src/config/config.test.ts       | 200 ++++++++++++++++++
 packages/core/src/config/config.ts            |  10 +
 .../provider/dashscope.ts                     |   8 +-
 4 files changed, 217 insertions(+), 14 deletions(-)

diff --git a/packages/cli/src/acp-integration/acpAgent.ts b/packages/cli/src/acp-integration/acpAgent.ts
index a333d75d2..d972e832f 100644
--- a/packages/cli/src/acp-integration/acpAgent.ts
+++ b/packages/cli/src/acp-integration/acpAgent.ts
@@ -15,6 +15,7 @@ import {
   qwenOAuth2Events,
   MCPServerConfig,
   SessionService,
+  tokenLimit,
   type Config,
   type ConversationRecord,
   type DeviceAuthorizationData,
@@ -373,17 +374,14 @@ class GeminiAgent {
     ).trim();
     const availableModels = config.getAvailableModels();
 
-    // Get the contentGeneratorConfig which contains contextWindowSize
-    // This value is either user-configured or auto-detected during config initialization
-    const contentGeneratorConfig = config.getContentGeneratorConfig();
-
     const mappedAvailableModels = availableModels.map((model) => ({
       modelId: model.id,
       name: model.label,
       description: model.description ?? null,
       _meta: {
-        // Use the contextWindowSize from config, which is always set during initialization
-        contextLimit: contentGeneratorConfig?.contextWindowSize,
+        // Each model should have its own context window size based on its capabilities
+        // Use tokenLimit to get the model-specific context window size
+        contextLimit: tokenLimit(model.id, 'input'),
       },
     }));
 
@@ -396,7 +394,8 @@ class GeminiAgent {
         name: currentModelId,
         description: null,
         _meta: {
-          contextLimit: contentGeneratorConfig?.contextWindowSize,
+          // Get context window size specific to the current model
+          contextLimit: tokenLimit(currentModelId, 'input'),
         },
       });
     }
diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts
index 3f97bafb8..a2e5094bb 100644
--- a/packages/core/src/config/config.test.ts
+++ b/packages/core/src/config/config.test.ts
@@ -1341,3 +1341,203 @@ describe('BaseLlmClient Lifecycle', () => {
     );
   });
 });
+
+describe('Model Switching and Config Updates', () => {
+  const baseParams: ConfigParameters = {
+    cwd: '/tmp',
+    targetDir: '/path/to/target',
+    debugMode: false,
+    model: 'qwen3-coder-plus',
+    usageStatisticsEnabled: false,
+    telemetry: { enabled: false },
+  };
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it('should update contextWindowSize and maxOutputTokens when switching models with hot-update', async () => {
+    const config = new Config(baseParams);
+
+    // Initialize with first model
+    const initialConfig: ContentGeneratorConfig = {
+      ['model']: 'qwen3-coder-plus',
+      ['authType']: AuthType.QWEN_OAUTH,
+      ['apiKey']: 'test-key',
+      ['contextWindowSize']: 1_000_000,
+      ['maxOutputTokens']: 8_192,
+      ['samplingParams']: { temperature: 0.7 },
+      ['disableCacheControl']: false,
+    };
+
+    vi.mocked(resolveContentGeneratorConfigWithSources).mockReturnValue({
+      config: initialConfig,
+      sources: {
+        model: { kind: 'settings' },
+        contextWindowSize: { kind: 'computed', detail: 'auto' },
+        maxOutputTokens: { kind: 'computed', detail: 'auto' },
+      },
+    });
+
+    await config.refreshAuth(AuthType.QWEN_OAUTH);
+
+    // Verify initial config
+    const contentGenConfig = config.getContentGeneratorConfig();
+    expect(contentGenConfig['model']).toBe('qwen3-coder-plus');
+    expect(contentGenConfig['contextWindowSize']).toBe(1_000_000);
+    expect(contentGenConfig['maxOutputTokens']).toBe(8_192);
+
+    // Switch to a different model with different token limits
+    const newConfig: ContentGeneratorConfig = {
+      ['model']: 'qwen-max',
+      ['authType']: AuthType.QWEN_OAUTH,
+      ['apiKey']: 'test-key',
+      ['contextWindowSize']: 128_000,
+      ['maxOutputTokens']: 4_096,
+      ['samplingParams']: { temperature: 0.8 },
+      ['disableCacheControl']: true,
+    };
+
+    vi.mocked(resolveContentGeneratorConfigWithSources).mockReturnValue({
+      config: newConfig,
+      sources: {
+        model: { kind: 'programmatic', detail: 'user' },
+        contextWindowSize: { kind: 'computed', detail: 'auto' },
+        maxOutputTokens: { kind: 'computed', detail: 'auto' },
+        samplingParams: { kind: 'settings' },
+        disableCacheControl: { kind: 'settings' },
+      },
+    });
+
+    // Simulate model switch (this would be called by ModelsConfig.switchModel)
+    await (
+      config as unknown as {
+        handleModelChange: (
+          authType: AuthType,
+          requiresRefresh: boolean,
+        ) => Promise<void>;
+      }
+    ).handleModelChange(AuthType.QWEN_OAUTH, false);
+
+    // Verify all fields are updated
+    const updatedConfig = config.getContentGeneratorConfig();
+    expect(updatedConfig['model']).toBe('qwen-max');
+    expect(updatedConfig['contextWindowSize']).toBe(128_000);
+    expect(updatedConfig['maxOutputTokens']).toBe(4_096);
+    expect(updatedConfig['samplingParams']?.temperature).toBe(0.8);
+    expect(updatedConfig['disableCacheControl']).toBe(true);
+
+    // Verify sources are also updated
+    const sources = config.getContentGeneratorConfigSources();
+    expect(sources['model']?.kind).toBe('programmatic');
+    expect(sources['model']?.detail).toBe('user');
+    expect(sources['contextWindowSize']?.kind).toBe('computed');
+    expect(sources['contextWindowSize']?.detail).toBe('auto');
+    expect(sources['maxOutputTokens']?.kind).toBe('computed');
+    expect(sources['maxOutputTokens']?.detail).toBe('auto');
+    expect(sources['samplingParams']?.kind).toBe('settings');
+    expect(sources['disableCacheControl']?.kind).toBe('settings');
+  });
+
+  it('should trigger full refresh when switching to non-qwen-oauth provider', async () => {
+    const config = new Config(baseParams);
+
+    // Initialize with qwen-oauth
+    const initialConfig: ContentGeneratorConfig = {
+      ['model']: 'qwen3-coder-plus',
+      ['authType']: AuthType.QWEN_OAUTH,
+      ['apiKey']: 'test-key',
+      ['contextWindowSize']: 1_000_000,
+      ['maxOutputTokens']: 8_192,
+    };
+
+    vi.mocked(resolveContentGeneratorConfigWithSources).mockReturnValue({
+      config: initialConfig,
+      sources: {},
+    });
+
+    await config.refreshAuth(AuthType.QWEN_OAUTH);
+
+    // Switch to different auth type (should trigger full refresh)
+    const newConfig: ContentGeneratorConfig = {
+      ['model']: 'gemini-flash',
+      ['authType']: AuthType.USE_GEMINI,
+      ['apiKey']: 'gemini-key',
+      ['contextWindowSize']: 32_000,
+      ['maxOutputTokens']: 2_048,
+    };
+
+    vi.mocked(resolveContentGeneratorConfigWithSources).mockReturnValue({
+      config: newConfig,
+      sources: {},
+    });
+
+    const refreshAuthSpy = vi.spyOn(
+      config as unknown as {
+        refreshAuth: (authType: AuthType) => Promise<void>;
+      },
+      'refreshAuth',
+    );
+
+    // Simulate model switch with different auth type
+    await (
+      config as unknown as {
+        handleModelChange: (
+          authType: AuthType,
+          requiresRefresh: boolean,
+        ) => Promise<void>;
+      }
+    ).handleModelChange(AuthType.USE_GEMINI, true);
+
+    // Verify refreshAuth was called (full refresh path)
+    expect(refreshAuthSpy).toHaveBeenCalledWith(AuthType.USE_GEMINI);
+  });
+
+  it('should handle model switch when contextWindowSize and maxOutputTokens are undefined', async () => {
+    const config = new Config(baseParams);
+
+    // Initialize with config that has undefined token limits
+    const initialConfig: ContentGeneratorConfig = {
+      ['model']: 'qwen3-coder-plus',
+      ['authType']: AuthType.QWEN_OAUTH,
+      ['apiKey']: 'test-key',
+      ['contextWindowSize']: undefined,
+      ['maxOutputTokens']: undefined,
+    };
+
+    vi.mocked(resolveContentGeneratorConfigWithSources).mockReturnValue({
+      config: initialConfig,
+      sources: {},
+    });
+
+    await config.refreshAuth(AuthType.QWEN_OAUTH);
+
+    // Switch to model with defined limits
+    const newConfig: ContentGeneratorConfig = {
+      ['model']: 'qwen-max',
+      ['authType']: AuthType.QWEN_OAUTH,
+      ['apiKey']: 'test-key',
+      ['contextWindowSize']: 128_000,
+      ['maxOutputTokens']: 4_096,
+    };
+
+    vi.mocked(resolveContentGeneratorConfigWithSources).mockReturnValue({
+      config: newConfig,
+      sources: {},
+    });
+
+    await (
+      config as unknown as {
+        handleModelChange: (
+          authType: AuthType,
+          requiresRefresh: boolean,
+        ) => Promise<void>;
+      }
+    ).handleModelChange(AuthType.QWEN_OAUTH, false);
+
+    // Verify limits are now defined
+    const updatedConfig = config.getContentGeneratorConfig();
+    expect(updatedConfig['contextWindowSize']).toBe(128_000);
+    expect(updatedConfig['maxOutputTokens']).toBe(4_096);
+  });
+});
diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts
index 585befa0d..0083733ab 100644
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -918,6 +918,8 @@ export class Config {
       this.contentGeneratorConfig.samplingParams = config.samplingParams;
       this.contentGeneratorConfig.disableCacheControl =
         config.disableCacheControl;
+      this.contentGeneratorConfig.contextWindowSize = config.contextWindowSize;
+      this.contentGeneratorConfig.maxOutputTokens = config.maxOutputTokens;
 
       if ('model' in sources) {
         this.contentGeneratorConfigSources['model'] = sources['model'];
@@ -930,6 +932,14 @@ export class Config {
         this.contentGeneratorConfigSources['disableCacheControl'] =
           sources['disableCacheControl'];
       }
+      if ('contextWindowSize' in sources) {
+        this.contentGeneratorConfigSources['contextWindowSize'] =
+          sources['contextWindowSize'];
+      }
+      if ('maxOutputTokens' in sources) {
+        this.contentGeneratorConfigSources['maxOutputTokens'] =
+          sources['maxOutputTokens'];
+      }
       return;
     }
 
diff --git a/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts b/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts
index 7beb31038..e6ac26374 100644
--- a/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts
+++ b/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts
@@ -8,7 +8,6 @@ import {
   DEFAULT_MAX_RETRIES,
   DEFAULT_DASHSCOPE_BASE_URL,
 } from '../constants.js';
-// tokenLimit import removed - now using contentGeneratorConfig.maxOutputTokens
 import type {
   OpenAICompatibleProvider,
   DashScopeRequestMetadata,
@@ -116,10 +115,7 @@ export class DashScopeOpenAICompatibleProvider
 
     // Apply output token limits based on model capabilities
     // This ensures max_tokens doesn't exceed the model's maximum output limit
-    const requestWithTokenLimits = this.applyOutputTokenLimit(
-      request,
-      request.model,
-    );
+    const requestWithTokenLimits = this.applyOutputTokenLimit(request);
 
     if (this.isVisionModel(request.model)) {
       return {
@@ -320,12 +316,10 @@ export class DashScopeOpenAICompatibleProvider
    * token limit. Only modifies max_tokens when already present in the request.
    *
    * @param request - The chat completion request parameters
-   * @param model - The model name to get the output token limit for
    * @returns The request with max_tokens adjusted to respect the model's limits (if present)
    */
   private applyOutputTokenLimit<T extends { max_tokens?: number | null }>(
     request: T,
-    _model: string,
   ): T {
     const currentMaxTokens = request.max_tokens;