From 8538f12689b8e2d492b6f2abbc87ffb29cf0192f Mon Sep 17 00:00:00 2001 From: xwj02155382 Date: Mon, 26 Jan 2026 11:43:33 +0800 Subject: [PATCH] fix: update contextWindowSize and maxOutputTokens when switching models - Fix handleModelChange to update contextWindowSize and maxOutputTokens during hot-update - Fix dashscope.ts to use contentGeneratorConfig.maxOutputTokens instead of tokenLimit() - Fix acpAgent.ts to use model-specific contextLimit for each model - Add comprehensive tests for model switching scenarios - Fix all TypeScript type errors (index signature and ConfigSource types) - Fix all ESLint errors (remove 'any' types) --- packages/cli/src/acp-integration/acpAgent.ts | 13 +- packages/core/src/config/config.test.ts | 200 ++++++++++++++++++ packages/core/src/config/config.ts | 10 + .../provider/dashscope.ts | 8 +- 4 files changed, 217 insertions(+), 14 deletions(-) diff --git a/packages/cli/src/acp-integration/acpAgent.ts b/packages/cli/src/acp-integration/acpAgent.ts index a333d75d2..d972e832f 100644 --- a/packages/cli/src/acp-integration/acpAgent.ts +++ b/packages/cli/src/acp-integration/acpAgent.ts @@ -15,6 +15,7 @@ import { qwenOAuth2Events, MCPServerConfig, SessionService, + tokenLimit, type Config, type ConversationRecord, type DeviceAuthorizationData, @@ -373,17 +374,14 @@ class GeminiAgent { ).trim(); const availableModels = config.getAvailableModels(); - // Get the contentGeneratorConfig which contains contextWindowSize - // This value is either user-configured or auto-detected during config initialization - const contentGeneratorConfig = config.getContentGeneratorConfig(); - const mappedAvailableModels = availableModels.map((model) => ({ modelId: model.id, name: model.label, description: model.description ?? null, _meta: { - // Use the contextWindowSize from config, which is always set during initialization - contextLimit: contentGeneratorConfig?.contextWindowSize, + // Each model should have its own context window size based on its capabilities + // Use tokenLimit to get the model-specific context window size + contextLimit: tokenLimit(model.id, 'input'), }, })); @@ -396,7 +394,8 @@ class GeminiAgent { name: currentModelId, description: null, _meta: { - contextLimit: contentGeneratorConfig?.contextWindowSize, + // Get context window size specific to the current model + contextLimit: tokenLimit(currentModelId, 'input'), }, }); } diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts index 3f97bafb8..a2e5094bb 100644 --- a/packages/core/src/config/config.test.ts +++ b/packages/core/src/config/config.test.ts @@ -1341,3 +1341,203 @@ describe('BaseLlmClient Lifecycle', () => { ); }); }); + +describe('Model Switching and Config Updates', () => { + const baseParams: ConfigParameters = { + cwd: '/tmp', + targetDir: '/path/to/target', + debugMode: false, + model: 'qwen3-coder-plus', + usageStatisticsEnabled: false, + telemetry: { enabled: false }, + }; + + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('should update contextWindowSize and maxOutputTokens when switching models with hot-update', async () => { + const config = new Config(baseParams); + + // Initialize with first model + const initialConfig: ContentGeneratorConfig = { + ['model']: 'qwen3-coder-plus', + ['authType']: AuthType.QWEN_OAUTH, + ['apiKey']: 'test-key', + ['contextWindowSize']: 1_000_000, + ['maxOutputTokens']: 8_192, + ['samplingParams']: { temperature: 0.7 }, + ['disableCacheControl']: false, + }; + + vi.mocked(resolveContentGeneratorConfigWithSources).mockReturnValue({ + config: initialConfig, + sources: { + model: { kind: 'settings' }, + contextWindowSize: { kind: 'computed', detail: 'auto' }, + maxOutputTokens: { kind: 'computed', detail: 'auto' }, + }, + }); + + await config.refreshAuth(AuthType.QWEN_OAUTH); + + // Verify initial config + const contentGenConfig = config.getContentGeneratorConfig(); + expect(contentGenConfig['model']).toBe('qwen3-coder-plus'); + expect(contentGenConfig['contextWindowSize']).toBe(1_000_000); + expect(contentGenConfig['maxOutputTokens']).toBe(8_192); + + // Switch to a different model with different token limits + const newConfig: ContentGeneratorConfig = { + ['model']: 'qwen-max', + ['authType']: AuthType.QWEN_OAUTH, + ['apiKey']: 'test-key', + ['contextWindowSize']: 128_000, + ['maxOutputTokens']: 4_096, + ['samplingParams']: { temperature: 0.8 }, + ['disableCacheControl']: true, + }; + + vi.mocked(resolveContentGeneratorConfigWithSources).mockReturnValue({ + config: newConfig, + sources: { + model: { kind: 'programmatic', detail: 'user' }, + contextWindowSize: { kind: 'computed', detail: 'auto' }, + maxOutputTokens: { kind: 'computed', detail: 'auto' }, + samplingParams: { kind: 'settings' }, + disableCacheControl: { kind: 'settings' }, + }, + }); + + // Simulate model switch (this would be called by ModelsConfig.switchModel) + await ( + config as unknown as { + handleModelChange: ( + authType: AuthType, + requiresRefresh: boolean, + ) => Promise; + } + ).handleModelChange(AuthType.QWEN_OAUTH, false); + + // Verify all fields are updated + const updatedConfig = config.getContentGeneratorConfig(); + expect(updatedConfig['model']).toBe('qwen-max'); + expect(updatedConfig['contextWindowSize']).toBe(128_000); + expect(updatedConfig['maxOutputTokens']).toBe(4_096); + expect(updatedConfig['samplingParams']?.temperature).toBe(0.8); + expect(updatedConfig['disableCacheControl']).toBe(true); + + // Verify sources are also updated + const sources = config.getContentGeneratorConfigSources(); + expect(sources['model']?.kind).toBe('programmatic'); + expect(sources['model']?.detail).toBe('user'); + expect(sources['contextWindowSize']?.kind).toBe('computed'); + expect(sources['contextWindowSize']?.detail).toBe('auto'); + expect(sources['maxOutputTokens']?.kind).toBe('computed'); + expect(sources['maxOutputTokens']?.detail).toBe('auto'); + expect(sources['samplingParams']?.kind).toBe('settings'); + expect(sources['disableCacheControl']?.kind).toBe('settings'); + }); + + it('should trigger full refresh when switching to non-qwen-oauth provider', async () => { + const config = new Config(baseParams); + + // Initialize with qwen-oauth + const initialConfig: ContentGeneratorConfig = { + ['model']: 'qwen3-coder-plus', + ['authType']: AuthType.QWEN_OAUTH, + ['apiKey']: 'test-key', + ['contextWindowSize']: 1_000_000, + ['maxOutputTokens']: 8_192, + }; + + vi.mocked(resolveContentGeneratorConfigWithSources).mockReturnValue({ + config: initialConfig, + sources: {}, + }); + + await config.refreshAuth(AuthType.QWEN_OAUTH); + + // Switch to different auth type (should trigger full refresh) + const newConfig: ContentGeneratorConfig = { + ['model']: 'gemini-flash', + ['authType']: AuthType.USE_GEMINI, + ['apiKey']: 'gemini-key', + ['contextWindowSize']: 32_000, + ['maxOutputTokens']: 2_048, + }; + + vi.mocked(resolveContentGeneratorConfigWithSources).mockReturnValue({ + config: newConfig, + sources: {}, + }); + + const refreshAuthSpy = vi.spyOn( + config as unknown as { + refreshAuth: (authType: AuthType) => Promise; + }, + 'refreshAuth', + ); + + // Simulate model switch with different auth type + await ( + config as unknown as { + handleModelChange: ( + authType: AuthType, + requiresRefresh: boolean, + ) => Promise; + } + ).handleModelChange(AuthType.USE_GEMINI, true); + + // Verify refreshAuth was called (full refresh path) + expect(refreshAuthSpy).toHaveBeenCalledWith(AuthType.USE_GEMINI); + }); + + it('should handle model switch when contextWindowSize and maxOutputTokens are undefined', async () => { + const config = new Config(baseParams); + + // Initialize with config that has undefined token limits + const initialConfig: ContentGeneratorConfig = { + ['model']: 'qwen3-coder-plus', + ['authType']: AuthType.QWEN_OAUTH, + ['apiKey']: 'test-key', + ['contextWindowSize']: undefined, + ['maxOutputTokens']: undefined, + }; + + vi.mocked(resolveContentGeneratorConfigWithSources).mockReturnValue({ + config: initialConfig, + sources: {}, + }); + + await config.refreshAuth(AuthType.QWEN_OAUTH); + + // Switch to model with defined limits + const newConfig: ContentGeneratorConfig = { + ['model']: 'qwen-max', + ['authType']: AuthType.QWEN_OAUTH, + ['apiKey']: 'test-key', + ['contextWindowSize']: 128_000, + ['maxOutputTokens']: 4_096, + }; + + vi.mocked(resolveContentGeneratorConfigWithSources).mockReturnValue({ + config: newConfig, + sources: {}, + }); + + await ( + config as unknown as { + handleModelChange: ( + authType: AuthType, + requiresRefresh: boolean, + ) => Promise; + } + ).handleModelChange(AuthType.QWEN_OAUTH, false); + + // Verify limits are now defined + const updatedConfig = config.getContentGeneratorConfig(); + expect(updatedConfig['contextWindowSize']).toBe(128_000); + expect(updatedConfig['maxOutputTokens']).toBe(4_096); + }); +}); diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index 585befa0d..0083733ab 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -918,6 +918,8 @@ export class Config { this.contentGeneratorConfig.samplingParams = config.samplingParams; this.contentGeneratorConfig.disableCacheControl = config.disableCacheControl; + this.contentGeneratorConfig.contextWindowSize = config.contextWindowSize; + this.contentGeneratorConfig.maxOutputTokens = config.maxOutputTokens; if ('model' in sources) { this.contentGeneratorConfigSources['model'] = sources['model']; @@ -930,6 +932,14 @@ export class Config { this.contentGeneratorConfigSources['disableCacheControl'] = sources['disableCacheControl']; } + if ('contextWindowSize' in sources) { + this.contentGeneratorConfigSources['contextWindowSize'] = + sources['contextWindowSize']; + } + if ('maxOutputTokens' in sources) { + this.contentGeneratorConfigSources['maxOutputTokens'] = + sources['maxOutputTokens']; + } return; } diff --git a/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts b/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts index 7beb31038..e6ac26374 100644 --- a/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts +++ b/packages/core/src/core/openaiContentGenerator/provider/dashscope.ts @@ -8,7 +8,6 @@ import { DEFAULT_MAX_RETRIES, DEFAULT_DASHSCOPE_BASE_URL, } from '../constants.js'; -// tokenLimit import removed - now using contentGeneratorConfig.maxOutputTokens import type { OpenAICompatibleProvider, DashScopeRequestMetadata, @@ -116,10 +115,7 @@ export class DashScopeOpenAICompatibleProvider // Apply output token limits based on model capabilities // This ensures max_tokens doesn't exceed the model's maximum output limit - const requestWithTokenLimits = this.applyOutputTokenLimit( - request, - request.model, - ); + const requestWithTokenLimits = this.applyOutputTokenLimit(request); if (this.isVisionModel(request.model)) { return { @@ -320,12 +316,10 @@ export class DashScopeOpenAICompatibleProvider * token limit. Only modifies max_tokens when already present in the request. * * @param request - The chat completion request parameters - * @param model - The model name to get the output token limit for * @returns The request with max_tokens adjusted to respect the model's limits (if present) */ private applyOutputTokenLimit( request: T, - _model: string, ): T { const currentMaxTokens = request.max_tokens;