fix(core): support cross-auth fast side queries (#4117)
Some checks are pending
Qwen Code CI / Classify PR (push) Waiting to run
Qwen Code CI / Lint (push) Blocked by required conditions
Qwen Code CI / Test (macos-latest, Node 22.x) (push) Blocked by required conditions
Qwen Code CI / Test (ubuntu-latest, Node 22.x) (push) Blocked by required conditions
Qwen Code CI / Test (windows-latest, Node 22.x) (push) Blocked by required conditions
Qwen Code CI / Post Coverage Comment (push) Blocked by required conditions
Qwen Code CI / CodeQL (push) Blocked by required conditions
E2E Tests / E2E Test (Linux) - sandbox:docker (push) Waiting to run
E2E Tests / E2E Test (Linux) - sandbox:none (push) Waiting to run
E2E Tests / E2E Test - macOS (push) Waiting to run

* fix(core): support cross-auth fast side queries

* refactor(core): hoist resolveForModel selector and refresh side-query docs

Compute the model selector once at the top of `resolveForModel` and pass
it through to `createContentGeneratorForModel` and
`resolveModelAcrossAuthTypes`. This eliminates the redundant selector
resolution that happened up to five times per cross-auth side query
(once per call, plus once inside each downstream helper).

Also update the JSDoc for `SideQueryJsonOptions.model` and
`SideQueryTextOptions.model` to reflect the actual fallback chain
(`getFastModelForSideQuery` → `getFastModel` → `getModel` →
`DEFAULT_QWEN_MODEL`) introduced in this PR.
This commit is contained in:
tanzhenxin 2026-05-14 19:22:12 +08:00 committed by GitHub
parent 85c10c1619
commit cc800d0132
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 961 additions and 89 deletions

View file

@ -375,7 +375,6 @@ export class DynamicCommandLocalizationService {
items: TranslationItem[],
signal: AbortSignal,
): Promise<Map<string, string>> {
const model = config.getFastModel() ?? config.getModel();
const targetLanguageName = getLanguageNameForTranslationTarget(language);
const translations = new Map<string, string>();
@ -391,7 +390,6 @@ export class DynamicCommandLocalizationService {
try {
response = await runSideQuery<Record<string, unknown>>(config, {
purpose: 'dynamic-command-localization',
model,
contents: [{ role: 'user', parts: [{ text: prompt }] }],
schema: {
type: 'object',

View file

@ -395,7 +395,108 @@ describe('modelCommand', () => {
});
});
it('should reject unavailable fast models for the current auth type', async () => {
it('should set fast models configured under another auth type', async () => {
const setValue = vi.fn();
const setFastModel = vi.fn();
mockContext = createMockCommandContext({
invocation: {
raw: '/model --fast deepseek-v4-flash',
name: 'model',
args: '--fast deepseek-v4-flash',
},
services: {
config: {
getContentGeneratorConfig: vi.fn().mockReturnValue({
model: 'claude-opus-4-7',
authType: AuthType.USE_ANTHROPIC,
}),
getAllConfiguredModels: vi.fn().mockReturnValue([
{
id: 'deepseek-v4-flash',
label: 'deepseek-v4-flash',
authType: AuthType.USE_OPENAI,
},
{
id: 'claude-opus-4-7',
label: 'claude-opus-4-7',
authType: AuthType.USE_ANTHROPIC,
},
]),
setFastModel,
},
settings: createMockSettings(setValue),
},
});
const result = await modelCommand.action!(
mockContext,
'--fast deepseek-v4-flash',
);
expect(setValue).toHaveBeenCalledWith(
expect.any(String),
'fastModel',
'deepseek-v4-flash',
);
expect(setFastModel).toHaveBeenCalledWith('deepseek-v4-flash');
expect(result).toEqual({
type: 'message',
messageType: 'info',
content: 'Fast Model: deepseek-v4-flash',
});
});
it('should set authType-qualified fast model selectors', async () => {
const setValue = vi.fn();
const setFastModel = vi.fn();
mockContext = createMockCommandContext({
invocation: {
raw: '/model --fast openai:deepseek-v4-flash',
name: 'model',
args: '--fast openai:deepseek-v4-flash',
},
services: {
config: {
getContentGeneratorConfig: vi.fn().mockReturnValue({
model: 'claude-opus-4-7',
authType: AuthType.USE_ANTHROPIC,
}),
getAvailableModelsForAuthType: vi.fn((authType: AuthType) =>
authType === AuthType.USE_OPENAI
? [
{
id: 'deepseek-v4-flash',
label: 'deepseek-v4-flash',
authType: AuthType.USE_OPENAI,
},
]
: [],
),
setFastModel,
},
settings: createMockSettings(setValue),
},
});
const result = await modelCommand.action!(
mockContext,
'--fast openai:deepseek-v4-flash',
);
expect(setValue).toHaveBeenCalledWith(
expect.any(String),
'fastModel',
'openai:deepseek-v4-flash',
);
expect(setFastModel).toHaveBeenCalledWith('openai:deepseek-v4-flash');
expect(result).toEqual({
type: 'message',
messageType: 'info',
content: 'Fast Model: openai:deepseek-v4-flash',
});
});
it('should reject unavailable fast models across all auth types', async () => {
const setValue = vi.fn();
const setFastModel = vi.fn();
mockContext = createMockCommandContext({
@ -410,9 +511,13 @@ describe('modelCommand', () => {
model: 'qwen-plus',
authType: AuthType.USE_OPENAI,
}),
getAvailableModelsForAuthType: vi
.fn()
.mockReturnValue([{ id: 'qwen-turbo', label: 'Qwen Turbo' }]),
getAllConfiguredModels: vi.fn().mockReturnValue([
{
id: 'qwen-turbo',
label: 'Qwen Turbo',
authType: AuthType.USE_OPENAI,
},
]),
setFastModel,
},
settings: createMockSettings(setValue),
@ -430,8 +535,8 @@ describe('modelCommand', () => {
type: 'message',
messageType: 'error',
content:
"Fast model 'missing-model' is not available for auth type 'openai'.\n" +
"Available models for 'openai': qwen-turbo.\n" +
"Fast model 'missing-model' is not configured for any auth type.\n" +
'Configured models: qwen-turbo.\n' +
'Configure models in settings.modelProviders or run /model to select an available model.',
});
});

View file

@ -17,6 +17,7 @@ import {
AuthType,
type AvailableModel,
type Config,
resolveModelId,
} from '@qwen-code/qwen-code-core';
import type { LoadedSettings } from '../../config/settings.js';
import { parseAcpModelOption } from '../../utils/acpModelUtils.js';
@ -77,6 +78,25 @@ function formatUnavailableModelMessage(
);
}
function formatUnavailableFastModelMessage(
modelName: string,
availableModels: AvailableModel[],
): string {
const availableModelIds = Array.from(
new Set(availableModels.map((model) => model.id)),
);
const availableModelsLine =
availableModelIds.length === 0
? 'No models are configured.'
: `Configured models: ${availableModelIds.join(', ')}.`;
return (
`Fast model '${modelName}' is not configured for any auth type.\n` +
`${availableModelsLine}\n` +
'Configure models in settings.modelProviders or run /model to select an available model.'
);
}
// Get an array of the available model IDs as strings
function getAvailableModelIds(context: CommandContext) {
const { services } = context;
@ -174,17 +194,36 @@ export const modelCommand: SlashCommand = {
};
}
const availableModels = config.getAvailableModelsForAuthType(authType);
if (!availableModels.some((model) => model.id === modelName)) {
const selector = (() => {
try {
return resolveModelId(modelName);
} catch {
return undefined;
}
})();
if (!selector) {
return {
type: 'message',
messageType: 'error',
content: formatUnavailableModelMessage(
'Fast model',
modelName,
authType,
availableModels,
),
content: formatUnavailableFastModelMessage(modelName, []),
};
}
const availableModels = selector.authType
? config.getAvailableModelsForAuthType(selector.authType)
: config.getAllConfiguredModels();
if (!availableModels.some((model) => model.id === selector.modelId)) {
return {
type: 'message',
messageType: 'error',
content: selector.authType
? formatUnavailableModelMessage(
'Fast model',
selector.modelId,
selector.authType,
availableModels,
)
: formatUnavailableFastModelMessage(modelName, availableModels),
};
}

View file

@ -213,7 +213,9 @@ export const renameCommand: SlashCommand = {
// the main model here because `--auto` is a deliberate opt-in to the
// sentence-case fast-model flow, and surprising a user with a main-
// model call would defeat the purpose.
if (!config.getFastModel()) {
const fastModel =
config.getFastModelForSideQuery?.() ?? config.getFastModel();
if (!fastModel) {
return {
type: 'message',
messageType: 'error',

View file

@ -288,6 +288,108 @@ describe('<ModelDialog />', () => {
expect(props.onClose).toHaveBeenCalledTimes(1);
});
it('stores authType-qualified selectors in fast model mode', async () => {
const setFastModel = vi.fn();
const { props, mockSettings } = renderComponent({ isFastModelMode: true }, {
getAuthType: vi.fn(() => AuthType.USE_ANTHROPIC),
getModel: vi.fn(() => 'claude-opus-4-7'),
getAllConfiguredModels: vi.fn(() => [
{
id: 'deepseek-v4-flash',
label: 'deepseek-v4-flash',
authType: AuthType.USE_OPENAI,
},
{
id: 'claude-opus-4-7',
label: 'claude-opus-4-7',
authType: AuthType.USE_ANTHROPIC,
},
]),
getContentGeneratorConfig: vi.fn(() => ({
authType: AuthType.USE_ANTHROPIC,
model: 'claude-opus-4-7',
})),
setFastModel,
} as unknown as Partial<Config>);
const childOnSelect = mockedSelect.mock.calls[0][0].onSelect;
await childOnSelect(`${AuthType.USE_OPENAI}::deepseek-v4-flash`);
expect(mockSettings.setValue).toHaveBeenCalledWith(
SettingScope.User,
'fastModel',
'openai:deepseek-v4-flash',
);
expect(setFastModel).toHaveBeenCalledWith('openai:deepseek-v4-flash');
expect(props.onClose).toHaveBeenCalledTimes(1);
});
it('highlights the cross-auth row for a bare fast-model setting', () => {
// `/model --fast deepseek-v4-flash` validates across all providers and
// persists the bare model id. When the dialog re-opens, it must locate
// the right row even though the setting carries no authType prefix —
// otherwise the highlight falls back to the current auth's first row
// and Enter would silently overwrite the setting.
const mockSettings = {
isTrusted: true,
user: { settings: {} },
workspace: { settings: {} },
merged: { fastModel: 'deepseek-v4-flash' },
setValue: vi.fn(),
} as unknown as LoadedSettings;
const allModels = [
{
id: 'claude-opus-4-7',
label: 'claude-opus-4-7',
description: '',
authType: AuthType.USE_ANTHROPIC,
},
{
id: 'deepseek-v4-flash',
label: 'deepseek-v4-flash',
description: '',
authType: AuthType.USE_OPENAI,
},
];
render(
<SettingsContext.Provider value={mockSettings}>
<ConfigContext.Provider
value={
{
getModel: vi.fn(() => 'claude-opus-4-7'),
getAuthType: vi.fn(() => AuthType.USE_ANTHROPIC),
getAllConfiguredModels: vi.fn(() => allModels),
getContentGeneratorConfig: vi.fn(() => ({
authType: AuthType.USE_ANTHROPIC,
model: 'claude-opus-4-7',
})),
getModelsConfig: vi.fn(() => ({
getGenerationConfig: vi.fn(() => ({ baseUrl: undefined })),
})),
getActiveRuntimeModelSnapshot: vi.fn(() => undefined),
getUsageStatisticsEnabled: vi.fn(() => false),
getSessionId: vi.fn(() => 'session'),
getDebugMode: vi.fn(() => false),
getUseModelRouter: vi.fn(() => false),
getProxy: vi.fn(() => undefined),
} as unknown as Config
}
>
<ModelDialog onClose={vi.fn()} isFastModelMode={true} />
</ConfigContext.Provider>
</SettingsContext.Provider>,
);
const items = mockedSelect.mock.calls[0][0].items;
const deepseekIndex = items.findIndex((item) =>
String(item.value).includes('deepseek-v4-flash'),
);
expect(deepseekIndex).toBeGreaterThanOrEqual(0);
expect(mockedSelect.mock.calls[0][0].initialIndex).toBe(deepseekIndex);
});
it('blocks switching to qwen-oauth from another authType (discontinued)', async () => {
const switchModel = vi.fn().mockResolvedValue(undefined);
const getAuthType = vi.fn(() => AuthType.USE_OPENAI);

View file

@ -12,6 +12,7 @@ import {
ModelSlashCommandEvent,
logModelSlashCommand,
MAINLINE_CODER_MODEL,
resolveModelId,
type AvailableModel as CoreAvailableModel,
type ContentGeneratorConfig,
type InputModalities,
@ -303,9 +304,17 @@ export function ModelDialog({
// In fast model mode, default to the currently configured fast model
const fastModelSetting = settings?.merged?.fastModel as string | undefined;
const parsedFastModelSetting = useMemo(() => {
if (!isFastModelMode) return undefined;
try {
return resolveModelId(fastModelSetting);
} catch {
return undefined;
}
}, [fastModelSetting, isFastModelMode]);
const preferredModelId =
isFastModelMode && fastModelSetting
? fastModelSetting
isFastModelMode && parsedFastModelSetting
? parsedFastModelSetting.modelId
: config?.getModel() || MAINLINE_CODER_MODEL;
// Check if current model is a runtime model
// Runtime snapshot ID is already in $runtime|${authType}|${modelId} format
@ -315,11 +324,35 @@ export function ModelDialog({
const currentBaseUrl = config
?.getModelsConfig()
.getGenerationConfig()?.baseUrl;
// When `/model --fast <bare-id>` validated the model across all providers,
// the setting persists as a bare model ID (no authType prefix) so that
// runtime cross-auth lookups still work. Highlight the row that owns it
// regardless of which provider that turns out to be — otherwise the
// dialog would default to the current auth's first row and Enter would
// silently overwrite the user's fast-model setting.
const preferredFastModelEntry =
isFastModelMode && parsedFastModelSetting
? parsedFastModelSetting.authType
? availableModelEntries.find(
({ authType: t2, model }) =>
t2 === parsedFastModelSetting.authType &&
model.id === parsedFastModelSetting.modelId,
)
: availableModelEntries.find(
({ model }) => model.id === parsedFastModelSetting.modelId,
)
: undefined;
const preferredKey = activeRuntimeSnapshot
? activeRuntimeSnapshot.id
: authType
? buildModelSelectionKey(authType, preferredModelId, currentBaseUrl)
: '';
: preferredFastModelEntry
? buildModelSelectionKey(
preferredFastModelEntry.authType,
preferredFastModelEntry.model.id,
preferredFastModelEntry.model.baseUrl,
)
: authType
? buildModelSelectionKey(authType, preferredModelId, currentBaseUrl)
: '';
useKeypress(
(key) => {
@ -358,27 +391,28 @@ export function ModelDialog({
async (selected: string) => {
setErrorMessage(null);
// Fast model mode: save the model ID only (baseUrl is intentionally
// discarded — getFastModel resolves via the first registry match).
// Fast model mode: save authType:modelId so duplicate model ids across
// providers remain unambiguous. baseUrl is intentionally discarded.
if (isFastModelMode) {
let modelId: string;
let fastModel: string;
if (selected.includes('::')) {
const parsed = parseModelSelectionKey(selected);
modelId = parsed.modelId;
fastModel = `${parsed.authType}:${parsed.modelId}`;
} else if (selected.startsWith('$runtime|')) {
const parts = selected.split('|');
modelId = parts[2] ?? selected;
fastModel =
parts[1] && parts[2] ? `${parts[1]}:${parts[2]}` : selected;
} else {
modelId = selected;
fastModel = selected;
}
const scope = getPersistScopeForModelSelection(settings);
settings.setValue(scope, 'fastModel', modelId);
settings.setValue(scope, 'fastModel', fastModel);
// Sync the runtime Config so forked agents pick up the change immediately.
config?.setFastModel(modelId);
config?.setFastModel(fastModel);
uiState?.historyManager.addItem(
{
type: 'success',
text: `${t('Fast Model')}: ${modelId}`,
text: `${t('Fast Model')}: ${fastModel}`,
},
Date.now(),
);

View file

@ -760,6 +760,190 @@ describe('Server Config (config.ts)', () => {
});
describe('model switching with different credentials (OpenAI)', () => {
it('keeps getFastModel current-auth-only for direct runtime callers', () => {
const config = new Config({
...baseParams,
authType: AuthType.USE_ANTHROPIC,
model: 'claude-opus-4-7',
fastModel: 'deepseek-v4-flash',
modelProvidersConfig: {
[AuthType.USE_OPENAI]: [
{
id: 'deepseek-v4-flash',
name: 'deepseek-v4-flash',
baseUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
envKey: 'DASHSCOPE_API_KEY',
},
],
[AuthType.USE_ANTHROPIC]: [
{
id: 'claude-opus-4-7',
name: 'claude-opus-4-7',
baseUrl: 'https://idealab.alibaba-inc.com/api/anthropic',
envKey: 'IDEALAB_OPUS_API_KEY',
},
],
},
});
expect(config.getFastModel()).toBeUndefined();
expect(config.getFastModelForSideQuery()).toBe('deepseek-v4-flash');
});
it('returns an authType-qualified fast model selector for side queries', () => {
const config = new Config({
...baseParams,
authType: AuthType.USE_ANTHROPIC,
model: 'shared-model',
fastModel: 'openai:shared-model',
modelProvidersConfig: {
[AuthType.USE_OPENAI]: [
{
id: 'shared-model',
name: 'OpenAI shared model',
baseUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
envKey: 'DASHSCOPE_API_KEY',
},
],
[AuthType.USE_ANTHROPIC]: [
{
id: 'shared-model',
name: 'Anthropic shared model',
baseUrl: 'https://idealab.alibaba-inc.com/api/anthropic',
envKey: 'IDEALAB_OPUS_API_KEY',
},
],
},
});
expect(config.getFastModel()).toBeUndefined();
expect(config.getFastModelForSideQuery()).toBe('openai:shared-model');
});
it('returns a bare fast model for getFastModel when authType-qualified selector matches the current auth type', () => {
const config = new Config({
...baseParams,
authType: AuthType.USE_OPENAI,
model: 'gpt-4',
fastModel: 'openai:deepseek-v4-flash',
modelProvidersConfig: {
[AuthType.USE_OPENAI]: [
{
id: 'deepseek-v4-flash',
name: 'deepseek-v4-flash',
baseUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
envKey: 'DASHSCOPE_API_KEY',
},
],
},
});
expect(config.getFastModel()).toBe('deepseek-v4-flash');
expect(config.getFastModelForSideQuery()).toBe(
'openai:deepseek-v4-flash',
);
});
it('accepts runtime fast models for authType-qualified selectors', () => {
const config = new Config({
...baseParams,
authType: AuthType.USE_OPENAI,
model: 'runtime-fast-model',
fastModel: 'openai:runtime-fast-model',
generationConfig: {
apiKey: 'sk-runtime-key',
baseUrl: 'https://runtime.example.com/v1',
},
generationConfigSources: {
model: { kind: 'programmatic', detail: 'test' },
apiKey: { kind: 'programmatic', detail: 'test' },
baseUrl: { kind: 'programmatic', detail: 'test' },
},
modelProvidersConfig: {
[AuthType.USE_OPENAI]: [
{
id: 'registry-model',
name: 'Registry Model',
baseUrl: 'https://api.openai.com/v1',
envKey: 'OPENAI_API_KEY',
},
],
},
});
config.getModelsConfig().detectAndCaptureRuntimeModel();
expect(config.getFastModel()).toBe('runtime-fast-model');
expect(config.getFastModelForSideQuery()).toBe(
'openai:runtime-fast-model',
);
});
it('returns undefined when the fast model is not configured for any auth type', () => {
const config = new Config({
...baseParams,
authType: AuthType.USE_ANTHROPIC,
model: 'claude-opus-4-7',
fastModel: 'missing-fast-model',
modelProvidersConfig: {
[AuthType.USE_ANTHROPIC]: [
{
id: 'claude-opus-4-7',
name: 'claude-opus-4-7',
baseUrl: 'https://idealab.alibaba-inc.com/api/anthropic',
envKey: 'IDEALAB_OPUS_API_KEY',
},
],
},
});
expect(config.getFastModel()).toBeUndefined();
expect(config.getFastModelForSideQuery()).toBeUndefined();
});
it('returns undefined when the fast model selector is malformed', () => {
const config = new Config({
...baseParams,
authType: AuthType.USE_ANTHROPIC,
model: 'claude-opus-4-7',
fastModel: 'openai:',
modelProvidersConfig: {
[AuthType.USE_OPENAI]: [
{
id: 'deepseek-v4-flash',
name: 'deepseek-v4-flash',
baseUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
envKey: 'DASHSCOPE_API_KEY',
},
],
},
});
expect(config.getFastModel()).toBeUndefined();
expect(config.getFastModelForSideQuery()).toBeUndefined();
});
it('returns undefined when fastModel points back to the fast selector', () => {
const config = new Config({
...baseParams,
authType: AuthType.USE_ANTHROPIC,
model: 'claude-opus-4-7',
fastModel: 'fast',
modelProvidersConfig: {
[AuthType.USE_ANTHROPIC]: [
{
id: 'claude-opus-4-7',
name: 'claude-opus-4-7',
baseUrl: 'https://idealab.alibaba-inc.com/api/anthropic',
envKey: 'IDEALAB_OPUS_API_KEY',
},
],
},
});
expect(config.getFastModel()).toBeUndefined();
expect(config.getFastModelForSideQuery()).toBeUndefined();
});
it('should refresh auth when switching to model with different envKey', async () => {
// This test verifies the fix for switching between modelProvider models
// with different envKeys (e.g., deepseek-chat with DEEPSEEK_API_KEY)

View file

@ -154,6 +154,7 @@ import {
type AvailableModel,
type RuntimeModelSnapshot,
} from '../models/index.js';
import { resolveModelId } from '../utils/modelId.js';
import type { ClaudeMarketplaceConfig } from '../extension/claude-converter.js';
// Re-export types
@ -1832,20 +1833,57 @@ export class Config {
}
/**
* Returns the fast model if one is configured and valid for the current auth type,
* otherwise returns undefined. Background agents (memory extraction, dream, /btw)
* use this as a cheaper alternative to the main session model.
* Returns the fast model if one is configured and valid for the current auth
* type, otherwise returns undefined. Direct runtime paths use this as a
* cheaper alternative to the main session model, so it intentionally stays
* current-auth-only.
*/
getFastModel(): string | undefined {
if (!this.fastModel) return undefined;
const authType = this.contentGeneratorConfig?.authType;
const authType =
this.contentGeneratorConfig?.authType ??
this.modelsConfig.getCurrentAuthType();
if (!authType) return undefined;
const available = this.getAvailableModelsForAuthType(authType);
return available.some((m) => m.id === this.fastModel)
? this.fastModel
const selector = this.resolveFastModelSelector();
if (!selector) return undefined;
if (selector.authType && selector.authType !== authType) return undefined;
const available = this.getAllConfiguredModels([authType]);
return available.some((m) => m.id === selector.modelId)
? selector.modelId
: undefined;
}
/**
* Returns the fast model for side-query paths. Unlike {@link getFastModel},
* this can return an authType-qualified selector because BaseLlmClient can
* route a single request through a provider different from the main session.
*/
getFastModelForSideQuery(): string | undefined {
const selector = this.resolveFastModelSelector();
if (!selector) return undefined;
if (selector.authType) {
const available = this.getAllConfiguredModels([selector.authType]);
return available.some((m) => m.id === selector.modelId)
? `${selector.authType}:${selector.modelId}`
: undefined;
}
const available = this.getAllConfiguredModels();
return available.some((m) => m.id === selector.modelId)
? selector.modelId
: undefined;
}
private resolveFastModelSelector() {
if (!this.fastModel) return undefined;
try {
return resolveModelId(this.fastModel);
} catch {
return undefined;
}
}
/**
* Update the fast model at runtime (e.g., when the user runs `/model --fast <model>`).
* Pass undefined or an empty string to clear the fast model override.

View file

@ -501,6 +501,8 @@ describe('BaseLlmClient', () => {
.mockReturnValue({ authType: AuthType.QWEN_OAUTH }),
getEmbeddingModel: vi.fn().mockReturnValue('test-embedding-model'),
getModel: vi.fn().mockReturnValue('main-model'),
getFastModel: vi.fn().mockReturnValue(undefined),
getFastModelForSideQuery: vi.fn().mockReturnValue(undefined),
getModelsConfig: vi.fn().mockReturnValue({ getResolvedModel }),
} as unknown as Mocked<Config>;
});
@ -635,6 +637,95 @@ describe('BaseLlmClient', () => {
);
});
it('generateJson accepts authType-qualified selectors and sends the bare model id', async () => {
getResolvedModel.mockImplementation((authType: string, model: string) => {
if (authType === AuthType.USE_OPENAI && model === 'shared-model') {
return {
id: 'shared-model',
authType: AuthType.USE_OPENAI,
envKey: 'OPENAI_API_KEY',
};
}
return undefined;
});
fastGenerateContent.mockResolvedValue(
createMockResponseWithFunctionCall({ ok: true }),
);
vi.mocked(getFunctionCalls).mockReturnValue([
{ name: 'respond_in_schema', args: { ok: true } },
]);
const c = new BaseLlmClient(mockContentGenerator, crossProviderConfig);
await c.generateJson({
contents: [{ role: 'user', parts: [{ text: 'go' }] }],
schema: { type: 'object' },
model: 'openai:shared-model',
abortSignal: new AbortController().signal,
promptId: 'test',
});
expect(getResolvedModel).toHaveBeenCalledWith(
AuthType.USE_OPENAI,
'shared-model',
);
expect(mockBuildAgentContentGeneratorConfig).toHaveBeenCalledWith(
crossProviderConfig,
'shared-model',
expect.objectContaining({ authType: AuthType.USE_OPENAI }),
);
expect(fastGenerateContent).toHaveBeenCalledWith(
expect.objectContaining({ model: 'shared-model' }),
'test',
);
});
it('generateJson resolves fast selectors through the configured fast model', async () => {
crossProviderConfig.getFastModelForSideQuery.mockReturnValue(
'openai:shared-model',
);
getResolvedModel.mockImplementation((authType: string, model: string) => {
if (authType === AuthType.USE_OPENAI && model === 'shared-model') {
return {
id: 'shared-model',
authType: AuthType.USE_OPENAI,
envKey: 'OPENAI_API_KEY',
};
}
return undefined;
});
fastGenerateContent.mockResolvedValue(
createMockResponseWithFunctionCall({ ok: true }),
);
vi.mocked(getFunctionCalls).mockReturnValue([
{ name: 'respond_in_schema', args: { ok: true } },
]);
const c = new BaseLlmClient(mockContentGenerator, crossProviderConfig);
await c.generateJson({
contents: [{ role: 'user', parts: [{ text: 'go' }] }],
schema: { type: 'object' },
model: 'fast',
abortSignal: new AbortController().signal,
promptId: 'test',
});
expect(getResolvedModel).toHaveBeenCalledWith(
AuthType.USE_OPENAI,
'shared-model',
);
expect(mockBuildAgentContentGeneratorConfig).toHaveBeenCalledWith(
crossProviderConfig,
'shared-model',
expect.objectContaining({ authType: AuthType.USE_OPENAI }),
);
expect(fastGenerateContent).toHaveBeenCalledWith(
expect.objectContaining({ model: 'shared-model' }),
'test',
);
});
it('generateText routes through the per-model generator and forwards retry authType', async () => {
getResolvedModel.mockReturnValue({
authType: AuthType.USE_ANTHROPIC,

View file

@ -19,6 +19,7 @@ import type { ContentGenerator } from './contentGenerator.js';
import { AuthType, createContentGenerator } from './contentGenerator.js';
import type { ResolvedModelConfig } from '../models/types.js';
import { buildAgentContentGeneratorConfig } from '../models/content-generator-config.js';
import { resolveModelId, type ResolvedModelId } from '../utils/modelId.js';
import { reportError } from '../utils/errorReporting.js';
import { getErrorMessage } from '../utils/errors.js';
import { retryWithBackoff, isUnattendedMode } from '../utils/retry.js';
@ -41,6 +42,7 @@ const debugLogger = createDebugLogger('BASE_LLM_CLIENT');
export interface ResolvedGeneratorForModel {
contentGenerator: ContentGenerator;
retryAuthType: string | undefined;
model: string;
}
/**
@ -173,14 +175,17 @@ export class BaseLlmClient {
},
];
const { contentGenerator, retryAuthType } =
await this.resolveForModel(model);
const {
contentGenerator,
retryAuthType,
model: requestModel,
} = await this.resolveForModel(model);
try {
const apiCall = () =>
contentGenerator.generateContent(
{
model,
model: requestModel,
config: {
...requestConfig,
tools,
@ -265,14 +270,17 @@ export class BaseLlmClient {
...(systemInstruction && { systemInstruction }),
};
const { contentGenerator, retryAuthType } =
await this.resolveForModel(model);
const {
contentGenerator,
retryAuthType,
model: requestModel,
} = await this.resolveForModel(model);
try {
const apiCall = () =>
contentGenerator.generateContent(
{
model,
model: requestModel,
config: requestConfig,
contents,
},
@ -363,23 +371,35 @@ export class BaseLlmClient {
* or generator creation fails (e.g. tests without full auth setup).
*/
async resolveForModel(model: string): Promise<ResolvedGeneratorForModel> {
const selector = this.resolveModelSelector(model);
const requestModel = selector?.modelId ?? this.config.getModel() ?? model;
const mainModel = this.config.getModel() ?? model;
const mainAuthType = this.config.getContentGeneratorConfig()?.authType;
if (model === mainModel) {
if (
requestModel === mainModel &&
(!selector?.authType || selector.authType === mainAuthType)
) {
return {
contentGenerator: this.contentGenerator,
retryAuthType: mainAuthType,
model: requestModel,
};
}
const contentGenerator = await this.createContentGeneratorForModel(model);
const contentGenerator = await this.createContentGeneratorForModel(
model,
selector,
);
const resolvedModel = this.resolveModelAcrossAuthTypes(model, selector);
const retryAuthType =
this.resolveModelAcrossAuthTypes(model)?.authType ??
mainAuthType ??
AuthType.USE_OPENAI;
resolvedModel?.authType ?? mainAuthType ?? AuthType.USE_OPENAI;
return { contentGenerator, retryAuthType };
return {
contentGenerator,
retryAuthType,
model: resolvedModel?.id ?? requestModel,
};
}
/**
@ -397,9 +417,16 @@ export class BaseLlmClient {
*/
private resolveModelAcrossAuthTypes(
model: string,
selector: ResolvedModelId | undefined,
): ResolvedModelConfig | undefined {
const modelsConfig = this.config.getModelsConfig?.();
if (!modelsConfig) return undefined;
if (!selector) return undefined;
const modelId = selector.modelId;
if (selector.authType) {
return modelsConfig.getResolvedModel(selector.authType, modelId);
}
const allAuthTypes: AuthType[] = [
AuthType.QWEN_OAUTH,
@ -411,13 +438,13 @@ export class BaseLlmClient {
const mainAuthType = this.config.getContentGeneratorConfig()?.authType;
if (mainAuthType) {
const resolved = modelsConfig.getResolvedModel(mainAuthType, model);
const resolved = modelsConfig.getResolvedModel(mainAuthType, modelId);
if (resolved) return resolved;
}
for (const authType of allAuthTypes) {
if (authType === mainAuthType) continue;
const resolved = modelsConfig.getResolvedModel(authType, model);
const resolved = modelsConfig.getResolvedModel(authType, modelId);
if (resolved) return resolved;
}
@ -426,13 +453,17 @@ export class BaseLlmClient {
private async createContentGeneratorForModel(
model: string,
selector: ResolvedModelId | undefined,
): Promise<ContentGenerator> {
const cached = this.perModelGeneratorCache.get(model);
const cacheKey = selector
? `${selector.authType ?? ''}:${selector.modelId}`
: model;
const cached = this.perModelGeneratorCache.get(cacheKey);
if (cached) return cached;
const generatorPromise = (async () => {
try {
const resolvedModel = this.resolveModelAcrossAuthTypes(model);
const resolvedModel = this.resolveModelAcrossAuthTypes(model, selector);
if (!resolvedModel) {
debugLogger.warn(
@ -441,9 +472,10 @@ export class BaseLlmClient {
return this.contentGenerator;
}
const targetModel = resolvedModel.id ?? selector?.modelId ?? model;
const targetConfig = buildAgentContentGeneratorConfig(
this.config,
model,
targetModel,
{
authType: resolvedModel.authType,
apiKey: resolvedModel.envKey
@ -459,12 +491,22 @@ export class BaseLlmClient {
`Failed to create content generator for model "${model}", falling back to main generator.`,
err instanceof Error ? err.message : String(err),
);
this.perModelGeneratorCache.delete(model);
this.perModelGeneratorCache.delete(cacheKey);
return this.contentGenerator;
}
})();
this.perModelGeneratorCache.set(model, generatorPromise);
this.perModelGeneratorCache.set(cacheKey, generatorPromise);
return generatorPromise;
}
private resolveModelSelector(model: string): ResolvedModelId | undefined {
return resolveModelId(model, {
currentModel: this.config.getModel(),
currentAuthType: this.config.getContentGeneratorConfig()?.authType,
fastModel:
this.config.getFastModelForSideQuery?.() ??
this.config.getFastModel?.(),
});
}
}

View file

@ -1576,16 +1576,18 @@ export class GeminiClient {
// main model's config. The retry authType is resolved alongside so that
// provider-specific checks (e.g. QWEN_OAUTH quota detection) reference
// the target model's provider.
const { contentGenerator, retryAuthType } = await this.config
.getBaseLlmClient()
.resolveForModel(model);
const {
contentGenerator,
retryAuthType,
model: requestModel,
} = await this.config.getBaseLlmClient().resolveForModel(model);
const apiCall = () => {
currentAttemptModel = model;
currentAttemptModel = requestModel;
return contentGenerator.generateContent(
{
model,
model: requestModel,
config: requestConfig,
contents,
},

View file

@ -286,6 +286,7 @@ export * from './utils/gitUtils.js';
export * from './utils/ignorePatterns.js';
export * from './utils/jsonl-utils.js';
export * from './utils/memoryDiscovery.js';
export * from './utils/modelId.js';
export { ConditionalRulesRegistry } from './utils/rulesDiscovery.js';
export type { RuleFile } from './utils/rulesDiscovery.js';
export { OpenAILogger, openaiLogger } from './utils/openaiLogger.js';

View file

@ -129,7 +129,7 @@ describe('selectRelevantAutoMemoryDocumentsByModel', () => {
);
});
it('passes the fast model to runSideQuery when configured', async () => {
it('lets runSideQuery choose the default side-query model when fast model is configured', async () => {
vi.mocked(mockConfig.getFastModel).mockReturnValue('fast-flash-model');
vi.mocked(runSideQuery).mockResolvedValue({
selected_memories: ['reference.md'],
@ -146,13 +146,15 @@ describe('selectRelevantAutoMemoryDocumentsByModel', () => {
mockConfig,
expect.objectContaining({
purpose: 'auto-memory-recall',
model: 'fast-flash-model',
config: { temperature: 0 },
}),
);
expect(
'model' in (vi.mocked(runSideQuery).mock.calls[0]![1] as object),
).toBe(false);
});
it('passes undefined model when no fast model is configured', async () => {
it('lets runSideQuery fall back to its default when no fast model is configured', async () => {
vi.mocked(mockConfig.getFastModel).mockReturnValue(undefined);
vi.mocked(runSideQuery).mockResolvedValue({
selected_memories: ['reference.md'],
@ -169,10 +171,12 @@ describe('selectRelevantAutoMemoryDocumentsByModel', () => {
mockConfig,
expect.objectContaining({
purpose: 'auto-memory-recall',
model: undefined,
config: { temperature: 0 },
}),
);
expect(
'model' in (vi.mocked(runSideQuery).mock.calls[0]![1] as object),
).toBe(false);
});
it('throws when selector returns unknown relative paths', async () => {

View file

@ -95,9 +95,8 @@ export async function selectRelevantAutoMemoryDocumentsByModel(
? AbortSignal.any([AbortSignal.timeout(1_000), callerAbortSignal])
: AbortSignal.timeout(1_000),
// Use the fast model for this background side-query to reduce latency and
// cost. Falls back to the main session model if no fast model is configured.
model: config.getFastModel(),
// Uses runSideQuery's default side-query model policy: fast model first,
// then main session model when no fast model is configured.
systemInstruction: SELECT_MEMORIES_SYSTEM_PROMPT,
config: {
temperature: 0,

View file

@ -928,11 +928,12 @@ export class ChatRecordingService {
// Headless/one-shot CLI flows (`qwen -p "…"`, cron, CI scripts) run a
// single prompt and throw the session away. Spending fast-model tokens
// on a title no one will ever resume is pure waste; skip entirely.
// Checked before `getFastModel()` because it's strictly cheaper (a bool
// field read vs. a method that looks up available models for the auth
// type).
// Checked before `getFastModelForSideQuery()` because it's strictly
// cheaper (a bool field read vs. a method that looks up available models).
if (!this.config.isInteractive()) return;
if (!this.config.getFastModel()) return;
const fastModel =
this.config.getFastModelForSideQuery?.() ?? this.config.getFastModel();
if (!fastModel) return;
this.autoTitleAttempts++;
const controller = new AbortController();

View file

@ -61,7 +61,10 @@ export async function generateSessionRecap(
const recentHistory = takeRecentDialog(dialog, RECENT_MESSAGE_WINDOW);
if (recentHistory.length === 0) return null;
const model = config.getFastModel() ?? config.getModel();
const model =
config.getFastModelForSideQuery?.() ??
config.getFastModel() ??
config.getModel();
const result = await runSideQuery(config, {
purpose: 'session-recap',
@ -75,7 +78,6 @@ export async function generateSessionRecap(
temperature: 0.3,
},
abortSignal,
model,
// Recap is best-effort cosmetic — don't burn the default 7 retries.
maxAttempts: 1,
});

View file

@ -70,8 +70,8 @@ const TRAILING_PAIRED_BRACKETS_RE =
* command) can surface actionable messages instead of a generic "could not
* generate".
*
* - `no_fast_model`: config.getFastModel() returned undefined. User needs to
* configure one via `/model --fast <name>`.
* - `no_fast_model`: config.getFastModelForSideQuery() returned undefined.
* User needs to configure one via `/model --fast <name>`.
* - `no_client`: BaseLlmClient or GeminiClient not yet initialized. Rare,
* usually means the session hasn't authenticated yet.
* - `empty_history`: the conversation has fewer than 2 turns of usable text.
@ -107,7 +107,7 @@ export async function tryGenerateSessionTitle(
abortSignal: AbortSignal,
): Promise<SessionTitleOutcome> {
try {
const model = config.getFastModel();
const model = config.getFastModelForSideQuery?.() ?? config.getFastModel();
if (!model) return { ok: false, reason: 'no_fast_model' };
const geminiClient = config.getGeminiClient();
@ -130,7 +130,6 @@ export async function tryGenerateSessionTitle(
const result = await runSideQuery<{ title?: string }>(config, {
purpose: 'session-title',
model,
systemInstruction: TITLE_SYSTEM_PROMPT,
schema: TITLE_SCHEMA as unknown as Record<string, unknown>,
contents: [

View file

@ -89,7 +89,8 @@ export interface GenerateToolUseSummaryParams {
*/
lastAssistantText?: string;
/**
* Fast model to use. If omitted, falls back to `config.getFastModel()`;
* Fast model to use. If omitted, falls back to
* `config.getFastModelForSideQuery()`;
* if that also returns undefined, the call is skipped (returns null).
* Unlike `sessionRecap`, this does not fall back to the main model
* summary generation is a nice-to-have and must not incur main-model cost.
@ -112,7 +113,10 @@ export async function generateToolUseSummary(
return null;
}
const model = params.model ?? config.getFastModel();
const model =
params.model ??
config.getFastModelForSideQuery?.() ??
config.getFastModel();
if (!model) {
debugLogger.debug('No fast model configured — skipping summary generation');
return null;
@ -151,7 +155,7 @@ export async function generateToolUseSummary(
temperature: 0.3,
},
abortSignal: signal,
model,
...(params.model !== undefined ? { model: params.model } : {}),
// Tool-use labels are best-effort cosmetic; firing once per turn means
// 7 retries on a transient outage would spike traffic for no benefit.
maxAttempts: 1,

View file

@ -0,0 +1,98 @@
/**
* @license
* Copyright 2025 Qwen
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, expect, it } from 'vitest';
import { AuthType } from '../core/contentGenerator.js';
import { resolveModelId } from './modelId.js';
describe('resolveModelId', () => {
it('returns undefined for omitted models without a current model', () => {
expect(resolveModelId(undefined)).toBeUndefined();
});
it('resolves omitted models to the current model when provided', () => {
expect(
resolveModelId(undefined, {
currentModel: 'main-model',
currentAuthType: AuthType.USE_ANTHROPIC,
}),
).toEqual({
authType: AuthType.USE_ANTHROPIC,
modelId: 'main-model',
});
});
it('resolves explicit inherit to the current model', () => {
expect(
resolveModelId('inherit', {
currentModel: 'main-model',
currentAuthType: AuthType.USE_OPENAI,
}),
).toEqual({
authType: AuthType.USE_OPENAI,
modelId: 'main-model',
});
});
it('returns undefined for fast when no fast model is available', () => {
expect(resolveModelId('fast')).toBeUndefined();
});
it('resolves fast to the configured fast model', () => {
expect(resolveModelId('fast', { fastModel: 'fast-model' })).toEqual({
modelId: 'fast-model',
});
});
it('resolves fast to authType-prefixed configured fast models', () => {
expect(resolveModelId('fast', { fastModel: 'openai:fast-model' })).toEqual({
authType: AuthType.USE_OPENAI,
modelId: 'fast-model',
});
});
it('returns undefined for recursive fast selectors', () => {
expect(resolveModelId('fast', { fastModel: 'fast' })).toBeUndefined();
});
it('parses bare model IDs to concrete model IDs', () => {
expect(resolveModelId('glm-5')).toEqual({
modelId: 'glm-5',
});
});
it('parses authType-prefixed model IDs', () => {
expect(resolveModelId('openai:glm-5')).toEqual({
authType: AuthType.USE_OPENAI,
modelId: 'glm-5',
});
});
it('trims authType-prefixed model IDs', () => {
expect(resolveModelId(' openai : glm-5 ')).toEqual({
authType: AuthType.USE_OPENAI,
modelId: 'glm-5',
});
});
it('treats unknown prefix as bare model ID (colon in model ID)', () => {
expect(resolveModelId('invalid:glm-5')).toEqual({
modelId: 'invalid:glm-5',
});
});
it('treats model IDs with colons as bare model IDs', () => {
expect(resolveModelId('gpt-4o:online')).toEqual({
modelId: 'gpt-4o:online',
});
});
it('rejects missing model IDs after valid authType prefixes', () => {
expect(() => resolveModelId('openai:')).toThrow(
'Model selector must include a model ID after the authType',
);
});
});

View file

@ -0,0 +1,122 @@
/**
* @license
* Copyright 2025 Qwen
* SPDX-License-Identifier: Apache-2.0
*/
import { AuthType } from '../core/contentGenerator.js';
export interface ResolvedModelId {
authType?: AuthType;
modelId: string;
}
export interface ModelIdResolutionContext {
currentModel?: string;
currentAuthType?: AuthType;
fastModel?: string;
}
type ModelIdSelector =
| {
kind: 'inherit';
}
| {
kind: 'fast';
}
| {
kind: 'model';
authType?: AuthType;
modelId: string;
};
const AUTH_TYPES = new Set<AuthType>(Object.values(AuthType));
/**
* Resolve a model selector to the concrete model ID a caller should use.
*
* Supported forms:
* - omitted / inherit -> use parent conversation model
* - fast -> use the configured fastModel
* - modelId -> use parent authType with the provided modelId
* - authType:modelId -> use explicit authType and modelId
*/
export function resolveModelId(
model: string | undefined,
context: ModelIdResolutionContext = {},
): ResolvedModelId | undefined {
return resolveModelIdSelector(parseModelIdSelector(model), context);
}
function parseModelIdSelector(model: string | undefined): ModelIdSelector {
const trimmed = model?.trim();
if (!trimmed || trimmed === 'inherit') {
return { kind: 'inherit' };
}
if (trimmed === 'fast') {
return { kind: 'fast' };
}
const colonIndex = trimmed.indexOf(':');
if (colonIndex === -1) {
return { kind: 'model', modelId: trimmed };
}
const maybeAuthType = trimmed.slice(0, colonIndex).trim();
const modelId = trimmed.slice(colonIndex + 1).trim();
// If the prefix isn't a known AuthType, treat the whole string as a bare
// model ID. Model IDs can legitimately contain colons (e.g. gpt-4o:online).
if (!AUTH_TYPES.has(maybeAuthType as AuthType)) {
return { kind: 'model', modelId: trimmed };
}
if (!modelId) {
throw new Error(
'Model selector must include a model ID after the authType',
);
}
return {
kind: 'model',
authType: maybeAuthType as AuthType,
modelId,
};
}
function resolveModelIdSelector(
selector: ModelIdSelector,
context: ModelIdResolutionContext,
): ResolvedModelId | undefined {
if (selector.kind === 'model') {
return {
...(selector.authType ? { authType: selector.authType } : {}),
modelId: selector.modelId,
};
}
if (selector.kind === 'inherit') {
return context.currentModel
? {
...(context.currentAuthType
? { authType: context.currentAuthType }
: {}),
modelId: context.currentModel,
}
: undefined;
}
if (!context.fastModel) {
return undefined;
}
const fastSelector = parseModelIdSelector(context.fastModel);
if (fastSelector.kind === 'fast') {
return undefined;
}
return resolveModelIdSelector(fastSelector, {
...context,
fastModel: undefined,
});
}

View file

@ -20,9 +20,11 @@ export interface SideQueryJsonOptions<TResponse> {
abortSignal: AbortSignal;
/**
* Override the model used for this query. Defaults to
* `config.getFastModel?.() ?? config.getModel()` side queries run on the
* fast model when one is configured. Pass an explicit value to pin to the
* main model (e.g. long-form summarization in web-fetch).
* `config.getFastModelForSideQuery?.() ?? config.getFastModel?.() ?? config.getModel() ?? DEFAULT_QWEN_MODEL`
* side queries run on the fast model when one is configured, including
* fast models registered under a different authType than the main session.
* Pass an explicit value to pin to the main model (e.g. long-form
* summarization in web-fetch).
*/
model?: string;
systemInstruction?: string | Part | Part[] | Content;
@ -61,9 +63,11 @@ export interface SideQueryTextOptions {
abortSignal: AbortSignal;
/**
* Override the model used for this query. Defaults to
* `config.getFastModel?.() ?? config.getModel()` side queries run on the
* fast model when one is configured. Pass an explicit value to pin to the
* main model (e.g. long-form summarization in web-fetch).
* `config.getFastModelForSideQuery?.() ?? config.getFastModel?.() ?? config.getModel() ?? DEFAULT_QWEN_MODEL`
* side queries run on the fast model when one is configured, including
* fast models registered under a different authType than the main session.
* Pass an explicit value to pin to the main model (e.g. long-form
* summarization in web-fetch).
*/
model?: string;
systemInstruction?: string | Part | Part[] | Content;
@ -101,6 +105,7 @@ function buildDefaultPromptId(purpose?: string): string {
function resolveDefaultModel(config: Config, override?: string): string {
return (
override ??
config.getFastModelForSideQuery?.() ??
config.getFastModel?.() ??
config.getModel() ??
DEFAULT_QWEN_MODEL