mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-05-19 07:54:38 +00:00
fix(core): support cross-auth fast side queries (#4117)
Some checks are pending
Qwen Code CI / Classify PR (push) Waiting to run
Qwen Code CI / Lint (push) Blocked by required conditions
Qwen Code CI / Test (macos-latest, Node 22.x) (push) Blocked by required conditions
Qwen Code CI / Test (ubuntu-latest, Node 22.x) (push) Blocked by required conditions
Qwen Code CI / Test (windows-latest, Node 22.x) (push) Blocked by required conditions
Qwen Code CI / Post Coverage Comment (push) Blocked by required conditions
Qwen Code CI / CodeQL (push) Blocked by required conditions
E2E Tests / E2E Test (Linux) - sandbox:docker (push) Waiting to run
E2E Tests / E2E Test (Linux) - sandbox:none (push) Waiting to run
E2E Tests / E2E Test - macOS (push) Waiting to run
Some checks are pending
Qwen Code CI / Classify PR (push) Waiting to run
Qwen Code CI / Lint (push) Blocked by required conditions
Qwen Code CI / Test (macos-latest, Node 22.x) (push) Blocked by required conditions
Qwen Code CI / Test (ubuntu-latest, Node 22.x) (push) Blocked by required conditions
Qwen Code CI / Test (windows-latest, Node 22.x) (push) Blocked by required conditions
Qwen Code CI / Post Coverage Comment (push) Blocked by required conditions
Qwen Code CI / CodeQL (push) Blocked by required conditions
E2E Tests / E2E Test (Linux) - sandbox:docker (push) Waiting to run
E2E Tests / E2E Test (Linux) - sandbox:none (push) Waiting to run
E2E Tests / E2E Test - macOS (push) Waiting to run
* fix(core): support cross-auth fast side queries * refactor(core): hoist resolveForModel selector and refresh side-query docs Compute the model selector once at the top of `resolveForModel` and pass it through to `createContentGeneratorForModel` and `resolveModelAcrossAuthTypes`. This eliminates the redundant selector resolution that happened up to five times per cross-auth side query (once per call, plus once inside each downstream helper). Also update the JSDoc for `SideQueryJsonOptions.model` and `SideQueryTextOptions.model` to reflect the actual fallback chain (`getFastModelForSideQuery` → `getFastModel` → `getModel` → `DEFAULT_QWEN_MODEL`) introduced in this PR.
This commit is contained in:
parent
85c10c1619
commit
cc800d0132
21 changed files with 961 additions and 89 deletions
|
|
@ -375,7 +375,6 @@ export class DynamicCommandLocalizationService {
|
|||
items: TranslationItem[],
|
||||
signal: AbortSignal,
|
||||
): Promise<Map<string, string>> {
|
||||
const model = config.getFastModel() ?? config.getModel();
|
||||
const targetLanguageName = getLanguageNameForTranslationTarget(language);
|
||||
const translations = new Map<string, string>();
|
||||
|
||||
|
|
@ -391,7 +390,6 @@ export class DynamicCommandLocalizationService {
|
|||
try {
|
||||
response = await runSideQuery<Record<string, unknown>>(config, {
|
||||
purpose: 'dynamic-command-localization',
|
||||
model,
|
||||
contents: [{ role: 'user', parts: [{ text: prompt }] }],
|
||||
schema: {
|
||||
type: 'object',
|
||||
|
|
|
|||
|
|
@ -395,7 +395,108 @@ describe('modelCommand', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('should reject unavailable fast models for the current auth type', async () => {
|
||||
it('should set fast models configured under another auth type', async () => {
|
||||
const setValue = vi.fn();
|
||||
const setFastModel = vi.fn();
|
||||
mockContext = createMockCommandContext({
|
||||
invocation: {
|
||||
raw: '/model --fast deepseek-v4-flash',
|
||||
name: 'model',
|
||||
args: '--fast deepseek-v4-flash',
|
||||
},
|
||||
services: {
|
||||
config: {
|
||||
getContentGeneratorConfig: vi.fn().mockReturnValue({
|
||||
model: 'claude-opus-4-7',
|
||||
authType: AuthType.USE_ANTHROPIC,
|
||||
}),
|
||||
getAllConfiguredModels: vi.fn().mockReturnValue([
|
||||
{
|
||||
id: 'deepseek-v4-flash',
|
||||
label: 'deepseek-v4-flash',
|
||||
authType: AuthType.USE_OPENAI,
|
||||
},
|
||||
{
|
||||
id: 'claude-opus-4-7',
|
||||
label: 'claude-opus-4-7',
|
||||
authType: AuthType.USE_ANTHROPIC,
|
||||
},
|
||||
]),
|
||||
setFastModel,
|
||||
},
|
||||
settings: createMockSettings(setValue),
|
||||
},
|
||||
});
|
||||
|
||||
const result = await modelCommand.action!(
|
||||
mockContext,
|
||||
'--fast deepseek-v4-flash',
|
||||
);
|
||||
|
||||
expect(setValue).toHaveBeenCalledWith(
|
||||
expect.any(String),
|
||||
'fastModel',
|
||||
'deepseek-v4-flash',
|
||||
);
|
||||
expect(setFastModel).toHaveBeenCalledWith('deepseek-v4-flash');
|
||||
expect(result).toEqual({
|
||||
type: 'message',
|
||||
messageType: 'info',
|
||||
content: 'Fast Model: deepseek-v4-flash',
|
||||
});
|
||||
});
|
||||
|
||||
it('should set authType-qualified fast model selectors', async () => {
|
||||
const setValue = vi.fn();
|
||||
const setFastModel = vi.fn();
|
||||
mockContext = createMockCommandContext({
|
||||
invocation: {
|
||||
raw: '/model --fast openai:deepseek-v4-flash',
|
||||
name: 'model',
|
||||
args: '--fast openai:deepseek-v4-flash',
|
||||
},
|
||||
services: {
|
||||
config: {
|
||||
getContentGeneratorConfig: vi.fn().mockReturnValue({
|
||||
model: 'claude-opus-4-7',
|
||||
authType: AuthType.USE_ANTHROPIC,
|
||||
}),
|
||||
getAvailableModelsForAuthType: vi.fn((authType: AuthType) =>
|
||||
authType === AuthType.USE_OPENAI
|
||||
? [
|
||||
{
|
||||
id: 'deepseek-v4-flash',
|
||||
label: 'deepseek-v4-flash',
|
||||
authType: AuthType.USE_OPENAI,
|
||||
},
|
||||
]
|
||||
: [],
|
||||
),
|
||||
setFastModel,
|
||||
},
|
||||
settings: createMockSettings(setValue),
|
||||
},
|
||||
});
|
||||
|
||||
const result = await modelCommand.action!(
|
||||
mockContext,
|
||||
'--fast openai:deepseek-v4-flash',
|
||||
);
|
||||
|
||||
expect(setValue).toHaveBeenCalledWith(
|
||||
expect.any(String),
|
||||
'fastModel',
|
||||
'openai:deepseek-v4-flash',
|
||||
);
|
||||
expect(setFastModel).toHaveBeenCalledWith('openai:deepseek-v4-flash');
|
||||
expect(result).toEqual({
|
||||
type: 'message',
|
||||
messageType: 'info',
|
||||
content: 'Fast Model: openai:deepseek-v4-flash',
|
||||
});
|
||||
});
|
||||
|
||||
it('should reject unavailable fast models across all auth types', async () => {
|
||||
const setValue = vi.fn();
|
||||
const setFastModel = vi.fn();
|
||||
mockContext = createMockCommandContext({
|
||||
|
|
@ -410,9 +511,13 @@ describe('modelCommand', () => {
|
|||
model: 'qwen-plus',
|
||||
authType: AuthType.USE_OPENAI,
|
||||
}),
|
||||
getAvailableModelsForAuthType: vi
|
||||
.fn()
|
||||
.mockReturnValue([{ id: 'qwen-turbo', label: 'Qwen Turbo' }]),
|
||||
getAllConfiguredModels: vi.fn().mockReturnValue([
|
||||
{
|
||||
id: 'qwen-turbo',
|
||||
label: 'Qwen Turbo',
|
||||
authType: AuthType.USE_OPENAI,
|
||||
},
|
||||
]),
|
||||
setFastModel,
|
||||
},
|
||||
settings: createMockSettings(setValue),
|
||||
|
|
@ -430,8 +535,8 @@ describe('modelCommand', () => {
|
|||
type: 'message',
|
||||
messageType: 'error',
|
||||
content:
|
||||
"Fast model 'missing-model' is not available for auth type 'openai'.\n" +
|
||||
"Available models for 'openai': qwen-turbo.\n" +
|
||||
"Fast model 'missing-model' is not configured for any auth type.\n" +
|
||||
'Configured models: qwen-turbo.\n' +
|
||||
'Configure models in settings.modelProviders or run /model to select an available model.',
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ import {
|
|||
AuthType,
|
||||
type AvailableModel,
|
||||
type Config,
|
||||
resolveModelId,
|
||||
} from '@qwen-code/qwen-code-core';
|
||||
import type { LoadedSettings } from '../../config/settings.js';
|
||||
import { parseAcpModelOption } from '../../utils/acpModelUtils.js';
|
||||
|
|
@ -77,6 +78,25 @@ function formatUnavailableModelMessage(
|
|||
);
|
||||
}
|
||||
|
||||
function formatUnavailableFastModelMessage(
|
||||
modelName: string,
|
||||
availableModels: AvailableModel[],
|
||||
): string {
|
||||
const availableModelIds = Array.from(
|
||||
new Set(availableModels.map((model) => model.id)),
|
||||
);
|
||||
const availableModelsLine =
|
||||
availableModelIds.length === 0
|
||||
? 'No models are configured.'
|
||||
: `Configured models: ${availableModelIds.join(', ')}.`;
|
||||
|
||||
return (
|
||||
`Fast model '${modelName}' is not configured for any auth type.\n` +
|
||||
`${availableModelsLine}\n` +
|
||||
'Configure models in settings.modelProviders or run /model to select an available model.'
|
||||
);
|
||||
}
|
||||
|
||||
// Get an array of the available model IDs as strings
|
||||
function getAvailableModelIds(context: CommandContext) {
|
||||
const { services } = context;
|
||||
|
|
@ -174,17 +194,36 @@ export const modelCommand: SlashCommand = {
|
|||
};
|
||||
}
|
||||
|
||||
const availableModels = config.getAvailableModelsForAuthType(authType);
|
||||
if (!availableModels.some((model) => model.id === modelName)) {
|
||||
const selector = (() => {
|
||||
try {
|
||||
return resolveModelId(modelName);
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
})();
|
||||
if (!selector) {
|
||||
return {
|
||||
type: 'message',
|
||||
messageType: 'error',
|
||||
content: formatUnavailableModelMessage(
|
||||
'Fast model',
|
||||
modelName,
|
||||
authType,
|
||||
availableModels,
|
||||
),
|
||||
content: formatUnavailableFastModelMessage(modelName, []),
|
||||
};
|
||||
}
|
||||
|
||||
const availableModels = selector.authType
|
||||
? config.getAvailableModelsForAuthType(selector.authType)
|
||||
: config.getAllConfiguredModels();
|
||||
if (!availableModels.some((model) => model.id === selector.modelId)) {
|
||||
return {
|
||||
type: 'message',
|
||||
messageType: 'error',
|
||||
content: selector.authType
|
||||
? formatUnavailableModelMessage(
|
||||
'Fast model',
|
||||
selector.modelId,
|
||||
selector.authType,
|
||||
availableModels,
|
||||
)
|
||||
: formatUnavailableFastModelMessage(modelName, availableModels),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -213,7 +213,9 @@ export const renameCommand: SlashCommand = {
|
|||
// the main model here because `--auto` is a deliberate opt-in to the
|
||||
// sentence-case fast-model flow, and surprising a user with a main-
|
||||
// model call would defeat the purpose.
|
||||
if (!config.getFastModel()) {
|
||||
const fastModel =
|
||||
config.getFastModelForSideQuery?.() ?? config.getFastModel();
|
||||
if (!fastModel) {
|
||||
return {
|
||||
type: 'message',
|
||||
messageType: 'error',
|
||||
|
|
|
|||
|
|
@ -288,6 +288,108 @@ describe('<ModelDialog />', () => {
|
|||
expect(props.onClose).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('stores authType-qualified selectors in fast model mode', async () => {
|
||||
const setFastModel = vi.fn();
|
||||
const { props, mockSettings } = renderComponent({ isFastModelMode: true }, {
|
||||
getAuthType: vi.fn(() => AuthType.USE_ANTHROPIC),
|
||||
getModel: vi.fn(() => 'claude-opus-4-7'),
|
||||
getAllConfiguredModels: vi.fn(() => [
|
||||
{
|
||||
id: 'deepseek-v4-flash',
|
||||
label: 'deepseek-v4-flash',
|
||||
authType: AuthType.USE_OPENAI,
|
||||
},
|
||||
{
|
||||
id: 'claude-opus-4-7',
|
||||
label: 'claude-opus-4-7',
|
||||
authType: AuthType.USE_ANTHROPIC,
|
||||
},
|
||||
]),
|
||||
getContentGeneratorConfig: vi.fn(() => ({
|
||||
authType: AuthType.USE_ANTHROPIC,
|
||||
model: 'claude-opus-4-7',
|
||||
})),
|
||||
setFastModel,
|
||||
} as unknown as Partial<Config>);
|
||||
|
||||
const childOnSelect = mockedSelect.mock.calls[0][0].onSelect;
|
||||
await childOnSelect(`${AuthType.USE_OPENAI}::deepseek-v4-flash`);
|
||||
|
||||
expect(mockSettings.setValue).toHaveBeenCalledWith(
|
||||
SettingScope.User,
|
||||
'fastModel',
|
||||
'openai:deepseek-v4-flash',
|
||||
);
|
||||
expect(setFastModel).toHaveBeenCalledWith('openai:deepseek-v4-flash');
|
||||
expect(props.onClose).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('highlights the cross-auth row for a bare fast-model setting', () => {
|
||||
// `/model --fast deepseek-v4-flash` validates across all providers and
|
||||
// persists the bare model id. When the dialog re-opens, it must locate
|
||||
// the right row even though the setting carries no authType prefix —
|
||||
// otherwise the highlight falls back to the current auth's first row
|
||||
// and Enter would silently overwrite the setting.
|
||||
const mockSettings = {
|
||||
isTrusted: true,
|
||||
user: { settings: {} },
|
||||
workspace: { settings: {} },
|
||||
merged: { fastModel: 'deepseek-v4-flash' },
|
||||
setValue: vi.fn(),
|
||||
} as unknown as LoadedSettings;
|
||||
|
||||
const allModels = [
|
||||
{
|
||||
id: 'claude-opus-4-7',
|
||||
label: 'claude-opus-4-7',
|
||||
description: '',
|
||||
authType: AuthType.USE_ANTHROPIC,
|
||||
},
|
||||
{
|
||||
id: 'deepseek-v4-flash',
|
||||
label: 'deepseek-v4-flash',
|
||||
description: '',
|
||||
authType: AuthType.USE_OPENAI,
|
||||
},
|
||||
];
|
||||
|
||||
render(
|
||||
<SettingsContext.Provider value={mockSettings}>
|
||||
<ConfigContext.Provider
|
||||
value={
|
||||
{
|
||||
getModel: vi.fn(() => 'claude-opus-4-7'),
|
||||
getAuthType: vi.fn(() => AuthType.USE_ANTHROPIC),
|
||||
getAllConfiguredModels: vi.fn(() => allModels),
|
||||
getContentGeneratorConfig: vi.fn(() => ({
|
||||
authType: AuthType.USE_ANTHROPIC,
|
||||
model: 'claude-opus-4-7',
|
||||
})),
|
||||
getModelsConfig: vi.fn(() => ({
|
||||
getGenerationConfig: vi.fn(() => ({ baseUrl: undefined })),
|
||||
})),
|
||||
getActiveRuntimeModelSnapshot: vi.fn(() => undefined),
|
||||
getUsageStatisticsEnabled: vi.fn(() => false),
|
||||
getSessionId: vi.fn(() => 'session'),
|
||||
getDebugMode: vi.fn(() => false),
|
||||
getUseModelRouter: vi.fn(() => false),
|
||||
getProxy: vi.fn(() => undefined),
|
||||
} as unknown as Config
|
||||
}
|
||||
>
|
||||
<ModelDialog onClose={vi.fn()} isFastModelMode={true} />
|
||||
</ConfigContext.Provider>
|
||||
</SettingsContext.Provider>,
|
||||
);
|
||||
|
||||
const items = mockedSelect.mock.calls[0][0].items;
|
||||
const deepseekIndex = items.findIndex((item) =>
|
||||
String(item.value).includes('deepseek-v4-flash'),
|
||||
);
|
||||
expect(deepseekIndex).toBeGreaterThanOrEqual(0);
|
||||
expect(mockedSelect.mock.calls[0][0].initialIndex).toBe(deepseekIndex);
|
||||
});
|
||||
|
||||
it('blocks switching to qwen-oauth from another authType (discontinued)', async () => {
|
||||
const switchModel = vi.fn().mockResolvedValue(undefined);
|
||||
const getAuthType = vi.fn(() => AuthType.USE_OPENAI);
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import {
|
|||
ModelSlashCommandEvent,
|
||||
logModelSlashCommand,
|
||||
MAINLINE_CODER_MODEL,
|
||||
resolveModelId,
|
||||
type AvailableModel as CoreAvailableModel,
|
||||
type ContentGeneratorConfig,
|
||||
type InputModalities,
|
||||
|
|
@ -303,9 +304,17 @@ export function ModelDialog({
|
|||
|
||||
// In fast model mode, default to the currently configured fast model
|
||||
const fastModelSetting = settings?.merged?.fastModel as string | undefined;
|
||||
const parsedFastModelSetting = useMemo(() => {
|
||||
if (!isFastModelMode) return undefined;
|
||||
try {
|
||||
return resolveModelId(fastModelSetting);
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}, [fastModelSetting, isFastModelMode]);
|
||||
const preferredModelId =
|
||||
isFastModelMode && fastModelSetting
|
||||
? fastModelSetting
|
||||
isFastModelMode && parsedFastModelSetting
|
||||
? parsedFastModelSetting.modelId
|
||||
: config?.getModel() || MAINLINE_CODER_MODEL;
|
||||
// Check if current model is a runtime model
|
||||
// Runtime snapshot ID is already in $runtime|${authType}|${modelId} format
|
||||
|
|
@ -315,11 +324,35 @@ export function ModelDialog({
|
|||
const currentBaseUrl = config
|
||||
?.getModelsConfig()
|
||||
.getGenerationConfig()?.baseUrl;
|
||||
// When `/model --fast <bare-id>` validated the model across all providers,
|
||||
// the setting persists as a bare model ID (no authType prefix) so that
|
||||
// runtime cross-auth lookups still work. Highlight the row that owns it
|
||||
// regardless of which provider that turns out to be — otherwise the
|
||||
// dialog would default to the current auth's first row and Enter would
|
||||
// silently overwrite the user's fast-model setting.
|
||||
const preferredFastModelEntry =
|
||||
isFastModelMode && parsedFastModelSetting
|
||||
? parsedFastModelSetting.authType
|
||||
? availableModelEntries.find(
|
||||
({ authType: t2, model }) =>
|
||||
t2 === parsedFastModelSetting.authType &&
|
||||
model.id === parsedFastModelSetting.modelId,
|
||||
)
|
||||
: availableModelEntries.find(
|
||||
({ model }) => model.id === parsedFastModelSetting.modelId,
|
||||
)
|
||||
: undefined;
|
||||
const preferredKey = activeRuntimeSnapshot
|
||||
? activeRuntimeSnapshot.id
|
||||
: authType
|
||||
? buildModelSelectionKey(authType, preferredModelId, currentBaseUrl)
|
||||
: '';
|
||||
: preferredFastModelEntry
|
||||
? buildModelSelectionKey(
|
||||
preferredFastModelEntry.authType,
|
||||
preferredFastModelEntry.model.id,
|
||||
preferredFastModelEntry.model.baseUrl,
|
||||
)
|
||||
: authType
|
||||
? buildModelSelectionKey(authType, preferredModelId, currentBaseUrl)
|
||||
: '';
|
||||
|
||||
useKeypress(
|
||||
(key) => {
|
||||
|
|
@ -358,27 +391,28 @@ export function ModelDialog({
|
|||
async (selected: string) => {
|
||||
setErrorMessage(null);
|
||||
|
||||
// Fast model mode: save the model ID only (baseUrl is intentionally
|
||||
// discarded — getFastModel resolves via the first registry match).
|
||||
// Fast model mode: save authType:modelId so duplicate model ids across
|
||||
// providers remain unambiguous. baseUrl is intentionally discarded.
|
||||
if (isFastModelMode) {
|
||||
let modelId: string;
|
||||
let fastModel: string;
|
||||
if (selected.includes('::')) {
|
||||
const parsed = parseModelSelectionKey(selected);
|
||||
modelId = parsed.modelId;
|
||||
fastModel = `${parsed.authType}:${parsed.modelId}`;
|
||||
} else if (selected.startsWith('$runtime|')) {
|
||||
const parts = selected.split('|');
|
||||
modelId = parts[2] ?? selected;
|
||||
fastModel =
|
||||
parts[1] && parts[2] ? `${parts[1]}:${parts[2]}` : selected;
|
||||
} else {
|
||||
modelId = selected;
|
||||
fastModel = selected;
|
||||
}
|
||||
const scope = getPersistScopeForModelSelection(settings);
|
||||
settings.setValue(scope, 'fastModel', modelId);
|
||||
settings.setValue(scope, 'fastModel', fastModel);
|
||||
// Sync the runtime Config so forked agents pick up the change immediately.
|
||||
config?.setFastModel(modelId);
|
||||
config?.setFastModel(fastModel);
|
||||
uiState?.historyManager.addItem(
|
||||
{
|
||||
type: 'success',
|
||||
text: `${t('Fast Model')}: ${modelId}`,
|
||||
text: `${t('Fast Model')}: ${fastModel}`,
|
||||
},
|
||||
Date.now(),
|
||||
);
|
||||
|
|
|
|||
|
|
@ -760,6 +760,190 @@ describe('Server Config (config.ts)', () => {
|
|||
});
|
||||
|
||||
describe('model switching with different credentials (OpenAI)', () => {
|
||||
it('keeps getFastModel current-auth-only for direct runtime callers', () => {
|
||||
const config = new Config({
|
||||
...baseParams,
|
||||
authType: AuthType.USE_ANTHROPIC,
|
||||
model: 'claude-opus-4-7',
|
||||
fastModel: 'deepseek-v4-flash',
|
||||
modelProvidersConfig: {
|
||||
[AuthType.USE_OPENAI]: [
|
||||
{
|
||||
id: 'deepseek-v4-flash',
|
||||
name: 'deepseek-v4-flash',
|
||||
baseUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
|
||||
envKey: 'DASHSCOPE_API_KEY',
|
||||
},
|
||||
],
|
||||
[AuthType.USE_ANTHROPIC]: [
|
||||
{
|
||||
id: 'claude-opus-4-7',
|
||||
name: 'claude-opus-4-7',
|
||||
baseUrl: 'https://idealab.alibaba-inc.com/api/anthropic',
|
||||
envKey: 'IDEALAB_OPUS_API_KEY',
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
expect(config.getFastModel()).toBeUndefined();
|
||||
expect(config.getFastModelForSideQuery()).toBe('deepseek-v4-flash');
|
||||
});
|
||||
|
||||
it('returns an authType-qualified fast model selector for side queries', () => {
|
||||
const config = new Config({
|
||||
...baseParams,
|
||||
authType: AuthType.USE_ANTHROPIC,
|
||||
model: 'shared-model',
|
||||
fastModel: 'openai:shared-model',
|
||||
modelProvidersConfig: {
|
||||
[AuthType.USE_OPENAI]: [
|
||||
{
|
||||
id: 'shared-model',
|
||||
name: 'OpenAI shared model',
|
||||
baseUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
|
||||
envKey: 'DASHSCOPE_API_KEY',
|
||||
},
|
||||
],
|
||||
[AuthType.USE_ANTHROPIC]: [
|
||||
{
|
||||
id: 'shared-model',
|
||||
name: 'Anthropic shared model',
|
||||
baseUrl: 'https://idealab.alibaba-inc.com/api/anthropic',
|
||||
envKey: 'IDEALAB_OPUS_API_KEY',
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
expect(config.getFastModel()).toBeUndefined();
|
||||
expect(config.getFastModelForSideQuery()).toBe('openai:shared-model');
|
||||
});
|
||||
|
||||
it('returns a bare fast model for getFastModel when authType-qualified selector matches the current auth type', () => {
|
||||
const config = new Config({
|
||||
...baseParams,
|
||||
authType: AuthType.USE_OPENAI,
|
||||
model: 'gpt-4',
|
||||
fastModel: 'openai:deepseek-v4-flash',
|
||||
modelProvidersConfig: {
|
||||
[AuthType.USE_OPENAI]: [
|
||||
{
|
||||
id: 'deepseek-v4-flash',
|
||||
name: 'deepseek-v4-flash',
|
||||
baseUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
|
||||
envKey: 'DASHSCOPE_API_KEY',
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
expect(config.getFastModel()).toBe('deepseek-v4-flash');
|
||||
expect(config.getFastModelForSideQuery()).toBe(
|
||||
'openai:deepseek-v4-flash',
|
||||
);
|
||||
});
|
||||
|
||||
it('accepts runtime fast models for authType-qualified selectors', () => {
|
||||
const config = new Config({
|
||||
...baseParams,
|
||||
authType: AuthType.USE_OPENAI,
|
||||
model: 'runtime-fast-model',
|
||||
fastModel: 'openai:runtime-fast-model',
|
||||
generationConfig: {
|
||||
apiKey: 'sk-runtime-key',
|
||||
baseUrl: 'https://runtime.example.com/v1',
|
||||
},
|
||||
generationConfigSources: {
|
||||
model: { kind: 'programmatic', detail: 'test' },
|
||||
apiKey: { kind: 'programmatic', detail: 'test' },
|
||||
baseUrl: { kind: 'programmatic', detail: 'test' },
|
||||
},
|
||||
modelProvidersConfig: {
|
||||
[AuthType.USE_OPENAI]: [
|
||||
{
|
||||
id: 'registry-model',
|
||||
name: 'Registry Model',
|
||||
baseUrl: 'https://api.openai.com/v1',
|
||||
envKey: 'OPENAI_API_KEY',
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
config.getModelsConfig().detectAndCaptureRuntimeModel();
|
||||
|
||||
expect(config.getFastModel()).toBe('runtime-fast-model');
|
||||
expect(config.getFastModelForSideQuery()).toBe(
|
||||
'openai:runtime-fast-model',
|
||||
);
|
||||
});
|
||||
|
||||
it('returns undefined when the fast model is not configured for any auth type', () => {
|
||||
const config = new Config({
|
||||
...baseParams,
|
||||
authType: AuthType.USE_ANTHROPIC,
|
||||
model: 'claude-opus-4-7',
|
||||
fastModel: 'missing-fast-model',
|
||||
modelProvidersConfig: {
|
||||
[AuthType.USE_ANTHROPIC]: [
|
||||
{
|
||||
id: 'claude-opus-4-7',
|
||||
name: 'claude-opus-4-7',
|
||||
baseUrl: 'https://idealab.alibaba-inc.com/api/anthropic',
|
||||
envKey: 'IDEALAB_OPUS_API_KEY',
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
expect(config.getFastModel()).toBeUndefined();
|
||||
expect(config.getFastModelForSideQuery()).toBeUndefined();
|
||||
});
|
||||
|
||||
it('returns undefined when the fast model selector is malformed', () => {
|
||||
const config = new Config({
|
||||
...baseParams,
|
||||
authType: AuthType.USE_ANTHROPIC,
|
||||
model: 'claude-opus-4-7',
|
||||
fastModel: 'openai:',
|
||||
modelProvidersConfig: {
|
||||
[AuthType.USE_OPENAI]: [
|
||||
{
|
||||
id: 'deepseek-v4-flash',
|
||||
name: 'deepseek-v4-flash',
|
||||
baseUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
|
||||
envKey: 'DASHSCOPE_API_KEY',
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
expect(config.getFastModel()).toBeUndefined();
|
||||
expect(config.getFastModelForSideQuery()).toBeUndefined();
|
||||
});
|
||||
|
||||
it('returns undefined when fastModel points back to the fast selector', () => {
|
||||
const config = new Config({
|
||||
...baseParams,
|
||||
authType: AuthType.USE_ANTHROPIC,
|
||||
model: 'claude-opus-4-7',
|
||||
fastModel: 'fast',
|
||||
modelProvidersConfig: {
|
||||
[AuthType.USE_ANTHROPIC]: [
|
||||
{
|
||||
id: 'claude-opus-4-7',
|
||||
name: 'claude-opus-4-7',
|
||||
baseUrl: 'https://idealab.alibaba-inc.com/api/anthropic',
|
||||
envKey: 'IDEALAB_OPUS_API_KEY',
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
expect(config.getFastModel()).toBeUndefined();
|
||||
expect(config.getFastModelForSideQuery()).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should refresh auth when switching to model with different envKey', async () => {
|
||||
// This test verifies the fix for switching between modelProvider models
|
||||
// with different envKeys (e.g., deepseek-chat with DEEPSEEK_API_KEY)
|
||||
|
|
|
|||
|
|
@ -154,6 +154,7 @@ import {
|
|||
type AvailableModel,
|
||||
type RuntimeModelSnapshot,
|
||||
} from '../models/index.js';
|
||||
import { resolveModelId } from '../utils/modelId.js';
|
||||
import type { ClaudeMarketplaceConfig } from '../extension/claude-converter.js';
|
||||
|
||||
// Re-export types
|
||||
|
|
@ -1832,20 +1833,57 @@ export class Config {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the fast model if one is configured and valid for the current auth type,
|
||||
* otherwise returns undefined. Background agents (memory extraction, dream, /btw)
|
||||
* use this as a cheaper alternative to the main session model.
|
||||
* Returns the fast model if one is configured and valid for the current auth
|
||||
* type, otherwise returns undefined. Direct runtime paths use this as a
|
||||
* cheaper alternative to the main session model, so it intentionally stays
|
||||
* current-auth-only.
|
||||
*/
|
||||
getFastModel(): string | undefined {
|
||||
if (!this.fastModel) return undefined;
|
||||
const authType = this.contentGeneratorConfig?.authType;
|
||||
const authType =
|
||||
this.contentGeneratorConfig?.authType ??
|
||||
this.modelsConfig.getCurrentAuthType();
|
||||
if (!authType) return undefined;
|
||||
const available = this.getAvailableModelsForAuthType(authType);
|
||||
return available.some((m) => m.id === this.fastModel)
|
||||
? this.fastModel
|
||||
const selector = this.resolveFastModelSelector();
|
||||
if (!selector) return undefined;
|
||||
if (selector.authType && selector.authType !== authType) return undefined;
|
||||
|
||||
const available = this.getAllConfiguredModels([authType]);
|
||||
return available.some((m) => m.id === selector.modelId)
|
||||
? selector.modelId
|
||||
: undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the fast model for side-query paths. Unlike {@link getFastModel},
|
||||
* this can return an authType-qualified selector because BaseLlmClient can
|
||||
* route a single request through a provider different from the main session.
|
||||
*/
|
||||
getFastModelForSideQuery(): string | undefined {
|
||||
const selector = this.resolveFastModelSelector();
|
||||
if (!selector) return undefined;
|
||||
|
||||
if (selector.authType) {
|
||||
const available = this.getAllConfiguredModels([selector.authType]);
|
||||
return available.some((m) => m.id === selector.modelId)
|
||||
? `${selector.authType}:${selector.modelId}`
|
||||
: undefined;
|
||||
}
|
||||
|
||||
const available = this.getAllConfiguredModels();
|
||||
return available.some((m) => m.id === selector.modelId)
|
||||
? selector.modelId
|
||||
: undefined;
|
||||
}
|
||||
|
||||
private resolveFastModelSelector() {
|
||||
if (!this.fastModel) return undefined;
|
||||
try {
|
||||
return resolveModelId(this.fastModel);
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the fast model at runtime (e.g., when the user runs `/model --fast <model>`).
|
||||
* Pass undefined or an empty string to clear the fast model override.
|
||||
|
|
|
|||
|
|
@ -501,6 +501,8 @@ describe('BaseLlmClient', () => {
|
|||
.mockReturnValue({ authType: AuthType.QWEN_OAUTH }),
|
||||
getEmbeddingModel: vi.fn().mockReturnValue('test-embedding-model'),
|
||||
getModel: vi.fn().mockReturnValue('main-model'),
|
||||
getFastModel: vi.fn().mockReturnValue(undefined),
|
||||
getFastModelForSideQuery: vi.fn().mockReturnValue(undefined),
|
||||
getModelsConfig: vi.fn().mockReturnValue({ getResolvedModel }),
|
||||
} as unknown as Mocked<Config>;
|
||||
});
|
||||
|
|
@ -635,6 +637,95 @@ describe('BaseLlmClient', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('generateJson accepts authType-qualified selectors and sends the bare model id', async () => {
|
||||
getResolvedModel.mockImplementation((authType: string, model: string) => {
|
||||
if (authType === AuthType.USE_OPENAI && model === 'shared-model') {
|
||||
return {
|
||||
id: 'shared-model',
|
||||
authType: AuthType.USE_OPENAI,
|
||||
envKey: 'OPENAI_API_KEY',
|
||||
};
|
||||
}
|
||||
return undefined;
|
||||
});
|
||||
fastGenerateContent.mockResolvedValue(
|
||||
createMockResponseWithFunctionCall({ ok: true }),
|
||||
);
|
||||
vi.mocked(getFunctionCalls).mockReturnValue([
|
||||
{ name: 'respond_in_schema', args: { ok: true } },
|
||||
]);
|
||||
|
||||
const c = new BaseLlmClient(mockContentGenerator, crossProviderConfig);
|
||||
|
||||
await c.generateJson({
|
||||
contents: [{ role: 'user', parts: [{ text: 'go' }] }],
|
||||
schema: { type: 'object' },
|
||||
model: 'openai:shared-model',
|
||||
abortSignal: new AbortController().signal,
|
||||
promptId: 'test',
|
||||
});
|
||||
|
||||
expect(getResolvedModel).toHaveBeenCalledWith(
|
||||
AuthType.USE_OPENAI,
|
||||
'shared-model',
|
||||
);
|
||||
expect(mockBuildAgentContentGeneratorConfig).toHaveBeenCalledWith(
|
||||
crossProviderConfig,
|
||||
'shared-model',
|
||||
expect.objectContaining({ authType: AuthType.USE_OPENAI }),
|
||||
);
|
||||
expect(fastGenerateContent).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ model: 'shared-model' }),
|
||||
'test',
|
||||
);
|
||||
});
|
||||
|
||||
it('generateJson resolves fast selectors through the configured fast model', async () => {
|
||||
crossProviderConfig.getFastModelForSideQuery.mockReturnValue(
|
||||
'openai:shared-model',
|
||||
);
|
||||
getResolvedModel.mockImplementation((authType: string, model: string) => {
|
||||
if (authType === AuthType.USE_OPENAI && model === 'shared-model') {
|
||||
return {
|
||||
id: 'shared-model',
|
||||
authType: AuthType.USE_OPENAI,
|
||||
envKey: 'OPENAI_API_KEY',
|
||||
};
|
||||
}
|
||||
return undefined;
|
||||
});
|
||||
fastGenerateContent.mockResolvedValue(
|
||||
createMockResponseWithFunctionCall({ ok: true }),
|
||||
);
|
||||
vi.mocked(getFunctionCalls).mockReturnValue([
|
||||
{ name: 'respond_in_schema', args: { ok: true } },
|
||||
]);
|
||||
|
||||
const c = new BaseLlmClient(mockContentGenerator, crossProviderConfig);
|
||||
|
||||
await c.generateJson({
|
||||
contents: [{ role: 'user', parts: [{ text: 'go' }] }],
|
||||
schema: { type: 'object' },
|
||||
model: 'fast',
|
||||
abortSignal: new AbortController().signal,
|
||||
promptId: 'test',
|
||||
});
|
||||
|
||||
expect(getResolvedModel).toHaveBeenCalledWith(
|
||||
AuthType.USE_OPENAI,
|
||||
'shared-model',
|
||||
);
|
||||
expect(mockBuildAgentContentGeneratorConfig).toHaveBeenCalledWith(
|
||||
crossProviderConfig,
|
||||
'shared-model',
|
||||
expect.objectContaining({ authType: AuthType.USE_OPENAI }),
|
||||
);
|
||||
expect(fastGenerateContent).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ model: 'shared-model' }),
|
||||
'test',
|
||||
);
|
||||
});
|
||||
|
||||
it('generateText routes through the per-model generator and forwards retry authType', async () => {
|
||||
getResolvedModel.mockReturnValue({
|
||||
authType: AuthType.USE_ANTHROPIC,
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ import type { ContentGenerator } from './contentGenerator.js';
|
|||
import { AuthType, createContentGenerator } from './contentGenerator.js';
|
||||
import type { ResolvedModelConfig } from '../models/types.js';
|
||||
import { buildAgentContentGeneratorConfig } from '../models/content-generator-config.js';
|
||||
import { resolveModelId, type ResolvedModelId } from '../utils/modelId.js';
|
||||
import { reportError } from '../utils/errorReporting.js';
|
||||
import { getErrorMessage } from '../utils/errors.js';
|
||||
import { retryWithBackoff, isUnattendedMode } from '../utils/retry.js';
|
||||
|
|
@ -41,6 +42,7 @@ const debugLogger = createDebugLogger('BASE_LLM_CLIENT');
|
|||
export interface ResolvedGeneratorForModel {
|
||||
contentGenerator: ContentGenerator;
|
||||
retryAuthType: string | undefined;
|
||||
model: string;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -173,14 +175,17 @@ export class BaseLlmClient {
|
|||
},
|
||||
];
|
||||
|
||||
const { contentGenerator, retryAuthType } =
|
||||
await this.resolveForModel(model);
|
||||
const {
|
||||
contentGenerator,
|
||||
retryAuthType,
|
||||
model: requestModel,
|
||||
} = await this.resolveForModel(model);
|
||||
|
||||
try {
|
||||
const apiCall = () =>
|
||||
contentGenerator.generateContent(
|
||||
{
|
||||
model,
|
||||
model: requestModel,
|
||||
config: {
|
||||
...requestConfig,
|
||||
tools,
|
||||
|
|
@ -265,14 +270,17 @@ export class BaseLlmClient {
|
|||
...(systemInstruction && { systemInstruction }),
|
||||
};
|
||||
|
||||
const { contentGenerator, retryAuthType } =
|
||||
await this.resolveForModel(model);
|
||||
const {
|
||||
contentGenerator,
|
||||
retryAuthType,
|
||||
model: requestModel,
|
||||
} = await this.resolveForModel(model);
|
||||
|
||||
try {
|
||||
const apiCall = () =>
|
||||
contentGenerator.generateContent(
|
||||
{
|
||||
model,
|
||||
model: requestModel,
|
||||
config: requestConfig,
|
||||
contents,
|
||||
},
|
||||
|
|
@ -363,23 +371,35 @@ export class BaseLlmClient {
|
|||
* or generator creation fails (e.g. tests without full auth setup).
|
||||
*/
|
||||
async resolveForModel(model: string): Promise<ResolvedGeneratorForModel> {
|
||||
const selector = this.resolveModelSelector(model);
|
||||
const requestModel = selector?.modelId ?? this.config.getModel() ?? model;
|
||||
const mainModel = this.config.getModel() ?? model;
|
||||
const mainAuthType = this.config.getContentGeneratorConfig()?.authType;
|
||||
|
||||
if (model === mainModel) {
|
||||
if (
|
||||
requestModel === mainModel &&
|
||||
(!selector?.authType || selector.authType === mainAuthType)
|
||||
) {
|
||||
return {
|
||||
contentGenerator: this.contentGenerator,
|
||||
retryAuthType: mainAuthType,
|
||||
model: requestModel,
|
||||
};
|
||||
}
|
||||
|
||||
const contentGenerator = await this.createContentGeneratorForModel(model);
|
||||
const contentGenerator = await this.createContentGeneratorForModel(
|
||||
model,
|
||||
selector,
|
||||
);
|
||||
const resolvedModel = this.resolveModelAcrossAuthTypes(model, selector);
|
||||
const retryAuthType =
|
||||
this.resolveModelAcrossAuthTypes(model)?.authType ??
|
||||
mainAuthType ??
|
||||
AuthType.USE_OPENAI;
|
||||
resolvedModel?.authType ?? mainAuthType ?? AuthType.USE_OPENAI;
|
||||
|
||||
return { contentGenerator, retryAuthType };
|
||||
return {
|
||||
contentGenerator,
|
||||
retryAuthType,
|
||||
model: resolvedModel?.id ?? requestModel,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -397,9 +417,16 @@ export class BaseLlmClient {
|
|||
*/
|
||||
private resolveModelAcrossAuthTypes(
|
||||
model: string,
|
||||
selector: ResolvedModelId | undefined,
|
||||
): ResolvedModelConfig | undefined {
|
||||
const modelsConfig = this.config.getModelsConfig?.();
|
||||
if (!modelsConfig) return undefined;
|
||||
if (!selector) return undefined;
|
||||
const modelId = selector.modelId;
|
||||
|
||||
if (selector.authType) {
|
||||
return modelsConfig.getResolvedModel(selector.authType, modelId);
|
||||
}
|
||||
|
||||
const allAuthTypes: AuthType[] = [
|
||||
AuthType.QWEN_OAUTH,
|
||||
|
|
@ -411,13 +438,13 @@ export class BaseLlmClient {
|
|||
|
||||
const mainAuthType = this.config.getContentGeneratorConfig()?.authType;
|
||||
if (mainAuthType) {
|
||||
const resolved = modelsConfig.getResolvedModel(mainAuthType, model);
|
||||
const resolved = modelsConfig.getResolvedModel(mainAuthType, modelId);
|
||||
if (resolved) return resolved;
|
||||
}
|
||||
|
||||
for (const authType of allAuthTypes) {
|
||||
if (authType === mainAuthType) continue;
|
||||
const resolved = modelsConfig.getResolvedModel(authType, model);
|
||||
const resolved = modelsConfig.getResolvedModel(authType, modelId);
|
||||
if (resolved) return resolved;
|
||||
}
|
||||
|
||||
|
|
@ -426,13 +453,17 @@ export class BaseLlmClient {
|
|||
|
||||
private async createContentGeneratorForModel(
|
||||
model: string,
|
||||
selector: ResolvedModelId | undefined,
|
||||
): Promise<ContentGenerator> {
|
||||
const cached = this.perModelGeneratorCache.get(model);
|
||||
const cacheKey = selector
|
||||
? `${selector.authType ?? ''}:${selector.modelId}`
|
||||
: model;
|
||||
const cached = this.perModelGeneratorCache.get(cacheKey);
|
||||
if (cached) return cached;
|
||||
|
||||
const generatorPromise = (async () => {
|
||||
try {
|
||||
const resolvedModel = this.resolveModelAcrossAuthTypes(model);
|
||||
const resolvedModel = this.resolveModelAcrossAuthTypes(model, selector);
|
||||
|
||||
if (!resolvedModel) {
|
||||
debugLogger.warn(
|
||||
|
|
@ -441,9 +472,10 @@ export class BaseLlmClient {
|
|||
return this.contentGenerator;
|
||||
}
|
||||
|
||||
const targetModel = resolvedModel.id ?? selector?.modelId ?? model;
|
||||
const targetConfig = buildAgentContentGeneratorConfig(
|
||||
this.config,
|
||||
model,
|
||||
targetModel,
|
||||
{
|
||||
authType: resolvedModel.authType,
|
||||
apiKey: resolvedModel.envKey
|
||||
|
|
@ -459,12 +491,22 @@ export class BaseLlmClient {
|
|||
`Failed to create content generator for model "${model}", falling back to main generator.`,
|
||||
err instanceof Error ? err.message : String(err),
|
||||
);
|
||||
this.perModelGeneratorCache.delete(model);
|
||||
this.perModelGeneratorCache.delete(cacheKey);
|
||||
return this.contentGenerator;
|
||||
}
|
||||
})();
|
||||
|
||||
this.perModelGeneratorCache.set(model, generatorPromise);
|
||||
this.perModelGeneratorCache.set(cacheKey, generatorPromise);
|
||||
return generatorPromise;
|
||||
}
|
||||
|
||||
private resolveModelSelector(model: string): ResolvedModelId | undefined {
|
||||
return resolveModelId(model, {
|
||||
currentModel: this.config.getModel(),
|
||||
currentAuthType: this.config.getContentGeneratorConfig()?.authType,
|
||||
fastModel:
|
||||
this.config.getFastModelForSideQuery?.() ??
|
||||
this.config.getFastModel?.(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1576,16 +1576,18 @@ export class GeminiClient {
|
|||
// main model's config. The retry authType is resolved alongside so that
|
||||
// provider-specific checks (e.g. QWEN_OAUTH quota detection) reference
|
||||
// the target model's provider.
|
||||
const { contentGenerator, retryAuthType } = await this.config
|
||||
.getBaseLlmClient()
|
||||
.resolveForModel(model);
|
||||
const {
|
||||
contentGenerator,
|
||||
retryAuthType,
|
||||
model: requestModel,
|
||||
} = await this.config.getBaseLlmClient().resolveForModel(model);
|
||||
|
||||
const apiCall = () => {
|
||||
currentAttemptModel = model;
|
||||
currentAttemptModel = requestModel;
|
||||
|
||||
return contentGenerator.generateContent(
|
||||
{
|
||||
model,
|
||||
model: requestModel,
|
||||
config: requestConfig,
|
||||
contents,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -286,6 +286,7 @@ export * from './utils/gitUtils.js';
|
|||
export * from './utils/ignorePatterns.js';
|
||||
export * from './utils/jsonl-utils.js';
|
||||
export * from './utils/memoryDiscovery.js';
|
||||
export * from './utils/modelId.js';
|
||||
export { ConditionalRulesRegistry } from './utils/rulesDiscovery.js';
|
||||
export type { RuleFile } from './utils/rulesDiscovery.js';
|
||||
export { OpenAILogger, openaiLogger } from './utils/openaiLogger.js';
|
||||
|
|
|
|||
|
|
@ -129,7 +129,7 @@ describe('selectRelevantAutoMemoryDocumentsByModel', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('passes the fast model to runSideQuery when configured', async () => {
|
||||
it('lets runSideQuery choose the default side-query model when fast model is configured', async () => {
|
||||
vi.mocked(mockConfig.getFastModel).mockReturnValue('fast-flash-model');
|
||||
vi.mocked(runSideQuery).mockResolvedValue({
|
||||
selected_memories: ['reference.md'],
|
||||
|
|
@ -146,13 +146,15 @@ describe('selectRelevantAutoMemoryDocumentsByModel', () => {
|
|||
mockConfig,
|
||||
expect.objectContaining({
|
||||
purpose: 'auto-memory-recall',
|
||||
model: 'fast-flash-model',
|
||||
config: { temperature: 0 },
|
||||
}),
|
||||
);
|
||||
expect(
|
||||
'model' in (vi.mocked(runSideQuery).mock.calls[0]![1] as object),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it('passes undefined model when no fast model is configured', async () => {
|
||||
it('lets runSideQuery fall back to its default when no fast model is configured', async () => {
|
||||
vi.mocked(mockConfig.getFastModel).mockReturnValue(undefined);
|
||||
vi.mocked(runSideQuery).mockResolvedValue({
|
||||
selected_memories: ['reference.md'],
|
||||
|
|
@ -169,10 +171,12 @@ describe('selectRelevantAutoMemoryDocumentsByModel', () => {
|
|||
mockConfig,
|
||||
expect.objectContaining({
|
||||
purpose: 'auto-memory-recall',
|
||||
model: undefined,
|
||||
config: { temperature: 0 },
|
||||
}),
|
||||
);
|
||||
expect(
|
||||
'model' in (vi.mocked(runSideQuery).mock.calls[0]![1] as object),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it('throws when selector returns unknown relative paths', async () => {
|
||||
|
|
|
|||
|
|
@ -95,9 +95,8 @@ export async function selectRelevantAutoMemoryDocumentsByModel(
|
|||
? AbortSignal.any([AbortSignal.timeout(1_000), callerAbortSignal])
|
||||
: AbortSignal.timeout(1_000),
|
||||
|
||||
// Use the fast model for this background side-query to reduce latency and
|
||||
// cost. Falls back to the main session model if no fast model is configured.
|
||||
model: config.getFastModel(),
|
||||
// Uses runSideQuery's default side-query model policy: fast model first,
|
||||
// then main session model when no fast model is configured.
|
||||
systemInstruction: SELECT_MEMORIES_SYSTEM_PROMPT,
|
||||
config: {
|
||||
temperature: 0,
|
||||
|
|
|
|||
|
|
@ -928,11 +928,12 @@ export class ChatRecordingService {
|
|||
// Headless/one-shot CLI flows (`qwen -p "…"`, cron, CI scripts) run a
|
||||
// single prompt and throw the session away. Spending fast-model tokens
|
||||
// on a title no one will ever resume is pure waste; skip entirely.
|
||||
// Checked before `getFastModel()` because it's strictly cheaper (a bool
|
||||
// field read vs. a method that looks up available models for the auth
|
||||
// type).
|
||||
// Checked before `getFastModelForSideQuery()` because it's strictly
|
||||
// cheaper (a bool field read vs. a method that looks up available models).
|
||||
if (!this.config.isInteractive()) return;
|
||||
if (!this.config.getFastModel()) return;
|
||||
const fastModel =
|
||||
this.config.getFastModelForSideQuery?.() ?? this.config.getFastModel();
|
||||
if (!fastModel) return;
|
||||
|
||||
this.autoTitleAttempts++;
|
||||
const controller = new AbortController();
|
||||
|
|
|
|||
|
|
@ -61,7 +61,10 @@ export async function generateSessionRecap(
|
|||
const recentHistory = takeRecentDialog(dialog, RECENT_MESSAGE_WINDOW);
|
||||
if (recentHistory.length === 0) return null;
|
||||
|
||||
const model = config.getFastModel() ?? config.getModel();
|
||||
const model =
|
||||
config.getFastModelForSideQuery?.() ??
|
||||
config.getFastModel() ??
|
||||
config.getModel();
|
||||
|
||||
const result = await runSideQuery(config, {
|
||||
purpose: 'session-recap',
|
||||
|
|
@ -75,7 +78,6 @@ export async function generateSessionRecap(
|
|||
temperature: 0.3,
|
||||
},
|
||||
abortSignal,
|
||||
model,
|
||||
// Recap is best-effort cosmetic — don't burn the default 7 retries.
|
||||
maxAttempts: 1,
|
||||
});
|
||||
|
|
|
|||
|
|
@ -70,8 +70,8 @@ const TRAILING_PAIRED_BRACKETS_RE =
|
|||
* command) can surface actionable messages instead of a generic "could not
|
||||
* generate".
|
||||
*
|
||||
* - `no_fast_model`: config.getFastModel() returned undefined. User needs to
|
||||
* configure one via `/model --fast <name>`.
|
||||
* - `no_fast_model`: config.getFastModelForSideQuery() returned undefined.
|
||||
* User needs to configure one via `/model --fast <name>`.
|
||||
* - `no_client`: BaseLlmClient or GeminiClient not yet initialized. Rare,
|
||||
* usually means the session hasn't authenticated yet.
|
||||
* - `empty_history`: the conversation has fewer than 2 turns of usable text.
|
||||
|
|
@ -107,7 +107,7 @@ export async function tryGenerateSessionTitle(
|
|||
abortSignal: AbortSignal,
|
||||
): Promise<SessionTitleOutcome> {
|
||||
try {
|
||||
const model = config.getFastModel();
|
||||
const model = config.getFastModelForSideQuery?.() ?? config.getFastModel();
|
||||
if (!model) return { ok: false, reason: 'no_fast_model' };
|
||||
|
||||
const geminiClient = config.getGeminiClient();
|
||||
|
|
@ -130,7 +130,6 @@ export async function tryGenerateSessionTitle(
|
|||
|
||||
const result = await runSideQuery<{ title?: string }>(config, {
|
||||
purpose: 'session-title',
|
||||
model,
|
||||
systemInstruction: TITLE_SYSTEM_PROMPT,
|
||||
schema: TITLE_SCHEMA as unknown as Record<string, unknown>,
|
||||
contents: [
|
||||
|
|
|
|||
|
|
@ -89,7 +89,8 @@ export interface GenerateToolUseSummaryParams {
|
|||
*/
|
||||
lastAssistantText?: string;
|
||||
/**
|
||||
* Fast model to use. If omitted, falls back to `config.getFastModel()`;
|
||||
* Fast model to use. If omitted, falls back to
|
||||
* `config.getFastModelForSideQuery()`;
|
||||
* if that also returns undefined, the call is skipped (returns null).
|
||||
* Unlike `sessionRecap`, this does not fall back to the main model —
|
||||
* summary generation is a nice-to-have and must not incur main-model cost.
|
||||
|
|
@ -112,7 +113,10 @@ export async function generateToolUseSummary(
|
|||
return null;
|
||||
}
|
||||
|
||||
const model = params.model ?? config.getFastModel();
|
||||
const model =
|
||||
params.model ??
|
||||
config.getFastModelForSideQuery?.() ??
|
||||
config.getFastModel();
|
||||
if (!model) {
|
||||
debugLogger.debug('No fast model configured — skipping summary generation');
|
||||
return null;
|
||||
|
|
@ -151,7 +155,7 @@ export async function generateToolUseSummary(
|
|||
temperature: 0.3,
|
||||
},
|
||||
abortSignal: signal,
|
||||
model,
|
||||
...(params.model !== undefined ? { model: params.model } : {}),
|
||||
// Tool-use labels are best-effort cosmetic; firing once per turn means
|
||||
// 7 retries on a transient outage would spike traffic for no benefit.
|
||||
maxAttempts: 1,
|
||||
|
|
|
|||
98
packages/core/src/utils/modelId.test.ts
Normal file
98
packages/core/src/utils/modelId.test.ts
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { AuthType } from '../core/contentGenerator.js';
|
||||
import { resolveModelId } from './modelId.js';
|
||||
|
||||
describe('resolveModelId', () => {
|
||||
it('returns undefined for omitted models without a current model', () => {
|
||||
expect(resolveModelId(undefined)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('resolves omitted models to the current model when provided', () => {
|
||||
expect(
|
||||
resolveModelId(undefined, {
|
||||
currentModel: 'main-model',
|
||||
currentAuthType: AuthType.USE_ANTHROPIC,
|
||||
}),
|
||||
).toEqual({
|
||||
authType: AuthType.USE_ANTHROPIC,
|
||||
modelId: 'main-model',
|
||||
});
|
||||
});
|
||||
|
||||
it('resolves explicit inherit to the current model', () => {
|
||||
expect(
|
||||
resolveModelId('inherit', {
|
||||
currentModel: 'main-model',
|
||||
currentAuthType: AuthType.USE_OPENAI,
|
||||
}),
|
||||
).toEqual({
|
||||
authType: AuthType.USE_OPENAI,
|
||||
modelId: 'main-model',
|
||||
});
|
||||
});
|
||||
|
||||
it('returns undefined for fast when no fast model is available', () => {
|
||||
expect(resolveModelId('fast')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('resolves fast to the configured fast model', () => {
|
||||
expect(resolveModelId('fast', { fastModel: 'fast-model' })).toEqual({
|
||||
modelId: 'fast-model',
|
||||
});
|
||||
});
|
||||
|
||||
it('resolves fast to authType-prefixed configured fast models', () => {
|
||||
expect(resolveModelId('fast', { fastModel: 'openai:fast-model' })).toEqual({
|
||||
authType: AuthType.USE_OPENAI,
|
||||
modelId: 'fast-model',
|
||||
});
|
||||
});
|
||||
|
||||
it('returns undefined for recursive fast selectors', () => {
|
||||
expect(resolveModelId('fast', { fastModel: 'fast' })).toBeUndefined();
|
||||
});
|
||||
|
||||
it('parses bare model IDs to concrete model IDs', () => {
|
||||
expect(resolveModelId('glm-5')).toEqual({
|
||||
modelId: 'glm-5',
|
||||
});
|
||||
});
|
||||
|
||||
it('parses authType-prefixed model IDs', () => {
|
||||
expect(resolveModelId('openai:glm-5')).toEqual({
|
||||
authType: AuthType.USE_OPENAI,
|
||||
modelId: 'glm-5',
|
||||
});
|
||||
});
|
||||
|
||||
it('trims authType-prefixed model IDs', () => {
|
||||
expect(resolveModelId(' openai : glm-5 ')).toEqual({
|
||||
authType: AuthType.USE_OPENAI,
|
||||
modelId: 'glm-5',
|
||||
});
|
||||
});
|
||||
|
||||
it('treats unknown prefix as bare model ID (colon in model ID)', () => {
|
||||
expect(resolveModelId('invalid:glm-5')).toEqual({
|
||||
modelId: 'invalid:glm-5',
|
||||
});
|
||||
});
|
||||
|
||||
it('treats model IDs with colons as bare model IDs', () => {
|
||||
expect(resolveModelId('gpt-4o:online')).toEqual({
|
||||
modelId: 'gpt-4o:online',
|
||||
});
|
||||
});
|
||||
|
||||
it('rejects missing model IDs after valid authType prefixes', () => {
|
||||
expect(() => resolveModelId('openai:')).toThrow(
|
||||
'Model selector must include a model ID after the authType',
|
||||
);
|
||||
});
|
||||
});
|
||||
122
packages/core/src/utils/modelId.ts
Normal file
122
packages/core/src/utils/modelId.ts
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { AuthType } from '../core/contentGenerator.js';
|
||||
|
||||
export interface ResolvedModelId {
|
||||
authType?: AuthType;
|
||||
modelId: string;
|
||||
}
|
||||
|
||||
export interface ModelIdResolutionContext {
|
||||
currentModel?: string;
|
||||
currentAuthType?: AuthType;
|
||||
fastModel?: string;
|
||||
}
|
||||
|
||||
type ModelIdSelector =
|
||||
| {
|
||||
kind: 'inherit';
|
||||
}
|
||||
| {
|
||||
kind: 'fast';
|
||||
}
|
||||
| {
|
||||
kind: 'model';
|
||||
authType?: AuthType;
|
||||
modelId: string;
|
||||
};
|
||||
|
||||
const AUTH_TYPES = new Set<AuthType>(Object.values(AuthType));
|
||||
|
||||
/**
|
||||
* Resolve a model selector to the concrete model ID a caller should use.
|
||||
*
|
||||
* Supported forms:
|
||||
* - omitted / inherit -> use parent conversation model
|
||||
* - fast -> use the configured fastModel
|
||||
* - modelId -> use parent authType with the provided modelId
|
||||
* - authType:modelId -> use explicit authType and modelId
|
||||
*/
|
||||
export function resolveModelId(
|
||||
model: string | undefined,
|
||||
context: ModelIdResolutionContext = {},
|
||||
): ResolvedModelId | undefined {
|
||||
return resolveModelIdSelector(parseModelIdSelector(model), context);
|
||||
}
|
||||
|
||||
function parseModelIdSelector(model: string | undefined): ModelIdSelector {
|
||||
const trimmed = model?.trim();
|
||||
if (!trimmed || trimmed === 'inherit') {
|
||||
return { kind: 'inherit' };
|
||||
}
|
||||
if (trimmed === 'fast') {
|
||||
return { kind: 'fast' };
|
||||
}
|
||||
|
||||
const colonIndex = trimmed.indexOf(':');
|
||||
if (colonIndex === -1) {
|
||||
return { kind: 'model', modelId: trimmed };
|
||||
}
|
||||
|
||||
const maybeAuthType = trimmed.slice(0, colonIndex).trim();
|
||||
const modelId = trimmed.slice(colonIndex + 1).trim();
|
||||
|
||||
// If the prefix isn't a known AuthType, treat the whole string as a bare
|
||||
// model ID. Model IDs can legitimately contain colons (e.g. gpt-4o:online).
|
||||
if (!AUTH_TYPES.has(maybeAuthType as AuthType)) {
|
||||
return { kind: 'model', modelId: trimmed };
|
||||
}
|
||||
|
||||
if (!modelId) {
|
||||
throw new Error(
|
||||
'Model selector must include a model ID after the authType',
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
kind: 'model',
|
||||
authType: maybeAuthType as AuthType,
|
||||
modelId,
|
||||
};
|
||||
}
|
||||
|
||||
function resolveModelIdSelector(
|
||||
selector: ModelIdSelector,
|
||||
context: ModelIdResolutionContext,
|
||||
): ResolvedModelId | undefined {
|
||||
if (selector.kind === 'model') {
|
||||
return {
|
||||
...(selector.authType ? { authType: selector.authType } : {}),
|
||||
modelId: selector.modelId,
|
||||
};
|
||||
}
|
||||
|
||||
if (selector.kind === 'inherit') {
|
||||
return context.currentModel
|
||||
? {
|
||||
...(context.currentAuthType
|
||||
? { authType: context.currentAuthType }
|
||||
: {}),
|
||||
modelId: context.currentModel,
|
||||
}
|
||||
: undefined;
|
||||
}
|
||||
|
||||
if (!context.fastModel) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const fastSelector = parseModelIdSelector(context.fastModel);
|
||||
if (fastSelector.kind === 'fast') {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return resolveModelIdSelector(fastSelector, {
|
||||
...context,
|
||||
fastModel: undefined,
|
||||
});
|
||||
}
|
||||
|
|
@ -20,9 +20,11 @@ export interface SideQueryJsonOptions<TResponse> {
|
|||
abortSignal: AbortSignal;
|
||||
/**
|
||||
* Override the model used for this query. Defaults to
|
||||
* `config.getFastModel?.() ?? config.getModel()` — side queries run on the
|
||||
* fast model when one is configured. Pass an explicit value to pin to the
|
||||
* main model (e.g. long-form summarization in web-fetch).
|
||||
* `config.getFastModelForSideQuery?.() ?? config.getFastModel?.() ?? config.getModel() ?? DEFAULT_QWEN_MODEL`
|
||||
* — side queries run on the fast model when one is configured, including
|
||||
* fast models registered under a different authType than the main session.
|
||||
* Pass an explicit value to pin to the main model (e.g. long-form
|
||||
* summarization in web-fetch).
|
||||
*/
|
||||
model?: string;
|
||||
systemInstruction?: string | Part | Part[] | Content;
|
||||
|
|
@ -61,9 +63,11 @@ export interface SideQueryTextOptions {
|
|||
abortSignal: AbortSignal;
|
||||
/**
|
||||
* Override the model used for this query. Defaults to
|
||||
* `config.getFastModel?.() ?? config.getModel()` — side queries run on the
|
||||
* fast model when one is configured. Pass an explicit value to pin to the
|
||||
* main model (e.g. long-form summarization in web-fetch).
|
||||
* `config.getFastModelForSideQuery?.() ?? config.getFastModel?.() ?? config.getModel() ?? DEFAULT_QWEN_MODEL`
|
||||
* — side queries run on the fast model when one is configured, including
|
||||
* fast models registered under a different authType than the main session.
|
||||
* Pass an explicit value to pin to the main model (e.g. long-form
|
||||
* summarization in web-fetch).
|
||||
*/
|
||||
model?: string;
|
||||
systemInstruction?: string | Part | Part[] | Content;
|
||||
|
|
@ -101,6 +105,7 @@ function buildDefaultPromptId(purpose?: string): string {
|
|||
function resolveDefaultModel(config: Config, override?: string): string {
|
||||
return (
|
||||
override ??
|
||||
config.getFastModelForSideQuery?.() ??
|
||||
config.getFastModel?.() ??
|
||||
config.getModel() ??
|
||||
DEFAULT_QWEN_MODEL
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue