mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-05-19 07:54:38 +00:00
fix(core): use per-model settings for fast model side queries (#3815)
Fixes #3765. Side queries (session recap, title generation, tool-use summary) running on the fast model previously inherited the main model's ContentGeneratorConfig, leaking extra_body / samplingParams / reasoning between models. GeminiClient.generateContent() now resolves the target model's own config via buildAgentContentGeneratorConfig() when the requested model differs from the main model, and creates a dedicated ContentGenerator (cached, cleared on resetChat). Cross-authType resolution lets the fast model live under a different provider than the main model. Uses the target model's authType for retry logic so provider-specific checks (e.g. QWEN_OAUTH quota detection) reference the correct provider. Falls back to the main generator if the model is not in the registry.
This commit is contained in:
parent
5d0adbb5bd
commit
c7facf5013
2 changed files with 544 additions and 2 deletions
|
|
@ -19,18 +19,27 @@ import { GeminiClient, SendMessageType } from './client.js';
|
|||
import { findCompressSplitPoint } from '../services/chatCompressionService.js';
|
||||
import {
|
||||
AuthType,
|
||||
createContentGenerator,
|
||||
type ContentGenerator,
|
||||
type ContentGeneratorConfig,
|
||||
} from './contentGenerator.js';
|
||||
import { buildAgentContentGeneratorConfig } from '../models/content-generator-config.js';
|
||||
import { type GeminiChat } from './geminiChat.js';
|
||||
import type { Config } from '../config/config.js';
|
||||
import { ApprovalMode } from '../config/config.js';
|
||||
import type { ModelsConfig } from '../models/modelsConfig.js';
|
||||
import { retryWithBackoff } from '../utils/retry.js';
|
||||
import {
|
||||
CompressionStatus,
|
||||
GeminiEventType,
|
||||
Turn,
|
||||
type ChatCompressionInfo,
|
||||
} from './turn.js';
|
||||
|
||||
vi.mock('../utils/retry.js', () => ({
|
||||
retryWithBackoff: vi.fn(async (fn) => await fn()),
|
||||
isUnattendedMode: vi.fn(() => false),
|
||||
}));
|
||||
import { getCoreSystemPrompt, getCustomSystemPrompt } from './prompts.js';
|
||||
import { DEFAULT_QWEN_FLASH_MODEL } from '../config/models.js';
|
||||
import { FileDiscoveryService } from '../services/fileDiscoveryService.js';
|
||||
|
|
@ -90,6 +99,25 @@ vi.mock('./turn', async (importOriginal) => {
|
|||
|
||||
vi.mock('../config/config.js');
|
||||
vi.mock('./prompts');
|
||||
vi.mock('../models/content-generator-config.js', async (importOriginal) => {
|
||||
const actual =
|
||||
await importOriginal<
|
||||
typeof import('../models/content-generator-config.js')
|
||||
>();
|
||||
return {
|
||||
...actual,
|
||||
buildAgentContentGeneratorConfig: vi
|
||||
.fn()
|
||||
.mockImplementation(actual.buildAgentContentGeneratorConfig),
|
||||
};
|
||||
});
|
||||
vi.mock('./contentGenerator.js', async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import('./contentGenerator.js')>();
|
||||
return {
|
||||
...actual,
|
||||
createContentGenerator: vi.fn(),
|
||||
};
|
||||
});
|
||||
vi.mock('../utils/getFolderStructure', () => ({
|
||||
getFolderStructure: vi.fn().mockResolvedValue('Mock Folder Structure'),
|
||||
}));
|
||||
|
|
@ -151,6 +179,7 @@ vi.mock('../telemetry/uiTelemetry.js', () => ({
|
|||
vi.mock('../telemetry/loggers.js', () => ({
|
||||
logChatCompression: vi.fn(),
|
||||
logNextSpeakerCheck: vi.fn(),
|
||||
logApiRequest: vi.fn(),
|
||||
}));
|
||||
|
||||
// Mock RequestTokenizer to use simple character-based estimation
|
||||
|
|
@ -277,6 +306,12 @@ describe('Gemini Client (client.ts)', () => {
|
|||
vi.resetAllMocks();
|
||||
vi.mocked(uiTelemetryService.setLastPromptTokenCount).mockClear();
|
||||
|
||||
// Default: createContentGenerator rejects (simulates test env without auth).
|
||||
// Individual tests can override with mockResolvedValue for success path.
|
||||
vi.mocked(createContentGenerator).mockRejectedValue(
|
||||
new Error('no auth in test env'),
|
||||
);
|
||||
|
||||
mockMemoryManager = {
|
||||
scheduleExtract: vi.fn().mockResolvedValue({
|
||||
touchedTopics: [],
|
||||
|
|
@ -389,6 +424,9 @@ describe('Gemini Client (client.ts)', () => {
|
|||
getArenaAgentClient: vi.fn().mockReturnValue(null),
|
||||
getManagedAutoMemoryEnabled: vi.fn().mockReturnValue(true),
|
||||
getMemoryManager: vi.fn().mockReturnValue(mockMemoryManager),
|
||||
getModelsConfig: vi.fn().mockReturnValue({
|
||||
getResolvedModel: vi.fn().mockReturnValue(undefined),
|
||||
}),
|
||||
getDisableAllHooks: vi.fn().mockReturnValue(true),
|
||||
getArenaManager: vi.fn().mockReturnValue(null),
|
||||
getMessageBus: vi.fn().mockReturnValue(undefined),
|
||||
|
|
@ -3150,4 +3188,368 @@ Other open files:
|
|||
// Note: there is currently no "fallback mode" model routing; the model used
|
||||
// is always the one explicitly requested by the caller.
|
||||
});
|
||||
|
||||
describe('generateContent with fast model', () => {
|
||||
it('should resolve per-model config and fall back when createContentGenerator fails', async () => {
|
||||
const contents = [{ role: 'user', parts: [{ text: 'hello' }] }];
|
||||
const abortSignal = new AbortController().signal;
|
||||
|
||||
// Set up a resolved model for the fast model, but createContentGenerator
|
||||
// will fail in the test env (no auth), so it falls back to the main
|
||||
// content generator. Verify the resolution was attempted.
|
||||
const mockResolvedModel = {
|
||||
id: 'fast-model',
|
||||
authType: 'openai' as const,
|
||||
name: 'Fast Model',
|
||||
baseUrl: 'https://fast-api.example.com',
|
||||
generationConfig: {
|
||||
extra_body: { enable_thinking: false },
|
||||
samplingParams: { temperature: 0.1 },
|
||||
},
|
||||
capabilities: {},
|
||||
};
|
||||
|
||||
const getResolvedModel = vi.fn().mockReturnValue(mockResolvedModel);
|
||||
vi.mocked(mockConfig.getModelsConfig).mockReturnValue({
|
||||
getResolvedModel,
|
||||
} as unknown as ModelsConfig);
|
||||
|
||||
await client.generateContent(
|
||||
contents,
|
||||
{ temperature: 0.5 },
|
||||
abortSignal,
|
||||
'fast-model',
|
||||
);
|
||||
|
||||
// Verify that getResolvedModel was called with the fast model ID
|
||||
expect(getResolvedModel).toHaveBeenCalledWith(
|
||||
expect.any(String),
|
||||
'fast-model',
|
||||
);
|
||||
|
||||
// The main content generator is used as fallback (since creating a new
|
||||
// one fails in test env without auth). In production, a dedicated
|
||||
// content generator with the fast model's settings would be created.
|
||||
expect(mockContentGenerator.generateContent).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
model: 'fast-model',
|
||||
}),
|
||||
expect.any(String),
|
||||
);
|
||||
});
|
||||
|
||||
it('should use a dedicated content generator for the fast model on success', async () => {
|
||||
const contents = [{ role: 'user', parts: [{ text: 'hello' }] }];
|
||||
const abortSignal = new AbortController().signal;
|
||||
|
||||
// Create a mock dedicated content generator
|
||||
const mockFastContentGenerator = {
|
||||
generateContent: vi.fn().mockResolvedValue({
|
||||
text: 'fast response',
|
||||
}),
|
||||
} as unknown as ContentGenerator;
|
||||
|
||||
// Set up a resolved model for the fast model
|
||||
const mockResolvedModel = {
|
||||
id: 'fast-model',
|
||||
authType: 'openai' as const,
|
||||
name: 'Fast Model',
|
||||
baseUrl: 'https://fast-api.example.com',
|
||||
envKey: 'FAST_API_KEY',
|
||||
generationConfig: {
|
||||
extra_body: { enable_thinking: false },
|
||||
samplingParams: { temperature: 0.1 },
|
||||
},
|
||||
capabilities: {},
|
||||
};
|
||||
|
||||
const getResolvedModel = vi.fn().mockReturnValue(mockResolvedModel);
|
||||
vi.mocked(mockConfig.getModelsConfig).mockReturnValue({
|
||||
getResolvedModel,
|
||||
} as unknown as ModelsConfig);
|
||||
|
||||
// Override createContentGenerator to return our test double (success path)
|
||||
vi.mocked(createContentGenerator).mockResolvedValue(
|
||||
mockFastContentGenerator,
|
||||
);
|
||||
|
||||
await client.generateContent(
|
||||
contents,
|
||||
{ temperature: 0.5 },
|
||||
abortSignal,
|
||||
'fast-model',
|
||||
);
|
||||
|
||||
// Verify buildAgentContentGeneratorConfig was called with correct args
|
||||
expect(buildAgentContentGeneratorConfig).toHaveBeenCalledWith(
|
||||
mockConfig,
|
||||
'fast-model',
|
||||
expect.objectContaining({
|
||||
baseUrl: 'https://fast-api.example.com',
|
||||
}),
|
||||
);
|
||||
|
||||
// The dedicated fast content generator should be used
|
||||
expect(mockFastContentGenerator.generateContent).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
model: 'fast-model',
|
||||
}),
|
||||
expect.any(String),
|
||||
);
|
||||
|
||||
// The original main content generator should NOT have been called
|
||||
expect(mockContentGenerator.generateContent).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should use the main content generator when the requested model matches the main model', async () => {
|
||||
const contents = [{ role: 'user', parts: [{ text: 'hello' }] }];
|
||||
const abortSignal = new AbortController().signal;
|
||||
|
||||
const getResolvedModel = vi.fn();
|
||||
vi.mocked(mockConfig.getModelsConfig).mockReturnValue({
|
||||
getResolvedModel,
|
||||
} as unknown as ModelsConfig);
|
||||
|
||||
await client.generateContent(
|
||||
contents,
|
||||
{},
|
||||
abortSignal,
|
||||
'test-model', // same as getModel() return value
|
||||
);
|
||||
|
||||
// getResolvedModel should NOT be called when model matches main
|
||||
expect(getResolvedModel).not.toHaveBeenCalled();
|
||||
|
||||
// The main content generator should be used directly
|
||||
expect(mockContentGenerator.generateContent).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
model: 'test-model',
|
||||
}),
|
||||
expect.any(String),
|
||||
);
|
||||
});
|
||||
|
||||
it('should fall back to main generator when model is not in registry', async () => {
|
||||
const contents = [{ role: 'user', parts: [{ text: 'hello' }] }];
|
||||
const abortSignal = new AbortController().signal;
|
||||
|
||||
// getResolvedModel returns undefined — model not found in registry
|
||||
const getResolvedModel = vi.fn().mockReturnValue(undefined);
|
||||
vi.mocked(mockConfig.getModelsConfig).mockReturnValue({
|
||||
getResolvedModel,
|
||||
} as unknown as ModelsConfig);
|
||||
|
||||
// Should not throw — falls back to main generator
|
||||
await expect(
|
||||
client.generateContent(
|
||||
contents,
|
||||
{ temperature: 0.5 },
|
||||
abortSignal,
|
||||
'unknown-model',
|
||||
),
|
||||
).resolves.toBeDefined();
|
||||
|
||||
// getResolvedModel was called to look up the model
|
||||
expect(getResolvedModel).toHaveBeenCalledWith(
|
||||
expect.any(String),
|
||||
'unknown-model',
|
||||
);
|
||||
|
||||
// The main content generator is used as fallback
|
||||
expect(mockContentGenerator.generateContent).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
model: 'unknown-model',
|
||||
}),
|
||||
expect.any(String),
|
||||
);
|
||||
|
||||
// buildAgentContentGeneratorConfig must NOT be called when the model is
|
||||
// not in the registry — the fallback path skips config construction.
|
||||
expect(buildAgentContentGeneratorConfig).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should use fast model authType for retry, not main model authType', async () => {
|
||||
const contents = [{ role: 'user', parts: [{ text: 'hello' }] }];
|
||||
const abortSignal = new AbortController().signal;
|
||||
|
||||
const mockResolvedModel = {
|
||||
id: 'fast-model',
|
||||
authType: 'openai' as const,
|
||||
name: 'Fast Model',
|
||||
baseUrl: 'https://fast-api.example.com',
|
||||
generationConfig: {},
|
||||
capabilities: {},
|
||||
};
|
||||
|
||||
const getResolvedModel = vi.fn().mockReturnValue(mockResolvedModel);
|
||||
vi.mocked(mockConfig.getModelsConfig).mockReturnValue({
|
||||
getResolvedModel,
|
||||
} as unknown as ModelsConfig);
|
||||
|
||||
// Main config uses a different authType
|
||||
vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({
|
||||
authType: AuthType.QWEN_OAUTH,
|
||||
apiKey: 'test-key',
|
||||
apiModel: 'test-model',
|
||||
} as unknown as ContentGeneratorConfig);
|
||||
|
||||
// Success path for createContentGenerator
|
||||
vi.mocked(createContentGenerator).mockResolvedValue(mockContentGenerator);
|
||||
|
||||
await client.generateContent(
|
||||
contents,
|
||||
{ temperature: 0.5 },
|
||||
abortSignal,
|
||||
'fast-model',
|
||||
);
|
||||
|
||||
// VERIFY: retryWithBackoff was called with the fast model's authType ('openai'),
|
||||
// not the main model's authType ('QWEN_OAUTH').
|
||||
expect(retryWithBackoff).toHaveBeenCalledWith(
|
||||
expect.any(Function),
|
||||
expect.objectContaining({
|
||||
authType: 'openai',
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('should cache per-model content generators', async () => {
|
||||
const contents = [{ role: 'user', parts: [{ text: 'hello' }] }];
|
||||
const abortController = new AbortController();
|
||||
const mockResolvedModel = {
|
||||
id: 'fast-model',
|
||||
authType: 'openai' as const,
|
||||
name: 'Fast Model',
|
||||
baseUrl: 'https://fast-api.example.com',
|
||||
generationConfig: {},
|
||||
capabilities: {},
|
||||
};
|
||||
|
||||
vi.mocked(mockConfig.getModelsConfig).mockReturnValue({
|
||||
getResolvedModel: vi.fn().mockReturnValue(mockResolvedModel),
|
||||
} as unknown as ModelsConfig);
|
||||
|
||||
vi.mocked(createContentGenerator).mockResolvedValue(mockContentGenerator);
|
||||
|
||||
// First call
|
||||
await client.generateContent(
|
||||
contents,
|
||||
{},
|
||||
abortController.signal,
|
||||
'fast-model',
|
||||
);
|
||||
expect(createContentGenerator).toHaveBeenCalledTimes(1);
|
||||
|
||||
// Second call - should use cache
|
||||
await client.generateContent(
|
||||
contents,
|
||||
{},
|
||||
abortController.signal,
|
||||
'fast-model',
|
||||
);
|
||||
expect(createContentGenerator).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('should resolve model across authTypes when main authType misses', async () => {
|
||||
const contents = [{ role: 'user', parts: [{ text: 'hello' }] }];
|
||||
const abortSignal = new AbortController().signal;
|
||||
|
||||
const mockResolvedModel = {
|
||||
id: 'fast-model',
|
||||
authType: 'openai' as const,
|
||||
name: 'Fast Model',
|
||||
baseUrl: 'https://fast-api.example.com',
|
||||
generationConfig: {},
|
||||
capabilities: {},
|
||||
envKey: undefined,
|
||||
};
|
||||
|
||||
// resolveModelAcrossAuthTypes calls getResolvedModel multiple times:
|
||||
// 1. main authType (QWEN_OAUTH) → undefined (miss)
|
||||
// 2. secondary authType (USE_OPENAI) → mockResolvedModel (hit)
|
||||
// 3. buildAgentContentGeneratorConfig calls getResolvedModel again
|
||||
// with the resolved authType → mockResolvedModel (hit)
|
||||
const getResolvedModel = vi
|
||||
.fn()
|
||||
.mockReturnValueOnce(undefined)
|
||||
.mockReturnValue(mockResolvedModel);
|
||||
|
||||
vi.mocked(mockConfig.getModelsConfig).mockReturnValue({
|
||||
getResolvedModel,
|
||||
} as unknown as ModelsConfig);
|
||||
|
||||
// Main config uses QWEN_OAUTH — fast model registered under USE_OPENAI
|
||||
vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({
|
||||
authType: AuthType.QWEN_OAUTH,
|
||||
apiKey: 'test-key',
|
||||
apiModel: 'test-model',
|
||||
} as unknown as ContentGeneratorConfig);
|
||||
|
||||
// Mock createContentGenerator to succeed so the cross-authType
|
||||
// resolution path completes without falling back
|
||||
vi.mocked(createContentGenerator).mockResolvedValue(mockContentGenerator);
|
||||
|
||||
await client.generateContent(
|
||||
contents,
|
||||
{ temperature: 0.5 },
|
||||
abortSignal,
|
||||
'fast-model',
|
||||
);
|
||||
|
||||
// First call uses main authType (QWEN_OAUTH) — misses
|
||||
expect(getResolvedModel).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
AuthType.QWEN_OAUTH,
|
||||
'fast-model',
|
||||
);
|
||||
// Second call falls through to secondary authType — hits
|
||||
expect(getResolvedModel).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
AuthType.USE_OPENAI,
|
||||
'fast-model',
|
||||
);
|
||||
// Generator was created using the resolved model's config
|
||||
expect(createContentGenerator).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should clear per-model generator cache on resetChat', async () => {
|
||||
const contents = [{ role: 'user', parts: [{ text: 'hello' }] }];
|
||||
const abortController = new AbortController();
|
||||
const mockResolvedModel = {
|
||||
id: 'fast-model',
|
||||
authType: 'openai' as const,
|
||||
name: 'Fast Model',
|
||||
baseUrl: 'https://fast-api.example.com',
|
||||
generationConfig: {},
|
||||
capabilities: {},
|
||||
};
|
||||
|
||||
vi.mocked(mockConfig.getModelsConfig).mockReturnValue({
|
||||
getResolvedModel: vi.fn().mockReturnValue(mockResolvedModel),
|
||||
} as unknown as ModelsConfig);
|
||||
|
||||
vi.mocked(createContentGenerator).mockResolvedValue(mockContentGenerator);
|
||||
|
||||
// First call — populates cache
|
||||
await client.generateContent(
|
||||
contents,
|
||||
{},
|
||||
abortController.signal,
|
||||
'fast-model',
|
||||
);
|
||||
expect(createContentGenerator).toHaveBeenCalledTimes(1);
|
||||
|
||||
// Reset chat should clear the cache
|
||||
await client.resetChat();
|
||||
|
||||
// Second call after reset — cache should be cleared, generator recreated
|
||||
await client.generateContent(
|
||||
contents,
|
||||
{},
|
||||
abortController.signal,
|
||||
'fast-model',
|
||||
);
|
||||
expect(createContentGenerator).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@ const debugLogger = createDebugLogger('CLIENT');
|
|||
|
||||
// Core modules
|
||||
import type { ContentGenerator } from './contentGenerator.js';
|
||||
import type { ResolvedModelConfig } from '../models/types.js';
|
||||
import { AuthType, createContentGenerator } from './contentGenerator.js';
|
||||
import { GeminiChat } from './geminiChat.js';
|
||||
import {
|
||||
getArenaSystemReminder,
|
||||
|
|
@ -46,6 +48,9 @@ import {
|
|||
} from '../services/chatCompressionService.js';
|
||||
import { LoopDetectionService } from '../services/loopDetectionService.js';
|
||||
|
||||
// Models
|
||||
import { buildAgentContentGeneratorConfig } from '../models/content-generator-config.js';
|
||||
|
||||
// Tools
|
||||
import type { RelevantAutoMemoryPromptResult } from '../memory/manager.js';
|
||||
import { ToolNames } from '../tools/tool-names.js';
|
||||
|
|
@ -135,6 +140,14 @@ export class GeminiClient {
|
|||
private lastSentIdeContext: IdeContext | undefined;
|
||||
private forceFullIdeContext = true;
|
||||
|
||||
/**
|
||||
* Cache of per-model ContentGenerators keyed by model ID.
|
||||
* Avoids rebuilding the generator (SDK instantiation, config resolution)
|
||||
* on every side query (recap, title, tool summary).
|
||||
* Cleared on session reset (resetChat) to pick up config changes.
|
||||
*/
|
||||
private perModelGeneratorCache = new Map<string, Promise<ContentGenerator>>();
|
||||
|
||||
/**
|
||||
* At any point in this conversation, was compression triggered without
|
||||
* being forced and did it fail?
|
||||
|
|
@ -271,6 +284,7 @@ export class GeminiClient {
|
|||
// pointing at content the model can no longer retrieve.
|
||||
debugLogger.debug('[FILE_READ_CACHE] clear after resetChat');
|
||||
this.config.getFileReadCache().clear();
|
||||
this.perModelGeneratorCache.clear();
|
||||
await this.startChat();
|
||||
}
|
||||
|
||||
|
|
@ -1136,10 +1150,31 @@ export class GeminiClient {
|
|||
systemInstruction: finalSystemInstruction,
|
||||
};
|
||||
|
||||
// When the requested model differs from the main model (e.g. fast model
|
||||
// side queries for session recap / title / summary), resolve the target
|
||||
// model's own ContentGeneratorConfig so that per-model settings like
|
||||
// extra_body, samplingParams, and reasoning are not inherited from the
|
||||
// main model's config.
|
||||
const mainModel = this.config.getModel() ?? model;
|
||||
const isPerModel = model !== mainModel;
|
||||
|
||||
// Resolve the authType for retry logic. When using a per-model content
|
||||
// generator (e.g. fast model side queries), the retry authType must match
|
||||
// the target model's provider, not the main session's provider. This
|
||||
// ensures QWEN_OAUTH quota detection checks against the right provider.
|
||||
const retryAuthType = isPerModel
|
||||
? (this.createRetryAuthTypeForModel(model) ??
|
||||
this.config.getContentGeneratorConfig()?.authType ??
|
||||
AuthType.USE_OPENAI)
|
||||
: this.config.getContentGeneratorConfig()?.authType;
|
||||
|
||||
const contentGenerator = isPerModel
|
||||
? await this.createContentGeneratorForModel(model)
|
||||
: this.getContentGeneratorOrFail();
|
||||
const apiCall = () => {
|
||||
currentAttemptModel = model;
|
||||
|
||||
return this.getContentGeneratorOrFail().generateContent(
|
||||
return contentGenerator.generateContent(
|
||||
{
|
||||
model,
|
||||
config: requestConfig,
|
||||
|
|
@ -1149,7 +1184,7 @@ export class GeminiClient {
|
|||
);
|
||||
};
|
||||
const result = await retryWithBackoff(apiCall, {
|
||||
authType: this.config.getContentGeneratorConfig()?.authType,
|
||||
authType: retryAuthType,
|
||||
persistentMode: isUnattendedMode(),
|
||||
signal: abortSignal,
|
||||
heartbeatFn: (info) => {
|
||||
|
|
@ -1179,6 +1214,111 @@ export class GeminiClient {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a model across all authTypes. Handles the case where the target
|
||||
* model is registered under a different authType than the main model
|
||||
* (e.g. main=QWEN_OAUTH, fast=USE_ANTHROPIC).
|
||||
*
|
||||
* TODO: Move cross-authType resolution to ModelRegistry for a cleaner
|
||||
* data-layer solution. Follow-up PR.
|
||||
*/
|
||||
|
||||
private resolveModelAcrossAuthTypes(
|
||||
model: string,
|
||||
): ResolvedModelConfig | undefined {
|
||||
const modelsConfig = this.config.getModelsConfig();
|
||||
const allAuthTypes: AuthType[] = [
|
||||
AuthType.QWEN_OAUTH,
|
||||
AuthType.USE_OPENAI,
|
||||
AuthType.USE_VERTEX_AI,
|
||||
AuthType.USE_ANTHROPIC,
|
||||
AuthType.USE_GEMINI,
|
||||
];
|
||||
|
||||
// Try the main authType first for early exit
|
||||
const mainAuthType = this.config.getContentGeneratorConfig()?.authType;
|
||||
if (mainAuthType) {
|
||||
const resolved = modelsConfig.getResolvedModel(mainAuthType, model);
|
||||
if (resolved) return resolved;
|
||||
}
|
||||
|
||||
for (const authType of allAuthTypes) {
|
||||
if (authType === mainAuthType) continue;
|
||||
const resolved = modelsConfig.getResolvedModel(authType, model);
|
||||
if (resolved) return resolved;
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the authType for a given model without creating a full generator.
|
||||
* Used by retry logic to ensure provider-specific checks (e.g. QWEN_OAUTH
|
||||
* quota detection) reference the correct provider.
|
||||
*/
|
||||
private createRetryAuthTypeForModel(model: string): string | undefined {
|
||||
return this.resolveModelAcrossAuthTypes(model)?.authType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a ContentGenerator for a specific model (e.g. the fast model) with
|
||||
* its own per-model settings from modelProviders. This prevents the main
|
||||
* model's extra_body / samplingParams / reasoning from leaking into side
|
||||
* queries that target a different model.
|
||||
*
|
||||
* Falls back to the main content generator when the target model is not in
|
||||
* the registry or when creating a dedicated generator fails (e.g. in test
|
||||
* environments without full auth setup).
|
||||
*
|
||||
* Results are cached by model ID to avoid rebuilding the generator
|
||||
* (SDK instantiation, config resolution) on every side query.
|
||||
*/
|
||||
private async createContentGeneratorForModel(
|
||||
model: string,
|
||||
): Promise<ContentGenerator> {
|
||||
// Check cache first (Promise coalescing to prevent redundant SDK instantiations)
|
||||
const cached = this.perModelGeneratorCache.get(model);
|
||||
if (cached) return cached;
|
||||
|
||||
const generatorPromise = (async () => {
|
||||
try {
|
||||
const resolvedModel = this.resolveModelAcrossAuthTypes(model);
|
||||
|
||||
if (!resolvedModel) {
|
||||
debugLogger.warn(
|
||||
`Model "${model}" not found in registry across all authTypes, falling back to main generator.`,
|
||||
);
|
||||
return this.getContentGeneratorOrFail();
|
||||
}
|
||||
|
||||
const targetConfig = buildAgentContentGeneratorConfig(
|
||||
this.config,
|
||||
model,
|
||||
{
|
||||
authType: resolvedModel.authType,
|
||||
apiKey: resolvedModel.envKey
|
||||
? (process.env[resolvedModel.envKey] ?? undefined)
|
||||
: undefined,
|
||||
baseUrl: resolvedModel.baseUrl,
|
||||
},
|
||||
);
|
||||
|
||||
return await createContentGenerator(targetConfig, this.config);
|
||||
} catch (err: unknown) {
|
||||
debugLogger.warn(
|
||||
`Failed to create content generator for model "${model}", falling back to main generator.`,
|
||||
err instanceof Error ? err.message : String(err),
|
||||
);
|
||||
// On failure, delete from cache so subsequent attempts can retry.
|
||||
this.perModelGeneratorCache.delete(model);
|
||||
return this.getContentGeneratorOrFail();
|
||||
}
|
||||
})();
|
||||
|
||||
this.perModelGeneratorCache.set(model, generatorPromise);
|
||||
return generatorPromise;
|
||||
}
|
||||
|
||||
async tryCompressChat(
|
||||
prompt_id: string,
|
||||
force: boolean = false,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue