diff --git a/packages/cli/src/ui/AppContainer.tsx b/packages/cli/src/ui/AppContainer.tsx index 511c6ac61..a7bdd39ad 100644 --- a/packages/cli/src/ui/AppContainer.tsx +++ b/packages/cli/src/ui/AppContainer.tsx @@ -1113,6 +1113,24 @@ export const AppContainer = (props: AppContainerProps) => { const followupSuggestionsEnabled = settings.merged.ui?.enableFollowupSuggestions === true; + // Resolve fastModel, validating it belongs to the current authType. + // If the configured fastModel is from a different provider, the API call + // would fail silently (DashScope/Qwen client rejects unknown model IDs), + // so fall back to the main model instead. + const resolveFastModel = useCallback((): string | undefined => { + const fastModel = settings.merged.fastModel; + if (!fastModel) return undefined; + const currentAuthType = config.getContentGeneratorConfig()?.authType; + if (!currentAuthType) return undefined; + const availableModels = config + .getModelsConfig() + .getAvailableModelsForAuthType(currentAuthType); + const belongsToCurrentAuth = availableModels.some( + (m) => m.id === fastModel, + ); + return belongsToCurrentAuth ? fastModel : undefined; + }, [settings.merged.fastModel, config]); + useEffect(() => { // Clear suggestion when feature is disabled at runtime if (!followupSuggestionsEnabled) { @@ -1164,9 +1182,10 @@ export const AppContainer = (props: AppContainerProps) => { const fullHistory = geminiClient.getChat().getHistory(true); const conversationHistory = fullHistory.length > 40 ? fullHistory.slice(-40) : fullHistory; + const fastModel = resolveFastModel(); generatePromptSuggestion(config, conversationHistory, ac.signal, { enableCacheSharing: settings.merged.ui?.enableCacheSharing === true, - model: settings.merged.fastModel || undefined, + model: fastModel, }) .then((result) => { if (ac.signal.aborted) return; @@ -1175,7 +1194,7 @@ export const AppContainer = (props: AppContainerProps) => { // Start speculation if enabled (runs in background) if (settings.merged.ui?.enableSpeculation) { startSpeculation(config, result.suggestion, ac.signal, { - model: settings.merged.fastModel || undefined, + model: fastModel, }) .then((state) => { speculationRef.current = state; diff --git a/packages/core/src/core/openaiContentGenerator/pipeline.test.ts b/packages/core/src/core/openaiContentGenerator/pipeline.test.ts index 6969a51ef..549d39f2a 100644 --- a/packages/core/src/core/openaiContentGenerator/pipeline.test.ts +++ b/packages/core/src/core/openaiContentGenerator/pipeline.test.ts @@ -171,7 +171,7 @@ describe('ContentGenerationPipeline', () => { ); }); - it('should ignore request.model override and always use configured model', async () => { + it('should use request.model when provided', async () => { // Arrange const request: GenerateContentParameters = { model: 'override-model', @@ -205,7 +205,54 @@ describe('ContentGenerationPipeline', () => { // Act const result = await pipeline.execute(request, userPromptId); - // Assert + // Assert — request.model takes precedence over contentGeneratorConfig.model + expect(result).toBe(mockGeminiResponse); + expect( + (mockConverter as unknown as { setModel: Mock }).setModel, + ).toHaveBeenCalledWith('override-model'); + expect(mockClient.chat.completions.create).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'override-model', + }), + expect.any(Object), + ); + }); + + it('should fall back to configured model when request.model is empty', async () => { + // Arrange — empty model string is falsy, should fall back to contentGeneratorConfig.model + const request: GenerateContentParameters = { + model: '', + contents: [{ parts: [{ text: 'Hello' }], role: 'user' }], + }; + const userPromptId = 'test-prompt-id'; + + const mockMessages = [ + { role: 'user', content: 'Hello' }, + ] as OpenAI.Chat.ChatCompletionMessageParam[]; + const mockOpenAIResponse = { + id: 'response-id', + choices: [ + { message: { content: 'Hello response' }, finish_reason: 'stop' }, + ], + created: Date.now(), + model: 'test-model', + } as OpenAI.Chat.ChatCompletion; + const mockGeminiResponse = new GenerateContentResponse(); + + (mockConverter.convertGeminiRequestToOpenAI as Mock).mockReturnValue( + mockMessages, + ); + (mockConverter.convertOpenAIResponseToGemini as Mock).mockReturnValue( + mockGeminiResponse, + ); + (mockClient.chat.completions.create as Mock).mockResolvedValue( + mockOpenAIResponse, + ); + + // Act + const result = await pipeline.execute(request, userPromptId); + + // Assert — falls back to contentGeneratorConfig.model expect(result).toBe(mockGeminiResponse); expect( (mockConverter as unknown as { setModel: Mock }).setModel, @@ -287,6 +334,174 @@ describe('ContentGenerationPipeline', () => { ); }); + it('should skip empty tools array in request', async () => { + // Arrange — tools: [] should NOT be included in the API request + const request: GenerateContentParameters = { + model: 'test-model', + contents: [{ parts: [{ text: 'Hello' }], role: 'user' }], + config: { tools: [] }, + }; + const userPromptId = 'test-prompt-id'; + + const mockMessages = [ + { role: 'user', content: 'Hello' }, + ] as OpenAI.Chat.ChatCompletionMessageParam[]; + const mockOpenAIResponse = { + id: 'response-id', + choices: [{ message: { content: 'Response' }, finish_reason: 'stop' }], + } as OpenAI.Chat.ChatCompletion; + const mockGeminiResponse = new GenerateContentResponse(); + + (mockConverter.convertGeminiRequestToOpenAI as Mock).mockReturnValue( + mockMessages, + ); + (mockConverter.convertOpenAIResponseToGemini as Mock).mockReturnValue( + mockGeminiResponse, + ); + (mockClient.chat.completions.create as Mock).mockResolvedValue( + mockOpenAIResponse, + ); + + // Act + await pipeline.execute(request, userPromptId); + + // Assert — tools should NOT be in the request + expect(mockConverter.convertGeminiToolsToOpenAI).not.toHaveBeenCalled(); + const apiCall = (mockClient.chat.completions.create as Mock).mock + .calls[0][0]; + expect(apiCall.tools).toBeUndefined(); + }); + + it('should override enable_thinking when thinkingConfig disables it', async () => { + // Arrange — provider injects enable_thinking: true via extra_body, + // but request explicitly disables thinking + (mockProvider.buildRequest as Mock).mockImplementation((req) => ({ + ...req, + enable_thinking: true, // Simulates extra_body injection + })); + + const request: GenerateContentParameters = { + model: 'test-model', + contents: [{ parts: [{ text: 'Suggest next' }], role: 'user' }], + config: { thinkingConfig: { includeThoughts: false } }, + }; + const userPromptId = 'forked_query'; + + const mockMessages = [ + { role: 'user', content: 'Suggest next' }, + ] as OpenAI.Chat.ChatCompletionMessageParam[]; + const mockOpenAIResponse = { + id: 'response-id', + choices: [ + { + message: { content: '{"suggestion":"run tests"}' }, + finish_reason: 'stop', + }, + ], + } as OpenAI.Chat.ChatCompletion; + const mockGeminiResponse = new GenerateContentResponse(); + + (mockConverter.convertGeminiRequestToOpenAI as Mock).mockReturnValue( + mockMessages, + ); + (mockConverter.convertOpenAIResponseToGemini as Mock).mockReturnValue( + mockGeminiResponse, + ); + (mockClient.chat.completions.create as Mock).mockResolvedValue( + mockOpenAIResponse, + ); + + // Act + await pipeline.execute(request, userPromptId); + + // Assert — enable_thinking should be overridden to false + const apiCall = (mockClient.chat.completions.create as Mock).mock + .calls[0][0]; + expect(apiCall.enable_thinking).toBe(false); + }); + + it('should strip reasoning key from extra_body when thinking is disabled', async () => { + // Arrange — provider injects reasoning via extra_body + (mockProvider.buildRequest as Mock).mockImplementation((req) => ({ + ...req, + reasoning: { effort: 'high' }, + })); + + const request: GenerateContentParameters = { + model: 'test-model', + contents: [{ parts: [{ text: 'Suggest next' }], role: 'user' }], + config: { thinkingConfig: { includeThoughts: false } }, + }; + + const mockMessages = [ + { role: 'user', content: 'Suggest next' }, + ] as OpenAI.Chat.ChatCompletionMessageParam[]; + const mockOpenAIResponse = { + id: 'response-id', + choices: [{ message: { content: 'run tests' }, finish_reason: 'stop' }], + } as OpenAI.Chat.ChatCompletion; + const mockGeminiResponse = new GenerateContentResponse(); + + (mockConverter.convertGeminiRequestToOpenAI as Mock).mockReturnValue( + mockMessages, + ); + (mockConverter.convertOpenAIResponseToGemini as Mock).mockReturnValue( + mockGeminiResponse, + ); + (mockClient.chat.completions.create as Mock).mockResolvedValue( + mockOpenAIResponse, + ); + + // Act + await pipeline.execute(request, 'forked_query'); + + // Assert — reasoning should be stripped + const apiCall = (mockClient.chat.completions.create as Mock).mock + .calls[0][0]; + expect(apiCall.reasoning).toBeUndefined(); + }); + + it('should preserve enable_thinking when thinking is not explicitly disabled', async () => { + // Arrange — normal request (not forked query), enable_thinking should be preserved + (mockProvider.buildRequest as Mock).mockImplementation((req) => ({ + ...req, + enable_thinking: true, + })); + + const request: GenerateContentParameters = { + model: 'test-model', + contents: [{ parts: [{ text: 'Hello' }], role: 'user' }], + // No thinkingConfig — normal request + }; + + const mockMessages = [ + { role: 'user', content: 'Hello' }, + ] as OpenAI.Chat.ChatCompletionMessageParam[]; + const mockOpenAIResponse = { + id: 'response-id', + choices: [{ message: { content: 'Hi there' }, finish_reason: 'stop' }], + } as OpenAI.Chat.ChatCompletion; + const mockGeminiResponse = new GenerateContentResponse(); + + (mockConverter.convertGeminiRequestToOpenAI as Mock).mockReturnValue( + mockMessages, + ); + (mockConverter.convertOpenAIResponseToGemini as Mock).mockReturnValue( + mockGeminiResponse, + ); + (mockClient.chat.completions.create as Mock).mockResolvedValue( + mockOpenAIResponse, + ); + + // Act + await pipeline.execute(request, 'main'); + + // Assert — enable_thinking should be PRESERVED (not disabled) + const apiCall = (mockClient.chat.completions.create as Mock).mock + .calls[0][0]; + expect(apiCall.enable_thinking).toBe(true); + }); + it('should handle errors and log them', async () => { // Arrange const request: GenerateContentParameters = { diff --git a/packages/core/src/core/openaiContentGenerator/pipeline.ts b/packages/core/src/core/openaiContentGenerator/pipeline.ts index 4e2d42bd8..1af663a51 100644 --- a/packages/core/src/core/openaiContentGenerator/pipeline.ts +++ b/packages/core/src/core/openaiContentGenerator/pipeline.ts @@ -54,10 +54,9 @@ export class ContentGenerationPipeline { request: GenerateContentParameters, userPromptId: string, ): Promise { - // For OpenAI-compatible providers, the configured model is the single source of truth. - // We intentionally ignore request.model because upstream callers may pass a model string - // that is not valid/available for the OpenAI-compatible backend. - const effectiveModel = this.contentGeneratorConfig.model; + // Use request.model when explicitly provided (e.g., fastModel for suggestion + // generation), falling back to the configured model as the default. + const effectiveModel = request.model || this.contentGeneratorConfig.model; this.converter.setModel(effectiveModel); this.converter.setModalities(this.contentGeneratorConfig.modalities ?? {}); return this.executeWithErrorHandling( @@ -85,7 +84,7 @@ export class ContentGenerationPipeline { request: GenerateContentParameters, userPromptId: string, ): Promise> { - const effectiveModel = this.contentGeneratorConfig.model; + const effectiveModel = request.model || this.contentGeneratorConfig.model; this.converter.setModel(effectiveModel); this.converter.setModalities(this.contentGeneratorConfig.modalities ?? {}); return this.executeWithErrorHandling( @@ -331,15 +330,37 @@ export class ContentGenerationPipeline { baseRequest.stream_options = { include_usage: true }; } - // Add tools if present - if (request.config?.tools) { + // Add tools if present and non-empty. + // Some providers reject tools: [] (empty array), so skip when there are no tools. + if (request.config?.tools && request.config.tools.length > 0) { baseRequest.tools = await this.converter.convertGeminiToolsToOpenAI( request.config.tools, ); } // Let provider enhance the request (e.g., add metadata, cache control) - return this.config.provider.buildRequest(baseRequest, userPromptId); + const providerRequest = this.config.provider.buildRequest( + baseRequest, + userPromptId, + ); + + // When thinking is explicitly disabled (e.g., forked queries for suggestions), + // override thinking-related keys that may have been injected by extra_body. + // extra_body is spread last in provider.buildRequest, so it overrides + // buildReasoningConfig's decision — we must post-process here. + if (request.config?.thinkingConfig?.includeThoughts === false) { + const typed = providerRequest as unknown as Record; + if ('enable_thinking' in typed) { + typed['enable_thinking'] = false; + } + // Also strip reasoning config — extra_body could inject it, overriding + // buildReasoningConfig's decision to return {} for disabled thinking. + if ('reasoning' in typed) { + delete typed['reasoning']; + } + } + + return providerRequest; } private buildGenerateContentConfig( diff --git a/packages/core/src/followup/forkedQuery.ts b/packages/core/src/followup/forkedQuery.ts index 0d4dc2186..3daac42f5 100644 --- a/packages/core/src/followup/forkedQuery.ts +++ b/packages/core/src/followup/forkedQuery.ts @@ -237,9 +237,11 @@ export async function runForkedQuery( for await (const event of stream) { if (event.type !== StreamEventType.CHUNK) continue; const response = event.value; - // Extract text from candidates + // Extract text from candidates, skipping thought/reasoning parts. + // Some providers may return thinking content even with enable_thinking: false. const text = response.candidates?.[0]?.content?.parts - ?.map((p) => p.text ?? '') + ?.filter((p) => !(p as Record)['thought']) + .map((p) => p.text ?? '') .join(''); if (text) { fullText += text; diff --git a/packages/core/src/followup/suggestionGenerator.ts b/packages/core/src/followup/suggestionGenerator.ts index 50f872be7..f099b0f69 100644 --- a/packages/core/src/followup/suggestionGenerator.ts +++ b/packages/core/src/followup/suggestionGenerator.ts @@ -17,6 +17,9 @@ import { EVENT_API_RESPONSE, } from '../telemetry/uiTelemetry.js'; import { ApiResponseEvent } from '../telemetry/types.js'; +import { createDebugLogger } from '../utils/debugLogger.js'; + +const debugLogger = createDebugLogger('FOLLOWUP'); /** * Prompt for suggestion generation. @@ -104,6 +107,9 @@ export async function generatePromptSuggestion( // Try cache-aware forked query if enabled and params available const cacheSafe = options?.enableCacheSharing ? getCacheSafeParams() : null; const modelOverride = options?.model; + debugLogger.debug( + `Generating suggestion: cacheSharing=${!!cacheSafe}, model=${modelOverride || '(default)'}`, + ); const raw = cacheSafe ? await generateViaForkedQuery(config, abortSignal, modelOverride) : await generateViaBaseLlm( @@ -116,19 +122,25 @@ export async function generatePromptSuggestion( const suggestion = typeof raw === 'string' ? raw.trim() : null; if (!suggestion) { + debugLogger.debug('Suggestion generation returned empty result'); return { suggestion: null, filterReason: 'empty' }; } const filterReason = getFilterReason(suggestion); if (filterReason) { + debugLogger.debug( + `Suggestion filtered: reason=${filterReason}, text="${suggestion}"`, + ); return { suggestion: null, filterReason }; } + debugLogger.debug(`Suggestion accepted: "${suggestion}"`); return { suggestion }; - } catch { + } catch (error) { if (abortSignal.aborted) { return { suggestion: null }; } + debugLogger.warn('Suggestion generation failed:', error); return { suggestion: null, filterReason: 'error' }; } } @@ -218,7 +230,8 @@ async function generateViaBaseLlm( } const text = response.candidates?.[0]?.content?.parts - ?.map((p) => p.text ?? '') + ?.filter((p) => !(p as Record)['thought']) + .map((p) => p.text ?? '') .join('') .trim(); if (text) {