diff --git a/packages/core/src/core/openaiContentGenerator/converter.ts b/packages/core/src/core/openaiContentGenerator/converter.ts index 07a8f1831..184ba5493 100644 --- a/packages/core/src/core/openaiContentGenerator/converter.ts +++ b/packages/core/src/core/openaiContentGenerator/converter.ts @@ -696,6 +696,17 @@ export class OpenAIContentConverter { parts.push({ text: choice.message.content }); } + // Handle reasoning content + const message = choice.message as typeof choice.message & { + reasoning_content?: string; + }; + if (message.reasoning_content) { + parts.push({ + text: message.reasoning_content, + thought: true, + } as unknown as Part); + } + // Handle tool calls if (choice.message.tool_calls) { for (const toolCall of choice.message.tool_calls) { @@ -752,6 +763,8 @@ export class OpenAIContentConverter { usage.prompt_tokens_details?.cached_tokens ?? extendedUsage.cached_tokens ?? 0; + const reasoningTokens = + usage.completion_tokens_details?.reasoning_tokens || 0; // If we only have total tokens but no breakdown, estimate the split // Typically input is ~70% and output is ~30% for most conversations @@ -769,6 +782,7 @@ export class OpenAIContentConverter { candidatesTokenCount: finalCompletionTokens, totalTokenCount: totalTokens, cachedContentTokenCount: cachedTokens, + thoughtsTokenCount: reasoningTokens, }; } @@ -800,6 +814,17 @@ export class OpenAIContentConverter { } } + // Handle reasoning content + const delta = choice.delta as typeof choice.delta & { + reasoning_content?: string; + }; + if (delta.reasoning_content) { + parts.push({ + text: delta.reasoning_content, + thought: true, + }); + } + // Handle tool calls using the streaming parser if (choice.delta?.tool_calls) { for (const toolCall of choice.delta.tool_calls) { diff --git a/packages/core/src/core/openaiContentGenerator/pipeline.ts b/packages/core/src/core/openaiContentGenerator/pipeline.ts index 88ac38f6a..0eab0c2d2 100644 --- a/packages/core/src/core/openaiContentGenerator/pipeline.ts +++ b/packages/core/src/core/openaiContentGenerator/pipeline.ts @@ -242,6 +242,30 @@ export class ContentGenerationPipeline { baseRequest.stream_options = { include_usage: true }; } + // Add thinking options if present + if ( + request.config?.thinkingConfig && + request.config.thinkingConfig.includeThoughts + ) { + ( + baseRequest as OpenAI.Chat.ChatCompletionCreateParams & { + extra_body?: Record; + } + ).extra_body = { enable_thinking: true }; + ( + baseRequest as OpenAI.Chat.ChatCompletionCreateParams & { + enable_thinking?: boolean; + } + ).enable_thinking = true; + if (request.config.thinkingConfig.thinkingBudget) { + ( + baseRequest as OpenAI.Chat.ChatCompletionCreateParams & { + thinking_budget?: number; + } + ).thinking_budget = request.config.thinkingConfig.thinkingBudget; + } + } + // Add tools if present if (request.config?.tools) { baseRequest.tools = await this.converter.convertGeminiToolsToOpenAI(