diff --git a/src/main/java/ee/carlrobert/codegpt/toolwindow/chat/ui/ChatMessageResponseBody.java b/src/main/java/ee/carlrobert/codegpt/toolwindow/chat/ui/ChatMessageResponseBody.java index af37f246..030cda63 100644 --- a/src/main/java/ee/carlrobert/codegpt/toolwindow/chat/ui/ChatMessageResponseBody.java +++ b/src/main/java/ee/carlrobert/codegpt/toolwindow/chat/ui/ChatMessageResponseBody.java @@ -130,12 +130,21 @@ public class ChatMessageResponseBody extends JPanel { public ChatMessageResponseBody withResponse(@NotNull String response) { try { - for (var item : new CompleteMessageParser().parse(response)) { + var parser = new CompleteMessageParser(); + var segments = parser.parse(response); + if (parser.getExtractedThought() != null && !parser.getExtractedThought().isBlank()) { + processThinkingOutput(parser.getExtractedThought()); + } + for (var item : segments) { processResponse(item, false); currentlyProcessedTextPane = null; currentlyProcessedEditorPanel = null; currentlyProcessedMermaidPanel = null; } + var thoughtProcessPanel = getExistingThoughtProcessPanel(); + if (thoughtProcessPanel != null && !thoughtProcessPanel.isFinished()) { + thoughtProcessPanel.setFinished(); + } } catch (Exception e) { LOG.error("Something went wrong while processing input", e); } diff --git a/src/main/kotlin/ee/carlrobert/codegpt/agent/AgentFactory.kt b/src/main/kotlin/ee/carlrobert/codegpt/agent/AgentFactory.kt index a65b23bb..96d8505f 100644 --- a/src/main/kotlin/ee/carlrobert/codegpt/agent/AgentFactory.kt +++ b/src/main/kotlin/ee/carlrobert/codegpt/agent/AgentFactory.kt @@ -13,21 +13,33 @@ import ai.koog.agents.core.environment.ReceivedToolResult import ai.koog.agents.core.environment.result import ai.koog.agents.core.feature.handler.tool.ToolCallCompletedContext import ai.koog.agents.core.feature.handler.tool.ToolCallStartingContext +import ai.koog.agents.core.tools.ToolDescriptor import ai.koog.agents.core.tools.ToolRegistry import ai.koog.agents.ext.tool.ExitTool import ai.koog.agents.ext.tool.shell.ShellCommandConfirmation import ai.koog.agents.features.eventHandler.feature.handleEvents import ai.koog.agents.features.tokenizer.feature.MessageTokenizer import ai.koog.agents.features.tokenizer.feature.tokenizer +import ai.koog.prompt.dsl.Prompt import ai.koog.prompt.dsl.prompt +import ai.koog.prompt.executor.clients.anthropic.AnthropicParams +import ai.koog.prompt.executor.clients.anthropic.models.AnthropicThinking import ai.koog.prompt.executor.clients.LLMClient +import ai.koog.prompt.executor.clients.openai.OpenAIResponsesParams +import ai.koog.prompt.executor.clients.openai.base.models.ReasoningEffort +import ai.koog.prompt.executor.clients.openai.models.ReasoningConfig +import ai.koog.prompt.executor.clients.openai.models.ReasoningSummary import ai.koog.prompt.executor.model.PromptExecutor +import ai.koog.prompt.llm.LLMCapability import ai.koog.prompt.llm.LLMProvider +import ai.koog.prompt.llm.LLModel import ai.koog.prompt.message.Message +import ai.koog.prompt.params.LLMParams import ai.koog.prompt.tokenizer.Tokenizer import com.intellij.openapi.components.service import com.intellij.openapi.project.Project import ee.carlrobert.codegpt.EncodingManager +import ee.carlrobert.codegpt.agent.clients.CustomOpenAILLMClient import ee.carlrobert.codegpt.agent.clients.RetryingPromptExecutor import ee.carlrobert.codegpt.agent.credits.extractCreditsSnapshot import ee.carlrobert.codegpt.agent.tools.* @@ -46,6 +58,8 @@ import kotlin.time.Duration.Companion.seconds object AgentFactory { private const val MAX_AGENT_ITERATIONS = 250 + private const val ANTHROPIC_MIN_THINKING_BUDGET = 512 + private const val ANTHROPIC_DEFAULT_THINKING_BUDGET = 2_048 fun createAgent( agentType: AgentType, @@ -170,10 +184,6 @@ object AgentFactory { featureType: FeatureType = FeatureType.AGENT ): PromptExecutor { val llmClient = LLMClientFactory.createClient(provider, featureType) - return createRetryingExecutor(llmClient, events) - } - - private fun createRetryingExecutor(client: LLMClient, events: AgentEvents?): PromptExecutor { val policy = RetryingPromptExecutor.RetryPolicy( maxAttempts = 5, initialDelay = 1.seconds, @@ -181,7 +191,104 @@ object AgentFactory { backoffMultiplier = 2.0, jitterFactor = 0.1 ) - return RetryingPromptExecutor.fromClient(client, policy, events) + return createRetryingExecutor(llmClient, policy, events) + } + + internal fun createRetryingExecutor( + client: LLMClient, + policy: RetryingPromptExecutor.RetryPolicy, + events: AgentEvents? + ): PromptExecutor { + val executor = RetryingPromptExecutor.fromClient(client, policy, events) + return object : PromptExecutor { + override fun executeStreaming( + prompt: Prompt, + model: LLModel, + tools: List + ) = executor.executeStreaming(prompt.withReasoningParams(model), model, tools) + + override suspend fun execute( + prompt: Prompt, + model: LLModel, + tools: List + ) = executor.execute(prompt.withReasoningParams(model), model, tools) + + override suspend fun moderate(prompt: Prompt, model: LLModel) = + executor.moderate(prompt, model) + + override suspend fun models() = executor.models() + + override fun close() = executor.close() + } + } + + private fun Prompt.withReasoningParams(model: LLModel): Prompt { + val params = when (model.provider) { + LLMProvider.OpenAI -> params.withOpenAIReasoning() + CustomOpenAILLMClient.CustomOpenAI -> { + if (model.supports(LLMCapability.OpenAIEndpoint.Responses)) { + params.withOpenAIReasoning() + } else { + params + } + } + LLMProvider.Anthropic -> params.withAnthropicReasoning() + else -> params + } + return withParams(params) + } + + private fun LLMParams.withOpenAIReasoning(): LLMParams { + val base = when (this) { + is OpenAIResponsesParams -> this + else -> OpenAIResponsesParams( + temperature = temperature, + maxTokens = maxTokens, + numberOfChoices = numberOfChoices, + speculation = speculation, + schema = schema, + toolChoice = toolChoice, + user = user, + additionalProperties = additionalProperties + ) + } + return base.copy( + reasoning = base.reasoning ?: ReasoningConfig( + effort = ReasoningEffort.MEDIUM, + summary = ReasoningSummary.AUTO + ) + ) + } + + private fun LLMParams.withAnthropicReasoning(): LLMParams { + val base = when (this) { + is AnthropicParams -> this + else -> AnthropicParams( + temperature = temperature, + maxTokens = maxTokens, + numberOfChoices = numberOfChoices, + speculation = speculation, + schema = schema, + toolChoice = toolChoice, + user = user, + additionalProperties = additionalProperties + ) + } + + if (base.thinking != null) return base + + val thinkingBudget = resolveAnthropicThinkingBudget(base.maxTokens) ?: return base + return base.copy(thinking = AnthropicThinking.Enabled(budgetTokens = thinkingBudget)) + } + + private fun resolveAnthropicThinkingBudget(maxTokens: Int?): Int? { + val limit = maxTokens ?: ANTHROPIC_DEFAULT_THINKING_BUDGET + if (limit <= ANTHROPIC_MIN_THINKING_BUDGET) { + return null + } + return (limit / 2) + .coerceAtLeast(ANTHROPIC_MIN_THINKING_BUDGET) + .coerceAtMost(ANTHROPIC_DEFAULT_THINKING_BUDGET) } private fun createGeneralPurposeAgent( diff --git a/src/main/kotlin/ee/carlrobert/codegpt/agent/ProxyAIAgent.kt b/src/main/kotlin/ee/carlrobert/codegpt/agent/ProxyAIAgent.kt index 1fff7523..dc56a7dc 100644 --- a/src/main/kotlin/ee/carlrobert/codegpt/agent/ProxyAIAgent.kt +++ b/src/main/kotlin/ee/carlrobert/codegpt/agent/ProxyAIAgent.kt @@ -23,7 +23,6 @@ import com.intellij.openapi.components.service import com.intellij.openapi.project.Project import ee.carlrobert.codegpt.EncodingManager import ee.carlrobert.codegpt.agent.clients.shouldStream -import ee.carlrobert.codegpt.agent.clients.shouldStreamCustomOpenAI import ee.carlrobert.codegpt.agent.strategy.CODE_AGENT_COMPRESSION import ee.carlrobert.codegpt.agent.strategy.HistoryCompressionConfig import ee.carlrobert.codegpt.agent.strategy.SingleRunStrategyProvider @@ -35,6 +34,7 @@ import ee.carlrobert.codegpt.settings.hooks.HookManager import ee.carlrobert.codegpt.settings.models.ModelSettings import ee.carlrobert.codegpt.settings.service.FeatureType import ee.carlrobert.codegpt.settings.service.ServiceType +import ee.carlrobert.codegpt.settings.service.custom.CustomServicesSettings import ee.carlrobert.codegpt.settings.skills.SkillDiscoveryService import ee.carlrobert.codegpt.toolwindow.agent.ui.approval.BashPayload import ee.carlrobert.codegpt.toolwindow.agent.ui.approval.ToolApprovalRequest @@ -89,7 +89,7 @@ object ProxyAIAgent { val modelSelection = service().getModelSelectionForFeature(FeatureType.AGENT) val skills = project.service().listSkills() - val stream = shouldStreamAgentToolLoop(provider) + val stream = shouldStreamAgentToolLoop(project, provider) val projectInstructions = loadProjectInstructions(project.basePath) val executor = AgentFactory.createExecutor(provider, events) val pendingMessageQueue = pendingMessages.getOrPut(sessionId) { ArrayDeque() } @@ -163,11 +163,20 @@ object ProxyAIAgent { val toolCallToUiId: MutableMap = HashMap() val anonymousToolIds: ArrayDeque = ArrayDeque() val frameAdapter = ReasoningFrameTextAdapter() + var streamedReasoningForCurrentNode = false onLLMStreamingFrameReceived { ctx -> if (!stream) return@onLLMStreamingFrameReceived - frameAdapter.consume(ctx.streamFrame).forEach { chunk -> + val frameType = ctx.streamFrame::class.simpleName + ?: ctx.streamFrame::class.qualifiedName + ?: "unknown" + val chunks = frameAdapter.consume(ctx.streamFrame) + if (frameType.contains("Reasoning") && chunks.isNotEmpty()) { + streamedReasoningForCurrentNode = true + } + + chunks.forEach { chunk -> if (chunk.isNotEmpty()) { events.onTextReceived(chunk) } @@ -175,9 +184,22 @@ object ProxyAIAgent { } onNodeExecutionCompleted { ctx -> - if (stream) return@onNodeExecutionCompleted + val output = (ctx.output as? List<*>) ?: emptyList() + if (stream) { + if (!streamedReasoningForCurrentNode) { + output.forEach { msg -> + (msg as? Message.Reasoning)?.let { + if (it.content.isNotBlank()) { + events.onTextReceived("${it.content}") + } + } + } + } + streamedReasoningForCurrentNode = false + return@onNodeExecutionCompleted + } - (ctx.output as? List<*>)?.forEach { msg -> + output.forEach { msg -> (msg as? Message.Assistant)?.let { events.onTextReceived(it.content) } @@ -268,10 +290,19 @@ object ProxyAIAgent { } private fun shouldStreamAgentToolLoop( + project: Project, provider: ServiceType, ): Boolean { return when (provider) { - ServiceType.CUSTOM_OPENAI -> shouldStreamCustomOpenAI(FeatureType.AGENT) + ServiceType.CUSTOM_OPENAI -> { + val selectedModel = + service().getModelSelectionForFeature(FeatureType.AGENT) + val selectedServiceId = selectedModel.serviceId + val selectedService = service().state.services + .firstOrNull { it.id == selectedServiceId } + selectedService?.chatCompletionSettings?.shouldStream() == true + } + ServiceType.GOOGLE -> false else -> true } diff --git a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIChatCompletion.kt b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIChatCompletion.kt index 59bff574..3705ef0e 100644 --- a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIChatCompletion.kt +++ b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIChatCompletion.kt @@ -7,7 +7,8 @@ import kotlinx.serialization.json.JsonElement @Serializable class CustomOpenAIChatCompletionRequest( - val messages: List = emptyList(), + val messages: List? = null, + val input: JsonElement? = null, val prompt: String? = null, override val model: String? = null, override val stream: Boolean? = null, diff --git a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAILLMClient.kt b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAILLMClient.kt index 10ed114e..aa1f1929 100644 --- a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAILLMClient.kt +++ b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAILLMClient.kt @@ -1,93 +1,68 @@ package ee.carlrobert.codegpt.agent.clients -import ai.koog.prompt.dsl.ModerationResult +import ai.koog.agents.core.tools.ToolDescriptor import ai.koog.prompt.dsl.Prompt import ai.koog.prompt.executor.clients.ConnectionTimeoutConfig import ai.koog.prompt.executor.clients.LLMClient -import ai.koog.prompt.executor.clients.LLMClientException -import ai.koog.prompt.executor.clients.openai.base.AbstractOpenAILLMClient -import ai.koog.prompt.executor.clients.openai.base.OpenAIBaseSettings -import ai.koog.prompt.executor.clients.openai.base.OpenAICompatibleToolDescriptorSchemaGenerator +import ai.koog.prompt.executor.clients.openai.OpenAIClientSettings +import ai.koog.prompt.executor.clients.openai.OpenAILLMClient +import ai.koog.prompt.executor.clients.openai.OpenAIResponsesParams import ai.koog.prompt.executor.clients.openai.base.models.* +import ai.koog.prompt.llm.LLMCapability import ai.koog.prompt.llm.LLMProvider import ai.koog.prompt.llm.LLModel import ai.koog.prompt.message.LLMChoice import ai.koog.prompt.message.Message -import ai.koog.prompt.message.ResponseMetaInfo import ai.koog.prompt.params.LLMParams import ai.koog.prompt.streaming.StreamFrame -import ai.koog.prompt.streaming.buildStreamFrameFlow import com.fasterxml.jackson.core.type.TypeReference import com.fasterxml.jackson.databind.ObjectMapper import ee.carlrobert.codegpt.codecompletions.InfillPromptTemplate import ee.carlrobert.codegpt.codecompletions.InfillRequest -import ee.carlrobert.codegpt.agent.normalizeToolArgumentsJson import ee.carlrobert.codegpt.settings.Placeholder -import ee.carlrobert.codegpt.settings.service.custom.CustomServiceCodeCompletionSettingsState import ee.carlrobert.codegpt.settings.service.custom.CustomServiceChatCompletionSettingsState +import ee.carlrobert.codegpt.settings.service.custom.CustomServiceCodeCompletionSettingsState import ee.carlrobert.codegpt.settings.service.custom.CustomServicePlaceholders -import ee.carlrobert.codegpt.completions.factory.ResponsesApiUtil import ee.carlrobert.codegpt.util.JsonMapper -import io.github.oshai.kotlinlogging.KotlinLogging import io.ktor.client.* import io.ktor.client.plugins.* +import io.ktor.client.plugins.api.createClientPlugin import io.ktor.client.request.* -import io.ktor.http.HttpHeaders -import org.apache.commons.text.StringEscapeUtils +import io.ktor.http.ContentType +import io.ktor.http.content.TextContent import kotlinx.coroutines.flow.Flow import kotlinx.serialization.ExperimentalSerializationApi import kotlinx.serialization.KSerializer import kotlinx.serialization.builtins.ListSerializer import kotlinx.serialization.descriptors.elementNames import kotlinx.serialization.json.* +import org.apache.commons.text.StringEscapeUtils import java.net.URI -import java.util.UUID -import kotlin.io.encoding.ExperimentalEncodingApi import kotlin.time.Clock /** - * Configuration settings for connecting to the CustomOpenAI API. + * Implementation of [LLMClient] for OpenAI-compatible custom providers. * - * @property baseUrl The base URL of the CustomOpenAI API. Default is "https://CustomOpenAI.ai/api/v1". - * @property timeoutConfig Configuration for connection timeouts including request, connection, and socket timeouts. - */ -class CustomOpenAIClientSettings( - baseUrl: String, - chatCompletionsPath: String, - timeoutConfig: ConnectionTimeoutConfig -) : OpenAIBaseSettings(baseUrl, chatCompletionsPath, timeoutConfig) - -/** - * Implementation of [LLMClient] for CustomOpenAI API. - * CustomOpenAI is an API that routes requests to multiple LLM providers. - * - * @param apiKey The API key for the CustomOpenAI API - * @param settings The base URL and timeouts for the CustomOpenAI API, defaults to "https://CustomOpenAI.ai" and 900s - * @param clock Clock instance used for tracking response metadata timestamps. + * Chat-completions requests keep the custom body/placeholder behavior. + * Responses requests follow Koog's [OpenAILLMClient] path and only add custom params. */ class CustomOpenAILLMClient( private val apiKey: String, - private val settings: CustomOpenAIClientSettings, + settings: OpenAIClientSettings, private val chatState: CustomServiceChatCompletionSettingsState? = null, private val codeCompletionState: CustomServiceCodeCompletionSettingsState? = null, private val baseClient: HttpClient = HttpClient(), clock: Clock = Clock.System -) : AbstractOpenAILLMClient( - apiKey, - settings, - baseClient, - clock, - staticLogger, - OpenAICompatibleToolDescriptorSchemaGenerator() -), - CodeCompletionCapable { +) : OpenAILLMClient( + apiKey = apiKey, + settings = settings, + baseClient = baseClient, + clock = clock, +), CodeCompletionCapable { + data object CustomOpenAI : LLMProvider("custom-openai", "Custom OpenAI") - private val isResponsesApi: Boolean = ResponsesApiUtil.isResponsesApiUrl(chatState?.url) - companion object { - private val staticLogger = KotlinLogging.logger { } - init { registerOpenAIJsonSchemaGenerators(CustomOpenAI) } @@ -103,6 +78,7 @@ class CustomOpenAILLMClient( timeoutConfig = timeoutConfig ) val clientWithCustomHeaders = baseClient.config { + install(FlattenCustomOpenAIAdditionalPropertiesPlugin) defaultRequest { state.headers.forEach { (key, value) -> val normalizedKey = key.trim() @@ -147,12 +123,17 @@ class CustomOpenAILLMClient( private fun createClientSettings( url: String, timeoutConfig: ConnectionTimeoutConfig - ): CustomOpenAIClientSettings { + ): OpenAIClientSettings { val uri = URI.create(url) val authority = uri.authority ?: uri.host - return CustomOpenAIClientSettings( + val path = buildString { + append(uri.path) + uri.query?.takeIf { it.isNotBlank() }?.let { append("?").append(it) } + } + return OpenAIClientSettings( baseUrl = "${uri.scheme}://${authority}", - chatCompletionsPath = uri.path, + chatCompletionsPath = path, + responsesAPIPath = path, timeoutConfig = timeoutConfig ) } @@ -160,10 +141,67 @@ class CustomOpenAILLMClient( override fun llmProvider(): LLMProvider = CustomOpenAI - override suspend fun getCodeCompletion(infillRequest: InfillRequest): String { - val state = requireNotNull(codeCompletionState) { + private fun LLModel.hasResponsesEndpointCapability(): Boolean = + this.capabilities?.any { it == LLMCapability.OpenAIEndpoint.Responses } == true + + private fun requireChatState(): CustomServiceChatCompletionSettingsState { + return requireNotNull(chatState) { + "Custom OpenAI chat request requested on a code-completion-only client" + } + } + + private fun requireCodeCompletionState(): CustomServiceCodeCompletionSettingsState { + return requireNotNull(codeCompletionState) { "Custom OpenAI code completion requested on a chat-only client" } + } + + override suspend fun execute( + prompt: Prompt, + model: LLModel, + tools: List + ): List { + val state = requireChatState() + val finalPrompt = prompt.prepareForModel(model, state) + return super.execute(finalPrompt, model, tools) + } + + override fun executeStreaming( + prompt: Prompt, + model: LLModel, + tools: List + ): Flow { + val state = requireChatState() + val finalPrompt = prompt.prepareForModel(model, state) + return super.executeStreaming(finalPrompt, model, tools) + } + + override suspend fun executeMultipleChoices( + prompt: Prompt, + model: LLModel, + tools: List + ): List { + val state = requireChatState() + return super.executeMultipleChoices(prompt.prepareForModel(model, state), model, tools) + } + + private fun Prompt.prepareForModel( + model: LLModel, + state: CustomServiceChatCompletionSettingsState + ): Prompt { + return when { + model.hasResponsesEndpointCapability() -> + withParams(params.toCustomOpenAIResponsesParams(state)) + + params is OpenAIResponsesParams -> + withParams(params.toCustomOpenAIParams(state)) + + else -> this + } + } + + override suspend fun getCodeCompletion(infillRequest: InfillRequest): String { + val state = requireCodeCompletionState() val url = requireNotNull(state.url) val payload = postCompletionJson( client = baseClient, @@ -190,14 +228,7 @@ class CustomOpenAILLMClient( params: LLMParams, stream: Boolean ): String { - val state = requireNotNull(chatState) { - "Custom OpenAI chat request requested on a code-completion-only client" - } - - if (isResponsesApi) { - return serializeResponsesApiRequest(state, messages, model, tools, toolChoice) - } - + val state = requireChatState() val customParams: CustomOpenAIParams = params.toCustomOpenAIParams(state) val streamRequest = state.shouldStream() val additionalProperties = buildCustomOpenAIAdditionalProperties( @@ -238,44 +269,6 @@ class CustomOpenAILLMClient( return json.encodeToString(CustomOpenAIChatCompletionRequestSerializer, request) } - /** - * Builds the Responses API request body directly from the template body configuration. - * The template body uses "input" instead of "messages" and "max_output_tokens" instead - * of "max_tokens", so we process it as-is to produce the correct format. - */ - private fun serializeResponsesApiRequest( - state: CustomServiceChatCompletionSettingsState, - messages: List, - model: LLModel, - tools: List?, - toolChoice: OpenAIToolChoice? - ): String { - val streamRequest = state.shouldStream() - val inputJson = messages.toResponsesApiItemsJson() - val prompt = renderCustomOpenAIPrompt(messages, json) - - return buildJsonObject { - state.body.forEach { (key, value) -> - put( - key, transformCustomOpenAIBodyValue( - key = key, - value = value, - streamRequest = streamRequest, - messagesJson = inputJson, - prompt = prompt, - credential = apiKey, - json = json - ) - ) - } - put("model", JsonPrimitive(model.id)) - if (!tools.isNullOrEmpty()) { - put("tools", JsonArray(tools.map { it.toResponsesApiToolJson() })) - } - toolChoice?.toResponsesApiToolChoiceJson()?.let { put("tool_choice", it) } - }.toString() - } - private fun buildCodeCompletionRequestBody( state: CustomServiceCodeCompletionSettingsState, infillRequest: InfillRequest @@ -285,16 +278,16 @@ class CustomOpenAILLMClient( mapOf( "role" to "system", "content" to ( - "You are a code completion assistant. Complete the code between the given prefix and suffix. " + - "Return only the missing code that should be inserted, without any formatting, explanations, or markdown." - ) + "You are a code completion assistant. Complete the code between the given prefix and suffix. " + + "Return only the missing code that should be inserted, without any formatting, explanations, or markdown." + ) ), mapOf( "role" to "user", "content" to ( - "\n${infillRequest.prefix}\n\n\n" + - "\n${infillRequest.suffix}\n\n\nComplete:" - ) + "\n${infillRequest.prefix}\n\n\n" + + "\n${infillRequest.suffix}\n\n\nComplete:" + ) ) ) val transformedBody = state.body.entries.mapNotNull { (key, value) -> @@ -416,283 +409,6 @@ class CustomOpenAILLMClient( ?: replaced } } - - override fun processProviderChatResponse(response: CustomOpenAIChatCompletionResponse): List { - require(response.choices.isNotEmpty()) { "Empty choices in response" } - return response.choices.map { - it.message.toResponses(it.finishReason, createMetaInfo(response.usage)) - } - } - - override fun decodeStreamingResponse(data: String): CustomOpenAIChatCompletionStreamResponse { - val payload = normalizeSsePayload(data) - ?: return CustomOpenAIChatCompletionStreamResponse( - choices = emptyList(), - created = 0, - id = "", - model = "" - ) - if (!isResponsesApi) { - return json.decodeFromString(payload) - } - return adaptResponsesApiStreamEvent(payload) - } - - override fun decodeResponse(data: String): CustomOpenAIChatCompletionResponse { - if (!isResponsesApi) { - return json.decodeFromString(data) - } - return adaptResponsesApiResponse(data) - } - - /** - * Adapts a Responses API SSE event into a [CustomOpenAIChatCompletionStreamResponse]. - * Maps Responses API event types to the chat completions delta format that - * [processStreamingResponse] already knows how to handle. - */ - private fun adaptResponsesApiStreamEvent(data: String): CustomOpenAIChatCompletionStreamResponse { - val event = json.parseToJsonElement(data).jsonObject - val type = event["type"]?.jsonPrimitive?.contentOrNull ?: "" - - val choice = when (type) { - "response.output_text.delta" -> { - val delta = event["delta"]?.jsonPrimitive?.contentOrNull ?: "" - CustomOpenAIStreamChoice( - delta = CustomOpenAIStreamDelta(content = delta) - ) - } - - "response.function_call_arguments.delta", - "response.output_item.added" -> null - - "response.completed" -> { - val responseObject = event["response"]?.jsonObject - val toolCalls = responseObject?.get("output") - ?.jsonArray - ?.mapIndexedNotNull { index, item -> - val itemObject = item.jsonObject - if (itemObject["type"]?.jsonPrimitive?.contentOrNull != "function_call") { - return@mapIndexedNotNull null - } - - val rawArguments = when (val arguments = itemObject["arguments"]) { - is JsonPrimitive -> arguments.contentOrNull - is JsonObject -> arguments.toString() - else -> null - } - CustomOpenAIToolCall( - id = itemObject["call_id"]?.jsonPrimitive?.contentOrNull, - index = index, - function = CustomOpenAIFunction( - name = itemObject["name"]?.jsonPrimitive?.contentOrNull, - arguments = normalizeToolArgumentsJson(rawArguments) ?: rawArguments.orEmpty() - ) - ) - } - .orEmpty() - CustomOpenAIStreamChoice( - finishReason = "stop", - delta = CustomOpenAIStreamDelta( - toolCalls = toolCalls.takeIf { it.isNotEmpty() } - ) - ) - } - - else -> null - } - - return CustomOpenAIChatCompletionStreamResponse( - choices = listOfNotNull(choice), - created = 0, - id = "", - model = "" - ) - } - - /** - * Adapts a non-streaming Responses API response into a [CustomOpenAIChatCompletionResponse]. - * Builds a synthetic chat-completions-format JSON and deserializes it, avoiding direct - * construction of Koog internal types. - */ - private fun adaptResponsesApiResponse(data: String): CustomOpenAIChatCompletionResponse { - val response = json.parseToJsonElement(data).jsonObject - val output = response["output"]?.jsonArray ?: JsonArray(emptyList()) - - val textContent = StringBuilder() - val toolCallsJson = mutableListOf() - - for (item in output) { - val itemObj = item.jsonObject - when (itemObj["type"]?.jsonPrimitive?.contentOrNull) { - "message" -> { - itemObj["content"]?.jsonArray?.forEach { contentPart -> - val partObj = contentPart.jsonObject - if (partObj["type"]?.jsonPrimitive?.contentOrNull == "output_text") { - textContent.append(partObj["text"]?.jsonPrimitive?.contentOrNull ?: "") - } - } - } - - "function_call" -> { - val rawArguments = when (val arguments = itemObj["arguments"]) { - is JsonPrimitive -> arguments.contentOrNull - is JsonObject -> arguments.toString() - else -> null - } - toolCallsJson.add(buildJsonObject { - put("id", itemObj["call_id"] ?: JsonPrimitive("")) - put("type", JsonPrimitive("function")) - putJsonObject("function") { - put("name", itemObj["name"] ?: JsonPrimitive("")) - put( - "arguments", - JsonPrimitive(normalizeToolArgumentsJson(rawArguments) ?: rawArguments ?: "{}") - ) - } - }) - } - } - } - - val syntheticJson = buildJsonObject { - put("id", response["id"] ?: JsonPrimitive("")) - put("created", JsonPrimitive(0)) - put("model", response["model"] ?: JsonPrimitive("")) - put("object", JsonPrimitive("chat.completion")) - putJsonArray("choices") { - addJsonObject { - put("finish_reason", JsonPrimitive("stop")) - putJsonObject("message") { - put("role", JsonPrimitive("assistant")) - if (textContent.isNotEmpty()) { - put("content", JsonPrimitive(textContent.toString())) - } - if (toolCallsJson.isNotEmpty()) { - put("tool_calls", JsonArray(toolCallsJson)) - } - } - } - } - response["usage"]?.let { put("usage", parseResponsesApiUsageJson(it)) } - } - - return json.decodeFromString(syntheticJson.toString()) - } - - /** - * Converts Responses API usage JSON (input_tokens/output_tokens) to - * OpenAI-compatible usage JSON (prompt_tokens/completion_tokens/total_tokens). - */ - private fun parseResponsesApiUsageJson(usageElement: JsonElement): JsonElement { - val usageObj = usageElement.jsonObject - val inputTokens = usageObj["input_tokens"]?.jsonPrimitive?.intOrNull ?: 0 - val outputTokens = usageObj["output_tokens"]?.jsonPrimitive?.intOrNull ?: 0 - return buildJsonObject { - put("prompt_tokens", JsonPrimitive(inputTokens)) - put("completion_tokens", JsonPrimitive(outputTokens)) - put("total_tokens", JsonPrimitive(inputTokens + outputTokens)) - } - } - - override fun processStreamingResponse( - response: Flow - ): Flow = buildStreamFrameFlow { - var finishReason: String? = null - var metaInfo: ResponseMetaInfo? = null - - response.collect { chunk -> - chunk.choices.firstOrNull()?.let { choice -> - choice.delta.content?.let { emitTextDelta(it) } - - choice.delta.toolCalls?.forEach { openAIToolCall -> - val index = openAIToolCall.index ?: 0 - val id = openAIToolCall.id.orEmpty() - val functionName = openAIToolCall.function.name.orEmpty() - val functionArgs = openAIToolCall.function.arguments.orEmpty() - emitToolCallDelta(id, functionName, functionArgs, index) - } - - choice.finishReason?.let { finishReason = it } - } - - chunk.usage?.let { metaInfo = createMetaInfo(it) } - } - - emitEnd(finishReason, metaInfo) - } - - override suspend fun moderate(prompt: Prompt, model: LLModel): ModerationResult { - throw UnsupportedOperationException("Moderation not supported.") - } - - @OptIn(ExperimentalEncodingApi::class) - private fun OpenAIMessage.toResponses( - finishReason: String?, - metaInfo: ResponseMetaInfo - ): List { - val contentText = content?.text() - - if (this is OpenAIMessage.Assistant) { - val assistantToolCalls = toolCalls - if (!assistantToolCalls.isNullOrEmpty()) { - return buildList { - contentText?.let { - add( - Message.Assistant( - content = it, - finishReason = finishReason, - metaInfo = metaInfo - ) - ) - } - - assistantToolCalls.forEach { toolCall -> - val arguments = normalizeToolArgumentsJson(toolCall.function.arguments) ?: "{}" - add( - Message.Tool.Call( - id = toolCall.id, - tool = toolCall.function.name, - content = arguments, - metaInfo = metaInfo - ) - ) - } - } - } - - val reasoning = reasoningContent - if (reasoning != null && contentText != null) { - return listOf( - Message.Reasoning( - content = reasoning, - metaInfo = metaInfo - ), - Message.Assistant( - content = contentText, - finishReason = finishReason, - metaInfo = metaInfo - ) - ) - } - } - - if (contentText != null) { - return listOf( - Message.Assistant( - content = contentText, - finishReason = finishReason, - metaInfo = metaInfo - ) - ) - } - - val exception = LLMClientException( - clientName, - "Unexpected response: no tool calls and no content" - ) - logger.error(exception) { exception.message } - throw exception - } } internal fun buildCustomOpenAIAdditionalProperties( @@ -722,38 +438,22 @@ internal fun transformCustomOpenAIBodyValue( messages: List, credential: String, json: Json -): JsonElement { - return transformCustomOpenAIBodyValue( - key = key, - value = value, - streamRequest = streamRequest, - messagesJson = json.encodeToJsonElement( - ListSerializer(OpenAIMessage.serializer()), - messages - ), - prompt = renderCustomOpenAIPrompt(messages, json), - credential = credential, - json = json - ) -} - -private fun transformCustomOpenAIBodyValue( - key: String?, - value: Any?, - streamRequest: Boolean, - messagesJson: JsonElement, - prompt: String, - credential: String, - json: Json ): JsonElement { return when (value) { null -> JsonNull is JsonElement -> value is String -> when { !streamRequest && key == "stream" -> JsonPrimitive(false) - CustomServicePlaceholders.isMessages(value) -> messagesJson + CustomServicePlaceholders.isMessages(value) -> json.parseToJsonElement( + json.encodeToString(ListSerializer(OpenAIMessage.serializer()), messages) + ) - CustomServicePlaceholders.isPrompt(value) -> JsonPrimitive(prompt) + CustomServicePlaceholders.isPrompt(value) -> JsonPrimitive( + renderCustomOpenAIPrompt( + messages, + json + ) + ) value.contains($$"$CUSTOM_SERVICE_API_KEY") -> { JsonPrimitive(value.replace($$"$CUSTOM_SERVICE_API_KEY", credential)) @@ -772,8 +472,7 @@ private fun transformCustomOpenAIBodyValue( key = nestedKey.toString(), value = nestedValue, streamRequest = streamRequest, - messagesJson = messagesJson, - prompt = prompt, + messages = messages, credential = credential, json = json ) @@ -786,8 +485,7 @@ private fun transformCustomOpenAIBodyValue( key = null, value = item, streamRequest = streamRequest, - messagesJson = messagesJson, - prompt = prompt, + messages = messages, credential = credential, json = json ) @@ -800,8 +498,7 @@ private fun transformCustomOpenAIBodyValue( key = null, value = item, streamRequest = streamRequest, - messagesJson = messagesJson, - prompt = prompt, + messages = messages, credential = credential, json = json ) @@ -819,172 +516,48 @@ internal fun renderCustomOpenAIPrompt(messages: List, json: Json) } } -private fun List.toResponsesApiItemsJson(): JsonArray { - return JsonArray( - buildList { - for (message in this@toResponsesApiItemsJson) { - addAll(message.toResponsesApiItemsJson()) - } - } - ) +private val customOpenAIRequestTransformJson = Json { + ignoreUnknownKeys = true + isLenient = true + explicitNulls = false } -private fun OpenAIMessage.toResponsesApiItemsJson(): List { - return when (this) { - is OpenAIMessage.System, is OpenAIMessage.Developer -> listOf( - buildResponsesApiInputMessageItemJson( - role = "developer", - content = content.toResponsesApiInputContentJson() - ) +private val FlattenCustomOpenAIAdditionalPropertiesPlugin = createClientPlugin( + "FlattenCustomOpenAIAdditionalProperties" +) { + transformRequestBody { _, content, _ -> + val requestBody = content as? String ?: return@transformRequestBody null + val flattened = flattenSerializedAdditionalProperties( + requestBody, + customOpenAIRequestTransformJson ) - - is OpenAIMessage.User -> listOf( - buildResponsesApiInputMessageItemJson( - role = "user", - content = content.toResponsesApiInputContentJson() - ) - ) - - is OpenAIMessage.Assistant -> buildList { - reasoningContent - ?.takeIf { it.isNotBlank() } - ?.let { reasoning -> - add( - buildJsonObject { - put("type", JsonPrimitive("reasoning")) - put("id", JsonPrimitive(UUID.randomUUID().toString())) - putJsonArray("summary") { - addJsonObject { - put("type", JsonPrimitive("summary_text")) - put("text", JsonPrimitive(reasoning)) - } - } - } - ) - } - - content?.text() - ?.takeIf { it.isNotBlank() } - ?.let { assistantText -> - add( - buildJsonObject { - put("type", JsonPrimitive("message")) - put("role", JsonPrimitive("assistant")) - putJsonArray("content") { - addJsonObject { - put("type", JsonPrimitive("output_text")) - put("text", JsonPrimitive(assistantText)) - put("annotations", JsonArray(emptyList())) - } - } - } - ) - } - - toolCalls.orEmpty().forEach { toolCall -> - val arguments = normalizeToolArgumentsJson(toolCall.function.arguments) ?: "{}" - add( - buildJsonObject { - put("type", JsonPrimitive("function_call")) - put("arguments", JsonPrimitive(arguments)) - put("call_id", JsonPrimitive(toolCall.id)) - put("name", JsonPrimitive(toolCall.function.name)) - } - ) - } - } - - is OpenAIMessage.Tool -> listOf( - buildJsonObject { - put("type", JsonPrimitive("function_call_output")) - put("call_id", JsonPrimitive(toolCallId)) - put("output", JsonPrimitive(content?.text().orEmpty())) - } - ) - } -} - -private fun buildResponsesApiInputMessageItemJson( - role: String, - content: JsonArray -): JsonObject { - return buildJsonObject { - put("type", JsonPrimitive("message")) - put("role", JsonPrimitive(role)) - put("content", content) - } -} - -private fun Content?.toResponsesApiInputContentJson(): JsonArray { - if (this == null) { - return JsonArray(emptyList()) - } - - return when (this) { - is Content.Parts -> JsonArray(value.mapNotNull { it.toResponsesApiInputContentJson() }) - else -> JsonArray( - listOf( - buildJsonObject { - put("type", JsonPrimitive("input_text")) - put("text", JsonPrimitive(text())) - } - ) - ) - } -} - -private fun OpenAIContentPart.toResponsesApiInputContentJson(): JsonObject? { - return when (this) { - is OpenAIContentPart.Text -> buildJsonObject { - put("type", JsonPrimitive("input_text")) - put("text", JsonPrimitive(text)) - } - - is OpenAIContentPart.Image -> buildJsonObject { - put("type", JsonPrimitive("input_image")) - imageUrl.detail?.let { put("detail", JsonPrimitive(it)) } - put("imageUrl", JsonPrimitive(imageUrl.url)) - } - - is OpenAIContentPart.File -> buildJsonObject { - put("type", JsonPrimitive("input_file")) - file.fileData?.let { put("fileData", JsonPrimitive(it)) } - file.fileId?.let { put("fileId", JsonPrimitive(it)) } - file.filename?.let { put("filename", JsonPrimitive(it)) } - } - - else -> null - } -} - -internal fun OpenAITool.toResponsesApiToolJson(): JsonObject { - return buildJsonObject { - put("type", JsonPrimitive("function")) - put("name", JsonPrimitive(function.name)) - put( - "parameters", - function.parameters ?: buildJsonObject { - put("type", JsonPrimitive("object")) - putJsonObject("properties") {} - putJsonArray("required") {} - } - ) - function.strict?.let { put("strict", JsonPrimitive(it)) } - function.description?.takeIf { it.isNotBlank() }?.let { - put("description", JsonPrimitive(it)) + if (flattened == requestBody) { + null + } else { + TextContent(flattened, ContentType.Application.Json) } } } -internal fun OpenAIToolChoice.toResponsesApiToolChoiceJson(): JsonElement { - return when (this) { - is OpenAIToolChoice.Function -> buildJsonObject { - put("type", JsonPrimitive("function")) - put("name", JsonPrimitive(function.name)) - } +internal fun flattenSerializedAdditionalProperties( + requestBody: String, + json: Json +): String { + val payload = runCatching { json.parseToJsonElement(requestBody) }.getOrNull()?.jsonObject + ?: return requestBody + val additionalProperties = payload["additional_properties"] as? JsonObject + ?: return requestBody - is OpenAIToolChoice.Mode -> JsonPrimitive(value) + val flattened = buildJsonObject { + payload.entries + .filterNot { (key, _) -> key == "additional_properties" } + .forEach { (key, value) -> put(key, value) } + additionalProperties.entries + .filterNot { (key, _) -> payload.containsKey(key) } + .forEach { (key, value) -> put(key, value) } } + + return flattened.toString() } internal object CustomOpenAIChatCompletionRequestSerializer : diff --git a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIParams.kt b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIParams.kt index 76f38866..0df5894b 100644 --- a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIParams.kt +++ b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIParams.kt @@ -1,5 +1,9 @@ package ee.carlrobert.codegpt.agent.clients +import ai.koog.prompt.executor.clients.openai.OpenAIResponsesParams +import ai.koog.prompt.executor.clients.openai.base.models.ReasoningEffort +import ai.koog.prompt.executor.clients.openai.models.ReasoningConfig +import ai.koog.prompt.executor.clients.openai.models.ReasoningSummary import ai.koog.prompt.params.LLMParams import ai.koog.prompt.params.LLMParams.ToolChoice import ee.carlrobert.codegpt.settings.service.custom.CustomServiceChatCompletionSettingsState @@ -19,7 +23,6 @@ internal fun LLMParams.toCustomOpenAIParams(state: CustomServiceChatCompletionSe additionalProperties = mergedAdditionalProperties, temperature = body.findValue("temperature").asDouble() ?: temperature, maxTokens = body.findValue("maxTokens", "max_tokens").asInt() ?: maxTokens, - speculation = body.findValue("speculation").asString() ?: speculation, toolChoice = body.findValue("toolChoice", "tool_choice").asToolChoice() ?: toolChoice, frequencyPenalty = body.findValue("frequencyPenalty", "frequency_penalty").asDouble() ?: current?.frequencyPenalty, @@ -33,6 +36,37 @@ internal fun LLMParams.toCustomOpenAIParams(state: CustomServiceChatCompletionSe ) } +internal fun LLMParams.toCustomOpenAIResponsesParams( + state: CustomServiceChatCompletionSettingsState +): OpenAIResponsesParams { + val body = state.body + val currentReasoning = (this as? OpenAIResponsesParams)?.reasoning + val bodyReasoning = body.findValue("reasoning").asReasoningConfig() + + val mergedAdditionalProperties = buildMap { + body + .filterKeys { it !in CUSTOM_OPENAI_RESPONSES_RESERVED_BODY_KEYS } + .forEach { (key, value) -> put(key, value.toJsonElement()) } + }.takeIf { it.isNotEmpty() } + + return OpenAIResponsesParams( + additionalProperties = mergedAdditionalProperties, + temperature = body.findValue("temperature").asDouble() ?: temperature, + maxTokens = body.findValue("maxOutputTokens", "max_output_tokens").asInt() ?: maxTokens, + numberOfChoices = numberOfChoices, + reasoning = when { + bodyReasoning == null -> currentReasoning + currentReasoning == null -> bodyReasoning + else -> ReasoningConfig( + effort = bodyReasoning.effort ?: currentReasoning.effort, + summary = bodyReasoning.summary ?: currentReasoning.summary + ) + }, + schema = schema, + toolChoice = body.findValue("toolChoice", "tool_choice").asToolChoice() ?: toolChoice, + ) +} + internal fun CustomServiceChatCompletionSettingsState.shouldStream(): Boolean { return body.findValue("stream").asBoolean() == true } @@ -53,6 +87,18 @@ internal val CUSTOM_OPENAI_RESERVED_BODY_KEYS = setOf( "topP", "top_p", ) +internal val CUSTOM_OPENAI_RESPONSES_RESERVED_BODY_KEYS = setOf( + "input", + "maxOutputTokens", "max_output_tokens", + "messages", + "model", + "reasoning", + "stream", + "temperature", + "toolChoice", "tool_choice", + "tools", +) + private fun Map.findValue(vararg keys: String): Any? { keys.forEach { key -> if (containsKey(key)) { @@ -95,23 +141,6 @@ private fun Any?.asBoolean(): Boolean? = when (this) { else -> null } -private fun Any?.asJsonElementMap(): Map? { - if (this !is Map<*, *>) return null - - return buildMap { - this@asJsonElementMap.forEach { (key, value) -> - key?.toString() - ?.takeIf { it.isNotBlank() } - ?.let { put(it, value.toJsonElement()) } - } - } -} - -private fun Any?.asString(): String? = when (this) { - is String -> this - else -> null -} - private fun Any?.asStopList(): List? = asStringList(allowEmpty = false) private fun Any?.asStringList(allowEmpty: Boolean = false): List? = when (this) { @@ -155,6 +184,47 @@ private fun Any?.asToolChoice(): ToolChoice? = when (this) { else -> null } +private fun Any?.asReasoningConfig(): ReasoningConfig? { + val value = this as? Map<*, *> ?: return null + val effort = value["effort"].asReasoningEffort() + val summary = value["summary"].asReasoningSummary() + + if (effort == null && summary == null) { + return null + } + + return ReasoningConfig( + effort = effort, + summary = summary + ) +} + +private fun Any?.asReasoningEffort(): ReasoningEffort? = when (this) { + is ReasoningEffort -> this + is String -> when (trim().lowercase()) { + "none" -> ReasoningEffort.NONE + "minimal" -> ReasoningEffort.MINIMAL + "low" -> ReasoningEffort.LOW + "medium" -> ReasoningEffort.MEDIUM + "high" -> ReasoningEffort.HIGH + else -> null + } + + else -> null +} + +private fun Any?.asReasoningSummary(): ReasoningSummary? = when (this) { + is ReasoningSummary -> this + is String -> when (trim().lowercase()) { + "auto" -> ReasoningSummary.AUTO + "concise" -> ReasoningSummary.CONCISE + "detailed" -> ReasoningSummary.DETAILED + else -> null + } + + else -> null +} + private fun Any?.toJsonElement(): JsonElement = when (this) { null -> JsonNull is JsonElement -> this diff --git a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/ProxyAILLMClient.kt b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/ProxyAILLMClient.kt index 1e5837c1..4b27250e 100644 --- a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/ProxyAILLMClient.kt +++ b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/ProxyAILLMClient.kt @@ -180,11 +180,8 @@ public class ProxyAILLMClient( } } - override fun decodeStreamingResponse(data: String): ProxyAIChatCompletionStreamResponse { - val payload = normalizeSsePayload(data) - ?: return ProxyAIChatCompletionStreamResponse() - return json.decodeFromString(payload) - } + override fun decodeStreamingResponse(data: String): ProxyAIChatCompletionStreamResponse = + json.decodeFromString(data) override fun decodeResponse(data: String): ProxyAIChatCompletionResponse = json.decodeFromString(data) diff --git a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/RetryingPromptExecutor.kt b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/RetryingPromptExecutor.kt index 0c8f305c..8cb4a608 100644 --- a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/RetryingPromptExecutor.kt +++ b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/RetryingPromptExecutor.kt @@ -5,23 +5,13 @@ import ai.koog.http.client.KoogHttpClientException import ai.koog.prompt.dsl.ModerationResult import ai.koog.prompt.dsl.Prompt import ai.koog.prompt.executor.clients.LLMClient -import ai.koog.prompt.executor.clients.anthropic.AnthropicParams -import ai.koog.prompt.executor.clients.anthropic.models.AnthropicThinking -import ai.koog.prompt.executor.clients.openai.OpenAIResponsesParams -import ai.koog.prompt.executor.clients.openai.base.models.ReasoningEffort -import ai.koog.prompt.executor.clients.openai.models.ReasoningConfig -import ai.koog.prompt.executor.clients.openai.models.ReasoningSummary import ai.koog.prompt.executor.llms.SingleLLMPromptExecutor import ai.koog.prompt.executor.model.PromptExecutor -import ai.koog.prompt.llm.LLMProvider import ai.koog.prompt.llm.LLModel import ai.koog.prompt.message.Message -import ai.koog.prompt.params.LLMParams import ai.koog.prompt.streaming.StreamFrame import ee.carlrobert.codegpt.agent.AgentEvents import io.github.oshai.kotlinlogging.KotlinLogging -import java.io.IOException -import java.net.SocketTimeoutException import kotlinx.coroutines.CancellationException import kotlinx.coroutines.TimeoutCancellationException import kotlinx.coroutines.delay @@ -29,6 +19,8 @@ import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.flow.catch import kotlinx.coroutines.flow.flow import kotlinx.coroutines.flow.onEach +import java.io.IOException +import java.net.SocketTimeoutException import kotlin.math.pow import kotlin.random.Random import kotlin.time.Duration @@ -44,8 +36,6 @@ class RetryingPromptExecutor( ) : PromptExecutor { companion object { - private const val ANTHROPIC_MIN_THINKING_BUDGET = 1_024 - private const val ANTHROPIC_DEFAULT_THINKING_BUDGET = 2_048 private val logger = KotlinLogging.logger { } private val RETRYABLE_HTTP_STATUS_CODES = setOf(408, 409, 425, 429, 500, 502, 503, 504) @@ -88,7 +78,7 @@ class RetryingPromptExecutor( private fun Throwable.hasTimeoutMessage(): Boolean { val message = message ?: return false return message.contains("timed out", ignoreCase = true) - || message.contains("timeout", ignoreCase = true) + || message.contains("timeout", ignoreCase = true) } } @@ -100,13 +90,12 @@ class RetryingPromptExecutor( model: LLModel, tools: List ): Flow { - val refinedPrompt = prompt.withReasoningParams(model) var hasReceivedData: Boolean fun createStream(attemptNum: Int): Flow { hasReceivedData = false - return delegate.executeStreaming(refinedPrompt, model, tools) + return delegate.executeStreaming(prompt, model, tools) .onEach { hasReceivedData = true } .catch { error -> val retryable = isRetryableFailure(error) @@ -187,14 +176,13 @@ class RetryingPromptExecutor( model: LLModel, tools: List ): List { - val promptWithReasoning = prompt.withReasoningParams(model) var attempt = 1 var delay = retryPolicy.initialDelay var lastError: Throwable? = null while (attempt <= retryPolicy.maxAttempts) { try { - return delegate.execute(promptWithReasoning, model, tools) + return delegate.execute(prompt, model, tools) } catch (t: Throwable) { lastError = t val retryable = isRetryableFailure(t) @@ -218,66 +206,4 @@ class RetryingPromptExecutor( override fun close() { (delegate as? AutoCloseable)?.close() } - - private fun Prompt.withReasoningParams(model: LLModel): Prompt { - val params = when (model.provider) { - LLMProvider.OpenAI -> params.withOpenAIReasoning() - LLMProvider.Anthropic -> params.withAnthropicReasoning() - else -> params - } - return withParams(params) - } - - private fun LLMParams.withOpenAIReasoning(): LLMParams { - val base = when (this) { - is OpenAIResponsesParams -> this - else -> OpenAIResponsesParams( - temperature = temperature, - maxTokens = maxTokens, - numberOfChoices = numberOfChoices, - speculation = speculation, - schema = schema, - toolChoice = toolChoice, - user = user, - additionalProperties = additionalProperties - ) - } - - val reasoning = base.reasoning ?: ReasoningConfig( - effort = ReasoningEffort.MEDIUM, - summary = ReasoningSummary.AUTO - ) - return base.copy(reasoning = reasoning) - } - - private fun LLMParams.withAnthropicReasoning(): LLMParams { - val base = when (this) { - is AnthropicParams -> this - else -> AnthropicParams( - temperature = temperature, - maxTokens = maxTokens, - numberOfChoices = numberOfChoices, - speculation = speculation, - schema = schema, - toolChoice = toolChoice, - user = user, - additionalProperties = additionalProperties - ) - } - - if (base.thinking != null) return base - - val thinkingBudget = resolveAnthropicThinkingBudget(base.maxTokens) ?: return base - return base.copy(thinking = AnthropicThinking.Enabled(budgetTokens = thinkingBudget)) - } - - private fun resolveAnthropicThinkingBudget(maxTokens: Int?): Int? { - val limit = maxTokens ?: ANTHROPIC_DEFAULT_THINKING_BUDGET - if (limit <= ANTHROPIC_MIN_THINKING_BUDGET) { - return null - } - return (limit / 2) - .coerceAtLeast(ANTHROPIC_MIN_THINKING_BUDGET) - .coerceAtMost(ANTHROPIC_DEFAULT_THINKING_BUDGET) - } } diff --git a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/StreamingPayloadNormalizer.kt b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/StreamingPayloadNormalizer.kt deleted file mode 100644 index 4ffd1216..00000000 --- a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/StreamingPayloadNormalizer.kt +++ /dev/null @@ -1,31 +0,0 @@ -package ee.carlrobert.codegpt.agent.clients - -internal fun normalizeSsePayload(rawData: String): String? { - val normalized = rawData - .replace("\r\n", "\n") - .replace('\r', '\n') - .trim() - if (normalized.isEmpty()) { - return null - } - - val dataLines = normalized.lineSequence() - .mapNotNull { line -> - val markerIndex = line.indexOf("data:") - if (markerIndex >= 0) { - line.substring(markerIndex + "data:".length).trimStart() - } else { - null - } - } - .filter { it.isNotEmpty() } - .toList() - - val payload = if (dataLines.isNotEmpty()) { - dataLines.joinToString("\n") - } else { - normalized - } - - return payload.takeUnless { it.isBlank() || it == "[DONE]" } -} diff --git a/src/main/kotlin/ee/carlrobert/codegpt/agent/strategy/SingleRunStrategyProvider.kt b/src/main/kotlin/ee/carlrobert/codegpt/agent/strategy/SingleRunStrategyProvider.kt index 0d50488a..476b7375 100644 --- a/src/main/kotlin/ee/carlrobert/codegpt/agent/strategy/SingleRunStrategyProvider.kt +++ b/src/main/kotlin/ee/carlrobert/codegpt/agent/strategy/SingleRunStrategyProvider.kt @@ -200,7 +200,7 @@ private suspend fun AIAgentLLMWriteSession.requestResponses( } val appendableResponses = appendableResponses(responses, model.provider) appendableResponses.forEach(appendResponse) - return if (stream) responses else appendableResponses + return responses } private suspend fun AIAgentLLMWriteSession.requestAndPublish( diff --git a/src/main/kotlin/ee/carlrobert/codegpt/completions/AgentCompletionRunner.kt b/src/main/kotlin/ee/carlrobert/codegpt/completions/AgentCompletionRunner.kt index 0c6902cb..2e8ce1bf 100644 --- a/src/main/kotlin/ee/carlrobert/codegpt/completions/AgentCompletionRunner.kt +++ b/src/main/kotlin/ee/carlrobert/codegpt/completions/AgentCompletionRunner.kt @@ -8,19 +8,21 @@ import ai.koog.agents.features.eventHandler.feature.handleEvents import ai.koog.agents.features.tokenizer.feature.MessageTokenizer import ai.koog.prompt.dsl.prompt import ai.koog.prompt.tokenizer.Tokenizer +import com.intellij.openapi.components.service import com.intellij.openapi.project.Project import ee.carlrobert.codegpt.EncodingManager import ee.carlrobert.codegpt.agent.AgentEvents import ee.carlrobert.codegpt.agent.MessageWithContext import ee.carlrobert.codegpt.agent.clients.shouldStream -import ee.carlrobert.codegpt.agent.clients.shouldStreamCustomOpenAI import ee.carlrobert.codegpt.agent.strategy.CODE_AGENT_COMPRESSION import ee.carlrobert.codegpt.agent.strategy.HistoryCompressionConfig import ee.carlrobert.codegpt.agent.strategy.SingleRunStrategyProvider import ee.carlrobert.codegpt.mcp.McpTool import ee.carlrobert.codegpt.mcp.McpToolAliasResolver import ee.carlrobert.codegpt.mcp.McpToolCallHandler +import ee.carlrobert.codegpt.settings.models.ModelSettings import ee.carlrobert.codegpt.settings.service.ServiceType +import ee.carlrobert.codegpt.settings.service.custom.CustomServicesSettings import ee.carlrobert.codegpt.util.ReasoningFrameTextAdapter import kotlinx.coroutines.* import java.util.* @@ -49,7 +51,7 @@ internal object AgentCompletionRunner : CompletionRunner { val jobRef = AtomicReference() val messageBuilder = StringBuilder() val project = request.callParameters.project!! - val stream = shouldStreamAgentToolLoop(request) + val stream = shouldStreamAgentToolLoop(project, request) val toolCallHandler = project.let { McpToolCallHandler.getInstance(it) } val toolRegistry = createChatToolRegistry( callParameters = request.callParameters, @@ -174,13 +176,23 @@ internal object AgentCompletionRunner : CompletionRunner { } internal fun shouldStreamAgentToolLoop( + project: Project, request: CompletionRunnerRequest.Chat, ): Boolean { val provider = request.serviceType return when (provider) { - ServiceType.CUSTOM_OPENAI -> shouldStreamCustomOpenAI( - request.callParameters.featureType - ) + ServiceType.CUSTOM_OPENAI -> { + val selectedServiceId = service() + .getModelSelectionForFeature(request.callParameters.featureType) + .serviceId + service() + .state.services + .firstOrNull { it.id == selectedServiceId } + ?.chatCompletionSettings + ?.shouldStream() + ?: false + } + ServiceType.GOOGLE -> false else -> true } diff --git a/src/main/kotlin/ee/carlrobert/codegpt/completions/CompletionRequestService.kt b/src/main/kotlin/ee/carlrobert/codegpt/completions/CompletionRequestService.kt index c8149cd3..53385251 100644 --- a/src/main/kotlin/ee/carlrobert/codegpt/completions/CompletionRequestService.kt +++ b/src/main/kotlin/ee/carlrobert/codegpt/completions/CompletionRequestService.kt @@ -3,19 +3,24 @@ package ee.carlrobert.codegpt.completions import ai.koog.agents.core.tools.ToolDescriptor import ai.koog.prompt.dsl.Prompt import ai.koog.prompt.dsl.prompt -import ai.koog.prompt.executor.model.PromptExecutor +import ai.koog.prompt.llm.LLMCapability import ai.koog.prompt.llm.LLModel import ee.carlrobert.codegpt.agent.AgentFactory import ee.carlrobert.codegpt.agent.clients.CustomOpenAILLMClient import ee.carlrobert.codegpt.agent.clients.HttpClientProvider -import ee.carlrobert.codegpt.agent.clients.RetryingPromptExecutor +import ee.carlrobert.codegpt.completions.factory.ResponsesApiUtil import ee.carlrobert.codegpt.settings.models.ModelSelection import ee.carlrobert.codegpt.settings.service.FeatureType import ee.carlrobert.codegpt.settings.service.ServiceType import ee.carlrobert.codegpt.settings.service.custom.CustomServiceChatCompletionSettingsState +import kotlinx.coroutines.CancellationException +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.SupervisorJob +import kotlinx.coroutines.cancel +import kotlinx.coroutines.launch import kotlinx.coroutines.runBlocking import javax.swing.JPanel -import kotlin.time.Duration.Companion.seconds object CompletionRequestService { @@ -84,34 +89,53 @@ object CompletionRequestService { modelId: String?, eventListener: CompletionStreamEventListener ): CancellableRequest { - val client = CustomOpenAILLMClient.fromSettingsState( - apiKey.orEmpty(), - settings, - HttpClientProvider.createHttpClient() - ) - val retryPolicy = RetryingPromptExecutor.RetryPolicy( - maxAttempts = 2, - initialDelay = 1.seconds, - maxDelay = 4.seconds, - backoffMultiplier = 2.0, - jitterFactor = 0.1 - ) - val request = CompletionRunnerRequest.Streaming( - executor = RetryingPromptExecutor.fromClient(client, retryPolicy, null), - model = LLModel( - id = modelId?.takeIf { it.isNotBlank() } ?: "gpt-4.1-mini", - provider = CustomOpenAILLMClient.CustomOpenAI, - capabilities = emptyList(), - contextLength = 128_000, - maxOutputTokens = 4_096 + val scope = CoroutineScope(SupervisorJob() + Dispatchers.IO) + val model = LLModel( + id = modelId?.takeIf { it.isNotBlank() } ?: "gpt-4.1-mini", + provider = CustomOpenAILLMClient.CustomOpenAI, + capabilities = listOf( + if (ResponsesApiUtil.isResponsesApiUrl(settings.url)) { + LLMCapability.OpenAIEndpoint.Responses + } else { + LLMCapability.OpenAIEndpoint.Completions + } ), - prompt = prompt("custom-service-test-connection") { - user("Test connection") - }, - eventListener = eventListener, - mode = StreamingMode.SINGLE_RESPONSE, - cancellationResultBuilder = { StringBuilder() } + contextLength = 128_000, + maxOutputTokens = 4_096 ) - return CompletionRunnerFactory.create(request).run(request) + val testPrompt = prompt("custom-service-test-connection") { + user("Test connection") + } + + val job = scope.launch { + val client = CustomOpenAILLMClient.fromSettingsState( + apiKey.orEmpty(), + settings, + HttpClientProvider.createHttpClient() + ) + val messageBuilder = StringBuilder() + eventListener.onOpen() + try { + val responses = client.execute(testPrompt, model, emptyList()) + val text = CompletionTextExtractor.extract(responses) + if (text.isNotBlank()) { + messageBuilder.append(text) + eventListener.onMessage(text) + } + eventListener.onComplete(StringBuilder(messageBuilder)) + } catch (_: CancellationException) { + eventListener.onCancelled(StringBuilder(messageBuilder)) + } catch (exception: Throwable) { + eventListener.onError( + CompletionError(exception.message ?: "Failed to complete request"), + exception + ) + } finally { + runCatching { client.close() } + scope.cancel() + } + } + + return CancellableRequest { job.cancel() } } } diff --git a/src/main/kotlin/ee/carlrobert/codegpt/settings/models/ModelProvider.kt b/src/main/kotlin/ee/carlrobert/codegpt/settings/models/ModelProvider.kt index 8ca68250..3ee72c99 100644 --- a/src/main/kotlin/ee/carlrobert/codegpt/settings/models/ModelProvider.kt +++ b/src/main/kotlin/ee/carlrobert/codegpt/settings/models/ModelProvider.kt @@ -24,6 +24,7 @@ import ee.carlrobert.codegpt.settings.service.ServiceType import ee.carlrobert.codegpt.settings.service.custom.CustomServiceSettingsState import ee.carlrobert.codegpt.settings.service.custom.CustomServicesSettings import ee.carlrobert.codegpt.settings.service.ollama.OllamaSettings +import java.net.URI import javax.swing.Icon interface ModelProvider { @@ -703,6 +704,16 @@ private class CustomOpenAIModelProvider : ModelProvider { FeatureType.LOOKUP, ) + private fun isResponsesApiUrl(url: String?): Boolean { + if (url.isNullOrBlank()) return false + return try { + val path = URI(url.trim()).path?.trimEnd('/') ?: return false + path.endsWith("/responses") + } catch (_: Exception) { + false + } + } + private fun buildModels( modelExtractor: (CustomServiceSettingsState) -> String? ): List { @@ -713,13 +724,30 @@ private class CustomOpenAIModelProvider : ModelProvider { val modelName = modelExtractor(svc)?.takeIf { it.isNotBlank() } ?: return@mapNotNull null val displayName = formatCustomModelDisplayName(serviceName, modelName) + val additionalOpenAICapability = + if (isResponsesApiUrl(svc.chatCompletionSettings.url)) { + LLMCapability.OpenAIEndpoint.Responses + } else { + LLMCapability.OpenAIEndpoint.Completions + } ModelSelection( provider = serviceType, llmModel = virtualModel( CustomOpenAILLMClient.CustomOpenAI, modelName, - OPENAI_CAPABILITIES + listOf( + LLMCapability.Temperature, + LLMCapability.Schema.JSON.Basic, + LLMCapability.Schema.JSON.Standard, + LLMCapability.Speculation, + LLMCapability.Tools, + LLMCapability.ToolChoice, + LLMCapability.Vision.Image, + LLMCapability.Document, + LLMCapability.Completion, + LLMCapability.MultipleChoices, + ) + additionalOpenAICapability ), displayName = displayName, serviceId = serviceId, diff --git a/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/form/CustomServiceForm.kt b/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/form/CustomServiceForm.kt index f809f3aa..9e7c6f43 100644 --- a/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/form/CustomServiceForm.kt +++ b/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/form/CustomServiceForm.kt @@ -164,18 +164,14 @@ class CustomServiceForm( headers = template.chatCompletionTemplate.headers body = template.chatCompletionTemplate.body } - if (template.codeCompletionTemplate != null) { + template.codeCompletionTemplate?.let { codeCompletionsForm.run { - url = template.codeCompletionTemplate.url - headers = template.codeCompletionTemplate.headers - body = template.codeCompletionTemplate.body + url = it.url + headers = it.headers + body = it.body parseResponseAsChatCompletions = - template.codeCompletionTemplate.parseResponseAsChatCompletions + it.parseResponseAsChatCompletions } - tabbedPane.setEnabledAt(1, true) - } else { - tabbedPane.selectedIndex = 0 - tabbedPane.setEnabledAt(1, false) } } } diff --git a/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceChatCompletionTemplate.kt b/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceChatCompletionTemplate.kt index 2e639a78..e0f38dae 100644 --- a/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceChatCompletionTemplate.kt +++ b/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceChatCompletionTemplate.kt @@ -5,127 +5,109 @@ enum class CustomServiceChatCompletionTemplate( val headers: MutableMap, val body: MutableMap ) { - ANYSCALE( - "https://api.endpoints.anyscale.com/v1/chat/completions", - getDefaultHeadersWithAuthentication(), - getDefaultBodyParams( - mapOf( - "model" to "mistralai/Mixtral-8x7B-Instruct-v0.1", - "max_tokens" to 8192 - ) - ) - ), AZURE( - "https://{your-resource-name}.openai.azure.com/openai/deployments/{deployment-id}/chat/completions?api-version=2023-05-15", - getDefaultHeaders("api-key", "\$CUSTOM_SERVICE_API_KEY"), - getDefaultBodyParams(emptyMap()) + "https://{your-resource-name}.openai.azure.com/openai/deployments/{deployment-id}/chat/completions?api-version=2024-10-21", + getDefaultHeaders("api-key", $$"$CUSTOM_SERVICE_API_KEY"), + mutableMapOf( + "stream" to true, + ) ), DEEP_INFRA( "https://api.deepinfra.com/v1/openai/chat/completions", getDefaultHeadersWithAuthentication(), - getDefaultBodyParams( - mapOf( - "model" to "meta-llama/Llama-2-70b-chat-hf", - "max_tokens" to 8192 - ) + mutableMapOf( + "stream" to true, + "model" to "deepseek-ai/DeepSeek-V3.2", + "max_tokens" to 32_000 ) ), FIREWORKS( "https://api.fireworks.ai/inference/v1/chat/completions", getDefaultHeadersWithAuthentication(), - getDefaultBodyParams( - mapOf( - "model" to "accounts/fireworks/models/deepseek-r1-basic", - "max_tokens" to 8192 - ) + mutableMapOf( + "stream" to true, + "model" to "accounts/fireworks/models/deepseek-v3p1", + "max_tokens" to 32_000 ) ), GROQ( "https://api.groq.com/openai/v1/chat/completions", getDefaultHeadersWithAuthentication(), - getDefaultBodyParams( - mapOf( - "model" to "codellama-34b", - "max_tokens" to 8192 - ) + mutableMapOf( + "stream" to true, + "model" to "openai/gpt-oss-20b", + "max_tokens" to 32_000 ) ), OPENAI( "https://api.openai.com/v1/chat/completions", - getDefaultHeaders("Authorization", "Bearer \$CUSTOM_SERVICE_API_KEY"), - getDefaultBodyParams( - mapOf( - "model" to "gpt-5", - "max_tokens" to 8192 - ) - ) - ), - PERPLEXITY( - "https://api.perplexity.ai/chat/completions", - getDefaultHeadersWithAuthentication(), - getDefaultBodyParams( - mapOf( - "model" to "codellama", - "max_tokens" to 8192 - ) - ) - ), - TOGETHER( - "https://api.together.xyz/v1/chat/completions", - getDefaultHeaders("Authorization", "Bearer \$CUSTOM_SERVICE_API_KEY"), - getDefaultBodyParams( - mapOf( - "model" to "deepseek-ai/deepseek-coder-33b-instruct", - "max_tokens" to 8192 - ) - ) - ), - OLLAMA( - "http://localhost:11434/v1/chat/completions", - getDefaultHeaders(), - getDefaultBodyParams(mapOf("model" to "codellama")) - ), - LLAMA_CPP( - "http://localhost:8080/v1/chat/completions", - getDefaultHeaders(), - getDefaultBodyParams(emptyMap()) - ), - MISTRAL_AI( - "https://api.mistral.ai/v1/chat/completions", - getDefaultHeaders("Authorization", "Bearer \$CUSTOM_SERVICE_API_KEY"), - getDefaultBodyParams( - mapOf( - "model" to "open-mistral-7b", - "max_tokens" to 8192 - ) - ) - ), - OPEN_ROUTER( - "https://openrouter.ai/api/v1/chat/completions", - getDefaultHeaders( - mapOf( - "Authorization" to "Bearer \$CUSTOM_SERVICE_API_KEY", - "HTTP-Referer" to "https://tryproxy.io", - "X-Title" to "ProxyAI" - ) - ), - getDefaultBodyParams( - mapOf( - "model" to "meta-llama/llama-3.1-8b-instruct:free", - "max_tokens" to 8192 - ) + getDefaultHeaders("Authorization", $$"Bearer $CUSTOM_SERVICE_API_KEY"), + mutableMapOf( + "stream" to true, + "model" to "gpt-4.1", + "max_tokens" to 8192 ) ), OPENAI_RESPONSES( "https://api.openai.com/v1/responses", getDefaultHeaders("Authorization", "Bearer \$CUSTOM_SERVICE_API_KEY"), - getResponsesApiDefaultBodyParams( - mapOf( - "model" to "gpt-4.1", - "max_output_tokens" to 8192 - ) + mutableMapOf( + "stream" to true, + "model" to "gpt-5.3-codex", + "max_output_tokens" to 32_000 ) - ); + ), + TOGETHER( + "https://api.together.xyz/v1/chat/completions", + getDefaultHeaders("Authorization", $$"Bearer $CUSTOM_SERVICE_API_KEY"), + mutableMapOf( + "stream" to true, + "model" to "zai-org/GLM-5", + "max_tokens" to 32_000 + ) + ), + OLLAMA( + "http://localhost:11434/v1/chat/completions", + getDefaultHeaders(), + mutableMapOf( + "stream" to true, + "model" to "gpt-oss:20b", + "max_tokens" to 32_000 + ) + ), + LLAMA_CPP( + "http://localhost:8080/v1/chat/completions", + getDefaultHeaders(), + mutableMapOf( + "stream" to true, + "model" to "gpt-oss:20b", + "max_tokens" to 32_000 + ) + ), + MISTRAL_AI( + "https://api.mistral.ai/v1/chat/completions", + getDefaultHeaders("Authorization", $$"Bearer $CUSTOM_SERVICE_API_KEY"), + mutableMapOf( + "stream" to true, + "model" to "mistral-large-2512", + "max_tokens" to 32_000 + ) + ), + OPENROUTER( + "https://openrouter.ai/api/v1/chat/completions", + getDefaultHeaders( + mapOf( + "Authorization" to $$"Bearer $CUSTOM_SERVICE_API_KEY", + "HTTP-Referer" to "https://tryproxy.io", + "X-OpenRouter-Title" to "ProxyAI" + ) + ), + mutableMapOf( + "stream" to true, + "model" to "moonshotai/kimi-k2.5", + "max_tokens" to 8192 + ) + ), } private fun getDefaultHeadersWithAuthentication(): MutableMap { @@ -148,23 +130,3 @@ private fun getDefaultHeaders(additionalHeaders: Map): MutableMa defaultHeaders.putAll(additionalHeaders) return defaultHeaders } - -private fun getDefaultBodyParams(additionalParams: Map): MutableMap { - val defaultParams = mutableMapOf( - "stream" to true, - "messages" to "\$OPENAI_MESSAGES", - "temperature" to 0.1 - ) - defaultParams.putAll(additionalParams) - return defaultParams -} - -private fun getResponsesApiDefaultBodyParams(additionalParams: Map): MutableMap { - val defaultParams = mutableMapOf( - "stream" to true, - "input" to "\$OPENAI_MESSAGES", - "temperature" to 0.1 - ) - defaultParams.putAll(additionalParams) - return defaultParams -} diff --git a/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceCodeCompletionTemplate.kt b/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceCodeCompletionTemplate.kt index 327e4090..7a10ba00 100644 --- a/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceCodeCompletionTemplate.kt +++ b/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceCodeCompletionTemplate.kt @@ -7,24 +7,24 @@ enum class CustomServiceCodeCompletionTemplate( val parseResponseAsChatCompletions: Boolean = false ) { ANYSCALE( - "https://api.endpoints.anyscale.com/v1/completions", + "https://{your-service-endpoint}/v1/completions", getDefaultHeadersWithAuthentication(), - getDefaultBodyParams(mapOf("model" to "codellama/CodeLlama-70b-Instruct-hf")) + getDefaultBodyParams(mapOf("model" to "{your-model-id}")) ), AZURE( - "https://{your-resource-name}.openai.azure.com/openai/deployments/{deployment-id}/completions?api-version=2023-05-15", + "https://{your-resource-name}.openai.azure.com/openai/deployments/{deployment-id}/completions?api-version=2024-10-21", getDefaultHeaders("api-key", "\$CUSTOM_SERVICE_API_KEY"), getDefaultBodyParams(emptyMap()) ), DEEP_INFRA( - "https://api.deepinfra.com/v1/inference/codellama/CodeLlama-70b-Instruct-hf", + "https://api.deepinfra.com/v1/openai/completions", getDefaultHeadersWithAuthentication(), - mutableMapOf("input" to "\$FIM_PROMPT") + getDefaultBodyParams(mapOf("model" to "deepseek-ai/DeepSeek-V3.2")) ), FIREWORKS( "https://api.fireworks.ai/inference/v1/completions", getDefaultHeadersWithAuthentication(), - getDefaultBodyParams(mapOf("model" to "accounts/fireworks/models/qwen2p5-coder-32b-instruct")) + getDefaultBodyParams(mapOf("model" to "accounts/fireworks/models/kimi-k2-instruct-0905")) ), OPENAI( "https://api.openai.com/v1/completions", @@ -45,7 +45,7 @@ enum class CustomServiceCodeCompletionTemplate( "stream" to true, "prompt" to "\$PREFIX", "suffix" to "\$SUFFIX", - "model" to "codestral-latest", + "model" to "codestral-2508", "temperature" to 0.7, "max_tokens" to 1024 ), @@ -54,7 +54,7 @@ enum class CustomServiceCodeCompletionTemplate( TOGETHER( "https://api.together.xyz/v1/completions", getDefaultHeaders("Authorization", "Bearer \$CUSTOM_SERVICE_API_KEY"), - getDefaultBodyParams(mapOf("model" to "codellama/CodeLlama-70b-hf")) + getDefaultBodyParams(mapOf("model" to "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8")) ) } @@ -84,4 +84,4 @@ private fun getDefaultBodyParams(additionalParams: Map): MutableMap ) defaultParams.putAll(additionalParams) return defaultParams -} \ No newline at end of file +} diff --git a/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceTemplate.kt b/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceTemplate.kt index b49f3a75..18a035e1 100644 --- a/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceTemplate.kt +++ b/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceTemplate.kt @@ -6,12 +6,6 @@ enum class CustomServiceTemplate( val chatCompletionTemplate: CustomServiceChatCompletionTemplate, val codeCompletionTemplate: CustomServiceCodeCompletionTemplate? = null ) { - ANYSCALE( - "Anyscale", - "https://docs.endpoints.anyscale.com/", - CustomServiceChatCompletionTemplate.ANYSCALE, - CustomServiceCodeCompletionTemplate.ANYSCALE, - ), AZURE( "Azure OpenAI", "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference", @@ -26,25 +20,26 @@ enum class CustomServiceTemplate( ), FIREWORKS( "Fireworks", - "https://readme.fireworks.ai/reference/createchatcompletion", + "https://docs.fireworks.ai/api-reference/post-chatcompletions", CustomServiceChatCompletionTemplate.FIREWORKS, CustomServiceCodeCompletionTemplate.FIREWORKS ), GROQ( "Groq", - "https://docs.api.groq.com/md/openai.oas.html", + "https://console.groq.com/docs/openai", CustomServiceChatCompletionTemplate.GROQ ), OPENAI( - "OpenAI", - "https://platform.openai.com/docs/api-reference/chat", + "OpenAI (Chat Completions API)", + "https://platform.openai.com/docs/api-reference/chat/create", CustomServiceChatCompletionTemplate.OPENAI, CustomServiceCodeCompletionTemplate.OPENAI ), - PERPLEXITY( - "Perplexity AI", - "https://docs.perplexity.ai/reference/post_chat_completions", - CustomServiceChatCompletionTemplate.PERPLEXITY + OPENAI_RESPONSES( + "OpenAI (Responses API)", + "https://platform.openai.com/docs/api-reference/responses/create", + CustomServiceChatCompletionTemplate.OPENAI_RESPONSES, + CustomServiceCodeCompletionTemplate.OPENAI ), TOGETHER( "Together AI", @@ -54,32 +49,27 @@ enum class CustomServiceTemplate( ), OLLAMA( "Ollama", - "https://github.com/ollama/ollama/blob/main/docs/openai.md", + "https://docs.ollama.com/api/openai-compatibility", CustomServiceChatCompletionTemplate.OLLAMA ), LLAMA_CPP( - "LLaMA C/C++", - "https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md", + "llama.cpp Server", + "https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md", CustomServiceChatCompletionTemplate.LLAMA_CPP ), MISTRAL_AI( "Mistral AI", - "https://docs.mistral.ai/getting-started/quickstart", + "https://docs.mistral.ai/capabilities/completion/usage", CustomServiceChatCompletionTemplate.MISTRAL_AI, CustomServiceCodeCompletionTemplate.MISTRAL_AI ), OPEN_ROUTER( "OpenRouter", - "https://openrouter.ai/docs#quick-start", - CustomServiceChatCompletionTemplate.OPEN_ROUTER - ), - OPENAI_RESPONSES( - "OpenAI (Responses API)", - "https://platform.openai.com/docs/api-reference/responses", - CustomServiceChatCompletionTemplate.OPENAI_RESPONSES + "https://openrouter.ai/docs/quickstart", + CustomServiceChatCompletionTemplate.OPENROUTER ); override fun toString(): String { return providerName } -} \ No newline at end of file +} diff --git a/src/main/kotlin/ee/carlrobert/codegpt/toolwindow/chat/parser/CompleteMessageParser.kt b/src/main/kotlin/ee/carlrobert/codegpt/toolwindow/chat/parser/CompleteMessageParser.kt index 1368725d..d68a038f 100644 --- a/src/main/kotlin/ee/carlrobert/codegpt/toolwindow/chat/parser/CompleteMessageParser.kt +++ b/src/main/kotlin/ee/carlrobert/codegpt/toolwindow/chat/parser/CompleteMessageParser.kt @@ -17,11 +17,13 @@ class CompleteMessageParser : MessageParser { Pattern.compile("<<<<<<< SEARCH\\n(.*?)(?:\\n=======\\n(.*?))?$", Pattern.DOTALL) private const val THINK_OPEN_TAG = "" - private const val THINK_CLOSE_TAG = "\n\n" + private const val THINK_CLOSE_TAG = "" private const val CODE_HEADER_GROUP_INDEX = 2 private const val CODE_CONTENT_GROUP_INDEX = 3 private const val SEARCH_CONTENT_GROUP_INDEX = 1 private const val REPLACE_CONTENT_GROUP_INDEX = 2 + private val THINKING_BLOCK_PATTERN = + Regex("""^(.*?)\s*""", setOf(RegexOption.DOT_MATCHES_ALL)) private val TOLERANT_SEARCH_START = Regex("""^\s*<{3,}(\s*SEARCH.*)?$""", RegexOption.IGNORE_CASE) @@ -54,15 +56,10 @@ class CompleteMessageParser : MessageParser { private fun extractThoughtIfPresent(input: String): String { extractedThought = null - if (!input.startsWith(THINK_OPEN_TAG)) { - return input - } - - val closeTagIndex = input.indexOf(THINK_CLOSE_TAG) - return if (closeTagIndex != -1) { - val thoughtStartIndex = THINK_OPEN_TAG.length - extractedThought = input.substring(thoughtStartIndex, closeTagIndex).trim() - input.substring(closeTagIndex + THINK_CLOSE_TAG.length) + val match = THINKING_BLOCK_PATTERN.find(input) + return if (match != null) { + extractedThought = match.groupValues[1].trim() + input.substring(match.range.last + 1) } else { input } diff --git a/src/test/kotlin/ee/carlrobert/codegpt/agent/AgentProviderIntegrationTest.kt b/src/test/kotlin/ee/carlrobert/codegpt/agent/AgentProviderIntegrationTest.kt index a1b3cc37..17f3ee60 100644 --- a/src/test/kotlin/ee/carlrobert/codegpt/agent/AgentProviderIntegrationTest.kt +++ b/src/test/kotlin/ee/carlrobert/codegpt/agent/AgentProviderIntegrationTest.kt @@ -466,7 +466,7 @@ class AgentProviderIntegrationTest : IntegrationTest() { "item", jsonMap( e("type", "function_call"), - e("id", "fc_1"), + e("id", callId), e("call_id", callId), e("name", toolName), e("arguments", "") @@ -477,12 +477,28 @@ class AgentProviderIntegrationTest : IntegrationTest() { ), jsonMapResponse( e("type", "response.function_call_arguments.delta"), - e("item_id", "fc_1"), + e("item_id", callId), e("output_index", 0), e("delta", arguments), e("call_id", callId), e("sequence_number", 2) ), + jsonMapResponse( + e("type", "response.output_item.done"), + e( + "item", + jsonMap( + e("type", "function_call"), + e("id", callId), + e("call_id", callId), + e("name", toolName), + e("arguments", arguments), + e("status", "completed") + ) + ), + e("output_index", 0), + e("sequence_number", 3) + ), jsonMapResponse( e("type", "response.completed"), e( @@ -497,7 +513,7 @@ class AgentProviderIntegrationTest : IntegrationTest() { jsonArray( jsonMap( e("type", "function_call"), - e("id", "fc_1"), + e("id", callId), e("call_id", callId), e("name", toolName), e("arguments", arguments), @@ -510,7 +526,7 @@ class AgentProviderIntegrationTest : IntegrationTest() { e("text", jsonMap()) ) ), - e("sequence_number", 3) + e("sequence_number", 4) ) ) } @@ -826,6 +842,8 @@ class AgentProviderIntegrationTest : IntegrationTest() { jsonArray( jsonMap( e("finishReason", "stop"), + e("finish_reason", "stop"), + e("index", 0), e( "message", jsonMap( diff --git a/src/test/kotlin/ee/carlrobert/codegpt/agent/AgentServiceIntegrationTest.kt b/src/test/kotlin/ee/carlrobert/codegpt/agent/AgentServiceIntegrationTest.kt index d022fc41..b73cda95 100644 --- a/src/test/kotlin/ee/carlrobert/codegpt/agent/AgentServiceIntegrationTest.kt +++ b/src/test/kotlin/ee/carlrobert/codegpt/agent/AgentServiceIntegrationTest.kt @@ -3,9 +3,11 @@ package ee.carlrobert.codegpt.agent import ai.koog.agents.core.agent.AIAgent import ai.koog.agents.core.agent.AIAgentService import ai.koog.agents.core.agent.config.AIAgentConfig +import ai.koog.agents.core.tools.ToolDescriptor import ai.koog.agents.core.tools.ToolRegistry import ai.koog.agents.snapshot.feature.AgentCheckpointData import ai.koog.agents.snapshot.providers.file.JVMFilePersistenceStorageProvider +import ai.koog.prompt.dsl.ModerationResult import ai.koog.prompt.dsl.Prompt import ai.koog.prompt.dsl.prompt import ai.koog.prompt.executor.model.PromptExecutor @@ -327,7 +329,7 @@ private object NoopPromptExecutor : PromptExecutor { override suspend fun execute( prompt: Prompt, model: LLModel, - tools: List + tools: List ): List { throw UnsupportedOperationException("NoopPromptExecutor should not be used in tests") } @@ -335,7 +337,7 @@ private object NoopPromptExecutor : PromptExecutor { override fun executeStreaming( prompt: Prompt, model: LLModel, - tools: List + tools: List ): Flow { throw UnsupportedOperationException("NoopPromptExecutor should not be used in tests") } @@ -343,7 +345,7 @@ private object NoopPromptExecutor : PromptExecutor { override suspend fun moderate( prompt: Prompt, model: LLModel - ): ai.koog.prompt.dsl.ModerationResult { + ): ModerationResult { throw UnsupportedOperationException("NoopPromptExecutor should not be used in tests") } diff --git a/src/test/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIResponsesApiSerializationTest.kt b/src/test/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIResponsesApiSerializationTest.kt deleted file mode 100644 index 164886fe..00000000 --- a/src/test/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIResponsesApiSerializationTest.kt +++ /dev/null @@ -1,219 +0,0 @@ -package ee.carlrobert.codegpt.agent.clients - -import ai.koog.prompt.executor.clients.openai.base.models.Content -import ai.koog.prompt.executor.clients.openai.base.models.OpenAIFunction -import ai.koog.prompt.executor.clients.openai.base.models.OpenAIMessage -import ai.koog.prompt.executor.clients.openai.base.models.OpenAIToolCall -import ai.koog.prompt.executor.clients.openai.base.models.OpenAITool -import ai.koog.prompt.executor.clients.openai.base.models.OpenAIToolChoice -import ai.koog.prompt.executor.clients.openai.base.models.OpenAIToolFunction -import ai.koog.prompt.llm.LLModel -import ai.koog.prompt.params.LLMParams -import ee.carlrobert.codegpt.settings.service.custom.CustomServiceChatCompletionSettingsState -import kotlinx.serialization.json.Json -import kotlinx.serialization.json.buildJsonArray -import kotlinx.serialization.json.buildJsonObject -import kotlinx.serialization.json.boolean -import kotlinx.serialization.json.jsonArray -import kotlinx.serialization.json.jsonObject -import kotlinx.serialization.json.jsonPrimitive -import kotlinx.serialization.json.put -import org.assertj.core.api.Assertions.assertThat -import org.junit.Test - -class CustomOpenAIResponsesApiSerializationTest { - - private val json = Json { - ignoreUnknownKeys = true - encodeDefaults = true - explicitNulls = false - } - - @Test - fun `responses api request should encode tools with top level name`() { - val state = CustomServiceChatCompletionSettingsState().apply { - url = "https://example.com/v1/responses" - body.clear() - body["stream"] = true - body["input"] = "\$OPENAI_MESSAGES" - } - val client = CustomOpenAILLMClient.fromSettingsState("test-key", state) - val serializeMethod = client.javaClass.getDeclaredMethod( - "serializeProviderChatRequest", - List::class.java, - LLModel::class.java, - List::class.java, - OpenAIToolChoice::class.java, - LLMParams::class.java, - Boolean::class.javaPrimitiveType - ) - serializeMethod.isAccessible = true - - val payload = serializeMethod.invoke( - client, - listOf(OpenAIMessage.User(content = Content.Text("hello"))), - LLModel( - id = "gpt-test", - provider = CustomOpenAILLMClient.CustomOpenAI, - capabilities = emptyList(), - contextLength = 128_000, - maxOutputTokens = 4_096 - ), - listOf( - OpenAITool( - OpenAIToolFunction( - name = "Diagnostics", - description = "Read IDE diagnostics", - parameters = buildJsonObject { - put("type", "object") - put("properties", buildJsonObject {}) - put("required", buildJsonArray {}) - }, - strict = true - ) - ) - ), - OpenAIToolChoice.Function(OpenAIToolChoice.FunctionName("Diagnostics")), - CustomOpenAIParams(), - true - ) as String - - val request = json.parseToJsonElement(payload).jsonObject - val tool = request.getValue("tools").jsonArray.single().jsonObject - val toolChoice = request.getValue("tool_choice").jsonObject - - assertThat(tool.getValue("type").jsonPrimitive.content).isEqualTo("function") - assertThat(tool.getValue("name").jsonPrimitive.content).isEqualTo("Diagnostics") - assertThat(tool.getValue("description").jsonPrimitive.content).isEqualTo("Read IDE diagnostics") - assertThat(tool.getValue("strict").jsonPrimitive.boolean).isTrue() - assertThat(tool).doesNotContainKey("function") - assertThat(toolChoice.getValue("type").jsonPrimitive.content).isEqualTo("function") - assertThat(toolChoice.getValue("name").jsonPrimitive.content).isEqualTo("Diagnostics") - } - - @Test - fun `responses api request should encode mode tool choice as lowercase string`() { - val state = CustomServiceChatCompletionSettingsState().apply { - url = "https://example.com/v1/responses" - body.clear() - body["stream"] = true - body["input"] = "\$OPENAI_MESSAGES" - } - val client = CustomOpenAILLMClient.fromSettingsState("test-key", state) - val serializeMethod = client.javaClass.getDeclaredMethod( - "serializeProviderChatRequest", - List::class.java, - LLModel::class.java, - List::class.java, - OpenAIToolChoice::class.java, - LLMParams::class.java, - Boolean::class.javaPrimitiveType - ) - serializeMethod.isAccessible = true - - val payload = serializeMethod.invoke( - client, - listOf(OpenAIMessage.User(content = Content.Text("hello"))), - LLModel( - id = "gpt-test", - provider = CustomOpenAILLMClient.CustomOpenAI, - capabilities = emptyList(), - contextLength = 128_000, - maxOutputTokens = 4_096 - ), - emptyList(), - openAIToolChoiceMode("required"), - CustomOpenAIParams(), - true - ) as String - - val request = json.parseToJsonElement(payload).jsonObject - - assertThat(request.getValue("tool_choice").jsonPrimitive.content).isEqualTo("required") - } - - @Test - fun `responses api request should encode agent tool history as input items`() { - val state = CustomServiceChatCompletionSettingsState().apply { - url = "https://example.com/v1/responses" - body.clear() - body["stream"] = true - body["input"] = "\$OPENAI_MESSAGES" - } - val client = CustomOpenAILLMClient.fromSettingsState("test-key", state) - val serializeMethod = client.javaClass.getDeclaredMethod( - "serializeProviderChatRequest", - List::class.java, - LLModel::class.java, - List::class.java, - OpenAIToolChoice::class.java, - LLMParams::class.java, - Boolean::class.javaPrimitiveType - ) - serializeMethod.isAccessible = true - - val payload = serializeMethod.invoke( - client, - listOf( - OpenAIMessage.System(content = Content.Text("System instructions")), - OpenAIMessage.User(content = Content.Text("Read the fixture")), - OpenAIMessage.Assistant( - content = Content.Text("Calling Read"), - toolCalls = listOf( - OpenAIToolCall( - "call_read", - OpenAIFunction("Read", "\"{\\\"file_path\\\":\\\"/tmp/fixture.txt\\\"}\"") - ) - ) - ), - OpenAIMessage.Tool( - content = Content.Text("1\tfixture contents"), - toolCallId = "call_read" - ) - ), - LLModel( - id = "gpt-test", - provider = CustomOpenAILLMClient.CustomOpenAI, - capabilities = emptyList(), - contextLength = 128_000, - maxOutputTokens = 4_096 - ), - emptyList(), - null, - CustomOpenAIParams(), - true - ) as String - - val request = json.parseToJsonElement(payload).jsonObject - val input = request.getValue("input").jsonArray - - assertThat(input.map { it.jsonObject.getValue("type").jsonPrimitive.content }).containsExactly( - "message", - "message", - "message", - "function_call", - "function_call_output" - ) - assertThat(input[0].jsonObject.getValue("role").jsonPrimitive.content).isEqualTo("developer") - assertThat(input[1].jsonObject.getValue("role").jsonPrimitive.content).isEqualTo("user") - assertThat(input[2].jsonObject.getValue("role").jsonPrimitive.content).isEqualTo("assistant") - assertThat( - input[2].jsonObject.getValue("content").jsonArray.single().jsonObject.getValue("text").jsonPrimitive.content - ).isEqualTo("Calling Read") - assertThat(input[3].jsonObject.getValue("name").jsonPrimitive.content).isEqualTo("Read") - assertThat(input[3].jsonObject.getValue("call_id").jsonPrimitive.content).isEqualTo("call_read") - assertThat(input[3].jsonObject.getValue("arguments").jsonPrimitive.content) - .isEqualTo("""{"file_path":"/tmp/fixture.txt"}""") - assertThat(input[4].jsonObject.getValue("call_id").jsonPrimitive.content).isEqualTo("call_read") - assertThat(input[4].jsonObject.getValue("output").jsonPrimitive.content) - .isEqualTo("1\tfixture contents") - } - - private fun openAIToolChoiceMode(value: String): OpenAIToolChoice { - val modeClass = Class.forName( - "ai.koog.prompt.executor.clients.openai.base.models.OpenAIToolChoice\$Mode" - ) - val boxMethod = modeClass.getDeclaredMethod("box-impl", String::class.java) - return boxMethod.invoke(null, value) as OpenAIToolChoice - } -} diff --git a/src/test/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIResponsesSerializationIntegrationTest.kt b/src/test/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIResponsesSerializationIntegrationTest.kt new file mode 100644 index 00000000..6045395c --- /dev/null +++ b/src/test/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIResponsesSerializationIntegrationTest.kt @@ -0,0 +1,113 @@ +package ee.carlrobert.codegpt.agent.clients + +import ai.koog.prompt.dsl.prompt +import ai.koog.prompt.llm.LLMCapability +import ai.koog.prompt.llm.LLModel +import ee.carlrobert.codegpt.settings.service.custom.CustomServiceChatCompletionSettingsState +import kotlinx.coroutines.runBlocking +import org.assertj.core.api.Assertions.assertThat +import testsupport.IntegrationTest +import testsupport.http.ResponseEntity +import testsupport.http.exchange.BasicHttpExchange +import testsupport.json.JSONUtil.e +import testsupport.json.JSONUtil.jsonArray +import testsupport.json.JSONUtil.jsonMap +import testsupport.json.JSONUtil.jsonMapResponse + +class CustomOpenAIResponsesSerializationIntegrationTest : IntegrationTest() { + + fun testResponsesRequestsFlattenAdditionalBodyParameters() { + runBlocking { + val settings = CustomServiceChatCompletionSettingsState().apply { + url = System.getProperty("customOpenAI.baseUrl") + "/v1/responses" + headers.clear() + body.clear() + body["model"] = "custom-responses-model" + body["input"] = "\$OPENAI_MESSAGES" + body["custom_config"] = mapOf("tier" to "gold") + body["extra_flag"] = "enabled" + } + val client = CustomOpenAILLMClient.fromSettingsState("TEST_API_KEY", settings) + val model = LLModel( + id = "custom-responses-model", + provider = CustomOpenAILLMClient.CustomOpenAI, + capabilities = listOf(LLMCapability.OpenAIEndpoint.Responses), + contextLength = 128_000, + maxOutputTokens = 4_096 + ) + val prompt = prompt("custom-openai-responses-serialization") { + user("Test flattened params") + } + + expectCustomOpenAI(BasicHttpExchange { request -> + assertThat(request.uri.path).isEqualTo("/v1/responses") + assertThat(request.method).isEqualTo("POST") + assertThat(request.body) + .containsEntry("extra_flag", "enabled") + .containsKey("custom_config") + .doesNotContainKey("additional_properties") + assertThat(request.body["custom_config"]) + .isEqualTo(mapOf("tier" to "gold")) + ResponseEntity( + openAiResponsesResponse( + model = "custom-responses-model", + text = "Hello with flattened params" + ) + ) + }) + + val response = client.execute(prompt, model, emptyList()) + + assertThat(response) + .singleElement() + .extracting("content") + .isEqualTo("Hello with flattened params") + } + } + + private fun openAiResponsesResponse( + model: String, + text: String + ): String { + return jsonMapResponse( + e("id", "resp-openai-test"), + e("object", "response"), + e("created_at", 1), + e("model", model), + e( + "output", + jsonArray( + jsonMap( + e("type", "message"), + e("id", "msg_1"), + e("role", "assistant"), + e("status", "completed"), + e( + "content", + jsonArray( + jsonMap( + e("type", "output_text"), + e("text", text), + e("annotations", jsonArray()) + ) + ) + ) + ) + ) + ), + e("parallel_tool_calls", true), + e("status", "completed"), + e("text", jsonMap()), + e( + "usage", + jsonMap( + e("input_tokens", 1), + e("input_tokens_details", jsonMap("cached_tokens", 0)), + e("output_tokens", 1), + e("output_tokens_details", jsonMap("reasoning_tokens", 0)), + e("total_tokens", 2) + ) + ) + ) + } +} diff --git a/src/test/kotlin/ee/carlrobert/codegpt/agent/clients/StreamingPayloadNormalizerTest.kt b/src/test/kotlin/ee/carlrobert/codegpt/agent/clients/StreamingPayloadNormalizerTest.kt deleted file mode 100644 index 2ce230c5..00000000 --- a/src/test/kotlin/ee/carlrobert/codegpt/agent/clients/StreamingPayloadNormalizerTest.kt +++ /dev/null @@ -1,94 +0,0 @@ -package ee.carlrobert.codegpt.agent.clients - -import ai.koog.prompt.message.Message -import ai.koog.prompt.streaming.StreamFrame -import ai.koog.prompt.streaming.toMessageResponses -import ee.carlrobert.codegpt.settings.service.custom.CustomServiceChatCompletionSettingsState -import kotlinx.coroutines.flow.Flow -import kotlinx.coroutines.flow.asFlow -import kotlinx.coroutines.flow.toList -import kotlinx.coroutines.runBlocking -import org.assertj.core.api.Assertions.assertThat -import org.junit.Test - -class StreamingPayloadNormalizerTest { - - @Test - fun `should unwrap sse data payload`() { - assertThat(normalizeSsePayload("data: {\"id\":\"chunk-1\"}")) - .isEqualTo("{\"id\":\"chunk-1\"}") - } - - @Test - fun `should ignore done sentinel`() { - assertThat(normalizeSsePayload("data: [DONE]")).isNull() - } - - @Test - fun `should unwrap multiline sse event frames`() { - assertThat( - normalizeSsePayload("event: response.created\ndata: {\"type\":\"response.created\"}\n") - ).isEqualTo("{\"type\":\"response.created\"}") - } - - @Test - fun `should unwrap compact sse frames where event and data share a line`() { - assertThat( - normalizeSsePayload("event: response.created data: {\"type\":\"response.created\"}") - ).isEqualTo("{\"type\":\"response.created\"}") - } - - @Test - fun `custom openai client should decode sse wrapped chat completion chunks`() { - val state = CustomServiceChatCompletionSettingsState().apply { - url = "https://example.com/v1/chat/completions" - body["stream"] = true - } - val client = CustomOpenAILLMClient.fromSettingsState("test-key", state) - val decodeMethod = client.javaClass.getDeclaredMethod("decodeStreamingResponse", String::class.java) - decodeMethod.isAccessible = true - - val response = decodeMethod.invoke( - client, - "data: {\"choices\":[],\"created\":0,\"id\":\"chunk-1\",\"model\":\"test-model\"}" - ) as CustomOpenAIChatCompletionStreamResponse - - assertThat(response.id).isEqualTo("chunk-1") - assertThat(response.model).isEqualTo("test-model") - assertThat(response.choices).isEmpty() - } - - @Test - fun `responses api streaming tool call should collapse into one named tool call`() { - val state = CustomServiceChatCompletionSettingsState().apply { - url = "https://example.com/v1/responses" - body.clear() - body["stream"] = true - body["input"] = "\$OPENAI_MESSAGES" - } - val client = CustomOpenAILLMClient.fromSettingsState("test-key", state) - val decodeMethod = client.javaClass.getDeclaredMethod("decodeStreamingResponse", String::class.java) - decodeMethod.isAccessible = true - val processMethod = client.javaClass.getDeclaredMethod("processStreamingResponse", Flow::class.java) - processMethod.isAccessible = true - - val chunks = listOf( - """{"type":"response.output_item.added","item":{"type":"function_call","id":"fc_1","call_id":"call_diag","name":"Diagnostics","arguments":""},"output_index":0,"sequence_number":1}""", - """{"type":"response.function_call_arguments.delta","item_id":"fc_1","output_index":0,"delta":"{\"file_path\":\"/tmp/mainwindow.cpp\",\"filter\":\"all\"}","call_id":"call_diag","sequence_number":2}""", - """{"type":"response.completed","response":{"id":"resp-tool","object":"response","created_at":1,"model":"test-model","output":[{"type":"function_call","id":"fc_1","call_id":"call_diag","name":"Diagnostics","arguments":"{\"file_path\":\"/tmp/mainwindow.cpp\",\"filter\":\"all\"}","status":"completed"}],"parallel_tool_calls":true,"status":"completed","text":{}},"sequence_number":3}""" - ).map { payload -> - decodeMethod.invoke(client, payload) as CustomOpenAIChatCompletionStreamResponse - } - - val responses = runBlocking { - @Suppress("UNCHECKED_CAST") - val frames = processMethod.invoke(client, chunks.asFlow()) as Flow - frames.toList().toMessageResponses() - } - val toolCall = responses.filterIsInstance().single() - - assertThat(toolCall.id).isEqualTo("call_diag") - assertThat(toolCall.tool).isEqualTo("Diagnostics") - assertThat(toolCall.content).isEqualTo("""{"file_path":"/tmp/mainwindow.cpp","filter":"all"}""") - } -}