diff --git a/src/main/java/ee/carlrobert/codegpt/toolwindow/chat/ui/ChatMessageResponseBody.java b/src/main/java/ee/carlrobert/codegpt/toolwindow/chat/ui/ChatMessageResponseBody.java
index af37f246..030cda63 100644
--- a/src/main/java/ee/carlrobert/codegpt/toolwindow/chat/ui/ChatMessageResponseBody.java
+++ b/src/main/java/ee/carlrobert/codegpt/toolwindow/chat/ui/ChatMessageResponseBody.java
@@ -130,12 +130,21 @@ public class ChatMessageResponseBody extends JPanel {
 
   public ChatMessageResponseBody withResponse(@NotNull String response) {
     try {
-      for (var item : new CompleteMessageParser().parse(response)) {
+      var parser = new CompleteMessageParser();
+      var segments = parser.parse(response);
+      if (parser.getExtractedThought() != null && !parser.getExtractedThought().isBlank()) {
+        processThinkingOutput(parser.getExtractedThought());
+      }
+      for (var item : segments) {
         processResponse(item, false);
         currentlyProcessedTextPane = null;
         currentlyProcessedEditorPanel = null;
         currentlyProcessedMermaidPanel = null;
       }
+      var thoughtProcessPanel = getExistingThoughtProcessPanel();
+      if (thoughtProcessPanel != null && !thoughtProcessPanel.isFinished()) {
+        thoughtProcessPanel.setFinished();
+      }
     } catch (Exception e) {
       LOG.error("Something went wrong while processing input", e);
     }
diff --git a/src/main/kotlin/ee/carlrobert/codegpt/agent/AgentFactory.kt b/src/main/kotlin/ee/carlrobert/codegpt/agent/AgentFactory.kt
index a65b23bb..96d8505f 100644
--- a/src/main/kotlin/ee/carlrobert/codegpt/agent/AgentFactory.kt
+++ b/src/main/kotlin/ee/carlrobert/codegpt/agent/AgentFactory.kt
@@ -13,21 +13,33 @@ import ai.koog.agents.core.environment.ReceivedToolResult
 import ai.koog.agents.core.environment.result
 import ai.koog.agents.core.feature.handler.tool.ToolCallCompletedContext
 import ai.koog.agents.core.feature.handler.tool.ToolCallStartingContext
+import ai.koog.agents.core.tools.ToolDescriptor
 import ai.koog.agents.core.tools.ToolRegistry
 import ai.koog.agents.ext.tool.ExitTool
 import ai.koog.agents.ext.tool.shell.ShellCommandConfirmation
 import ai.koog.agents.features.eventHandler.feature.handleEvents
 import ai.koog.agents.features.tokenizer.feature.MessageTokenizer
 import ai.koog.agents.features.tokenizer.feature.tokenizer
+import ai.koog.prompt.dsl.Prompt
 import ai.koog.prompt.dsl.prompt
+import ai.koog.prompt.executor.clients.anthropic.AnthropicParams
+import ai.koog.prompt.executor.clients.anthropic.models.AnthropicThinking
 import ai.koog.prompt.executor.clients.LLMClient
+import ai.koog.prompt.executor.clients.openai.OpenAIResponsesParams
+import ai.koog.prompt.executor.clients.openai.base.models.ReasoningEffort
+import ai.koog.prompt.executor.clients.openai.models.ReasoningConfig
+import ai.koog.prompt.executor.clients.openai.models.ReasoningSummary
 import ai.koog.prompt.executor.model.PromptExecutor
+import ai.koog.prompt.llm.LLMCapability
 import ai.koog.prompt.llm.LLMProvider
+import ai.koog.prompt.llm.LLModel
 import ai.koog.prompt.message.Message
+import ai.koog.prompt.params.LLMParams
 import ai.koog.prompt.tokenizer.Tokenizer
 import com.intellij.openapi.components.service
 import com.intellij.openapi.project.Project
 import ee.carlrobert.codegpt.EncodingManager
+import ee.carlrobert.codegpt.agent.clients.CustomOpenAILLMClient
 import ee.carlrobert.codegpt.agent.clients.RetryingPromptExecutor
 import ee.carlrobert.codegpt.agent.credits.extractCreditsSnapshot
 import ee.carlrobert.codegpt.agent.tools.*
@@ -46,6 +58,8 @@ import kotlin.time.Duration.Companion.seconds
 object AgentFactory {
 
     private const val MAX_AGENT_ITERATIONS = 250
+    private const val ANTHROPIC_MIN_THINKING_BUDGET = 512
+    private const val ANTHROPIC_DEFAULT_THINKING_BUDGET = 2_048
 
     fun createAgent(
         agentType: AgentType,
@@ -170,10 +184,6 @@ object AgentFactory {
         featureType: FeatureType = FeatureType.AGENT
     ): PromptExecutor {
         val llmClient = LLMClientFactory.createClient(provider, featureType)
-        return createRetryingExecutor(llmClient, events)
-    }
-
-    private fun createRetryingExecutor(client: LLMClient, events: AgentEvents?): PromptExecutor {
         val policy = RetryingPromptExecutor.RetryPolicy(
             maxAttempts = 5,
             initialDelay = 1.seconds,
@@ -181,7 +191,104 @@ object AgentFactory {
             backoffMultiplier = 2.0,
             jitterFactor = 0.1
         )
-        return RetryingPromptExecutor.fromClient(client, policy, events)
+        return createRetryingExecutor(llmClient, policy, events)
+    }
+
+    internal fun createRetryingExecutor(
+        client: LLMClient,
+        policy: RetryingPromptExecutor.RetryPolicy,
+        events: AgentEvents?
+    ): PromptExecutor {
+        val executor = RetryingPromptExecutor.fromClient(client, policy, events)
+        return object : PromptExecutor {
+            override fun executeStreaming(
+                prompt: Prompt,
+                model: LLModel,
+                tools: List<ToolDescriptor>
+            ) = executor.executeStreaming(prompt.withReasoningParams(model), model, tools)
+
+            override suspend fun execute(
+                prompt: Prompt,
+                model: LLModel,
+                tools: List<ToolDescriptor>
+            ) = executor.execute(prompt.withReasoningParams(model), model, tools)
+
+            override suspend fun moderate(prompt: Prompt, model: LLModel) =
+                executor.moderate(prompt, model)
+
+            override suspend fun models() = executor.models()
+
+            override fun close() = executor.close()
+        }
+    }
+
+    private fun Prompt.withReasoningParams(model: LLModel): Prompt {
+        val params = when (model.provider) {
+            LLMProvider.OpenAI -> params.withOpenAIReasoning()
+            CustomOpenAILLMClient.CustomOpenAI -> {
+                if (model.supports(LLMCapability.OpenAIEndpoint.Responses)) {
+                    params.withOpenAIReasoning()
+                } else {
+                    params
+                }
+            }
+            LLMProvider.Anthropic -> params.withAnthropicReasoning()
+            else -> params
+        }
+        return withParams(params)
+    }
+
+    private fun LLMParams.withOpenAIReasoning(): LLMParams {
+        val base = when (this) {
+            is OpenAIResponsesParams -> this
+            else -> OpenAIResponsesParams(
+                temperature = temperature,
+                maxTokens = maxTokens,
+                numberOfChoices = numberOfChoices,
+                speculation = speculation,
+                schema = schema,
+                toolChoice = toolChoice,
+                user = user,
+                additionalProperties = additionalProperties
+            )
+        }
+        return base.copy(
+            reasoning = base.reasoning ?: ReasoningConfig(
+                effort = ReasoningEffort.MEDIUM,
+                summary = ReasoningSummary.AUTO
+            )
+        )
+    }
+
+    private fun LLMParams.withAnthropicReasoning(): LLMParams {
+        val base = when (this) {
+            is AnthropicParams -> this
+            else -> AnthropicParams(
+                temperature = temperature,
+                maxTokens = maxTokens,
+                numberOfChoices = numberOfChoices,
+                speculation = speculation,
+                schema = schema,
+                toolChoice = toolChoice,
+                user = user,
+                additionalProperties = additionalProperties
+            )
+        }
+
+        if (base.thinking != null) return base
+
+        val thinkingBudget = resolveAnthropicThinkingBudget(base.maxTokens) ?: return base
+        return base.copy(thinking = AnthropicThinking.Enabled(budgetTokens = thinkingBudget))
+    }
+
+    private fun resolveAnthropicThinkingBudget(maxTokens: Int?): Int? {
+        val limit = maxTokens ?: ANTHROPIC_DEFAULT_THINKING_BUDGET
+        if (limit <= ANTHROPIC_MIN_THINKING_BUDGET) {
+            return null
+        }
+        return (limit / 2)
+            .coerceAtLeast(ANTHROPIC_MIN_THINKING_BUDGET)
+            .coerceAtMost(ANTHROPIC_DEFAULT_THINKING_BUDGET)
     }
 
     private fun createGeneralPurposeAgent(
diff --git a/src/main/kotlin/ee/carlrobert/codegpt/agent/ProxyAIAgent.kt b/src/main/kotlin/ee/carlrobert/codegpt/agent/ProxyAIAgent.kt
index 1fff7523..dc56a7dc 100644
--- a/src/main/kotlin/ee/carlrobert/codegpt/agent/ProxyAIAgent.kt
+++ b/src/main/kotlin/ee/carlrobert/codegpt/agent/ProxyAIAgent.kt
@@ -23,7 +23,6 @@ import com.intellij.openapi.components.service
 import com.intellij.openapi.project.Project
 import ee.carlrobert.codegpt.EncodingManager
 import ee.carlrobert.codegpt.agent.clients.shouldStream
-import ee.carlrobert.codegpt.agent.clients.shouldStreamCustomOpenAI
 import ee.carlrobert.codegpt.agent.strategy.CODE_AGENT_COMPRESSION
 import ee.carlrobert.codegpt.agent.strategy.HistoryCompressionConfig
 import ee.carlrobert.codegpt.agent.strategy.SingleRunStrategyProvider
@@ -35,6 +34,7 @@ import ee.carlrobert.codegpt.settings.hooks.HookManager
 import ee.carlrobert.codegpt.settings.models.ModelSettings
 import ee.carlrobert.codegpt.settings.service.FeatureType
 import ee.carlrobert.codegpt.settings.service.ServiceType
+import ee.carlrobert.codegpt.settings.service.custom.CustomServicesSettings
 import ee.carlrobert.codegpt.settings.skills.SkillDiscoveryService
 import ee.carlrobert.codegpt.toolwindow.agent.ui.approval.BashPayload
 import ee.carlrobert.codegpt.toolwindow.agent.ui.approval.ToolApprovalRequest
@@ -89,7 +89,7 @@ object ProxyAIAgent {
         val modelSelection =
             service<ModelSettings>().getModelSelectionForFeature(FeatureType.AGENT)
         val skills = project.service<SkillDiscoveryService>().listSkills()
-        val stream = shouldStreamAgentToolLoop(provider)
+        val stream = shouldStreamAgentToolLoop(project, provider)
         val projectInstructions = loadProjectInstructions(project.basePath)
         val executor = AgentFactory.createExecutor(provider, events)
         val pendingMessageQueue = pendingMessages.getOrPut(sessionId) { ArrayDeque() }
@@ -163,11 +163,20 @@ object ProxyAIAgent {
                 val toolCallToUiId: MutableMap<String, String> = HashMap()
                 val anonymousToolIds: ArrayDeque<String> = ArrayDeque()
                 val frameAdapter = ReasoningFrameTextAdapter()
+                var streamedReasoningForCurrentNode = false
 
                 onLLMStreamingFrameReceived { ctx ->
                     if (!stream) return@onLLMStreamingFrameReceived
 
-                    frameAdapter.consume(ctx.streamFrame).forEach { chunk ->
+                    val frameType = ctx.streamFrame::class.simpleName
+                        ?: ctx.streamFrame::class.qualifiedName
+                        ?: "unknown"
+                    val chunks = frameAdapter.consume(ctx.streamFrame)
+                    if (frameType.contains("Reasoning") && chunks.isNotEmpty()) {
+                        streamedReasoningForCurrentNode = true
+                    }
+
+                    chunks.forEach { chunk ->
                         if (chunk.isNotEmpty()) {
                             events.onTextReceived(chunk)
                         }
@@ -175,9 +184,22 @@ object ProxyAIAgent {
                 }
 
                 onNodeExecutionCompleted { ctx ->
-                    if (stream) return@onNodeExecutionCompleted
+                    val output = (ctx.output as? List<*>) ?: emptyList<Any?>()
+                    if (stream) {
+                        if (!streamedReasoningForCurrentNode) {
+                            output.forEach { msg ->
+                                (msg as? Message.Reasoning)?.let {
+                                    if (it.content.isNotBlank()) {
+                                        events.onTextReceived("<think>${it.content}</think>")
+                                    }
+                                }
+                            }
+                        }
+                        streamedReasoningForCurrentNode = false
+                        return@onNodeExecutionCompleted
+                    }
 
-                    (ctx.output as? List<*>)?.forEach { msg ->
+                    output.forEach { msg ->
                         (msg as? Message.Assistant)?.let {
                             events.onTextReceived(it.content)
                         }
@@ -268,10 +290,19 @@ object ProxyAIAgent {
     }
 
     private fun shouldStreamAgentToolLoop(
+        project: Project,
         provider: ServiceType,
     ): Boolean {
         return when (provider) {
-            ServiceType.CUSTOM_OPENAI -> shouldStreamCustomOpenAI(FeatureType.AGENT)
+            ServiceType.CUSTOM_OPENAI -> {
+                val selectedModel =
+                    service<ModelSettings>().getModelSelectionForFeature(FeatureType.AGENT)
+                val selectedServiceId = selectedModel.serviceId
+                val selectedService = service<CustomServicesSettings>().state.services
+                    .firstOrNull { it.id == selectedServiceId }
+                selectedService?.chatCompletionSettings?.shouldStream() == true
+            }
+
             ServiceType.GOOGLE -> false
             else -> true
         }
diff --git a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIChatCompletion.kt b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIChatCompletion.kt
index 59bff574..3705ef0e 100644
--- a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIChatCompletion.kt
+++ b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIChatCompletion.kt
@@ -7,7 +7,8 @@ import kotlinx.serialization.json.JsonElement
 
 @Serializable
 class CustomOpenAIChatCompletionRequest(
-    val messages: List<OpenAIMessage> = emptyList(),
+    val messages: List<OpenAIMessage>? = null,
+    val input: JsonElement? = null,
     val prompt: String? = null,
     override val model: String? = null,
     override val stream: Boolean? = null,
diff --git a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAILLMClient.kt b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAILLMClient.kt
index 10ed114e..aa1f1929 100644
--- a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAILLMClient.kt
+++ b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAILLMClient.kt
@@ -1,93 +1,68 @@
 package ee.carlrobert.codegpt.agent.clients
 
-import ai.koog.prompt.dsl.ModerationResult
+import ai.koog.agents.core.tools.ToolDescriptor
 import ai.koog.prompt.dsl.Prompt
 import ai.koog.prompt.executor.clients.ConnectionTimeoutConfig
 import ai.koog.prompt.executor.clients.LLMClient
-import ai.koog.prompt.executor.clients.LLMClientException
-import ai.koog.prompt.executor.clients.openai.base.AbstractOpenAILLMClient
-import ai.koog.prompt.executor.clients.openai.base.OpenAIBaseSettings
-import ai.koog.prompt.executor.clients.openai.base.OpenAICompatibleToolDescriptorSchemaGenerator
+import ai.koog.prompt.executor.clients.openai.OpenAIClientSettings
+import ai.koog.prompt.executor.clients.openai.OpenAILLMClient
+import ai.koog.prompt.executor.clients.openai.OpenAIResponsesParams
 import ai.koog.prompt.executor.clients.openai.base.models.*
+import ai.koog.prompt.llm.LLMCapability
 import ai.koog.prompt.llm.LLMProvider
 import ai.koog.prompt.llm.LLModel
 import ai.koog.prompt.message.LLMChoice
 import ai.koog.prompt.message.Message
-import ai.koog.prompt.message.ResponseMetaInfo
 import ai.koog.prompt.params.LLMParams
 import ai.koog.prompt.streaming.StreamFrame
-import ai.koog.prompt.streaming.buildStreamFrameFlow
 import com.fasterxml.jackson.core.type.TypeReference
 import com.fasterxml.jackson.databind.ObjectMapper
 import ee.carlrobert.codegpt.codecompletions.InfillPromptTemplate
 import ee.carlrobert.codegpt.codecompletions.InfillRequest
-import ee.carlrobert.codegpt.agent.normalizeToolArgumentsJson
 import ee.carlrobert.codegpt.settings.Placeholder
-import ee.carlrobert.codegpt.settings.service.custom.CustomServiceCodeCompletionSettingsState
 import ee.carlrobert.codegpt.settings.service.custom.CustomServiceChatCompletionSettingsState
+import ee.carlrobert.codegpt.settings.service.custom.CustomServiceCodeCompletionSettingsState
 import ee.carlrobert.codegpt.settings.service.custom.CustomServicePlaceholders
-import ee.carlrobert.codegpt.completions.factory.ResponsesApiUtil
 import ee.carlrobert.codegpt.util.JsonMapper
-import io.github.oshai.kotlinlogging.KotlinLogging
 import io.ktor.client.*
 import io.ktor.client.plugins.*
+import io.ktor.client.plugins.api.createClientPlugin
 import io.ktor.client.request.*
-import io.ktor.http.HttpHeaders
-import org.apache.commons.text.StringEscapeUtils
+import io.ktor.http.ContentType
+import io.ktor.http.content.TextContent
 import kotlinx.coroutines.flow.Flow
 import kotlinx.serialization.ExperimentalSerializationApi
 import kotlinx.serialization.KSerializer
 import kotlinx.serialization.builtins.ListSerializer
 import kotlinx.serialization.descriptors.elementNames
 import kotlinx.serialization.json.*
+import org.apache.commons.text.StringEscapeUtils
 import java.net.URI
-import java.util.UUID
-import kotlin.io.encoding.ExperimentalEncodingApi
 import kotlin.time.Clock
 
 /**
- * Configuration settings for connecting to the CustomOpenAI API.
+ * Implementation of [LLMClient] for OpenAI-compatible custom providers.
  *
- * @property baseUrl The base URL of the CustomOpenAI API. Default is "https://CustomOpenAI.ai/api/v1".
- * @property timeoutConfig Configuration for connection timeouts including request, connection, and socket timeouts.
- */
-class CustomOpenAIClientSettings(
-    baseUrl: String,
-    chatCompletionsPath: String,
-    timeoutConfig: ConnectionTimeoutConfig
-) : OpenAIBaseSettings(baseUrl, chatCompletionsPath, timeoutConfig)
-
-/**
- * Implementation of [LLMClient] for CustomOpenAI API.
- * CustomOpenAI is an API that routes requests to multiple LLM providers.
- *
- * @param apiKey The API key for the CustomOpenAI API
- * @param settings The base URL and timeouts for the CustomOpenAI API, defaults to "https://CustomOpenAI.ai" and 900s
- * @param clock Clock instance used for tracking response metadata timestamps.
+ * Chat-completions requests keep the custom body/placeholder behavior.
+ * Responses requests follow Koog's [OpenAILLMClient] path and only add custom params.
  */
 class CustomOpenAILLMClient(
     private val apiKey: String,
-    private val settings: CustomOpenAIClientSettings,
+    settings: OpenAIClientSettings,
     private val chatState: CustomServiceChatCompletionSettingsState? = null,
     private val codeCompletionState: CustomServiceCodeCompletionSettingsState? = null,
     private val baseClient: HttpClient = HttpClient(),
     clock: Clock = Clock.System
-) : AbstractOpenAILLMClient<CustomOpenAIChatCompletionResponse, CustomOpenAIChatCompletionStreamResponse>(
-    apiKey,
-    settings,
-    baseClient,
-    clock,
-    staticLogger,
-    OpenAICompatibleToolDescriptorSchemaGenerator()
-),
-    CodeCompletionCapable {
+) : OpenAILLMClient(
+    apiKey = apiKey,
+    settings = settings,
+    baseClient = baseClient,
+    clock = clock,
+), CodeCompletionCapable {
+
     data object CustomOpenAI : LLMProvider("custom-openai", "Custom OpenAI")
 
-    private val isResponsesApi: Boolean = ResponsesApiUtil.isResponsesApiUrl(chatState?.url)
-
     companion object {
-        private val staticLogger = KotlinLogging.logger { }
-
         init {
             registerOpenAIJsonSchemaGenerators(CustomOpenAI)
         }
@@ -103,6 +78,7 @@ class CustomOpenAILLMClient(
                 timeoutConfig = timeoutConfig
             )
             val clientWithCustomHeaders = baseClient.config {
+                install(FlattenCustomOpenAIAdditionalPropertiesPlugin)
                 defaultRequest {
                     state.headers.forEach { (key, value) ->
                         val normalizedKey = key.trim()
@@ -147,12 +123,17 @@ class CustomOpenAILLMClient(
         private fun createClientSettings(
             url: String,
             timeoutConfig: ConnectionTimeoutConfig
-        ): CustomOpenAIClientSettings {
+        ): OpenAIClientSettings {
             val uri = URI.create(url)
             val authority = uri.authority ?: uri.host
-            return CustomOpenAIClientSettings(
+            val path = buildString {
+                append(uri.path)
+                uri.query?.takeIf { it.isNotBlank() }?.let { append("?").append(it) }
+            }
+            return OpenAIClientSettings(
                 baseUrl = "${uri.scheme}://${authority}",
-                chatCompletionsPath = uri.path,
+                chatCompletionsPath = path,
+                responsesAPIPath = path,
                 timeoutConfig = timeoutConfig
             )
         }
@@ -160,10 +141,67 @@ class CustomOpenAILLMClient(
 
     override fun llmProvider(): LLMProvider = CustomOpenAI
 
-    override suspend fun getCodeCompletion(infillRequest: InfillRequest): String {
-        val state = requireNotNull(codeCompletionState) {
+    private fun LLModel.hasResponsesEndpointCapability(): Boolean =
+        this.capabilities?.any { it == LLMCapability.OpenAIEndpoint.Responses } == true
+
+    private fun requireChatState(): CustomServiceChatCompletionSettingsState {
+        return requireNotNull(chatState) {
+            "Custom OpenAI chat request requested on a code-completion-only client"
+        }
+    }
+
+    private fun requireCodeCompletionState(): CustomServiceCodeCompletionSettingsState {
+        return requireNotNull(codeCompletionState) {
             "Custom OpenAI code completion requested on a chat-only client"
         }
+    }
+
+    override suspend fun execute(
+        prompt: Prompt,
+        model: LLModel,
+        tools: List<ToolDescriptor>
+    ): List<Message.Response> {
+        val state = requireChatState()
+        val finalPrompt = prompt.prepareForModel(model, state)
+        return super.execute(finalPrompt, model, tools)
+    }
+
+    override fun executeStreaming(
+        prompt: Prompt,
+        model: LLModel,
+        tools: List<ToolDescriptor>
+    ): Flow<StreamFrame> {
+        val state = requireChatState()
+        val finalPrompt = prompt.prepareForModel(model, state)
+        return super.executeStreaming(finalPrompt, model, tools)
+    }
+
+    override suspend fun executeMultipleChoices(
+        prompt: Prompt,
+        model: LLModel,
+        tools: List<ToolDescriptor>
+    ): List<LLMChoice> {
+        val state = requireChatState()
+        return super.executeMultipleChoices(prompt.prepareForModel(model, state), model, tools)
+    }
+
+    private fun Prompt.prepareForModel(
+        model: LLModel,
+        state: CustomServiceChatCompletionSettingsState
+    ): Prompt {
+        return when {
+            model.hasResponsesEndpointCapability() ->
+                withParams(params.toCustomOpenAIResponsesParams(state))
+
+            params is OpenAIResponsesParams ->
+                withParams(params.toCustomOpenAIParams(state))
+
+            else -> this
+        }
+    }
+
+    override suspend fun getCodeCompletion(infillRequest: InfillRequest): String {
+        val state = requireCodeCompletionState()
         val url = requireNotNull(state.url)
         val payload = postCompletionJson(
             client = baseClient,
@@ -190,14 +228,7 @@ class CustomOpenAILLMClient(
         params: LLMParams,
         stream: Boolean
     ): String {
-        val state = requireNotNull(chatState) {
-            "Custom OpenAI chat request requested on a code-completion-only client"
-        }
-
-        if (isResponsesApi) {
-            return serializeResponsesApiRequest(state, messages, model, tools, toolChoice)
-        }
-
+        val state = requireChatState()
         val customParams: CustomOpenAIParams = params.toCustomOpenAIParams(state)
         val streamRequest = state.shouldStream()
         val additionalProperties = buildCustomOpenAIAdditionalProperties(
@@ -238,44 +269,6 @@ class CustomOpenAILLMClient(
         return json.encodeToString(CustomOpenAIChatCompletionRequestSerializer, request)
     }
 
-    /**
-     * Builds the Responses API request body directly from the template body configuration.
-     * The template body uses "input" instead of "messages" and "max_output_tokens" instead
-     * of "max_tokens", so we process it as-is to produce the correct format.
-     */
-    private fun serializeResponsesApiRequest(
-        state: CustomServiceChatCompletionSettingsState,
-        messages: List<OpenAIMessage>,
-        model: LLModel,
-        tools: List<OpenAITool>?,
-        toolChoice: OpenAIToolChoice?
-    ): String {
-        val streamRequest = state.shouldStream()
-        val inputJson = messages.toResponsesApiItemsJson()
-        val prompt = renderCustomOpenAIPrompt(messages, json)
-
-        return buildJsonObject {
-            state.body.forEach { (key, value) ->
-                put(
-                    key, transformCustomOpenAIBodyValue(
-                        key = key,
-                        value = value,
-                        streamRequest = streamRequest,
-                        messagesJson = inputJson,
-                        prompt = prompt,
-                        credential = apiKey,
-                        json = json
-                    )
-                )
-            }
-            put("model", JsonPrimitive(model.id))
-            if (!tools.isNullOrEmpty()) {
-                put("tools", JsonArray(tools.map { it.toResponsesApiToolJson() }))
-            }
-            toolChoice?.toResponsesApiToolChoiceJson()?.let { put("tool_choice", it) }
-        }.toString()
-    }
-
     private fun buildCodeCompletionRequestBody(
         state: CustomServiceCodeCompletionSettingsState,
         infillRequest: InfillRequest
@@ -285,16 +278,16 @@ class CustomOpenAILLMClient(
                 mapOf(
                     "role" to "system",
                     "content" to (
-                        "You are a code completion assistant. Complete the code between the given prefix and suffix. " +
-                            "Return only the missing code that should be inserted, without any formatting, explanations, or markdown."
-                        )
+                            "You are a code completion assistant. Complete the code between the given prefix and suffix. " +
+                                    "Return only the missing code that should be inserted, without any formatting, explanations, or markdown."
+                            )
                 ),
                 mapOf(
                     "role" to "user",
                     "content" to (
-                        "<PREFIX>\n${infillRequest.prefix}\n</PREFIX>\n\n" +
-                            "<SUFFIX>\n${infillRequest.suffix}\n</SUFFIX>\n\nComplete:"
-                        )
+                            "<PREFIX>\n${infillRequest.prefix}\n</PREFIX>\n\n" +
+                                    "<SUFFIX>\n${infillRequest.suffix}\n</SUFFIX>\n\nComplete:"
+                            )
                 )
             )
             val transformedBody = state.body.entries.mapNotNull { (key, value) ->
@@ -416,283 +409,6 @@ class CustomOpenAILLMClient(
                 ?: replaced
         }
     }
-
-    override fun processProviderChatResponse(response: CustomOpenAIChatCompletionResponse): List<LLMChoice> {
-        require(response.choices.isNotEmpty()) { "Empty choices in response" }
-        return response.choices.map {
-            it.message.toResponses(it.finishReason, createMetaInfo(response.usage))
-        }
-    }
-
-    override fun decodeStreamingResponse(data: String): CustomOpenAIChatCompletionStreamResponse {
-        val payload = normalizeSsePayload(data)
-            ?: return CustomOpenAIChatCompletionStreamResponse(
-                choices = emptyList(),
-                created = 0,
-                id = "",
-                model = ""
-            )
-        if (!isResponsesApi) {
-            return json.decodeFromString(payload)
-        }
-        return adaptResponsesApiStreamEvent(payload)
-    }
-
-    override fun decodeResponse(data: String): CustomOpenAIChatCompletionResponse {
-        if (!isResponsesApi) {
-            return json.decodeFromString(data)
-        }
-        return adaptResponsesApiResponse(data)
-    }
-
-    /**
-     * Adapts a Responses API SSE event into a [CustomOpenAIChatCompletionStreamResponse].
-     * Maps Responses API event types to the chat completions delta format that
-     * [processStreamingResponse] already knows how to handle.
-     */
-    private fun adaptResponsesApiStreamEvent(data: String): CustomOpenAIChatCompletionStreamResponse {
-        val event = json.parseToJsonElement(data).jsonObject
-        val type = event["type"]?.jsonPrimitive?.contentOrNull ?: ""
-
-        val choice = when (type) {
-            "response.output_text.delta" -> {
-                val delta = event["delta"]?.jsonPrimitive?.contentOrNull ?: ""
-                CustomOpenAIStreamChoice(
-                    delta = CustomOpenAIStreamDelta(content = delta)
-                )
-            }
-
-            "response.function_call_arguments.delta",
-            "response.output_item.added" -> null
-
-            "response.completed" -> {
-                val responseObject = event["response"]?.jsonObject
-                val toolCalls = responseObject?.get("output")
-                    ?.jsonArray
-                    ?.mapIndexedNotNull { index, item ->
-                        val itemObject = item.jsonObject
-                        if (itemObject["type"]?.jsonPrimitive?.contentOrNull != "function_call") {
-                            return@mapIndexedNotNull null
-                        }
-
-                        val rawArguments = when (val arguments = itemObject["arguments"]) {
-                            is JsonPrimitive -> arguments.contentOrNull
-                            is JsonObject -> arguments.toString()
-                            else -> null
-                        }
-                        CustomOpenAIToolCall(
-                            id = itemObject["call_id"]?.jsonPrimitive?.contentOrNull,
-                            index = index,
-                            function = CustomOpenAIFunction(
-                                name = itemObject["name"]?.jsonPrimitive?.contentOrNull,
-                                arguments = normalizeToolArgumentsJson(rawArguments) ?: rawArguments.orEmpty()
-                            )
-                        )
-                    }
-                    .orEmpty()
-                CustomOpenAIStreamChoice(
-                    finishReason = "stop",
-                    delta = CustomOpenAIStreamDelta(
-                        toolCalls = toolCalls.takeIf { it.isNotEmpty() }
-                    )
-                )
-            }
-
-            else -> null
-        }
-
-        return CustomOpenAIChatCompletionStreamResponse(
-            choices = listOfNotNull(choice),
-            created = 0,
-            id = "",
-            model = ""
-        )
-    }
-
-    /**
-     * Adapts a non-streaming Responses API response into a [CustomOpenAIChatCompletionResponse].
-     * Builds a synthetic chat-completions-format JSON and deserializes it, avoiding direct
-     * construction of Koog internal types.
-     */
-    private fun adaptResponsesApiResponse(data: String): CustomOpenAIChatCompletionResponse {
-        val response = json.parseToJsonElement(data).jsonObject
-        val output = response["output"]?.jsonArray ?: JsonArray(emptyList())
-
-        val textContent = StringBuilder()
-        val toolCallsJson = mutableListOf<JsonElement>()
-
-        for (item in output) {
-            val itemObj = item.jsonObject
-            when (itemObj["type"]?.jsonPrimitive?.contentOrNull) {
-                "message" -> {
-                    itemObj["content"]?.jsonArray?.forEach { contentPart ->
-                        val partObj = contentPart.jsonObject
-                        if (partObj["type"]?.jsonPrimitive?.contentOrNull == "output_text") {
-                            textContent.append(partObj["text"]?.jsonPrimitive?.contentOrNull ?: "")
-                        }
-                    }
-                }
-
-                "function_call" -> {
-                    val rawArguments = when (val arguments = itemObj["arguments"]) {
-                        is JsonPrimitive -> arguments.contentOrNull
-                        is JsonObject -> arguments.toString()
-                        else -> null
-                    }
-                    toolCallsJson.add(buildJsonObject {
-                        put("id", itemObj["call_id"] ?: JsonPrimitive(""))
-                        put("type", JsonPrimitive("function"))
-                        putJsonObject("function") {
-                            put("name", itemObj["name"] ?: JsonPrimitive(""))
-                            put(
-                                "arguments",
-                                JsonPrimitive(normalizeToolArgumentsJson(rawArguments) ?: rawArguments ?: "{}")
-                            )
-                        }
-                    })
-                }
-            }
-        }
-
-        val syntheticJson = buildJsonObject {
-            put("id", response["id"] ?: JsonPrimitive(""))
-            put("created", JsonPrimitive(0))
-            put("model", response["model"] ?: JsonPrimitive(""))
-            put("object", JsonPrimitive("chat.completion"))
-            putJsonArray("choices") {
-                addJsonObject {
-                    put("finish_reason", JsonPrimitive("stop"))
-                    putJsonObject("message") {
-                        put("role", JsonPrimitive("assistant"))
-                        if (textContent.isNotEmpty()) {
-                            put("content", JsonPrimitive(textContent.toString()))
-                        }
-                        if (toolCallsJson.isNotEmpty()) {
-                            put("tool_calls", JsonArray(toolCallsJson))
-                        }
-                    }
-                }
-            }
-            response["usage"]?.let { put("usage", parseResponsesApiUsageJson(it)) }
-        }
-
-        return json.decodeFromString(syntheticJson.toString())
-    }
-
-    /**
-     * Converts Responses API usage JSON (input_tokens/output_tokens) to
-     * OpenAI-compatible usage JSON (prompt_tokens/completion_tokens/total_tokens).
-     */
-    private fun parseResponsesApiUsageJson(usageElement: JsonElement): JsonElement {
-        val usageObj = usageElement.jsonObject
-        val inputTokens = usageObj["input_tokens"]?.jsonPrimitive?.intOrNull ?: 0
-        val outputTokens = usageObj["output_tokens"]?.jsonPrimitive?.intOrNull ?: 0
-        return buildJsonObject {
-            put("prompt_tokens", JsonPrimitive(inputTokens))
-            put("completion_tokens", JsonPrimitive(outputTokens))
-            put("total_tokens", JsonPrimitive(inputTokens + outputTokens))
-        }
-    }
-
-    override fun processStreamingResponse(
-        response: Flow<CustomOpenAIChatCompletionStreamResponse>
-    ): Flow<StreamFrame> = buildStreamFrameFlow {
-        var finishReason: String? = null
-        var metaInfo: ResponseMetaInfo? = null
-
-        response.collect { chunk ->
-            chunk.choices.firstOrNull()?.let { choice ->
-                choice.delta.content?.let { emitTextDelta(it) }
-
-                choice.delta.toolCalls?.forEach { openAIToolCall ->
-                    val index = openAIToolCall.index ?: 0
-                    val id = openAIToolCall.id.orEmpty()
-                    val functionName = openAIToolCall.function.name.orEmpty()
-                    val functionArgs = openAIToolCall.function.arguments.orEmpty()
-                    emitToolCallDelta(id, functionName, functionArgs, index)
-                }
-
-                choice.finishReason?.let { finishReason = it }
-            }
-
-            chunk.usage?.let { metaInfo = createMetaInfo(it) }
-        }
-
-        emitEnd(finishReason, metaInfo)
-    }
-
-    override suspend fun moderate(prompt: Prompt, model: LLModel): ModerationResult {
-        throw UnsupportedOperationException("Moderation not supported.")
-    }
-
-    @OptIn(ExperimentalEncodingApi::class)
-    private fun OpenAIMessage.toResponses(
-        finishReason: String?,
-        metaInfo: ResponseMetaInfo
-    ): List<Message.Response> {
-        val contentText = content?.text()
-
-        if (this is OpenAIMessage.Assistant) {
-            val assistantToolCalls = toolCalls
-            if (!assistantToolCalls.isNullOrEmpty()) {
-                return buildList {
-                    contentText?.let {
-                        add(
-                            Message.Assistant(
-                                content = it,
-                                finishReason = finishReason,
-                                metaInfo = metaInfo
-                            )
-                        )
-                    }
-
-                    assistantToolCalls.forEach { toolCall ->
-                        val arguments = normalizeToolArgumentsJson(toolCall.function.arguments) ?: "{}"
-                        add(
-                            Message.Tool.Call(
-                                id = toolCall.id,
-                                tool = toolCall.function.name,
-                                content = arguments,
-                                metaInfo = metaInfo
-                            )
-                        )
-                    }
-                }
-            }
-
-            val reasoning = reasoningContent
-            if (reasoning != null && contentText != null) {
-                return listOf(
-                    Message.Reasoning(
-                        content = reasoning,
-                        metaInfo = metaInfo
-                    ),
-                    Message.Assistant(
-                        content = contentText,
-                        finishReason = finishReason,
-                        metaInfo = metaInfo
-                    )
-                )
-            }
-        }
-
-        if (contentText != null) {
-            return listOf(
-                Message.Assistant(
-                    content = contentText,
-                    finishReason = finishReason,
-                    metaInfo = metaInfo
-                )
-            )
-        }
-
-        val exception = LLMClientException(
-            clientName,
-            "Unexpected response: no tool calls and no content"
-        )
-        logger.error(exception) { exception.message }
-        throw exception
-    }
 }
 
 internal fun buildCustomOpenAIAdditionalProperties(
@@ -722,38 +438,22 @@ internal fun transformCustomOpenAIBodyValue(
     messages: List<OpenAIMessage>,
     credential: String,
     json: Json
-): JsonElement {
-    return transformCustomOpenAIBodyValue(
-        key = key,
-        value = value,
-        streamRequest = streamRequest,
-        messagesJson = json.encodeToJsonElement(
-            ListSerializer(OpenAIMessage.serializer()),
-            messages
-        ),
-        prompt = renderCustomOpenAIPrompt(messages, json),
-        credential = credential,
-        json = json
-    )
-}
-
-private fun transformCustomOpenAIBodyValue(
-    key: String?,
-    value: Any?,
-    streamRequest: Boolean,
-    messagesJson: JsonElement,
-    prompt: String,
-    credential: String,
-    json: Json
 ): JsonElement {
     return when (value) {
         null -> JsonNull
         is JsonElement -> value
         is String -> when {
             !streamRequest && key == "stream" -> JsonPrimitive(false)
-            CustomServicePlaceholders.isMessages(value) -> messagesJson
+            CustomServicePlaceholders.isMessages(value) -> json.parseToJsonElement(
+                json.encodeToString(ListSerializer(OpenAIMessage.serializer()), messages)
+            )
 
-            CustomServicePlaceholders.isPrompt(value) -> JsonPrimitive(prompt)
+            CustomServicePlaceholders.isPrompt(value) -> JsonPrimitive(
+                renderCustomOpenAIPrompt(
+                    messages,
+                    json
+                )
+            )
 
             value.contains($$"$CUSTOM_SERVICE_API_KEY") -> {
                 JsonPrimitive(value.replace($$"$CUSTOM_SERVICE_API_KEY", credential))
@@ -772,8 +472,7 @@ private fun transformCustomOpenAIBodyValue(
                         key = nestedKey.toString(),
                         value = nestedValue,
                         streamRequest = streamRequest,
-                        messagesJson = messagesJson,
-                        prompt = prompt,
+                        messages = messages,
                         credential = credential,
                         json = json
                     )
@@ -786,8 +485,7 @@ private fun transformCustomOpenAIBodyValue(
                     key = null,
                     value = item,
                     streamRequest = streamRequest,
-                    messagesJson = messagesJson,
-                    prompt = prompt,
+                    messages = messages,
                     credential = credential,
                     json = json
                 )
@@ -800,8 +498,7 @@ private fun transformCustomOpenAIBodyValue(
                     key = null,
                     value = item,
                     streamRequest = streamRequest,
-                    messagesJson = messagesJson,
-                    prompt = prompt,
+                    messages = messages,
                     credential = credential,
                     json = json
                 )
@@ -819,172 +516,48 @@ internal fun renderCustomOpenAIPrompt(messages: List<OpenAIMessage>, json: Json)
     }
 }
 
-private fun List<OpenAIMessage>.toResponsesApiItemsJson(): JsonArray {
-    return JsonArray(
-        buildList {
-            for (message in this@toResponsesApiItemsJson) {
-                addAll(message.toResponsesApiItemsJson())
-            }
-        }
-    )
+private val customOpenAIRequestTransformJson = Json {
+    ignoreUnknownKeys = true
+    isLenient = true
+    explicitNulls = false
 }
 
-private fun OpenAIMessage.toResponsesApiItemsJson(): List<JsonObject> {
-    return when (this) {
-        is OpenAIMessage.System, is OpenAIMessage.Developer -> listOf(
-            buildResponsesApiInputMessageItemJson(
-                role = "developer",
-                content = content.toResponsesApiInputContentJson()
-            )
+private val FlattenCustomOpenAIAdditionalPropertiesPlugin = createClientPlugin(
+    "FlattenCustomOpenAIAdditionalProperties"
+) {
+    transformRequestBody { _, content, _ ->
+        val requestBody = content as? String ?: return@transformRequestBody null
+        val flattened = flattenSerializedAdditionalProperties(
+            requestBody,
+            customOpenAIRequestTransformJson
         )
-
-        is OpenAIMessage.User -> listOf(
-            buildResponsesApiInputMessageItemJson(
-                role = "user",
-                content = content.toResponsesApiInputContentJson()
-            )
-        )
-
-        is OpenAIMessage.Assistant -> buildList {
-            reasoningContent
-                ?.takeIf { it.isNotBlank() }
-                ?.let { reasoning ->
-                    add(
-                        buildJsonObject {
-                            put("type", JsonPrimitive("reasoning"))
-                            put("id", JsonPrimitive(UUID.randomUUID().toString()))
-                            putJsonArray("summary") {
-                                addJsonObject {
-                                    put("type", JsonPrimitive("summary_text"))
-                                    put("text", JsonPrimitive(reasoning))
-                                }
-                            }
-                        }
-                    )
-                }
-
-            content?.text()
-                ?.takeIf { it.isNotBlank() }
-                ?.let { assistantText ->
-                    add(
-                        buildJsonObject {
-                            put("type", JsonPrimitive("message"))
-                            put("role", JsonPrimitive("assistant"))
-                            putJsonArray("content") {
-                                addJsonObject {
-                                    put("type", JsonPrimitive("output_text"))
-                                    put("text", JsonPrimitive(assistantText))
-                                    put("annotations", JsonArray(emptyList()))
-                                }
-                            }
-                        }
-                    )
-                }
-
-            toolCalls.orEmpty().forEach { toolCall ->
-                val arguments = normalizeToolArgumentsJson(toolCall.function.arguments) ?: "{}"
-                add(
-                    buildJsonObject {
-                        put("type", JsonPrimitive("function_call"))
-                        put("arguments", JsonPrimitive(arguments))
-                        put("call_id", JsonPrimitive(toolCall.id))
-                        put("name", JsonPrimitive(toolCall.function.name))
-                    }
-                )
-            }
-        }
-
-        is OpenAIMessage.Tool -> listOf(
-            buildJsonObject {
-                put("type", JsonPrimitive("function_call_output"))
-                put("call_id", JsonPrimitive(toolCallId))
-                put("output", JsonPrimitive(content?.text().orEmpty()))
-            }
-        )
-    }
-}
-
-private fun buildResponsesApiInputMessageItemJson(
-    role: String,
-    content: JsonArray
-): JsonObject {
-    return buildJsonObject {
-        put("type", JsonPrimitive("message"))
-        put("role", JsonPrimitive(role))
-        put("content", content)
-    }
-}
-
-private fun Content?.toResponsesApiInputContentJson(): JsonArray {
-    if (this == null) {
-        return JsonArray(emptyList())
-    }
-
-    return when (this) {
-        is Content.Parts -> JsonArray(value.mapNotNull { it.toResponsesApiInputContentJson() })
-        else -> JsonArray(
-            listOf(
-                buildJsonObject {
-                    put("type", JsonPrimitive("input_text"))
-                    put("text", JsonPrimitive(text()))
-                }
-            )
-        )
-    }
-}
-
-private fun OpenAIContentPart.toResponsesApiInputContentJson(): JsonObject? {
-    return when (this) {
-        is OpenAIContentPart.Text -> buildJsonObject {
-            put("type", JsonPrimitive("input_text"))
-            put("text", JsonPrimitive(text))
-        }
-
-        is OpenAIContentPart.Image -> buildJsonObject {
-            put("type", JsonPrimitive("input_image"))
-            imageUrl.detail?.let { put("detail", JsonPrimitive(it)) }
-            put("imageUrl", JsonPrimitive(imageUrl.url))
-        }
-
-        is OpenAIContentPart.File -> buildJsonObject {
-            put("type", JsonPrimitive("input_file"))
-            file.fileData?.let { put("fileData", JsonPrimitive(it)) }
-            file.fileId?.let { put("fileId", JsonPrimitive(it)) }
-            file.filename?.let { put("filename", JsonPrimitive(it)) }
-        }
-
-        else -> null
-    }
-}
-
-internal fun OpenAITool.toResponsesApiToolJson(): JsonObject {
-    return buildJsonObject {
-        put("type", JsonPrimitive("function"))
-        put("name", JsonPrimitive(function.name))
-        put(
-            "parameters",
-            function.parameters ?: buildJsonObject {
-                put("type", JsonPrimitive("object"))
-                putJsonObject("properties") {}
-                putJsonArray("required") {}
-            }
-        )
-        function.strict?.let { put("strict", JsonPrimitive(it)) }
-        function.description?.takeIf { it.isNotBlank() }?.let {
-            put("description", JsonPrimitive(it))
+        if (flattened == requestBody) {
+            null
+        } else {
+            TextContent(flattened, ContentType.Application.Json)
         }
     }
 }
 
-internal fun OpenAIToolChoice.toResponsesApiToolChoiceJson(): JsonElement {
-    return when (this) {
-        is OpenAIToolChoice.Function -> buildJsonObject {
-            put("type", JsonPrimitive("function"))
-            put("name", JsonPrimitive(function.name))
-        }
+internal fun flattenSerializedAdditionalProperties(
+    requestBody: String,
+    json: Json
+): String {
+    val payload = runCatching { json.parseToJsonElement(requestBody) }.getOrNull()?.jsonObject
+        ?: return requestBody
+    val additionalProperties = payload["additional_properties"] as? JsonObject
+        ?: return requestBody
 
-        is OpenAIToolChoice.Mode -> JsonPrimitive(value)
+    val flattened = buildJsonObject {
+        payload.entries
+            .filterNot { (key, _) -> key == "additional_properties" }
+            .forEach { (key, value) -> put(key, value) }
+        additionalProperties.entries
+            .filterNot { (key, _) -> payload.containsKey(key) }
+            .forEach { (key, value) -> put(key, value) }
     }
+
+    return flattened.toString()
 }
 
 internal object CustomOpenAIChatCompletionRequestSerializer :
diff --git a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIParams.kt b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIParams.kt
index 76f38866..0df5894b 100644
--- a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIParams.kt
+++ b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIParams.kt
@@ -1,5 +1,9 @@
 package ee.carlrobert.codegpt.agent.clients
 
+import ai.koog.prompt.executor.clients.openai.OpenAIResponsesParams
+import ai.koog.prompt.executor.clients.openai.base.models.ReasoningEffort
+import ai.koog.prompt.executor.clients.openai.models.ReasoningConfig
+import ai.koog.prompt.executor.clients.openai.models.ReasoningSummary
 import ai.koog.prompt.params.LLMParams
 import ai.koog.prompt.params.LLMParams.ToolChoice
 import ee.carlrobert.codegpt.settings.service.custom.CustomServiceChatCompletionSettingsState
@@ -19,7 +23,6 @@ internal fun LLMParams.toCustomOpenAIParams(state: CustomServiceChatCompletionSe
         additionalProperties = mergedAdditionalProperties,
         temperature = body.findValue("temperature").asDouble() ?: temperature,
         maxTokens = body.findValue("maxTokens", "max_tokens").asInt() ?: maxTokens,
-        speculation = body.findValue("speculation").asString() ?: speculation,
         toolChoice = body.findValue("toolChoice", "tool_choice").asToolChoice() ?: toolChoice,
         frequencyPenalty = body.findValue("frequencyPenalty", "frequency_penalty").asDouble()
             ?: current?.frequencyPenalty,
@@ -33,6 +36,37 @@ internal fun LLMParams.toCustomOpenAIParams(state: CustomServiceChatCompletionSe
     )
 }
 
+internal fun LLMParams.toCustomOpenAIResponsesParams(
+    state: CustomServiceChatCompletionSettingsState
+): OpenAIResponsesParams {
+    val body = state.body
+    val currentReasoning = (this as? OpenAIResponsesParams)?.reasoning
+    val bodyReasoning = body.findValue("reasoning").asReasoningConfig()
+
+    val mergedAdditionalProperties = buildMap {
+        body
+            .filterKeys { it !in CUSTOM_OPENAI_RESPONSES_RESERVED_BODY_KEYS }
+            .forEach { (key, value) -> put(key, value.toJsonElement()) }
+    }.takeIf { it.isNotEmpty() }
+
+    return OpenAIResponsesParams(
+        additionalProperties = mergedAdditionalProperties,
+        temperature = body.findValue("temperature").asDouble() ?: temperature,
+        maxTokens = body.findValue("maxOutputTokens", "max_output_tokens").asInt() ?: maxTokens,
+        numberOfChoices = numberOfChoices,
+        reasoning = when {
+            bodyReasoning == null -> currentReasoning
+            currentReasoning == null -> bodyReasoning
+            else -> ReasoningConfig(
+                effort = bodyReasoning.effort ?: currentReasoning.effort,
+                summary = bodyReasoning.summary ?: currentReasoning.summary
+            )
+        },
+        schema = schema,
+        toolChoice = body.findValue("toolChoice", "tool_choice").asToolChoice() ?: toolChoice,
+    )
+}
+
 internal fun CustomServiceChatCompletionSettingsState.shouldStream(): Boolean {
     return body.findValue("stream").asBoolean() == true
 }
@@ -53,6 +87,18 @@ internal val CUSTOM_OPENAI_RESERVED_BODY_KEYS = setOf(
     "topP", "top_p",
 )
 
+internal val CUSTOM_OPENAI_RESPONSES_RESERVED_BODY_KEYS = setOf(
+    "input",
+    "maxOutputTokens", "max_output_tokens",
+    "messages",
+    "model",
+    "reasoning",
+    "stream",
+    "temperature",
+    "toolChoice", "tool_choice",
+    "tools",
+)
+
 private fun Map<String, Any>.findValue(vararg keys: String): Any? {
     keys.forEach { key ->
         if (containsKey(key)) {
@@ -95,23 +141,6 @@ private fun Any?.asBoolean(): Boolean? = when (this) {
     else -> null
 }
 
-private fun Any?.asJsonElementMap(): Map<String, JsonElement>? {
-    if (this !is Map<*, *>) return null
-
-    return buildMap {
-        this@asJsonElementMap.forEach { (key, value) ->
-            key?.toString()
-                ?.takeIf { it.isNotBlank() }
-                ?.let { put(it, value.toJsonElement()) }
-        }
-    }
-}
-
-private fun Any?.asString(): String? = when (this) {
-    is String -> this
-    else -> null
-}
-
 private fun Any?.asStopList(): List<String>? = asStringList(allowEmpty = false)
 
 private fun Any?.asStringList(allowEmpty: Boolean = false): List<String>? = when (this) {
@@ -155,6 +184,47 @@ private fun Any?.asToolChoice(): ToolChoice? = when (this) {
     else -> null
 }
 
+private fun Any?.asReasoningConfig(): ReasoningConfig? {
+    val value = this as? Map<*, *> ?: return null
+    val effort = value["effort"].asReasoningEffort()
+    val summary = value["summary"].asReasoningSummary()
+
+    if (effort == null && summary == null) {
+        return null
+    }
+
+    return ReasoningConfig(
+        effort = effort,
+        summary = summary
+    )
+}
+
+private fun Any?.asReasoningEffort(): ReasoningEffort? = when (this) {
+    is ReasoningEffort -> this
+    is String -> when (trim().lowercase()) {
+        "none" -> ReasoningEffort.NONE
+        "minimal" -> ReasoningEffort.MINIMAL
+        "low" -> ReasoningEffort.LOW
+        "medium" -> ReasoningEffort.MEDIUM
+        "high" -> ReasoningEffort.HIGH
+        else -> null
+    }
+
+    else -> null
+}
+
+private fun Any?.asReasoningSummary(): ReasoningSummary? = when (this) {
+    is ReasoningSummary -> this
+    is String -> when (trim().lowercase()) {
+        "auto" -> ReasoningSummary.AUTO
+        "concise" -> ReasoningSummary.CONCISE
+        "detailed" -> ReasoningSummary.DETAILED
+        else -> null
+    }
+
+    else -> null
+}
+
 private fun Any?.toJsonElement(): JsonElement = when (this) {
     null -> JsonNull
     is JsonElement -> this
diff --git a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/ProxyAILLMClient.kt b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/ProxyAILLMClient.kt
index 1e5837c1..4b27250e 100644
--- a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/ProxyAILLMClient.kt
+++ b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/ProxyAILLMClient.kt
@@ -180,11 +180,8 @@ public class ProxyAILLMClient(
         }
     }
 
-    override fun decodeStreamingResponse(data: String): ProxyAIChatCompletionStreamResponse {
-        val payload = normalizeSsePayload(data)
-            ?: return ProxyAIChatCompletionStreamResponse()
-        return json.decodeFromString(payload)
-    }
+    override fun decodeStreamingResponse(data: String): ProxyAIChatCompletionStreamResponse =
+        json.decodeFromString(data)
 
     override fun decodeResponse(data: String): ProxyAIChatCompletionResponse =
         json.decodeFromString(data)
diff --git a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/RetryingPromptExecutor.kt b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/RetryingPromptExecutor.kt
index 0c8f305c..8cb4a608 100644
--- a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/RetryingPromptExecutor.kt
+++ b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/RetryingPromptExecutor.kt
@@ -5,23 +5,13 @@ import ai.koog.http.client.KoogHttpClientException
 import ai.koog.prompt.dsl.ModerationResult
 import ai.koog.prompt.dsl.Prompt
 import ai.koog.prompt.executor.clients.LLMClient
-import ai.koog.prompt.executor.clients.anthropic.AnthropicParams
-import ai.koog.prompt.executor.clients.anthropic.models.AnthropicThinking
-import ai.koog.prompt.executor.clients.openai.OpenAIResponsesParams
-import ai.koog.prompt.executor.clients.openai.base.models.ReasoningEffort
-import ai.koog.prompt.executor.clients.openai.models.ReasoningConfig
-import ai.koog.prompt.executor.clients.openai.models.ReasoningSummary
 import ai.koog.prompt.executor.llms.SingleLLMPromptExecutor
 import ai.koog.prompt.executor.model.PromptExecutor
-import ai.koog.prompt.llm.LLMProvider
 import ai.koog.prompt.llm.LLModel
 import ai.koog.prompt.message.Message
-import ai.koog.prompt.params.LLMParams
 import ai.koog.prompt.streaming.StreamFrame
 import ee.carlrobert.codegpt.agent.AgentEvents
 import io.github.oshai.kotlinlogging.KotlinLogging
-import java.io.IOException
-import java.net.SocketTimeoutException
 import kotlinx.coroutines.CancellationException
 import kotlinx.coroutines.TimeoutCancellationException
 import kotlinx.coroutines.delay
@@ -29,6 +19,8 @@ import kotlinx.coroutines.flow.Flow
 import kotlinx.coroutines.flow.catch
 import kotlinx.coroutines.flow.flow
 import kotlinx.coroutines.flow.onEach
+import java.io.IOException
+import java.net.SocketTimeoutException
 import kotlin.math.pow
 import kotlin.random.Random
 import kotlin.time.Duration
@@ -44,8 +36,6 @@ class RetryingPromptExecutor(
 ) : PromptExecutor {
 
     companion object {
-        private const val ANTHROPIC_MIN_THINKING_BUDGET = 1_024
-        private const val ANTHROPIC_DEFAULT_THINKING_BUDGET = 2_048
         private val logger = KotlinLogging.logger { }
         private val RETRYABLE_HTTP_STATUS_CODES = setOf(408, 409, 425, 429, 500, 502, 503, 504)
 
@@ -88,7 +78,7 @@ class RetryingPromptExecutor(
         private fun Throwable.hasTimeoutMessage(): Boolean {
             val message = message ?: return false
             return message.contains("timed out", ignoreCase = true)
-                || message.contains("timeout", ignoreCase = true)
+                    || message.contains("timeout", ignoreCase = true)
         }
     }
 
@@ -100,13 +90,12 @@ class RetryingPromptExecutor(
         model: LLModel,
         tools: List<ToolDescriptor>
     ): Flow<StreamFrame> {
-        val refinedPrompt = prompt.withReasoningParams(model)
         var hasReceivedData: Boolean
 
         fun createStream(attemptNum: Int): Flow<StreamFrame> {
             hasReceivedData = false
 
-            return delegate.executeStreaming(refinedPrompt, model, tools)
+            return delegate.executeStreaming(prompt, model, tools)
                 .onEach { hasReceivedData = true }
                 .catch { error ->
                     val retryable = isRetryableFailure(error)
@@ -187,14 +176,13 @@ class RetryingPromptExecutor(
         model: LLModel,
         tools: List<ToolDescriptor>
     ): List<Message.Response> {
-        val promptWithReasoning = prompt.withReasoningParams(model)
         var attempt = 1
         var delay = retryPolicy.initialDelay
         var lastError: Throwable? = null
 
         while (attempt <= retryPolicy.maxAttempts) {
             try {
-                return delegate.execute(promptWithReasoning, model, tools)
+                return delegate.execute(prompt, model, tools)
             } catch (t: Throwable) {
                 lastError = t
                 val retryable = isRetryableFailure(t)
@@ -218,66 +206,4 @@ class RetryingPromptExecutor(
     override fun close() {
         (delegate as? AutoCloseable)?.close()
     }
-
-    private fun Prompt.withReasoningParams(model: LLModel): Prompt {
-        val params = when (model.provider) {
-            LLMProvider.OpenAI -> params.withOpenAIReasoning()
-            LLMProvider.Anthropic -> params.withAnthropicReasoning()
-            else -> params
-        }
-        return withParams(params)
-    }
-
-    private fun LLMParams.withOpenAIReasoning(): LLMParams {
-        val base = when (this) {
-            is OpenAIResponsesParams -> this
-            else -> OpenAIResponsesParams(
-                temperature = temperature,
-                maxTokens = maxTokens,
-                numberOfChoices = numberOfChoices,
-                speculation = speculation,
-                schema = schema,
-                toolChoice = toolChoice,
-                user = user,
-                additionalProperties = additionalProperties
-            )
-        }
-
-        val reasoning = base.reasoning ?: ReasoningConfig(
-            effort = ReasoningEffort.MEDIUM,
-            summary = ReasoningSummary.AUTO
-        )
-        return base.copy(reasoning = reasoning)
-    }
-
-    private fun LLMParams.withAnthropicReasoning(): LLMParams {
-        val base = when (this) {
-            is AnthropicParams -> this
-            else -> AnthropicParams(
-                temperature = temperature,
-                maxTokens = maxTokens,
-                numberOfChoices = numberOfChoices,
-                speculation = speculation,
-                schema = schema,
-                toolChoice = toolChoice,
-                user = user,
-                additionalProperties = additionalProperties
-            )
-        }
-
-        if (base.thinking != null) return base
-
-        val thinkingBudget = resolveAnthropicThinkingBudget(base.maxTokens) ?: return base
-        return base.copy(thinking = AnthropicThinking.Enabled(budgetTokens = thinkingBudget))
-    }
-
-    private fun resolveAnthropicThinkingBudget(maxTokens: Int?): Int? {
-        val limit = maxTokens ?: ANTHROPIC_DEFAULT_THINKING_BUDGET
-        if (limit <= ANTHROPIC_MIN_THINKING_BUDGET) {
-            return null
-        }
-        return (limit / 2)
-            .coerceAtLeast(ANTHROPIC_MIN_THINKING_BUDGET)
-            .coerceAtMost(ANTHROPIC_DEFAULT_THINKING_BUDGET)
-    }
 }
diff --git a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/StreamingPayloadNormalizer.kt b/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/StreamingPayloadNormalizer.kt
deleted file mode 100644
index 4ffd1216..00000000
--- a/src/main/kotlin/ee/carlrobert/codegpt/agent/clients/StreamingPayloadNormalizer.kt
+++ /dev/null
@@ -1,31 +0,0 @@
-package ee.carlrobert.codegpt.agent.clients
-
-internal fun normalizeSsePayload(rawData: String): String? {
-    val normalized = rawData
-        .replace("\r\n", "\n")
-        .replace('\r', '\n')
-        .trim()
-    if (normalized.isEmpty()) {
-        return null
-    }
-
-    val dataLines = normalized.lineSequence()
-        .mapNotNull { line ->
-            val markerIndex = line.indexOf("data:")
-            if (markerIndex >= 0) {
-                line.substring(markerIndex + "data:".length).trimStart()
-            } else {
-                null
-            }
-        }
-        .filter { it.isNotEmpty() }
-        .toList()
-
-    val payload = if (dataLines.isNotEmpty()) {
-        dataLines.joinToString("\n")
-    } else {
-        normalized
-    }
-
-    return payload.takeUnless { it.isBlank() || it == "[DONE]" }
-}
diff --git a/src/main/kotlin/ee/carlrobert/codegpt/agent/strategy/SingleRunStrategyProvider.kt b/src/main/kotlin/ee/carlrobert/codegpt/agent/strategy/SingleRunStrategyProvider.kt
index 0d50488a..476b7375 100644
--- a/src/main/kotlin/ee/carlrobert/codegpt/agent/strategy/SingleRunStrategyProvider.kt
+++ b/src/main/kotlin/ee/carlrobert/codegpt/agent/strategy/SingleRunStrategyProvider.kt
@@ -200,7 +200,7 @@ private suspend fun AIAgentLLMWriteSession.requestResponses(
     }
     val appendableResponses = appendableResponses(responses, model.provider)
     appendableResponses.forEach(appendResponse)
-    return if (stream) responses else appendableResponses
+    return responses
 }
 
 private suspend fun AIAgentLLMWriteSession.requestAndPublish(
diff --git a/src/main/kotlin/ee/carlrobert/codegpt/completions/AgentCompletionRunner.kt b/src/main/kotlin/ee/carlrobert/codegpt/completions/AgentCompletionRunner.kt
index 0c6902cb..2e8ce1bf 100644
--- a/src/main/kotlin/ee/carlrobert/codegpt/completions/AgentCompletionRunner.kt
+++ b/src/main/kotlin/ee/carlrobert/codegpt/completions/AgentCompletionRunner.kt
@@ -8,19 +8,21 @@ import ai.koog.agents.features.eventHandler.feature.handleEvents
 import ai.koog.agents.features.tokenizer.feature.MessageTokenizer
 import ai.koog.prompt.dsl.prompt
 import ai.koog.prompt.tokenizer.Tokenizer
+import com.intellij.openapi.components.service
 import com.intellij.openapi.project.Project
 import ee.carlrobert.codegpt.EncodingManager
 import ee.carlrobert.codegpt.agent.AgentEvents
 import ee.carlrobert.codegpt.agent.MessageWithContext
 import ee.carlrobert.codegpt.agent.clients.shouldStream
-import ee.carlrobert.codegpt.agent.clients.shouldStreamCustomOpenAI
 import ee.carlrobert.codegpt.agent.strategy.CODE_AGENT_COMPRESSION
 import ee.carlrobert.codegpt.agent.strategy.HistoryCompressionConfig
 import ee.carlrobert.codegpt.agent.strategy.SingleRunStrategyProvider
 import ee.carlrobert.codegpt.mcp.McpTool
 import ee.carlrobert.codegpt.mcp.McpToolAliasResolver
 import ee.carlrobert.codegpt.mcp.McpToolCallHandler
+import ee.carlrobert.codegpt.settings.models.ModelSettings
 import ee.carlrobert.codegpt.settings.service.ServiceType
+import ee.carlrobert.codegpt.settings.service.custom.CustomServicesSettings
 import ee.carlrobert.codegpt.util.ReasoningFrameTextAdapter
 import kotlinx.coroutines.*
 import java.util.*
@@ -49,7 +51,7 @@ internal object AgentCompletionRunner : CompletionRunner {
         val jobRef = AtomicReference<Job?>()
         val messageBuilder = StringBuilder()
         val project = request.callParameters.project!!
-        val stream = shouldStreamAgentToolLoop(request)
+        val stream = shouldStreamAgentToolLoop(project, request)
         val toolCallHandler = project.let { McpToolCallHandler.getInstance(it) }
         val toolRegistry = createChatToolRegistry(
             callParameters = request.callParameters,
@@ -174,13 +176,23 @@ internal object AgentCompletionRunner : CompletionRunner {
     }
 
     internal fun shouldStreamAgentToolLoop(
+        project: Project,
         request: CompletionRunnerRequest.Chat,
     ): Boolean {
         val provider = request.serviceType
         return when (provider) {
-            ServiceType.CUSTOM_OPENAI -> shouldStreamCustomOpenAI(
-                request.callParameters.featureType
-            )
+            ServiceType.CUSTOM_OPENAI -> {
+                val selectedServiceId = service<ModelSettings>()
+                    .getModelSelectionForFeature(request.callParameters.featureType)
+                    .serviceId
+                service<CustomServicesSettings>()
+                    .state.services
+                    .firstOrNull { it.id == selectedServiceId }
+                    ?.chatCompletionSettings
+                    ?.shouldStream()
+                    ?: false
+            }
+
             ServiceType.GOOGLE -> false
             else -> true
         }
diff --git a/src/main/kotlin/ee/carlrobert/codegpt/completions/CompletionRequestService.kt b/src/main/kotlin/ee/carlrobert/codegpt/completions/CompletionRequestService.kt
index c8149cd3..53385251 100644
--- a/src/main/kotlin/ee/carlrobert/codegpt/completions/CompletionRequestService.kt
+++ b/src/main/kotlin/ee/carlrobert/codegpt/completions/CompletionRequestService.kt
@@ -3,19 +3,24 @@ package ee.carlrobert.codegpt.completions
 import ai.koog.agents.core.tools.ToolDescriptor
 import ai.koog.prompt.dsl.Prompt
 import ai.koog.prompt.dsl.prompt
-import ai.koog.prompt.executor.model.PromptExecutor
+import ai.koog.prompt.llm.LLMCapability
 import ai.koog.prompt.llm.LLModel
 import ee.carlrobert.codegpt.agent.AgentFactory
 import ee.carlrobert.codegpt.agent.clients.CustomOpenAILLMClient
 import ee.carlrobert.codegpt.agent.clients.HttpClientProvider
-import ee.carlrobert.codegpt.agent.clients.RetryingPromptExecutor
+import ee.carlrobert.codegpt.completions.factory.ResponsesApiUtil
 import ee.carlrobert.codegpt.settings.models.ModelSelection
 import ee.carlrobert.codegpt.settings.service.FeatureType
 import ee.carlrobert.codegpt.settings.service.ServiceType
 import ee.carlrobert.codegpt.settings.service.custom.CustomServiceChatCompletionSettingsState
+import kotlinx.coroutines.CancellationException
+import kotlinx.coroutines.CoroutineScope
+import kotlinx.coroutines.Dispatchers
+import kotlinx.coroutines.SupervisorJob
+import kotlinx.coroutines.cancel
+import kotlinx.coroutines.launch
 import kotlinx.coroutines.runBlocking
 import javax.swing.JPanel
-import kotlin.time.Duration.Companion.seconds
 
 object CompletionRequestService {
 
@@ -84,34 +89,53 @@ object CompletionRequestService {
         modelId: String?,
         eventListener: CompletionStreamEventListener
     ): CancellableRequest {
-        val client = CustomOpenAILLMClient.fromSettingsState(
-            apiKey.orEmpty(),
-            settings,
-            HttpClientProvider.createHttpClient()
-        )
-        val retryPolicy = RetryingPromptExecutor.RetryPolicy(
-            maxAttempts = 2,
-            initialDelay = 1.seconds,
-            maxDelay = 4.seconds,
-            backoffMultiplier = 2.0,
-            jitterFactor = 0.1
-        )
-        val request = CompletionRunnerRequest.Streaming(
-            executor = RetryingPromptExecutor.fromClient(client, retryPolicy, null),
-            model = LLModel(
-                id = modelId?.takeIf { it.isNotBlank() } ?: "gpt-4.1-mini",
-                provider = CustomOpenAILLMClient.CustomOpenAI,
-                capabilities = emptyList(),
-                contextLength = 128_000,
-                maxOutputTokens = 4_096
+        val scope = CoroutineScope(SupervisorJob() + Dispatchers.IO)
+        val model = LLModel(
+            id = modelId?.takeIf { it.isNotBlank() } ?: "gpt-4.1-mini",
+            provider = CustomOpenAILLMClient.CustomOpenAI,
+            capabilities = listOf(
+                if (ResponsesApiUtil.isResponsesApiUrl(settings.url)) {
+                    LLMCapability.OpenAIEndpoint.Responses
+                } else {
+                    LLMCapability.OpenAIEndpoint.Completions
+                }
             ),
-            prompt = prompt("custom-service-test-connection") {
-                user("Test connection")
-            },
-            eventListener = eventListener,
-            mode = StreamingMode.SINGLE_RESPONSE,
-            cancellationResultBuilder = { StringBuilder() }
+            contextLength = 128_000,
+            maxOutputTokens = 4_096
         )
-        return CompletionRunnerFactory.create(request).run(request)
+        val testPrompt = prompt("custom-service-test-connection") {
+            user("Test connection")
+        }
+
+        val job = scope.launch {
+            val client = CustomOpenAILLMClient.fromSettingsState(
+                apiKey.orEmpty(),
+                settings,
+                HttpClientProvider.createHttpClient()
+            )
+            val messageBuilder = StringBuilder()
+            eventListener.onOpen()
+            try {
+                val responses = client.execute(testPrompt, model, emptyList())
+                val text = CompletionTextExtractor.extract(responses)
+                if (text.isNotBlank()) {
+                    messageBuilder.append(text)
+                    eventListener.onMessage(text)
+                }
+                eventListener.onComplete(StringBuilder(messageBuilder))
+            } catch (_: CancellationException) {
+                eventListener.onCancelled(StringBuilder(messageBuilder))
+            } catch (exception: Throwable) {
+                eventListener.onError(
+                    CompletionError(exception.message ?: "Failed to complete request"),
+                    exception
+                )
+            } finally {
+                runCatching { client.close() }
+                scope.cancel()
+            }
+        }
+
+        return CancellableRequest { job.cancel() }
     }
 }
diff --git a/src/main/kotlin/ee/carlrobert/codegpt/settings/models/ModelProvider.kt b/src/main/kotlin/ee/carlrobert/codegpt/settings/models/ModelProvider.kt
index 8ca68250..3ee72c99 100644
--- a/src/main/kotlin/ee/carlrobert/codegpt/settings/models/ModelProvider.kt
+++ b/src/main/kotlin/ee/carlrobert/codegpt/settings/models/ModelProvider.kt
@@ -24,6 +24,7 @@ import ee.carlrobert.codegpt.settings.service.ServiceType
 import ee.carlrobert.codegpt.settings.service.custom.CustomServiceSettingsState
 import ee.carlrobert.codegpt.settings.service.custom.CustomServicesSettings
 import ee.carlrobert.codegpt.settings.service.ollama.OllamaSettings
+import java.net.URI
 import javax.swing.Icon
 
 interface ModelProvider {
@@ -703,6 +704,16 @@ private class CustomOpenAIModelProvider : ModelProvider {
         FeatureType.LOOKUP,
     )
 
+    private fun isResponsesApiUrl(url: String?): Boolean {
+        if (url.isNullOrBlank()) return false
+        return try {
+            val path = URI(url.trim()).path?.trimEnd('/') ?: return false
+            path.endsWith("/responses")
+        } catch (_: Exception) {
+            false
+        }
+    }
+
     private fun buildModels(
         modelExtractor: (CustomServiceSettingsState) -> String?
     ): List<ModelSelection> {
@@ -713,13 +724,30 @@ private class CustomOpenAIModelProvider : ModelProvider {
                 val modelName =
                     modelExtractor(svc)?.takeIf { it.isNotBlank() } ?: return@mapNotNull null
                 val displayName = formatCustomModelDisplayName(serviceName, modelName)
+                val additionalOpenAICapability =
+                    if (isResponsesApiUrl(svc.chatCompletionSettings.url)) {
+                        LLMCapability.OpenAIEndpoint.Responses
+                    } else {
+                        LLMCapability.OpenAIEndpoint.Completions
+                    }
 
                 ModelSelection(
                     provider = serviceType,
                     llmModel = virtualModel(
                         CustomOpenAILLMClient.CustomOpenAI,
                         modelName,
-                        OPENAI_CAPABILITIES
+                        listOf(
+                            LLMCapability.Temperature,
+                            LLMCapability.Schema.JSON.Basic,
+                            LLMCapability.Schema.JSON.Standard,
+                            LLMCapability.Speculation,
+                            LLMCapability.Tools,
+                            LLMCapability.ToolChoice,
+                            LLMCapability.Vision.Image,
+                            LLMCapability.Document,
+                            LLMCapability.Completion,
+                            LLMCapability.MultipleChoices,
+                        ) + additionalOpenAICapability
                     ),
                     displayName = displayName,
                     serviceId = serviceId,
diff --git a/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/form/CustomServiceForm.kt b/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/form/CustomServiceForm.kt
index f809f3aa..9e7c6f43 100644
--- a/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/form/CustomServiceForm.kt
+++ b/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/form/CustomServiceForm.kt
@@ -164,18 +164,14 @@ class CustomServiceForm(
                     headers = template.chatCompletionTemplate.headers
                     body = template.chatCompletionTemplate.body
                 }
-                if (template.codeCompletionTemplate != null) {
+                template.codeCompletionTemplate?.let {
                     codeCompletionsForm.run {
-                        url = template.codeCompletionTemplate.url
-                        headers = template.codeCompletionTemplate.headers
-                        body = template.codeCompletionTemplate.body
+                        url = it.url
+                        headers = it.headers
+                        body = it.body
                         parseResponseAsChatCompletions =
-                            template.codeCompletionTemplate.parseResponseAsChatCompletions
+                            it.parseResponseAsChatCompletions
                     }
-                    tabbedPane.setEnabledAt(1, true)
-                } else {
-                    tabbedPane.selectedIndex = 0
-                    tabbedPane.setEnabledAt(1, false)
                 }
             }
         }
diff --git a/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceChatCompletionTemplate.kt b/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceChatCompletionTemplate.kt
index 2e639a78..e0f38dae 100644
--- a/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceChatCompletionTemplate.kt
+++ b/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceChatCompletionTemplate.kt
@@ -5,127 +5,109 @@ enum class CustomServiceChatCompletionTemplate(
     val headers: MutableMap<String, String>,
     val body: MutableMap<String, Any>
 ) {
-    ANYSCALE(
-        "https://api.endpoints.anyscale.com/v1/chat/completions",
-        getDefaultHeadersWithAuthentication(),
-        getDefaultBodyParams(
-            mapOf(
-                "model" to "mistralai/Mixtral-8x7B-Instruct-v0.1",
-                "max_tokens" to 8192
-            )
-        )
-    ),
     AZURE(
-        "https://{your-resource-name}.openai.azure.com/openai/deployments/{deployment-id}/chat/completions?api-version=2023-05-15",
-        getDefaultHeaders("api-key", "\$CUSTOM_SERVICE_API_KEY"),
-        getDefaultBodyParams(emptyMap())
+        "https://{your-resource-name}.openai.azure.com/openai/deployments/{deployment-id}/chat/completions?api-version=2024-10-21",
+        getDefaultHeaders("api-key", $$"$CUSTOM_SERVICE_API_KEY"),
+        mutableMapOf(
+            "stream" to true,
+        )
     ),
     DEEP_INFRA(
         "https://api.deepinfra.com/v1/openai/chat/completions",
         getDefaultHeadersWithAuthentication(),
-        getDefaultBodyParams(
-            mapOf(
-                "model" to "meta-llama/Llama-2-70b-chat-hf",
-                "max_tokens" to 8192
-            )
+        mutableMapOf(
+            "stream" to true,
+            "model" to "deepseek-ai/DeepSeek-V3.2",
+            "max_tokens" to 32_000
         )
     ),
     FIREWORKS(
         "https://api.fireworks.ai/inference/v1/chat/completions",
         getDefaultHeadersWithAuthentication(),
-        getDefaultBodyParams(
-            mapOf(
-                "model" to "accounts/fireworks/models/deepseek-r1-basic",
-                "max_tokens" to 8192
-            )
+        mutableMapOf(
+            "stream" to true,
+            "model" to "accounts/fireworks/models/deepseek-v3p1",
+            "max_tokens" to 32_000
         )
     ),
     GROQ(
         "https://api.groq.com/openai/v1/chat/completions",
         getDefaultHeadersWithAuthentication(),
-        getDefaultBodyParams(
-            mapOf(
-                "model" to "codellama-34b",
-                "max_tokens" to 8192
-            )
+        mutableMapOf(
+            "stream" to true,
+            "model" to "openai/gpt-oss-20b",
+            "max_tokens" to 32_000
         )
     ),
     OPENAI(
         "https://api.openai.com/v1/chat/completions",
-        getDefaultHeaders("Authorization", "Bearer \$CUSTOM_SERVICE_API_KEY"),
-        getDefaultBodyParams(
-            mapOf(
-                "model" to "gpt-5",
-                "max_tokens" to 8192
-            )
-        )
-    ),
-    PERPLEXITY(
-        "https://api.perplexity.ai/chat/completions",
-        getDefaultHeadersWithAuthentication(),
-        getDefaultBodyParams(
-            mapOf(
-                "model" to "codellama",
-                "max_tokens" to 8192
-            )
-        )
-    ),
-    TOGETHER(
-        "https://api.together.xyz/v1/chat/completions",
-        getDefaultHeaders("Authorization", "Bearer \$CUSTOM_SERVICE_API_KEY"),
-        getDefaultBodyParams(
-            mapOf(
-                "model" to "deepseek-ai/deepseek-coder-33b-instruct",
-                "max_tokens" to 8192
-            )
-        )
-    ),
-    OLLAMA(
-        "http://localhost:11434/v1/chat/completions",
-        getDefaultHeaders(),
-        getDefaultBodyParams(mapOf("model" to "codellama"))
-    ),
-    LLAMA_CPP(
-        "http://localhost:8080/v1/chat/completions",
-        getDefaultHeaders(),
-        getDefaultBodyParams(emptyMap())
-    ),
-    MISTRAL_AI(
-        "https://api.mistral.ai/v1/chat/completions",
-        getDefaultHeaders("Authorization", "Bearer \$CUSTOM_SERVICE_API_KEY"),
-        getDefaultBodyParams(
-            mapOf(
-                "model" to "open-mistral-7b",
-                "max_tokens" to 8192
-            )
-        )
-    ),
-    OPEN_ROUTER(
-        "https://openrouter.ai/api/v1/chat/completions",
-        getDefaultHeaders(
-            mapOf(
-                "Authorization" to "Bearer \$CUSTOM_SERVICE_API_KEY",
-                "HTTP-Referer" to "https://tryproxy.io",
-                "X-Title" to "ProxyAI"
-            )
-        ),
-        getDefaultBodyParams(
-            mapOf(
-                "model" to "meta-llama/llama-3.1-8b-instruct:free",
-                "max_tokens" to 8192
-            )
+        getDefaultHeaders("Authorization", $$"Bearer $CUSTOM_SERVICE_API_KEY"),
+        mutableMapOf(
+            "stream" to true,
+            "model" to "gpt-4.1",
+            "max_tokens" to 8192
         )
     ),
     OPENAI_RESPONSES(
         "https://api.openai.com/v1/responses",
         getDefaultHeaders("Authorization", "Bearer \$CUSTOM_SERVICE_API_KEY"),
-        getResponsesApiDefaultBodyParams(
-            mapOf(
-                "model" to "gpt-4.1",
-                "max_output_tokens" to 8192
-            )
+        mutableMapOf(
+            "stream" to true,
+            "model" to "gpt-5.3-codex",
+            "max_output_tokens" to 32_000
         )
-    );
+    ),
+    TOGETHER(
+        "https://api.together.xyz/v1/chat/completions",
+        getDefaultHeaders("Authorization", $$"Bearer $CUSTOM_SERVICE_API_KEY"),
+        mutableMapOf(
+            "stream" to true,
+            "model" to "zai-org/GLM-5",
+            "max_tokens" to 32_000
+        )
+    ),
+    OLLAMA(
+        "http://localhost:11434/v1/chat/completions",
+        getDefaultHeaders(),
+        mutableMapOf(
+            "stream" to true,
+            "model" to "gpt-oss:20b",
+            "max_tokens" to 32_000
+        )
+    ),
+    LLAMA_CPP(
+        "http://localhost:8080/v1/chat/completions",
+        getDefaultHeaders(),
+        mutableMapOf(
+            "stream" to true,
+            "model" to "gpt-oss:20b",
+            "max_tokens" to 32_000
+        )
+    ),
+    MISTRAL_AI(
+        "https://api.mistral.ai/v1/chat/completions",
+        getDefaultHeaders("Authorization", $$"Bearer $CUSTOM_SERVICE_API_KEY"),
+        mutableMapOf(
+            "stream" to true,
+            "model" to "mistral-large-2512",
+            "max_tokens" to 32_000
+        )
+    ),
+    OPENROUTER(
+        "https://openrouter.ai/api/v1/chat/completions",
+        getDefaultHeaders(
+            mapOf(
+                "Authorization" to $$"Bearer $CUSTOM_SERVICE_API_KEY",
+                "HTTP-Referer" to "https://tryproxy.io",
+                "X-OpenRouter-Title" to "ProxyAI"
+            )
+        ),
+        mutableMapOf(
+            "stream" to true,
+            "model" to "moonshotai/kimi-k2.5",
+            "max_tokens" to 8192
+        )
+    ),
 }
 
 private fun getDefaultHeadersWithAuthentication(): MutableMap<String, String> {
@@ -148,23 +130,3 @@ private fun getDefaultHeaders(additionalHeaders: Map<String, String>): MutableMa
     defaultHeaders.putAll(additionalHeaders)
     return defaultHeaders
 }
-
-private fun getDefaultBodyParams(additionalParams: Map<String, Any>): MutableMap<String, Any> {
-    val defaultParams = mutableMapOf<String, Any>(
-        "stream" to true,
-        "messages" to "\$OPENAI_MESSAGES",
-        "temperature" to 0.1
-    )
-    defaultParams.putAll(additionalParams)
-    return defaultParams
-}
-
-private fun getResponsesApiDefaultBodyParams(additionalParams: Map<String, Any>): MutableMap<String, Any> {
-    val defaultParams = mutableMapOf<String, Any>(
-        "stream" to true,
-        "input" to "\$OPENAI_MESSAGES",
-        "temperature" to 0.1
-    )
-    defaultParams.putAll(additionalParams)
-    return defaultParams
-}
diff --git a/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceCodeCompletionTemplate.kt b/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceCodeCompletionTemplate.kt
index 327e4090..7a10ba00 100644
--- a/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceCodeCompletionTemplate.kt
+++ b/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceCodeCompletionTemplate.kt
@@ -7,24 +7,24 @@ enum class CustomServiceCodeCompletionTemplate(
     val parseResponseAsChatCompletions: Boolean = false
 ) {
     ANYSCALE(
-        "https://api.endpoints.anyscale.com/v1/completions",
+        "https://{your-service-endpoint}/v1/completions",
         getDefaultHeadersWithAuthentication(),
-        getDefaultBodyParams(mapOf("model" to "codellama/CodeLlama-70b-Instruct-hf"))
+        getDefaultBodyParams(mapOf("model" to "{your-model-id}"))
     ),
     AZURE(
-        "https://{your-resource-name}.openai.azure.com/openai/deployments/{deployment-id}/completions?api-version=2023-05-15",
+        "https://{your-resource-name}.openai.azure.com/openai/deployments/{deployment-id}/completions?api-version=2024-10-21",
         getDefaultHeaders("api-key", "\$CUSTOM_SERVICE_API_KEY"),
         getDefaultBodyParams(emptyMap())
     ),
     DEEP_INFRA(
-        "https://api.deepinfra.com/v1/inference/codellama/CodeLlama-70b-Instruct-hf",
+        "https://api.deepinfra.com/v1/openai/completions",
         getDefaultHeadersWithAuthentication(),
-        mutableMapOf("input" to "\$FIM_PROMPT")
+        getDefaultBodyParams(mapOf("model" to "deepseek-ai/DeepSeek-V3.2"))
     ),
     FIREWORKS(
         "https://api.fireworks.ai/inference/v1/completions",
         getDefaultHeadersWithAuthentication(),
-        getDefaultBodyParams(mapOf("model" to "accounts/fireworks/models/qwen2p5-coder-32b-instruct"))
+        getDefaultBodyParams(mapOf("model" to "accounts/fireworks/models/kimi-k2-instruct-0905"))
     ),
     OPENAI(
         "https://api.openai.com/v1/completions",
@@ -45,7 +45,7 @@ enum class CustomServiceCodeCompletionTemplate(
             "stream" to true,
             "prompt" to "\$PREFIX",
             "suffix" to "\$SUFFIX",
-            "model" to "codestral-latest",
+            "model" to "codestral-2508",
             "temperature" to 0.7,
             "max_tokens" to 1024
         ),
@@ -54,7 +54,7 @@ enum class CustomServiceCodeCompletionTemplate(
     TOGETHER(
         "https://api.together.xyz/v1/completions",
         getDefaultHeaders("Authorization", "Bearer \$CUSTOM_SERVICE_API_KEY"),
-        getDefaultBodyParams(mapOf("model" to "codellama/CodeLlama-70b-hf"))
+        getDefaultBodyParams(mapOf("model" to "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8"))
     )
 }
 
@@ -84,4 +84,4 @@ private fun getDefaultBodyParams(additionalParams: Map<String, Any>): MutableMap
     )
     defaultParams.putAll(additionalParams)
     return defaultParams
-}
\ No newline at end of file
+}
diff --git a/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceTemplate.kt b/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceTemplate.kt
index b49f3a75..18a035e1 100644
--- a/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceTemplate.kt
+++ b/src/main/kotlin/ee/carlrobert/codegpt/settings/service/custom/template/CustomServiceTemplate.kt
@@ -6,12 +6,6 @@ enum class CustomServiceTemplate(
     val chatCompletionTemplate: CustomServiceChatCompletionTemplate,
     val codeCompletionTemplate: CustomServiceCodeCompletionTemplate? = null
 ) {
-    ANYSCALE(
-        "Anyscale",
-        "https://docs.endpoints.anyscale.com/",
-        CustomServiceChatCompletionTemplate.ANYSCALE,
-        CustomServiceCodeCompletionTemplate.ANYSCALE,
-    ),
     AZURE(
         "Azure OpenAI",
         "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference",
@@ -26,25 +20,26 @@ enum class CustomServiceTemplate(
     ),
     FIREWORKS(
         "Fireworks",
-        "https://readme.fireworks.ai/reference/createchatcompletion",
+        "https://docs.fireworks.ai/api-reference/post-chatcompletions",
         CustomServiceChatCompletionTemplate.FIREWORKS,
         CustomServiceCodeCompletionTemplate.FIREWORKS
     ),
     GROQ(
         "Groq",
-        "https://docs.api.groq.com/md/openai.oas.html",
+        "https://console.groq.com/docs/openai",
         CustomServiceChatCompletionTemplate.GROQ
     ),
     OPENAI(
-        "OpenAI",
-        "https://platform.openai.com/docs/api-reference/chat",
+        "OpenAI (Chat Completions API)",
+        "https://platform.openai.com/docs/api-reference/chat/create",
         CustomServiceChatCompletionTemplate.OPENAI,
         CustomServiceCodeCompletionTemplate.OPENAI
     ),
-    PERPLEXITY(
-        "Perplexity AI",
-        "https://docs.perplexity.ai/reference/post_chat_completions",
-        CustomServiceChatCompletionTemplate.PERPLEXITY
+    OPENAI_RESPONSES(
+        "OpenAI (Responses API)",
+        "https://platform.openai.com/docs/api-reference/responses/create",
+        CustomServiceChatCompletionTemplate.OPENAI_RESPONSES,
+        CustomServiceCodeCompletionTemplate.OPENAI
     ),
     TOGETHER(
         "Together AI",
@@ -54,32 +49,27 @@ enum class CustomServiceTemplate(
     ),
     OLLAMA(
         "Ollama",
-        "https://github.com/ollama/ollama/blob/main/docs/openai.md",
+        "https://docs.ollama.com/api/openai-compatibility",
         CustomServiceChatCompletionTemplate.OLLAMA
     ),
     LLAMA_CPP(
-        "LLaMA C/C++",
-        "https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md",
+        "llama.cpp Server",
+        "https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md",
         CustomServiceChatCompletionTemplate.LLAMA_CPP
     ),
     MISTRAL_AI(
         "Mistral AI",
-        "https://docs.mistral.ai/getting-started/quickstart",
+        "https://docs.mistral.ai/capabilities/completion/usage",
         CustomServiceChatCompletionTemplate.MISTRAL_AI,
         CustomServiceCodeCompletionTemplate.MISTRAL_AI
     ),
     OPEN_ROUTER(
         "OpenRouter",
-        "https://openrouter.ai/docs#quick-start",
-        CustomServiceChatCompletionTemplate.OPEN_ROUTER
-    ),
-    OPENAI_RESPONSES(
-        "OpenAI (Responses API)",
-        "https://platform.openai.com/docs/api-reference/responses",
-        CustomServiceChatCompletionTemplate.OPENAI_RESPONSES
+        "https://openrouter.ai/docs/quickstart",
+        CustomServiceChatCompletionTemplate.OPENROUTER
     );
 
     override fun toString(): String {
         return providerName
     }
-}
\ No newline at end of file
+}
diff --git a/src/main/kotlin/ee/carlrobert/codegpt/toolwindow/chat/parser/CompleteMessageParser.kt b/src/main/kotlin/ee/carlrobert/codegpt/toolwindow/chat/parser/CompleteMessageParser.kt
index 1368725d..d68a038f 100644
--- a/src/main/kotlin/ee/carlrobert/codegpt/toolwindow/chat/parser/CompleteMessageParser.kt
+++ b/src/main/kotlin/ee/carlrobert/codegpt/toolwindow/chat/parser/CompleteMessageParser.kt
@@ -17,11 +17,13 @@ class CompleteMessageParser : MessageParser {
             Pattern.compile("<<<<<<< SEARCH\\n(.*?)(?:\\n=======\\n(.*?))?$", Pattern.DOTALL)
 
         private const val THINK_OPEN_TAG = "<think>"
-        private const val THINK_CLOSE_TAG = "</think>\n\n"
+        private const val THINK_CLOSE_TAG = "</think>"
         private const val CODE_HEADER_GROUP_INDEX = 2
         private const val CODE_CONTENT_GROUP_INDEX = 3
         private const val SEARCH_CONTENT_GROUP_INDEX = 1
         private const val REPLACE_CONTENT_GROUP_INDEX = 2
+        private val THINKING_BLOCK_PATTERN =
+            Regex("""^<think>(.*?)</think>\s*""", setOf(RegexOption.DOT_MATCHES_ALL))
 
         private val TOLERANT_SEARCH_START =
             Regex("""^\s*<{3,}(\s*SEARCH.*)?$""", RegexOption.IGNORE_CASE)
@@ -54,15 +56,10 @@ class CompleteMessageParser : MessageParser {
     private fun extractThoughtIfPresent(input: String): String {
         extractedThought = null
 
-        if (!input.startsWith(THINK_OPEN_TAG)) {
-            return input
-        }
-
-        val closeTagIndex = input.indexOf(THINK_CLOSE_TAG)
-        return if (closeTagIndex != -1) {
-            val thoughtStartIndex = THINK_OPEN_TAG.length
-            extractedThought = input.substring(thoughtStartIndex, closeTagIndex).trim()
-            input.substring(closeTagIndex + THINK_CLOSE_TAG.length)
+        val match = THINKING_BLOCK_PATTERN.find(input)
+        return if (match != null) {
+            extractedThought = match.groupValues[1].trim()
+            input.substring(match.range.last + 1)
         } else {
             input
         }
diff --git a/src/test/kotlin/ee/carlrobert/codegpt/agent/AgentProviderIntegrationTest.kt b/src/test/kotlin/ee/carlrobert/codegpt/agent/AgentProviderIntegrationTest.kt
index a1b3cc37..17f3ee60 100644
--- a/src/test/kotlin/ee/carlrobert/codegpt/agent/AgentProviderIntegrationTest.kt
+++ b/src/test/kotlin/ee/carlrobert/codegpt/agent/AgentProviderIntegrationTest.kt
@@ -466,7 +466,7 @@ class AgentProviderIntegrationTest : IntegrationTest() {
                     "item",
                     jsonMap(
                         e("type", "function_call"),
-                        e("id", "fc_1"),
+                        e("id", callId),
                         e("call_id", callId),
                         e("name", toolName),
                         e("arguments", "")
@@ -477,12 +477,28 @@ class AgentProviderIntegrationTest : IntegrationTest() {
             ),
             jsonMapResponse(
                 e("type", "response.function_call_arguments.delta"),
-                e("item_id", "fc_1"),
+                e("item_id", callId),
                 e("output_index", 0),
                 e("delta", arguments),
                 e("call_id", callId),
                 e("sequence_number", 2)
             ),
+            jsonMapResponse(
+                e("type", "response.output_item.done"),
+                e(
+                    "item",
+                    jsonMap(
+                        e("type", "function_call"),
+                        e("id", callId),
+                        e("call_id", callId),
+                        e("name", toolName),
+                        e("arguments", arguments),
+                        e("status", "completed")
+                    )
+                ),
+                e("output_index", 0),
+                e("sequence_number", 3)
+            ),
             jsonMapResponse(
                 e("type", "response.completed"),
                 e(
@@ -497,7 +513,7 @@ class AgentProviderIntegrationTest : IntegrationTest() {
                             jsonArray(
                                 jsonMap(
                                     e("type", "function_call"),
-                                    e("id", "fc_1"),
+                                    e("id", callId),
                                     e("call_id", callId),
                                     e("name", toolName),
                                     e("arguments", arguments),
@@ -510,7 +526,7 @@ class AgentProviderIntegrationTest : IntegrationTest() {
                         e("text", jsonMap())
                     )
                 ),
-                e("sequence_number", 3)
+                e("sequence_number", 4)
             )
         )
     }
@@ -826,6 +842,8 @@ class AgentProviderIntegrationTest : IntegrationTest() {
                 jsonArray(
                     jsonMap(
                         e("finishReason", "stop"),
+                        e("finish_reason", "stop"),
+                        e("index", 0),
                         e(
                             "message",
                             jsonMap(
diff --git a/src/test/kotlin/ee/carlrobert/codegpt/agent/AgentServiceIntegrationTest.kt b/src/test/kotlin/ee/carlrobert/codegpt/agent/AgentServiceIntegrationTest.kt
index d022fc41..b73cda95 100644
--- a/src/test/kotlin/ee/carlrobert/codegpt/agent/AgentServiceIntegrationTest.kt
+++ b/src/test/kotlin/ee/carlrobert/codegpt/agent/AgentServiceIntegrationTest.kt
@@ -3,9 +3,11 @@ package ee.carlrobert.codegpt.agent
 import ai.koog.agents.core.agent.AIAgent
 import ai.koog.agents.core.agent.AIAgentService
 import ai.koog.agents.core.agent.config.AIAgentConfig
+import ai.koog.agents.core.tools.ToolDescriptor
 import ai.koog.agents.core.tools.ToolRegistry
 import ai.koog.agents.snapshot.feature.AgentCheckpointData
 import ai.koog.agents.snapshot.providers.file.JVMFilePersistenceStorageProvider
+import ai.koog.prompt.dsl.ModerationResult
 import ai.koog.prompt.dsl.Prompt
 import ai.koog.prompt.dsl.prompt
 import ai.koog.prompt.executor.model.PromptExecutor
@@ -327,7 +329,7 @@ private object NoopPromptExecutor : PromptExecutor {
     override suspend fun execute(
         prompt: Prompt,
         model: LLModel,
-        tools: List<ai.koog.agents.core.tools.ToolDescriptor>
+        tools: List<ToolDescriptor>
     ): List<Message.Response> {
         throw UnsupportedOperationException("NoopPromptExecutor should not be used in tests")
     }
@@ -335,7 +337,7 @@ private object NoopPromptExecutor : PromptExecutor {
     override fun executeStreaming(
         prompt: Prompt,
         model: LLModel,
-        tools: List<ai.koog.agents.core.tools.ToolDescriptor>
+        tools: List<ToolDescriptor>
     ): Flow<StreamFrame> {
         throw UnsupportedOperationException("NoopPromptExecutor should not be used in tests")
     }
@@ -343,7 +345,7 @@ private object NoopPromptExecutor : PromptExecutor {
     override suspend fun moderate(
         prompt: Prompt,
         model: LLModel
-    ): ai.koog.prompt.dsl.ModerationResult {
+    ): ModerationResult {
         throw UnsupportedOperationException("NoopPromptExecutor should not be used in tests")
     }
 
diff --git a/src/test/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIResponsesApiSerializationTest.kt b/src/test/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIResponsesApiSerializationTest.kt
deleted file mode 100644
index 164886fe..00000000
--- a/src/test/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIResponsesApiSerializationTest.kt
+++ /dev/null
@@ -1,219 +0,0 @@
-package ee.carlrobert.codegpt.agent.clients
-
-import ai.koog.prompt.executor.clients.openai.base.models.Content
-import ai.koog.prompt.executor.clients.openai.base.models.OpenAIFunction
-import ai.koog.prompt.executor.clients.openai.base.models.OpenAIMessage
-import ai.koog.prompt.executor.clients.openai.base.models.OpenAIToolCall
-import ai.koog.prompt.executor.clients.openai.base.models.OpenAITool
-import ai.koog.prompt.executor.clients.openai.base.models.OpenAIToolChoice
-import ai.koog.prompt.executor.clients.openai.base.models.OpenAIToolFunction
-import ai.koog.prompt.llm.LLModel
-import ai.koog.prompt.params.LLMParams
-import ee.carlrobert.codegpt.settings.service.custom.CustomServiceChatCompletionSettingsState
-import kotlinx.serialization.json.Json
-import kotlinx.serialization.json.buildJsonArray
-import kotlinx.serialization.json.buildJsonObject
-import kotlinx.serialization.json.boolean
-import kotlinx.serialization.json.jsonArray
-import kotlinx.serialization.json.jsonObject
-import kotlinx.serialization.json.jsonPrimitive
-import kotlinx.serialization.json.put
-import org.assertj.core.api.Assertions.assertThat
-import org.junit.Test
-
-class CustomOpenAIResponsesApiSerializationTest {
-
-    private val json = Json {
-        ignoreUnknownKeys = true
-        encodeDefaults = true
-        explicitNulls = false
-    }
-
-    @Test
-    fun `responses api request should encode tools with top level name`() {
-        val state = CustomServiceChatCompletionSettingsState().apply {
-            url = "https://example.com/v1/responses"
-            body.clear()
-            body["stream"] = true
-            body["input"] = "\$OPENAI_MESSAGES"
-        }
-        val client = CustomOpenAILLMClient.fromSettingsState("test-key", state)
-        val serializeMethod = client.javaClass.getDeclaredMethod(
-            "serializeProviderChatRequest",
-            List::class.java,
-            LLModel::class.java,
-            List::class.java,
-            OpenAIToolChoice::class.java,
-            LLMParams::class.java,
-            Boolean::class.javaPrimitiveType
-        )
-        serializeMethod.isAccessible = true
-
-        val payload = serializeMethod.invoke(
-            client,
-            listOf(OpenAIMessage.User(content = Content.Text("hello"))),
-            LLModel(
-                id = "gpt-test",
-                provider = CustomOpenAILLMClient.CustomOpenAI,
-                capabilities = emptyList(),
-                contextLength = 128_000,
-                maxOutputTokens = 4_096
-            ),
-            listOf(
-                OpenAITool(
-                    OpenAIToolFunction(
-                        name = "Diagnostics",
-                        description = "Read IDE diagnostics",
-                        parameters = buildJsonObject {
-                            put("type", "object")
-                            put("properties", buildJsonObject {})
-                            put("required", buildJsonArray {})
-                        },
-                        strict = true
-                    )
-                )
-            ),
-            OpenAIToolChoice.Function(OpenAIToolChoice.FunctionName("Diagnostics")),
-            CustomOpenAIParams(),
-            true
-        ) as String
-
-        val request = json.parseToJsonElement(payload).jsonObject
-        val tool = request.getValue("tools").jsonArray.single().jsonObject
-        val toolChoice = request.getValue("tool_choice").jsonObject
-
-        assertThat(tool.getValue("type").jsonPrimitive.content).isEqualTo("function")
-        assertThat(tool.getValue("name").jsonPrimitive.content).isEqualTo("Diagnostics")
-        assertThat(tool.getValue("description").jsonPrimitive.content).isEqualTo("Read IDE diagnostics")
-        assertThat(tool.getValue("strict").jsonPrimitive.boolean).isTrue()
-        assertThat(tool).doesNotContainKey("function")
-        assertThat(toolChoice.getValue("type").jsonPrimitive.content).isEqualTo("function")
-        assertThat(toolChoice.getValue("name").jsonPrimitive.content).isEqualTo("Diagnostics")
-    }
-
-    @Test
-    fun `responses api request should encode mode tool choice as lowercase string`() {
-        val state = CustomServiceChatCompletionSettingsState().apply {
-            url = "https://example.com/v1/responses"
-            body.clear()
-            body["stream"] = true
-            body["input"] = "\$OPENAI_MESSAGES"
-        }
-        val client = CustomOpenAILLMClient.fromSettingsState("test-key", state)
-        val serializeMethod = client.javaClass.getDeclaredMethod(
-            "serializeProviderChatRequest",
-            List::class.java,
-            LLModel::class.java,
-            List::class.java,
-            OpenAIToolChoice::class.java,
-            LLMParams::class.java,
-            Boolean::class.javaPrimitiveType
-        )
-        serializeMethod.isAccessible = true
-
-        val payload = serializeMethod.invoke(
-            client,
-            listOf(OpenAIMessage.User(content = Content.Text("hello"))),
-            LLModel(
-                id = "gpt-test",
-                provider = CustomOpenAILLMClient.CustomOpenAI,
-                capabilities = emptyList(),
-                contextLength = 128_000,
-                maxOutputTokens = 4_096
-            ),
-            emptyList<OpenAITool>(),
-            openAIToolChoiceMode("required"),
-            CustomOpenAIParams(),
-            true
-        ) as String
-
-        val request = json.parseToJsonElement(payload).jsonObject
-
-        assertThat(request.getValue("tool_choice").jsonPrimitive.content).isEqualTo("required")
-    }
-
-    @Test
-    fun `responses api request should encode agent tool history as input items`() {
-        val state = CustomServiceChatCompletionSettingsState().apply {
-            url = "https://example.com/v1/responses"
-            body.clear()
-            body["stream"] = true
-            body["input"] = "\$OPENAI_MESSAGES"
-        }
-        val client = CustomOpenAILLMClient.fromSettingsState("test-key", state)
-        val serializeMethod = client.javaClass.getDeclaredMethod(
-            "serializeProviderChatRequest",
-            List::class.java,
-            LLModel::class.java,
-            List::class.java,
-            OpenAIToolChoice::class.java,
-            LLMParams::class.java,
-            Boolean::class.javaPrimitiveType
-        )
-        serializeMethod.isAccessible = true
-
-        val payload = serializeMethod.invoke(
-            client,
-            listOf(
-                OpenAIMessage.System(content = Content.Text("System instructions")),
-                OpenAIMessage.User(content = Content.Text("Read the fixture")),
-                OpenAIMessage.Assistant(
-                    content = Content.Text("Calling Read"),
-                    toolCalls = listOf(
-                        OpenAIToolCall(
-                            "call_read",
-                            OpenAIFunction("Read", "\"{\\\"file_path\\\":\\\"/tmp/fixture.txt\\\"}\"")
-                        )
-                    )
-                ),
-                OpenAIMessage.Tool(
-                    content = Content.Text("1\tfixture contents"),
-                    toolCallId = "call_read"
-                )
-            ),
-            LLModel(
-                id = "gpt-test",
-                provider = CustomOpenAILLMClient.CustomOpenAI,
-                capabilities = emptyList(),
-                contextLength = 128_000,
-                maxOutputTokens = 4_096
-            ),
-            emptyList<OpenAITool>(),
-            null,
-            CustomOpenAIParams(),
-            true
-        ) as String
-
-        val request = json.parseToJsonElement(payload).jsonObject
-        val input = request.getValue("input").jsonArray
-
-        assertThat(input.map { it.jsonObject.getValue("type").jsonPrimitive.content }).containsExactly(
-            "message",
-            "message",
-            "message",
-            "function_call",
-            "function_call_output"
-        )
-        assertThat(input[0].jsonObject.getValue("role").jsonPrimitive.content).isEqualTo("developer")
-        assertThat(input[1].jsonObject.getValue("role").jsonPrimitive.content).isEqualTo("user")
-        assertThat(input[2].jsonObject.getValue("role").jsonPrimitive.content).isEqualTo("assistant")
-        assertThat(
-            input[2].jsonObject.getValue("content").jsonArray.single().jsonObject.getValue("text").jsonPrimitive.content
-        ).isEqualTo("Calling Read")
-        assertThat(input[3].jsonObject.getValue("name").jsonPrimitive.content).isEqualTo("Read")
-        assertThat(input[3].jsonObject.getValue("call_id").jsonPrimitive.content).isEqualTo("call_read")
-        assertThat(input[3].jsonObject.getValue("arguments").jsonPrimitive.content)
-            .isEqualTo("""{"file_path":"/tmp/fixture.txt"}""")
-        assertThat(input[4].jsonObject.getValue("call_id").jsonPrimitive.content).isEqualTo("call_read")
-        assertThat(input[4].jsonObject.getValue("output").jsonPrimitive.content)
-            .isEqualTo("1\tfixture contents")
-    }
-
-    private fun openAIToolChoiceMode(value: String): OpenAIToolChoice {
-        val modeClass = Class.forName(
-            "ai.koog.prompt.executor.clients.openai.base.models.OpenAIToolChoice\$Mode"
-        )
-        val boxMethod = modeClass.getDeclaredMethod("box-impl", String::class.java)
-        return boxMethod.invoke(null, value) as OpenAIToolChoice
-    }
-}
diff --git a/src/test/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIResponsesSerializationIntegrationTest.kt b/src/test/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIResponsesSerializationIntegrationTest.kt
new file mode 100644
index 00000000..6045395c
--- /dev/null
+++ b/src/test/kotlin/ee/carlrobert/codegpt/agent/clients/CustomOpenAIResponsesSerializationIntegrationTest.kt
@@ -0,0 +1,113 @@
+package ee.carlrobert.codegpt.agent.clients
+
+import ai.koog.prompt.dsl.prompt
+import ai.koog.prompt.llm.LLMCapability
+import ai.koog.prompt.llm.LLModel
+import ee.carlrobert.codegpt.settings.service.custom.CustomServiceChatCompletionSettingsState
+import kotlinx.coroutines.runBlocking
+import org.assertj.core.api.Assertions.assertThat
+import testsupport.IntegrationTest
+import testsupport.http.ResponseEntity
+import testsupport.http.exchange.BasicHttpExchange
+import testsupport.json.JSONUtil.e
+import testsupport.json.JSONUtil.jsonArray
+import testsupport.json.JSONUtil.jsonMap
+import testsupport.json.JSONUtil.jsonMapResponse
+
+class CustomOpenAIResponsesSerializationIntegrationTest : IntegrationTest() {
+
+    fun testResponsesRequestsFlattenAdditionalBodyParameters() {
+        runBlocking {
+            val settings = CustomServiceChatCompletionSettingsState().apply {
+                url = System.getProperty("customOpenAI.baseUrl") + "/v1/responses"
+                headers.clear()
+                body.clear()
+                body["model"] = "custom-responses-model"
+                body["input"] = "\$OPENAI_MESSAGES"
+                body["custom_config"] = mapOf("tier" to "gold")
+                body["extra_flag"] = "enabled"
+            }
+            val client = CustomOpenAILLMClient.fromSettingsState("TEST_API_KEY", settings)
+            val model = LLModel(
+                id = "custom-responses-model",
+                provider = CustomOpenAILLMClient.CustomOpenAI,
+                capabilities = listOf(LLMCapability.OpenAIEndpoint.Responses),
+                contextLength = 128_000,
+                maxOutputTokens = 4_096
+            )
+            val prompt = prompt("custom-openai-responses-serialization") {
+                user("Test flattened params")
+            }
+
+            expectCustomOpenAI(BasicHttpExchange { request ->
+                assertThat(request.uri.path).isEqualTo("/v1/responses")
+                assertThat(request.method).isEqualTo("POST")
+                assertThat(request.body)
+                    .containsEntry("extra_flag", "enabled")
+                    .containsKey("custom_config")
+                    .doesNotContainKey("additional_properties")
+                assertThat(request.body["custom_config"])
+                    .isEqualTo(mapOf("tier" to "gold"))
+                ResponseEntity(
+                    openAiResponsesResponse(
+                        model = "custom-responses-model",
+                        text = "Hello with flattened params"
+                    )
+                )
+            })
+
+            val response = client.execute(prompt, model, emptyList())
+
+            assertThat(response)
+                .singleElement()
+                .extracting("content")
+                .isEqualTo("Hello with flattened params")
+        }
+    }
+
+    private fun openAiResponsesResponse(
+        model: String,
+        text: String
+    ): String {
+        return jsonMapResponse(
+            e("id", "resp-openai-test"),
+            e("object", "response"),
+            e("created_at", 1),
+            e("model", model),
+            e(
+                "output",
+                jsonArray(
+                    jsonMap(
+                        e("type", "message"),
+                        e("id", "msg_1"),
+                        e("role", "assistant"),
+                        e("status", "completed"),
+                        e(
+                            "content",
+                            jsonArray(
+                                jsonMap(
+                                    e("type", "output_text"),
+                                    e("text", text),
+                                    e("annotations", jsonArray())
+                                )
+                            )
+                        )
+                    )
+                )
+            ),
+            e("parallel_tool_calls", true),
+            e("status", "completed"),
+            e("text", jsonMap()),
+            e(
+                "usage",
+                jsonMap(
+                    e("input_tokens", 1),
+                    e("input_tokens_details", jsonMap("cached_tokens", 0)),
+                    e("output_tokens", 1),
+                    e("output_tokens_details", jsonMap("reasoning_tokens", 0)),
+                    e("total_tokens", 2)
+                )
+            )
+        )
+    }
+}
diff --git a/src/test/kotlin/ee/carlrobert/codegpt/agent/clients/StreamingPayloadNormalizerTest.kt b/src/test/kotlin/ee/carlrobert/codegpt/agent/clients/StreamingPayloadNormalizerTest.kt
deleted file mode 100644
index 2ce230c5..00000000
--- a/src/test/kotlin/ee/carlrobert/codegpt/agent/clients/StreamingPayloadNormalizerTest.kt
+++ /dev/null
@@ -1,94 +0,0 @@
-package ee.carlrobert.codegpt.agent.clients
-
-import ai.koog.prompt.message.Message
-import ai.koog.prompt.streaming.StreamFrame
-import ai.koog.prompt.streaming.toMessageResponses
-import ee.carlrobert.codegpt.settings.service.custom.CustomServiceChatCompletionSettingsState
-import kotlinx.coroutines.flow.Flow
-import kotlinx.coroutines.flow.asFlow
-import kotlinx.coroutines.flow.toList
-import kotlinx.coroutines.runBlocking
-import org.assertj.core.api.Assertions.assertThat
-import org.junit.Test
-
-class StreamingPayloadNormalizerTest {
-
-    @Test
-    fun `should unwrap sse data payload`() {
-        assertThat(normalizeSsePayload("data: {\"id\":\"chunk-1\"}"))
-            .isEqualTo("{\"id\":\"chunk-1\"}")
-    }
-
-    @Test
-    fun `should ignore done sentinel`() {
-        assertThat(normalizeSsePayload("data: [DONE]")).isNull()
-    }
-
-    @Test
-    fun `should unwrap multiline sse event frames`() {
-        assertThat(
-            normalizeSsePayload("event: response.created\ndata: {\"type\":\"response.created\"}\n")
-        ).isEqualTo("{\"type\":\"response.created\"}")
-    }
-
-    @Test
-    fun `should unwrap compact sse frames where event and data share a line`() {
-        assertThat(
-            normalizeSsePayload("event: response.created data: {\"type\":\"response.created\"}")
-        ).isEqualTo("{\"type\":\"response.created\"}")
-    }
-
-    @Test
-    fun `custom openai client should decode sse wrapped chat completion chunks`() {
-        val state = CustomServiceChatCompletionSettingsState().apply {
-            url = "https://example.com/v1/chat/completions"
-            body["stream"] = true
-        }
-        val client = CustomOpenAILLMClient.fromSettingsState("test-key", state)
-        val decodeMethod = client.javaClass.getDeclaredMethod("decodeStreamingResponse", String::class.java)
-        decodeMethod.isAccessible = true
-
-        val response = decodeMethod.invoke(
-            client,
-            "data: {\"choices\":[],\"created\":0,\"id\":\"chunk-1\",\"model\":\"test-model\"}"
-        ) as CustomOpenAIChatCompletionStreamResponse
-
-        assertThat(response.id).isEqualTo("chunk-1")
-        assertThat(response.model).isEqualTo("test-model")
-        assertThat(response.choices).isEmpty()
-    }
-
-    @Test
-    fun `responses api streaming tool call should collapse into one named tool call`() {
-        val state = CustomServiceChatCompletionSettingsState().apply {
-            url = "https://example.com/v1/responses"
-            body.clear()
-            body["stream"] = true
-            body["input"] = "\$OPENAI_MESSAGES"
-        }
-        val client = CustomOpenAILLMClient.fromSettingsState("test-key", state)
-        val decodeMethod = client.javaClass.getDeclaredMethod("decodeStreamingResponse", String::class.java)
-        decodeMethod.isAccessible = true
-        val processMethod = client.javaClass.getDeclaredMethod("processStreamingResponse", Flow::class.java)
-        processMethod.isAccessible = true
-
-        val chunks = listOf(
-            """{"type":"response.output_item.added","item":{"type":"function_call","id":"fc_1","call_id":"call_diag","name":"Diagnostics","arguments":""},"output_index":0,"sequence_number":1}""",
-            """{"type":"response.function_call_arguments.delta","item_id":"fc_1","output_index":0,"delta":"{\"file_path\":\"/tmp/mainwindow.cpp\",\"filter\":\"all\"}","call_id":"call_diag","sequence_number":2}""",
-            """{"type":"response.completed","response":{"id":"resp-tool","object":"response","created_at":1,"model":"test-model","output":[{"type":"function_call","id":"fc_1","call_id":"call_diag","name":"Diagnostics","arguments":"{\"file_path\":\"/tmp/mainwindow.cpp\",\"filter\":\"all\"}","status":"completed"}],"parallel_tool_calls":true,"status":"completed","text":{}},"sequence_number":3}"""
-        ).map { payload ->
-            decodeMethod.invoke(client, payload) as CustomOpenAIChatCompletionStreamResponse
-        }
-
-        val responses = runBlocking {
-            @Suppress("UNCHECKED_CAST")
-            val frames = processMethod.invoke(client, chunks.asFlow()) as Flow<StreamFrame>
-            frames.toList().toMessageResponses()
-        }
-        val toolCall = responses.filterIsInstance<Message.Tool.Call>().single()
-
-        assertThat(toolCall.id).isEqualTo("call_diag")
-        assertThat(toolCall.tool).isEqualTo("Diagnostics")
-        assertThat(toolCall.content).isEqualTo("""{"file_path":"/tmp/mainwindow.cpp","filter":"all"}""")
-    }
-}