refactor: sse message parsing
Some checks are pending
Build / Build (push) Waiting to run
Build / Verify Plugin (push) Blocked by required conditions

This commit is contained in:
Carl-Robert Linnupuu 2025-05-29 12:15:57 +01:00
parent 01d6b51acf
commit 4e54a2f90b
5 changed files with 541 additions and 232 deletions

View file

@ -7,18 +7,15 @@ import com.intellij.openapi.editor.ex.EditorEx
import com.intellij.openapi.project.Project
import com.intellij.openapi.vfs.LocalFileSystem
import com.intellij.openapi.vfs.VirtualFile
import com.intellij.openapi.vfs.readText
import ee.carlrobert.codegpt.CodeGPTKeys
import ee.carlrobert.codegpt.completions.AutoApplyParameters
import ee.carlrobert.codegpt.completions.CompletionRequestService
import ee.carlrobert.codegpt.toolwindow.chat.editor.ResponseEditorPanel
import ee.carlrobert.codegpt.toolwindow.chat.editor.ResponseEditorPanel.Companion.RESPONSE_EDITOR_STATE_KEY
import ee.carlrobert.codegpt.toolwindow.chat.editor.RetryListener
import ee.carlrobert.codegpt.toolwindow.chat.editor.AutoApplyListener
import ee.carlrobert.codegpt.toolwindow.chat.editor.diff.DiffEditorManager
import ee.carlrobert.codegpt.toolwindow.chat.editor.factory.EditorFactory
import ee.carlrobert.codegpt.toolwindow.chat.parser.Code
import ee.carlrobert.codegpt.toolwindow.chat.parser.Segment
import ee.carlrobert.codegpt.toolwindow.chat.parser.SseMessageParser
@Service(Service.Level.PROJECT)
class EditorStateManager(private val project: Project) {
@ -42,22 +39,13 @@ class EditorStateManager(private val project: Project) {
return state
}
fun handleRetryForFailedSearch(replaceContent: String) {
val editor = currentState?.editor ?: return
val virtualFile =
CodeGPTKeys.TOOLWINDOW_EDITOR_FILE_DETAILS.get(editor)?.virtualFile ?: return
getCodeEditsAsync(replaceContent, virtualFile, editor)
}
fun getCodeEditsAsync(
content: String,
virtualFile: VirtualFile,
editor: EditorEx,
) {
val params = AutoApplyParameters(content, virtualFile)
val messageParser = SseMessageParser()
val listener = RetryListener(project, messageParser, this) { newEditor ->
val listener = AutoApplyListener(project, this) { newEditor ->
val responseEditorPanel = editor.component.parent as? ResponseEditorPanel
?: throw IllegalStateException("Expected parent to be ResponseEditorPanel")
responseEditorPanel.replaceEditor(editor, newEditor)

View file

@ -39,4 +39,4 @@ data class SearchReplace(
val replace: String,
override val language: String,
override val filePath: String?
) : Segment(search, language, filePath)
) : Segment(replace, language, filePath)

View file

@ -1,239 +1,332 @@
package ee.carlrobert.codegpt.toolwindow.chat.parser
enum class State { OUTSIDE, CODE_HEADER_WAITING, IN_CODE, IN_SEARCH, IN_REPLACE, IN_THINKING }
class SseMessageParser : MessageParser {
var state = State.OUTSIDE
private val buffer = StringBuilder()
private val parsedSegments = mutableListOf<Segment>()
private companion object {
const val CODE_FENCE = "```"
const val THINK_START = "<think>"
const val THINK_END = "</think>"
const val SEARCH_MARKER = "<<<<<<< SEARCH"
const val SEPARATOR_MARKER = "======="
const val REPLACE_MARKER = ">>>>>>> REPLACE"
const val NEWLINE = "\n"
const val HEADER_DELIMITER = ":"
const val HEADER_PARTS_LIMIT = 2
}
private var currentCodeHeader: CodeHeader? = null
private val codeBuilder = StringBuilder()
private val headerBuilder = StringBuilder()
private val searchBuilder = StringBuilder()
private val replaceBuilder = StringBuilder()
private val thinkingBuilder = StringBuilder()
private var parserState: ParserState = ParserState.Outside
private val buffer = StringBuilder()
fun clear() {
state = State.OUTSIDE
parserState = ParserState.Outside
buffer.clear()
parsedSegments.clear()
currentCodeHeader = null
codeBuilder.clear()
headerBuilder.clear()
searchBuilder.clear()
replaceBuilder.clear()
thinkingBuilder.clear()
}
/**
* Parse incoming partial text and return any completed segments.
* Leftover text remains in buffer until more input arrives.
*/
override fun parse(input: String): List<Segment> {
buffer.append(input)
val output = mutableListOf<Segment>()
val segments = mutableListOf<Segment>()
loop@ while (true) {
when (state) {
State.OUTSIDE -> {
val fenceIdx = buffer.indexOf("```")
val thinkStartIdx = buffer.indexOf("<think>")
while (processNextSegment(segments)) {
// Continue processing until no more complete segments can be extracted
}
when {
fenceIdx != -1 && (thinkStartIdx == -1 || fenceIdx < thinkStartIdx) -> {
if (fenceIdx > 0) {
output += Text(buffer.substring(0, fenceIdx))
}
buffer.delete(0, fenceIdx + 3)
state = State.CODE_HEADER_WAITING
headerBuilder.clear()
continue@loop
}
segments.addAll(getPendingSegments())
thinkStartIdx != -1 -> {
if (thinkStartIdx > 0) {
output += Text(buffer.substring(0, thinkStartIdx))
}
buffer.delete(0, thinkStartIdx + "<think>".length)
state = State.IN_THINKING
thinkingBuilder.clear()
continue@loop
}
return segments
}
else -> break@loop
}
private fun processNextSegment(segments: MutableList<Segment>): Boolean {
return when (val state = parserState) {
is ParserState.Outside -> processOutsideState(segments)
is ParserState.CodeHeaderWaiting -> processCodeHeaderState(segments, state)
is ParserState.InCode -> processInCodeState(segments, state)
is ParserState.InSearch -> processInSearchState(segments, state)
is ParserState.InReplace -> processInReplaceState(segments, state)
is ParserState.InThinking -> processInThinkingState(segments, state)
}
}
private fun processOutsideState(segments: MutableList<Segment>): Boolean {
val fenceIdx = buffer.indexOf(CODE_FENCE)
val thinkStartIdx = buffer.indexOf(THINK_START)
return when {
shouldProcessCodeFence(fenceIdx, thinkStartIdx) -> {
extractTextBeforeIndex(fenceIdx)?.let { segments.add(it) }
consumeFromBuffer(fenceIdx + CODE_FENCE.length)
parserState = ParserState.CodeHeaderWaiting()
true
}
thinkStartIdx != -1 -> {
extractTextBeforeIndex(thinkStartIdx)?.let { segments.add(it) }
consumeFromBuffer(thinkStartIdx + THINK_START.length)
parserState = ParserState.InThinking()
true
}
else -> false
}
}
private fun processCodeHeaderState(
segments: MutableList<Segment>,
state: ParserState.CodeHeaderWaiting
): Boolean {
val nlIdx = buffer.indexOf(NEWLINE)
if (nlIdx < 0) return false
val headerLine = buffer.substring(0, nlIdx).trim()
consumeFromBuffer(nlIdx + 1)
val updatedHeader = state.content + headerLine
val header = parseCodeHeader(updatedHeader)
return if (header != null) {
segments.add(header)
parserState = ParserState.InCode(header)
true
} else {
segments.add(CodeHeaderWaiting(updatedHeader))
parserState = ParserState.CodeHeaderWaiting(updatedHeader)
false
}
}
private fun processInCodeState(
segments: MutableList<Segment>,
state: ParserState.InCode
): Boolean {
val nlIdx = buffer.indexOf(NEWLINE)
if (nlIdx < 0) return false
val line = buffer.substring(0, nlIdx)
consumeFromBuffer(nlIdx + 1)
return when {
line.trim() == CODE_FENCE -> {
if (state.content.isNotEmpty()) {
segments.add(Code(state.content, state.header.language, state.header.filePath))
}
segments.add(CodeEnd(""))
parserState = ParserState.Outside
true
}
State.CODE_HEADER_WAITING -> {
val nlIdx = buffer.indexOf("\n")
if (nlIdx < 0) break@loop
val headerLine = buffer.substring(0, nlIdx).trim()
buffer.delete(0, nlIdx + 1)
headerBuilder.append(headerLine)
line.trimStart().startsWith(SEARCH_MARKER) -> {
segments.add(SearchWaiting("", state.header.language, state.header.filePath))
parserState = ParserState.InSearch(state.header, "")
true
}
val headerText = headerBuilder.toString()
val parts = headerText.split(":", limit = 2)
val language = parts.getOrNull(0) ?: ""
val fileName = parts.getOrNull(1)
if (parts.size > 0) {
currentCodeHeader = CodeHeader(language, fileName)
output += currentCodeHeader!!
state = State.IN_CODE
codeBuilder.clear()
headerBuilder.clear()
} else {
output += CodeHeaderWaiting(headerText)
}
}
State.IN_CODE -> {
val idx = buffer.indexOf("\n")
if (idx < 0) break@loop
val line = buffer.substring(0, idx)
buffer.delete(0, idx + 1)
when {
line.trim() == "```" -> {
if (codeBuilder.isNotEmpty()) {
output += Code(
codeBuilder.toString(),
currentCodeHeader!!.language,
currentCodeHeader!!.filePath
)
}
output += CodeEnd("")
state = State.OUTSIDE
}
line.trimStart().startsWith("<<<<<<< SEARCH") -> {
state = State.IN_SEARCH
searchBuilder.clear()
output += SearchWaiting(
"",
currentCodeHeader!!.language,
currentCodeHeader!!.filePath
)
}
else -> codeBuilder.appendLine(line)
}
}
State.IN_SEARCH -> {
val idx = buffer.indexOf("\n")
if (idx < 0) break@loop
val line = buffer.substring(0, idx)
buffer.delete(0, idx + 1)
if (line.trim() == "=======") {
state = State.IN_REPLACE
replaceBuilder.clear()
output += ReplaceWaiting(
searchBuilder.toString(),
"",
currentCodeHeader!!.language,
currentCodeHeader!!.filePath
)
} else {
searchBuilder.appendLine(line)
output += SearchWaiting(
searchBuilder.toString(),
currentCodeHeader!!.language,
currentCodeHeader!!.filePath
)
}
}
State.IN_REPLACE -> {
val idx = buffer.indexOf("\n")
if (idx < 0) break@loop
val line = buffer.substring(0, idx)
buffer.delete(0, idx + 1)
if (line.trim().startsWith(">>>>>>> REPLACE")) {
output += SearchReplace(
search = searchBuilder.toString(),
replace = replaceBuilder.toString(),
language = currentCodeHeader!!.language,
filePath = currentCodeHeader!!.filePath
)
state = State.IN_CODE
} else {
replaceBuilder.appendLine(line)
output += ReplaceWaiting(
searchBuilder.toString(),
replaceBuilder.toString(),
currentCodeHeader!!.language,
currentCodeHeader!!.filePath
)
}
}
State.IN_THINKING -> {
val thinkEndIdx = buffer.indexOf("</think>")
if (thinkEndIdx < 0) {
if (buffer.isNotEmpty()) {
thinkingBuilder.append(buffer)
output += Thinking(thinkingBuilder.toString())
buffer.clear()
}
break@loop
}
thinkingBuilder.append(buffer.substring(0, thinkEndIdx))
output += Thinking(thinkingBuilder.toString())
buffer.delete(0, thinkEndIdx + "</think>".length)
state = State.OUTSIDE
thinkingBuilder.clear()
continue@loop
}
else -> {
val newContent =
if (state.content.isEmpty()) line else state.content + NEWLINE + line
parserState = ParserState.InCode(state.header, newContent)
false
}
}
when (state) {
State.OUTSIDE ->
if (buffer.isNotBlank())
output += Text(buffer.toString())
State.CODE_HEADER_WAITING ->
if (headerBuilder.isNotBlank())
output += CodeHeaderWaiting(headerBuilder.toString())
State.IN_CODE ->
if (codeBuilder.isNotBlank())
output += Code(
codeBuilder.toString(),
currentCodeHeader!!.language,
currentCodeHeader!!.filePath
)
State.IN_SEARCH ->
if (searchBuilder.isNotBlank())
output += SearchWaiting(
searchBuilder.toString(),
currentCodeHeader!!.language,
currentCodeHeader!!.filePath
)
State.IN_REPLACE ->
if (replaceBuilder.isNotBlank())
output += ReplaceWaiting(
searchBuilder.toString(),
replaceBuilder.toString(),
currentCodeHeader!!.language,
currentCodeHeader!!.filePath
)
State.IN_THINKING ->
if (thinkingBuilder.isNotBlank() || buffer.isNotBlank()) {
thinkingBuilder.append(buffer)
buffer.clear()
output += Thinking(thinkingBuilder.toString())
}
}
parsedSegments.addAll(output)
return output
}
}
private fun processInSearchState(
segments: MutableList<Segment>,
state: ParserState.InSearch
): Boolean {
val nlIdx = buffer.indexOf(NEWLINE)
if (nlIdx < 0) return false
val line = buffer.substring(0, nlIdx)
consumeFromBuffer(nlIdx + 1)
return if (line.trim() == SEPARATOR_MARKER) {
segments.add(
ReplaceWaiting(
state.searchContent,
"",
state.header.language,
state.header.filePath
)
)
parserState = ParserState.InReplace(state.header, state.searchContent, "")
true
} else {
val newSearch =
if (state.searchContent.isEmpty()) line else state.searchContent + NEWLINE + line
segments.add(SearchWaiting(newSearch, state.header.language, state.header.filePath))
parserState = ParserState.InSearch(state.header, newSearch)
false
}
}
private fun processInReplaceState(
segments: MutableList<Segment>,
state: ParserState.InReplace
): Boolean {
val nlIdx = buffer.indexOf(NEWLINE)
if (nlIdx < 0) return false
val line = buffer.substring(0, nlIdx)
consumeFromBuffer(nlIdx + 1)
return when {
line.trim().startsWith(REPLACE_MARKER) -> {
segments.add(
SearchReplace(
search = state.searchContent,
replace = state.replaceContent,
language = state.header.language,
filePath = state.header.filePath
)
)
parserState = ParserState.InCode(state.header)
true
}
line.trim() == CODE_FENCE -> {
// Invalid search/replace block - missing REPLACE marker
// Mark done
segments.add(CodeEnd(""))
parserState = ParserState.Outside
true
}
else -> {
val newReplace =
if (state.replaceContent.isEmpty()) line else state.replaceContent + NEWLINE + line
segments.add(
ReplaceWaiting(
state.searchContent,
newReplace,
state.header.language,
state.header.filePath
)
)
parserState = ParserState.InReplace(state.header, state.searchContent, newReplace)
false
}
}
}
private fun processInThinkingState(
segments: MutableList<Segment>,
state: ParserState.InThinking
): Boolean {
val thinkEndIdx = buffer.indexOf(THINK_END)
return if (thinkEndIdx >= 0) {
val thinkingContent = state.content + buffer.substring(0, thinkEndIdx)
segments.add(Thinking(thinkingContent))
consumeFromBuffer(thinkEndIdx + THINK_END.length)
parserState = ParserState.Outside
true
} else {
if (buffer.isNotEmpty()) {
val newContent = state.content + buffer.toString()
segments.add(Thinking(newContent))
buffer.clear()
parserState = ParserState.InThinking(newContent)
}
false
}
}
private fun getPendingSegments(): List<Segment> {
return when (val state = parserState) {
is ParserState.Outside -> {
if (buffer.isNotBlank()) listOf(Text(buffer.toString()))
else emptyList()
}
is ParserState.CodeHeaderWaiting -> {
if (state.content.isNotBlank()) listOf(CodeHeaderWaiting(state.content))
else emptyList()
}
is ParserState.InCode -> {
if (state.content.isNotBlank()) {
listOf(Code(state.content, state.header.language, state.header.filePath))
} else emptyList()
}
is ParserState.InSearch -> {
if (state.searchContent.isNotBlank()) {
listOf(
SearchWaiting(
state.searchContent,
state.header.language,
state.header.filePath
)
)
} else emptyList()
}
is ParserState.InReplace -> {
if (state.replaceContent.isNotBlank()) {
listOf(
ReplaceWaiting(
state.searchContent,
state.replaceContent,
state.header.language,
state.header.filePath
)
)
} else emptyList()
}
is ParserState.InThinking -> {
val fullContent = state.content + buffer.toString()
buffer.clear()
if (fullContent.isNotBlank()) listOf(Thinking(fullContent))
else emptyList()
}
}
}
private fun shouldProcessCodeFence(fenceIdx: Int, thinkStartIdx: Int): Boolean {
return fenceIdx != -1 && (thinkStartIdx == -1 || fenceIdx < thinkStartIdx)
}
private fun extractTextBeforeIndex(index: Int): Text? {
return if (index > 0) Text(buffer.substring(0, index)) else null
}
private fun consumeFromBuffer(length: Int) {
buffer.delete(0, length)
}
private fun parseCodeHeader(headerText: String): CodeHeader? {
val parts = headerText.split(HEADER_DELIMITER, limit = HEADER_PARTS_LIMIT)
return if (parts.isNotEmpty()) {
CodeHeader(
language = parts.getOrNull(0) ?: "",
filePath = parts.getOrNull(1)
)
} else null
}
private sealed class ParserState {
object Outside : ParserState()
data class CodeHeaderWaiting(
val content: String = ""
) : ParserState()
data class InCode(
val header: CodeHeader,
val content: String = ""
) : ParserState()
data class InSearch(
val header: CodeHeader,
val searchContent: String = ""
) : ParserState()
data class InReplace(
val header: CodeHeader,
val searchContent: String,
val replaceContent: String = ""
) : ParserState()
data class InThinking(
val content: String = ""
) : ParserState()
}
}

View file

@ -0,0 +1,228 @@
package ee.carlrobert.codegpt.toolwindow.chat.parser
import org.assertj.core.api.Assertions.assertThat
import org.junit.Test
import kotlin.random.Random
class SseMessageParserStreamTest {
/**
* Simulates streaming by breaking input into random chunks
*/
private fun simulateStreaming(
parser: SseMessageParser,
input: String,
minChunkSize: Int = 1,
maxChunkSize: Int = 10,
seed: Long? = null
): List<Segment> {
val random = seed?.let { Random(it) } ?: Random
val allSegments = mutableListOf<Segment>()
var position = 0
while (position < input.length) {
val chunkSize = random.nextInt(minChunkSize, maxChunkSize + 1)
.coerceAtMost(input.length - position)
val chunk = input.substring(position, position + chunkSize)
val segments = parser.parse(chunk)
allSegments.addAll(segments)
position += chunkSize
// Simulate network delay
Thread.sleep(random.nextLong(5, 20))
}
return allSegments
}
@Test
fun shouldHandleStreamedCodeBlock() {
val parser = SseMessageParser()
val input = """
Here is some code:
```kotlin:MyFile.kt
fun main() {
println("Hello, World!")
}
```
Done!
""".trimIndent()
val segments = simulateStreaming(parser, input, seed = 42)
val segmentTypes = segments.map { it::class.simpleName }
assertThat(segmentTypes).contains("Text", "CodeHeader", "Code", "CodeEnd")
val codeSegments = segments.filterIsInstance<Code>()
assertThat(codeSegments).isNotEmpty
assertThat(codeSegments.last().content).contains("println(\"Hello, World!\")")
}
@Test
fun shouldHandleStreamedSearchReplace() {
val parser = SseMessageParser()
val input = """
```kotlin:MyFile.kt
<<<<<<< SEARCH
fun oldFunction() {
return "old"
}
=======
fun newFunction() {
return "new"
}
>>>>>>> REPLACE
```
""".trimIndent()
val segments = simulateStreaming(parser, input, minChunkSize = 3, maxChunkSize = 15, seed = 123)
val searchReplaceSegments = segments.filterIsInstance<SearchReplace>()
assertThat(searchReplaceSegments).hasSize(1)
val sr = searchReplaceSegments[0]
assertThat(sr.search).contains("oldFunction")
assertThat(sr.replace).contains("newFunction")
assertThat(sr.language).isEqualTo("kotlin")
assertThat(sr.filePath).isEqualTo("MyFile.kt")
}
@Test
fun shouldHandleSearchReplaceWithInvalidEnding() {
val parser = SseMessageParser()
val input = """
Here's some text.
```kotlin:MyFile.kt
<<<<<<< SEARCH
fun oldFunction() {
return "old"
}
=======
fun newFunction() {
return "new"
}
```
Here's some other text.
""".trimIndent()
val segments = simulateStreaming(parser, input, minChunkSize = 3, maxChunkSize = 15, seed = 123)
val searchReplaceSegments = segments.filterIsInstance<SearchReplace>()
assertThat(searchReplaceSegments).isEmpty()
val codeEndSegments = segments.filterIsInstance<CodeEnd>()
assertThat(codeEndSegments).hasSize(1)
val textSegments = segments.filterIsInstance<Text>()
assertThat(textSegments.any { it.content.contains("Here's some other text") }).isTrue
val replaceWaitingSegments = segments.filterIsInstance<ReplaceWaiting>()
assertThat(replaceWaitingSegments.size).isLessThan(10)
}
@Test
fun shouldHandleStreamedThinkingBlock() {
val parser = SseMessageParser()
val input = """
Let me analyze this...
<think>
First, I need to understand the requirements.
Then, I'll design a solution.
Finally, I'll implement it.
</think>
Here's my solution:
""".trimIndent()
val segments = simulateStreaming(parser, input, minChunkSize = 2, maxChunkSize = 8, seed = 456)
val thinkingSegments = segments.filterIsInstance<Thinking>()
assertThat(thinkingSegments).isNotEmpty
val finalThinking = thinkingSegments.last()
assertThat(finalThinking.content).contains("understand the requirements")
assertThat(finalThinking.content).contains("design a solution")
assertThat(finalThinking.content).contains("implement it")
}
@Test
fun shouldHandleMultipleCodeBlocksStreamed() {
val parser = SseMessageParser()
val input = """
First:
```java
System.out.println("1");
```
Second:
```python
print("2")
```
Third:
```javascript
console.log("3");
```
""".trimIndent()
val segments = simulateStreaming(parser, input, minChunkSize = 5, maxChunkSize = 20, seed = 789)
val codeHeaders = segments.filterIsInstance<CodeHeader>()
assertThat(codeHeaders).hasSize(3)
assertThat(codeHeaders.map { it.language }).containsExactly("java", "python", "javascript")
val codeSegments = segments.filterIsInstance<Code>()
assertThat(codeSegments.any { it.content.contains("System.out.println") }).isTrue
assertThat(codeSegments.any { it.content.contains("print(\"2\")") }).isTrue
assertThat(codeSegments.any { it.content.contains("console.log") }).isTrue
}
@Test
fun shouldHandleMixedContentStreamed() {
val parser = SseMessageParser()
val input = """
Starting analysis...
<think>
Processing request...
</think>
Here's the code:
```kotlin:Solution.kt
<<<<<<< SEARCH
val old = 1
=======
val new = 2
>>>>>>> REPLACE
```
And a simple block:
```python
print("done")
```
""".trimIndent()
val segments = simulateStreaming(parser, input, minChunkSize = 3, maxChunkSize = 12, seed = 999)
val segmentTypeSet = segments.map { it::class.simpleName }.toSet()
assertThat(segmentTypeSet).contains(
"Text", "Thinking", "CodeHeader", "SearchWaiting",
"ReplaceWaiting", "SearchReplace", "Code", "CodeEnd"
)
}
@Test
fun shouldHandleRandomChunkingConsistently() {
val input = """
```kotlin:Test.kt
class Test {
fun method() {
println("Hello")
}
}
```
""".trimIndent()
repeat(5) { iteration ->
val parser = SseMessageParser()
val segments = simulateStreaming(parser, input, seed = iteration.toLong())
val codeSegments = segments.filterIsInstance<Code>()
assertThat(codeSegments).isNotEmpty
val finalCode = codeSegments.last().content
assertThat(finalCode).contains("class Test")
assertThat(finalCode).contains("println(\"Hello\")")
}
}
}