Revert "fix: use /infill for llama.cpp code-completions (#513)" (#533)

This reverts commit 8de72b3301.
This commit is contained in:
Phil 2024-05-08 15:06:14 +02:00 committed by GitHub
parent ee16bfee10
commit dcd0a3fc51
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 12 additions and 13 deletions

View file

@ -126,7 +126,7 @@ public final class CompletionRequestService {
CodeCompletionRequestFactory.buildCustomRequest(requestDetails),
new OpenAITextCompletionEventSourceListener(eventListener));
case LLAMA_CPP -> CompletionClientProvider.getLlamaClient()
.getInfillAsync(
.getChatCompletionAsync(
CodeCompletionRequestFactory.buildLlamaRequest(requestDetails),
eventListener);
case OLLAMA -> CompletionClientProvider.getOllamaClient().getCompletionAsync(

View file

@ -13,7 +13,6 @@ import ee.carlrobert.codegpt.settings.service.llama.LlamaSettingsState
import ee.carlrobert.codegpt.settings.service.ollama.OllamaSettings
import ee.carlrobert.codegpt.settings.service.openai.OpenAISettings
import ee.carlrobert.llm.client.llama.completion.LlamaCompletionRequest
import ee.carlrobert.llm.client.llama.completion.LlamaInfillRequest
import ee.carlrobert.llm.client.ollama.completion.request.OllamaCompletionRequest
import ee.carlrobert.llm.client.ollama.completion.request.OllamaParameters
import ee.carlrobert.llm.client.openai.completion.request.OpenAITextCompletionRequest
@ -82,16 +81,16 @@ object CodeCompletionRequestFactory {
}
@JvmStatic
fun buildLlamaRequest(details: InfillRequestDetails): LlamaInfillRequest {
fun buildLlamaRequest(details: InfillRequestDetails): LlamaCompletionRequest {
val settings = LlamaSettings.getCurrentState()
val promptTemplate = getLlamaInfillPromptTemplate(settings)
return LlamaInfillRequest(
LlamaCompletionRequest.Builder(null)
.setN_predict(settings.codeCompletionMaxTokens)
.setStream(true)
.setTemperature(0.4)
.setStop(promptTemplate.stopTokens), details.prefix, details.suffix
)
val prompt = promptTemplate.buildPrompt(details.prefix, details.suffix)
return LlamaCompletionRequest.Builder(prompt)
.setN_predict(settings.codeCompletionMaxTokens)
.setStream(true)
.setTemperature(0.4)
.setStop(promptTemplate.stopTokens)
.build()
}
fun buildOllamaRequest(details: InfillRequestDetails): OllamaCompletionRequest {

View file

@ -35,11 +35,11 @@ class CodeCompletionServiceTest : IntegrationTest() {
${"z".repeat(247)}
""".trimIndent() // 128 tokens
expectLlama(StreamHttpExchange { request: RequestEntity ->
assertThat(request.uri.path).isEqualTo("/infill")
assertThat(request.uri.path).isEqualTo("/completion")
assertThat(request.method).isEqualTo("POST")
assertThat(request.body)
.extracting("input_prefix", "input_suffix")
.containsExactly(prefix, suffix)
.extracting("prompt")
.isEqualTo(InfillPromptTemplate.CODE_LLAMA.buildPrompt(prefix, suffix))
listOf(jsonMapResponse(e("content", expectedCompletion), e("stop", true)))
})