#178 - Add support for running local LLMs via LLaMA C/C++ port (#249)

* Initial implementation of integrating llama.cpp to run LLaMA models locally

* Move submodule

* Copy llama submodule to bundle

* Support for downloading models from IDE

* Code cleanup

* Store port field

* Replace service selection radio group with dropdown

* Add quantization support + other fixes

* Add option to override host

* Fix override host handler

* Disable port field when override host enabled

* Design updates

* Fix llama settings configuration, design changes, clean up code

* Improve You.com coupon design

* Add new Phind model and help tooltip

* Fetch you.com subscription

* Add CodeBooga model, fix downloadable model selection

* Chat history support

* Code refactoring, minor bug fixes

* UI updates, several bug fixes, removed code llama python model

* Code cleanup, enable llama port only on macOS

* Change downloaded gguf models path

* Move some of the labels to codegpt bundle

* Minor fixes

* Remove ToRA model, add help texts

* Fix test

* Modify description
This commit is contained in:
Carl-Robert 2023-11-03 12:00:24 +02:00 committed by GitHub
parent ca2eb9b6fa
commit 45908e69df
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
71 changed files with 2748 additions and 533 deletions

View file

@ -2,19 +2,23 @@ package ee.carlrobert.codegpt.completions;
import static java.util.stream.Collectors.toList;
import com.intellij.openapi.application.ApplicationManager;
import com.intellij.openapi.diagnostic.Logger;
import ee.carlrobert.codegpt.CodeGPTPlugin;
import ee.carlrobert.codegpt.EncodingManager;
import ee.carlrobert.codegpt.completions.llama.LlamaModel;
import ee.carlrobert.codegpt.conversations.Conversation;
import ee.carlrobert.codegpt.conversations.ConversationsState;
import ee.carlrobert.codegpt.conversations.message.Message;
import ee.carlrobert.codegpt.settings.configuration.ConfigurationState;
import ee.carlrobert.codegpt.settings.state.LlamaSettingsState;
import ee.carlrobert.codegpt.settings.state.SettingsState;
import ee.carlrobert.codegpt.settings.state.YouSettingsState;
import ee.carlrobert.codegpt.telemetry.core.configuration.TelemetryConfiguration;
import ee.carlrobert.codegpt.telemetry.core.service.TelemetryService;
import ee.carlrobert.codegpt.telemetry.core.service.UserId;
import ee.carlrobert.codegpt.util.ApplicationUtils;
import ee.carlrobert.embedding.EmbeddingsService;
import ee.carlrobert.llm.client.llama.completion.LlamaCompletionRequest;
import ee.carlrobert.llm.client.openai.completion.chat.OpenAIChatCompletionModel;
import ee.carlrobert.llm.client.openai.completion.chat.request.OpenAIChatCompletionMessage;
import ee.carlrobert.llm.client.openai.completion.chat.request.OpenAIChatCompletionRequest;
@ -36,17 +40,22 @@ public class CompletionRequestProvider {
"Follow the user's requirements carefully & to the letter.\n" +
"Your responses should be informative and logical.\n" +
"You should always adhere to technical information.\n" +
"If the user asks for code or technical questions, you must provide code suggestions and adhere to technical information.\n" +
"If the question is related to a developer, CodeGPT must respond with content related to a developer.\n" +
"First think step-by-step - describe your plan for what to build in pseudocode, written out in great detail.\n" +
"If the user asks for code or technical questions, you must provide code suggestions and " +
"adhere to technical information.\n" +
"If the question is related to a developer, CodeGPT must respond with " +
"content related to a developer.\n" +
"First think step-by-step - describe your plan for what to build in pseudocode, " +
"written out in great detail.\n" +
"Then output the code in a single code block.\n" +
"Minimize any other prose.\n" +
"Keep your answers short and impersonal.\n" +
"Use Markdown formatting in your answers.\n" +
"Make sure to include the programming language name at the start of the Markdown code blocks.\n" +
"Make sure to include the programming language name at the start of the " +
"Markdown code blocks.\n" +
"Avoid wrapping the whole response in triple backticks.\n" +
"The user works in an IDE built by JetBrains which has a concept for editors with open files, integrated unit test support, " +
"and output pane that shows the output of running the code as well as an integrated terminal.\n" +
"The user works in an IDE built by JetBrains which has a concept for editors " +
"with open files, integrated unit test support, and output pane that shows " +
"the output of running the code as well as an integrated terminal.\n" +
"You can only give one reply for each conversation turn.";
private final EncodingManager encodingManager = EncodingManager.getInstance();
@ -60,6 +69,20 @@ public class CompletionRequestProvider {
this.conversation = conversation;
}
public LlamaCompletionRequest buildLlamaCompletionRequest(Message message) {
var settings = LlamaSettingsState.getInstance();
var promptTemplate = settings.isUseCustomModel() ?
settings.getPromptTemplate() :
LlamaModel.findByHuggingFaceModel(settings.getHuggingFaceModel()).getPromptTemplate();
var prompt = promptTemplate.buildPrompt(
COMPLETION_SYSTEM_PROMPT,
message.getPrompt(),
conversation.getMessages());
return new LlamaCompletionRequest.Builder(prompt)
.setN_predict(512)
.build();
}
public YouCompletionRequest buildYouCompletionRequest(Message message) {
var requestBuilder = new YouCompletionRequest.Builder(message.getPrompt())
.setUseGPT4Model(YouSettingsState.getInstance().isUseGPT4Model())
@ -68,7 +91,8 @@ public class CompletionRequestProvider {
prevMessage.getPrompt(),
prevMessage.getResponse()))
.collect(toList()));
if (TelemetryConfiguration.getInstance().isEnabled()) {
if (TelemetryConfiguration.getInstance().isEnabled() &&
!ApplicationManager.getApplication().isUnitTestMode()) {
requestBuilder.setUserId(UUID.fromString(UserId.INSTANCE.get()));
}
return requestBuilder.build();