#178 - Add support for running local LLMs via LLaMA C/C++ port (#249)

* Initial implementation of integrating llama.cpp to run LLaMA models locally * Move submodule * Copy llama submodule to bundle * Support for downloading models from IDE * Code cleanup * Store port field * Replace service selection radio group with dropdown * Add quantization support + other fixes * Add option to override host * Fix override host handler * Disable port field when override host enabled * Design updates * Fix llama settings configuration, design changes, clean up code * Improve You.com coupon design * Add new Phind model and help tooltip * Fetch you.com subscription * Add CodeBooga model, fix downloadable model selection * Chat history support * Code refactoring, minor bug fixes * UI updates, several bug fixes, removed code llama python model * Code cleanup, enable llama port only on macOS * Change downloaded gguf models path * Move some of the labels to codegpt bundle * Minor fixes * Remove ToRA model, add help texts * Fix test * Modify description
2026-05-11 04:50:31 +00:00 · 2023-11-03 12:00:24 +02:00 · 2023-11-03 12:00:24 +02:00 · 45908e69df
commit 45908e69df
parent ca2eb9b6fa
71 changed files with 2748 additions and 533 deletions
--- a/src/main/java/ee/carlrobert/codegpt/completions/CompletionRequestProvider.java
+++ b/src/main/java/ee/carlrobert/codegpt/completions/CompletionRequestProvider.java
@ -2,19 +2,23 @@ package ee.carlrobert.codegpt.completions;

 import static java.util.stream.Collectors.toList;

+import com.intellij.openapi.application.ApplicationManager;
 import com.intellij.openapi.diagnostic.Logger;
 import ee.carlrobert.codegpt.CodeGPTPlugin;
 import ee.carlrobert.codegpt.EncodingManager;
+import ee.carlrobert.codegpt.completions.llama.LlamaModel;
 import ee.carlrobert.codegpt.conversations.Conversation;
 import ee.carlrobert.codegpt.conversations.ConversationsState;
 import ee.carlrobert.codegpt.conversations.message.Message;
 import ee.carlrobert.codegpt.settings.configuration.ConfigurationState;
+import ee.carlrobert.codegpt.settings.state.LlamaSettingsState;
 import ee.carlrobert.codegpt.settings.state.SettingsState;
 import ee.carlrobert.codegpt.settings.state.YouSettingsState;
 import ee.carlrobert.codegpt.telemetry.core.configuration.TelemetryConfiguration;
-import ee.carlrobert.codegpt.telemetry.core.service.TelemetryService;
 import ee.carlrobert.codegpt.telemetry.core.service.UserId;
+import ee.carlrobert.codegpt.util.ApplicationUtils;
 import ee.carlrobert.embedding.EmbeddingsService;
+import ee.carlrobert.llm.client.llama.completion.LlamaCompletionRequest;
 import ee.carlrobert.llm.client.openai.completion.chat.OpenAIChatCompletionModel;
 import ee.carlrobert.llm.client.openai.completion.chat.request.OpenAIChatCompletionMessage;
 import ee.carlrobert.llm.client.openai.completion.chat.request.OpenAIChatCompletionRequest;
@ -36,17 +40,22 @@ public class CompletionRequestProvider {
      "Follow the user's requirements carefully & to the letter.\n" +
      "Your responses should be informative and logical.\n" +
      "You should always adhere to technical information.\n" +
-      "If the user asks for code or technical questions, you must provide code suggestions and adhere to technical information.\n" +
-      "If the question is related to a developer, CodeGPT must respond with content related to a developer.\n" +
-      "First think step-by-step - describe your plan for what to build in pseudocode, written out in great detail.\n" +
+      "If the user asks for code or technical questions, you must provide code suggestions and " +
+      "adhere to technical information.\n" +
+      "If the question is related to a developer, CodeGPT must respond with " +
+      "content related to a developer.\n" +
+      "First think step-by-step - describe your plan for what to build in pseudocode, " +
+      "written out in great detail.\n" +
      "Then output the code in a single code block.\n" +
      "Minimize any other prose.\n" +
      "Keep your answers short and impersonal.\n" +
      "Use Markdown formatting in your answers.\n" +
-      "Make sure to include the programming language name at the start of the Markdown code blocks.\n" +
+      "Make sure to include the programming language name at the start of the " +
+      "Markdown code blocks.\n" +
      "Avoid wrapping the whole response in triple backticks.\n" +
-      "The user works in an IDE built by JetBrains which has a concept for editors with open files, integrated unit test support, " +
-      "and output pane that shows the output of running the code as well as an integrated terminal.\n" +
+      "The user works in an IDE built by JetBrains which has a concept for editors " +
+      "with open files, integrated unit test support, and output pane that shows " +
+      "the output of running the code as well as an integrated terminal.\n" +
      "You can only give one reply for each conversation turn.";

  private final EncodingManager encodingManager = EncodingManager.getInstance();
@ -60,6 +69,20 @@ public class CompletionRequestProvider {
    this.conversation = conversation;
  }

+  public LlamaCompletionRequest buildLlamaCompletionRequest(Message message) {
+    var settings = LlamaSettingsState.getInstance();
+    var promptTemplate = settings.isUseCustomModel() ?
+        settings.getPromptTemplate() :
+        LlamaModel.findByHuggingFaceModel(settings.getHuggingFaceModel()).getPromptTemplate();
+    var prompt = promptTemplate.buildPrompt(
+        COMPLETION_SYSTEM_PROMPT,
+        message.getPrompt(),
+        conversation.getMessages());
+    return new LlamaCompletionRequest.Builder(prompt)
+        .setN_predict(512)
+        .build();
+  }
+
  public YouCompletionRequest buildYouCompletionRequest(Message message) {
    var requestBuilder = new YouCompletionRequest.Builder(message.getPrompt())
        .setUseGPT4Model(YouSettingsState.getInstance().isUseGPT4Model())
@ -68,7 +91,8 @@ public class CompletionRequestProvider {
                prevMessage.getPrompt(),
                prevMessage.getResponse()))
            .collect(toList()));
-    if (TelemetryConfiguration.getInstance().isEnabled()) {
+    if (TelemetryConfiguration.getInstance().isEnabled() &&
+        !ApplicationManager.getApplication().isUnitTestMode()) {
      requestBuilder.setUserId(UUID.fromString(UserId.INSTANCE.get()));
    }
    return requestBuilder.build();