feat: Support Phi-3 Mini model (#516)

2026-05-19 16:28:46 +00:00 · 2024-04-27 22:50:03 +02:00 · 2024-04-27 22:50:03 +02:00 · 6d6e0a3ccb
commit 6d6e0a3ccb
parent 1415f387ff
5 changed files with 106 additions and 3 deletions
--- a/src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java
+++ b/src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java
@ -52,7 +52,14 @@ public enum HuggingFaceModel {
  LLAMA_3_8B_Q8_0(8, 8, "Meta-Llama-3-8B-Instruct-Q8_0.gguf", "lmstudio-community", 8.54),
  LLAMA_3_70B_IQ1(70, 1, "Meta-Llama-3-70B-Instruct-IQ1_M.gguf", "lmstudio-community", 16.8),
  LLAMA_3_70B_IQ2_XS(70, 2, "Meta-Llama-3-70B-Instruct-IQ2_XS.gguf", "lmstudio-community", 21.1),
-  LLAMA_3_70B_Q4_K_M(70, 4, "Meta-Llama-3-70B-Instruct-Q4_K_M.gguf", "lmstudio-community", 42.5);
+  LLAMA_3_70B_Q4_K_M(70, 4, "Meta-Llama-3-70B-Instruct-Q4_K_M.gguf", "lmstudio-community", 42.5),
+
+  PHI_3_3_8B_4K_IQ4_NL(4, 4, "Phi-3-mini-4k-instruct-IQ4_NL.gguf", "lmstudio-community", 2.18),
+  PHI_3_3_8B_4K_Q5_K_M(4, 5, "Phi-3-mini-4k-instruct-Q5_K_M.gguf", "lmstudio-community", 2.64),
+  PHI_3_3_8B_4K_Q5_K_S(4, 5, "Phi-3-mini-4k-instruct-Q5_K_S.gguf", "lmstudio-community", 2.82),
+  PHI_3_3_8B_4K_Q6_K(4, 6, "Phi-3-mini-4k-instruct-Q6_K.gguf", "lmstudio-community", 3.14),
+  PHI_3_3_8B_4K_Q8_0(4, 8, "Phi-3-mini-4k-instruct-Q8_0.gguf", "lmstudio-community", 4.06),
+  PHI_3_3_8B_4K_FP16(4, 16, "Phi-3-mini-4k-instruct-fp16.gguf", "lmstudio-community", 7.64);

  private final int parameterSize;
  private final int quantization;
--- a/src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java
+++ b/src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java
@ -99,7 +99,21 @@ public enum LlamaModel {
          HuggingFaceModel.LLAMA_3_8B_Q8_0,
          HuggingFaceModel.LLAMA_3_70B_IQ1,
          HuggingFaceModel.LLAMA_3_70B_IQ2_XS,
-          HuggingFaceModel.LLAMA_3_70B_Q4_K_M));
+          HuggingFaceModel.LLAMA_3_70B_Q4_K_M)),
+  PHI_3(
+      "Phi-3 Mini",
+      "Phi-3 Mini is a 3.8B parameters, lightweight, state-of-the-art open model. "
+          + "When assessed against benchmarks testing common sense, language understanding, math, "
+          + "code, long context and logical reasoning, Phi-3 Mini-4K-Instruct showcased a robust "
+          + "and state-of-the-art performance among models with less than 13 billion parameters.",
+      PromptTemplate.PHI_3,
+      List.of(
+          HuggingFaceModel.PHI_3_3_8B_4K_IQ4_NL,
+          HuggingFaceModel.PHI_3_3_8B_4K_Q5_K_M,
+          HuggingFaceModel.PHI_3_3_8B_4K_Q5_K_S,
+          HuggingFaceModel.PHI_3_3_8B_4K_Q6_K,
+          HuggingFaceModel.PHI_3_3_8B_4K_Q8_0,
+          HuggingFaceModel.PHI_3_3_8B_4K_FP16));

  private final String label;
  private final String description;
--- a/src/main/java/ee/carlrobert/codegpt/completions/llama/PromptTemplate.java
+++ b/src/main/java/ee/carlrobert/codegpt/completions/llama/PromptTemplate.java
@ -125,6 +125,25 @@ public enum PromptTemplate {
          .toString();
    }
  },
+  PHI_3("Phi-3 Mini", List.of("<|end|>")) {
+    @Override
+    public String buildPrompt(String systemPrompt, String userPrompt, List<Message> history) {
+      StringBuilder prompt = new StringBuilder();
+
+      for (Message message : history) {
+        prompt.append("<|user|>\n")
+            .append(message.getPrompt())
+            .append("<|end|>\n<|assistant|>\n")
+            .append(message.getResponse())
+            .append("<|end|>\n");
+      }
+
+      return prompt.append("<|user|>\n")
+          .append(userPrompt)
+          .append("<|end|>\n<|assistant|>")
+          .toString();
+    }
+  },
  ALPACA("Alpaca/Vicuna") {
    @Override
    public String buildPrompt(String systemPrompt, String userPrompt, List<Message> history) {