From 4e5b28048bb19d4807364d61e8ec5259f9d24ccc Mon Sep 17 00:00:00 2001 From: Carl-Robert Linnupuu Date: Sun, 1 Dec 2024 00:01:26 +0000 Subject: [PATCH] feat: add latest qwen 2.5 coder models and adjust stop tokens --- .../codegpt/completions/HuggingFaceModel.java | 27 ++++++++++++++++--- .../codegpt/completions/llama/LlamaModel.java | 11 +++++++- .../codecompletions/InfillPromptTemplate.kt | 21 +++++++++++++-- 3 files changed, 53 insertions(+), 6 deletions(-) diff --git a/src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java b/src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java index 1bd3ac98..c5e3d8a9 100644 --- a/src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java +++ b/src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java @@ -125,12 +125,33 @@ public enum HuggingFaceModel { CODE_QWEN_2_5_1_5B_Q8_0(1, 8, "Qwen2.5-Coder-1.5B-Instruct-GGUF", "qwen2.5-coder-1.5b-instruct-q8_0.gguf", "Qwen", 1.89), + CODE_QWEN_2_5_3B_Q4_K_M(3, 4, "Qwen2.5-Coder-3B-Instruct-GGUF", + "qwen2.5-coder-3b-instruct-q4_k_m.gguf", "Qwen", 2.10), + CODE_QWEN_2_5_3B_Q6_K(3, 6, "Qwen2.5-Coder-3B-Instruct-GGUF", + "qwen2.5-coder-3b-instruct-q6_k.gguf", "Qwen", 2.79), + CODE_QWEN_2_5_3B_Q8_0(3, 8, "Qwen2.5-Coder-3B-Instruct-GGUF", + "qwen2.5-coder-3b-instruct-q8_0.gguf", "Qwen", 3.62), + CODE_QWEN_2_5_7B_Q4_K_M(7, 4, "Qwen2.5-Coder-7B-Instruct-GGUF", - "Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf", "bartowski", 4.68), + "qwen2.5-coder-7b-instruct-q4_k_m.gguf", "Qwen", 4.68), CODE_QWEN_2_5_7B_Q6_K(7, 6, "Qwen2.5-Coder-7B-Instruct-GGUF", - "Qwen2.5-Coder-7B-Instruct-Q6_K.gguf", "bartowski", 6.25), + "qwen2.5-coder-7b-instruct-q6_k.gguf", "Qwen", 6.25), CODE_QWEN_2_5_7B_Q8_0(7, 8, "Qwen2.5-Coder-7B-Instruct-GGUF", - "Qwen2.5-Coder-7B-Instruct-Q8_0.gguf", "bartowski", 8.1), + "qwen2.5-coder-7b-instruct-q8_0.gguf", "Qwen", 8.10), + + CODE_QWEN_2_5_14B_Q4_K_M(14, 4, "Qwen2.5-Coder-14B-Instruct-GGUF", + "qwen2.5-coder-14b-instruct-q4_k_m.gguf", "Qwen", 8.99), + CODE_QWEN_2_5_14B_Q6_K(14, 6, "Qwen2.5-Coder-14B-Instruct-GGUF", + "qwen2.5-coder-14b-instruct-q6_k.gguf", "Qwen", 12.10), + CODE_QWEN_2_5_14B_Q8_0(14, 8, "Qwen2.5-Coder-14B-Instruct-GGUF", + "qwen2.5-coder-14b-instruct-q8_0.gguf", "Qwen", 15.70), + + CODE_QWEN_2_5_32B_Q4_K_M(32, 4, "Qwen2.5-Coder-32B-Instruct-GGUF", + "qwen2.5-coder-32b-instruct-q4_k_m.gguf", "Qwen", 19.90), + CODE_QWEN_2_5_32B_Q6_K(32, 6, "Qwen2.5-Coder-32B-Instruct-GGUF", + "qwen2.5-coder-32b-instruct-q6_k.gguf", "Qwen", 26.90), + CODE_QWEN_2_5_32B_Q8_0(32, 8, "Qwen2.5-Coder-32B-Instruct-GGUF", + "qwen2.5-coder-32b-instruct-q8_0.gguf", "Qwen", 34.80), STABLE_CODE_3B_Q3_K_M(SC3, 3, "stable-code-instruct-3b-Q3_K_M.gguf", 1.39), STABLE_CODE_3B_Q4_K_M(SC3, 4, "stable-code-instruct-3b-Q4_K_M.gguf", 1.71), diff --git a/src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java b/src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java index f7441ea2..2f63ab09 100644 --- a/src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java +++ b/src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java @@ -189,9 +189,18 @@ public enum LlamaModel { List.of( HuggingFaceModel.CODE_QWEN_2_5_1_5B_Q6_K, HuggingFaceModel.CODE_QWEN_2_5_1_5B_Q8_0, + HuggingFaceModel.CODE_QWEN_2_5_3B_Q4_K_M, + HuggingFaceModel.CODE_QWEN_2_5_3B_Q6_K, + HuggingFaceModel.CODE_QWEN_2_5_3B_Q8_0, HuggingFaceModel.CODE_QWEN_2_5_7B_Q4_K_M, HuggingFaceModel.CODE_QWEN_2_5_7B_Q6_K, - HuggingFaceModel.CODE_QWEN_2_5_7B_Q8_0)), + HuggingFaceModel.CODE_QWEN_2_5_7B_Q8_0, + HuggingFaceModel.CODE_QWEN_2_5_14B_Q4_K_M, + HuggingFaceModel.CODE_QWEN_2_5_14B_Q6_K, + HuggingFaceModel.CODE_QWEN_2_5_14B_Q8_0, + HuggingFaceModel.CODE_QWEN_2_5_32B_Q4_K_M, + HuggingFaceModel.CODE_QWEN_2_5_32B_Q6_K, + HuggingFaceModel.CODE_QWEN_2_5_32B_Q8_0)), STABLE_CODE( "Stable Code Instruct", """ stable-code-instruct-3b is a 2.7B billion parameter decoder-only language model tuned from \ diff --git a/src/main/kotlin/ee/carlrobert/codegpt/codecompletions/InfillPromptTemplate.kt b/src/main/kotlin/ee/carlrobert/codegpt/codecompletions/InfillPromptTemplate.kt index a7d96699..126c7061 100644 --- a/src/main/kotlin/ee/carlrobert/codegpt/codecompletions/InfillPromptTemplate.kt +++ b/src/main/kotlin/ee/carlrobert/codegpt/codecompletions/InfillPromptTemplate.kt @@ -50,7 +50,23 @@ enum class InfillPromptTemplate(val label: String, val stopTokens: List? } } }, - CODE_QWEN_2_5("CodeQwen2.5", listOf()) { + CODE_QWEN_2_5( + "CodeQwen2.5", + listOf( + "package ", + "import ", + "<|endoftext|>", + "<|fim_prefix|>", + "<|fim_middle|>", + "<|fim_suffix|>", + "<|fim_pad|>", + "<|cursor|>", + "<|repo_name|>", + "<|file_sep|>", + "<|im_start|>", + "<|im_end|>" + ) + ) { override fun buildPrompt(infillDetails: InfillRequest): String { val infillPrompt = "<|fim_prefix|> ${infillDetails.prefix} <|fim_suffix|>${infillDetails.suffix} <|fim_middle|>" @@ -110,7 +126,8 @@ enum class InfillPromptTemplate(val label: String, val stopTokens: List? CODESTRAL("Codestral", listOf("")) { override fun buildPrompt(infillDetails: InfillRequest): String { // see https://github.com/mistralai/mistral-common/blob/master/src/mistral_common/tokens/tokenizers/base.py - val infillPrompt = "[SUFFIX]${infillDetails.suffix}[PREFIX]${infillDetails.prefix}[MIDDLE]" + val infillPrompt = + "[SUFFIX]${infillDetails.suffix}[PREFIX]${infillDetails.prefix}[MIDDLE]" return createDefaultMultiFilePrompt(infillDetails, infillPrompt) } };