From 3797126de40bee0e3ccd2c353f1d35abcafde458 Mon Sep 17 00:00:00 2001 From: Carl-Robert Linnupuu Date: Thu, 23 Nov 2023 17:22:48 +0200 Subject: [PATCH] Add deepseek coder instruct models (1-33B) --- .../codegpt/completions/HuggingFaceModel.java | 10 ++++++++++ .../codegpt/completions/llama/LlamaModel.java | 17 +++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java b/src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java index a3a8784e..e95c44f1 100644 --- a/src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java +++ b/src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java @@ -21,6 +21,16 @@ public enum HuggingFaceModel { CODE_BOOGA_34B_Q4(34, 4, "CodeBooga-34B-v0.1-GGUF"), CODE_BOOGA_34B_Q5(34, 5, "CodeBooga-34B-v0.1-GGUF"), + DEEPSEEK_CODER_1_3B_Q3(1, 3, "deepseek-coder-1.3b-instruct-GGUF"), + DEEPSEEK_CODER_1_3B_Q4(1, 4, "deepseek-coder-1.3b-instruct-GGUF"), + DEEPSEEK_CODER_1_3B_Q5(1, 5, "deepseek-coder-1.3b-instruct-GGUF"), + DEEPSEEK_CODER_6_7B_Q3(7, 3, "deepseek-coder-6.7b-instruct-GGUF"), + DEEPSEEK_CODER_6_7B_Q4(7, 4, "deepseek-coder-6.7b-instruct-GGUF"), + DEEPSEEK_CODER_6_7B_Q5(7, 5, "deepseek-coder-6.7b-instruct-GGUF"), + DEEPSEEK_CODER_33B_Q3(33, 3, "deepseek-coder-33b-instruct-GGUF"), + DEEPSEEK_CODER_33B_Q4(33, 4, "deepseek-coder-33b-instruct-GGUF"), + DEEPSEEK_CODER_33B_Q5(33, 5, "deepseek-coder-33b-instruct-GGUF"), + PHIND_CODE_LLAMA_34B_Q3(34, 3, "Phind-CodeLlama-34B-v2-GGUF"), PHIND_CODE_LLAMA_34B_Q4(34, 4, "Phind-CodeLlama-34B-v2-GGUF"), PHIND_CODE_LLAMA_34B_Q5(34, 5, "Phind-CodeLlama-34B-v2-GGUF"), diff --git a/src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java b/src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java index 8cbacc6f..4e0afd49 100644 --- a/src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java +++ b/src/main/java/ee/carlrobert/codegpt/completions/llama/LlamaModel.java @@ -38,6 +38,23 @@ public enum LlamaModel { HuggingFaceModel.CODE_BOOGA_34B_Q3, HuggingFaceModel.CODE_BOOGA_34B_Q4, HuggingFaceModel.CODE_BOOGA_34B_Q5)), + DEEPSEEK_CODER( + "Deepseek Coder", + "Deepseek Coder is composed of a series of code language models, each trained " + + "from scratch on 2T tokens, with a composition of 87% code and 13% natural language " + + "in both English and Chinese. It achieves state-of-the-art performance among " + + "open-source code models on multiple programming languages and various benchmarks.", + PromptTemplate.ALPACA, + List.of( + HuggingFaceModel.DEEPSEEK_CODER_1_3B_Q3, + HuggingFaceModel.DEEPSEEK_CODER_1_3B_Q4, + HuggingFaceModel.DEEPSEEK_CODER_1_3B_Q5, + HuggingFaceModel.DEEPSEEK_CODER_6_7B_Q3, + HuggingFaceModel.DEEPSEEK_CODER_6_7B_Q4, + HuggingFaceModel.DEEPSEEK_CODER_6_7B_Q5, + HuggingFaceModel.DEEPSEEK_CODER_33B_Q3, + HuggingFaceModel.DEEPSEEK_CODER_33B_Q4, + HuggingFaceModel.DEEPSEEK_CODER_33B_Q5)), PHIND_CODE_LLAMA( "Phind Code Llama", "This model is fine-tuned from Phind-CodeLlama-34B-v1 on an additional 1.5B tokens "