package ee.carlrobert.codegpt.completions.llama; import static java.lang.String.format; import static java.util.stream.Collectors.toSet; import ee.carlrobert.codegpt.codecompletions.InfillPromptTemplate; import ee.carlrobert.codegpt.completions.HuggingFaceModel; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.LinkedHashSet; import java.util.List; import java.util.Set; import java.util.function.BiConsumer; import org.jetbrains.annotations.NotNull; public enum LlamaModel { CODE_LLAMA( "Code Llama", "Code Llama is a family of large language models for code based on Llama 2 " + "providing state-of-the-art performance among open models, infilling capabilities, " + "support for large input contexts, and zero-shot instruction following ability for " + "programming tasks.", PromptTemplate.LLAMA, InfillPromptTemplate.CODE_LLAMA, List.of( HuggingFaceModel.CODE_LLAMA_7B_Q3, HuggingFaceModel.CODE_LLAMA_7B_Q4, HuggingFaceModel.CODE_LLAMA_7B_Q5, HuggingFaceModel.CODE_LLAMA_13B_Q3, HuggingFaceModel.CODE_LLAMA_13B_Q4, HuggingFaceModel.CODE_LLAMA_13B_Q5, HuggingFaceModel.CODE_LLAMA_34B_Q3, HuggingFaceModel.CODE_LLAMA_34B_Q4, HuggingFaceModel.CODE_LLAMA_34B_Q5) ), CODE_BOOGA( "CodeBooga", "CodeBooga is a high-performing code instruct model created by merging two existing" + " code models: " + "
  1. Phind-CodeLlama-34B-v2
  2. WizardCoder-Python-34B-V1.0
", PromptTemplate.ALPACA, List.of( HuggingFaceModel.CODE_BOOGA_34B_Q3, HuggingFaceModel.CODE_BOOGA_34B_Q4, HuggingFaceModel.CODE_BOOGA_34B_Q5)), DEEPSEEK_CODER( "Deepseek Coder", "Deepseek Coder is composed of a series of code language models, each trained " + "from scratch on 2T tokens, with a composition of 87% code and 13% natural language " + "in both English and Chinese. It achieves state-of-the-art performance among " + "open-source code models on multiple programming languages and various benchmarks.", PromptTemplate.ALPACA, InfillPromptTemplate.DEEPSEEK_CODER, List.of( HuggingFaceModel.DEEPSEEK_CODER_1_3B_Q3, HuggingFaceModel.DEEPSEEK_CODER_1_3B_Q4, HuggingFaceModel.DEEPSEEK_CODER_1_3B_Q5, HuggingFaceModel.DEEPSEEK_CODER_6_7B_Q3, HuggingFaceModel.DEEPSEEK_CODER_6_7B_Q4, HuggingFaceModel.DEEPSEEK_CODER_6_7B_Q5, HuggingFaceModel.DEEPSEEK_CODER_33B_Q3, HuggingFaceModel.DEEPSEEK_CODER_33B_Q4, HuggingFaceModel.DEEPSEEK_CODER_33B_Q5)), DEEPSEEK_R1( "Deepseek R1", "DeepSeek-R1-Zero, a model trained via large-scale reinforcement learning (RL) " + "without supervised fine-tuning (SFT) as a preliminary step, demonstrated remarkable " + "performance on reasoning. DeepSeek-R1 achieves performance comparable to OpenAI-o1 " + "across math, code, and reasoning tasks.", PromptTemplate.DEEPSEEK_R1, InfillPromptTemplate.DEEPSEEK_CODER, List.of( HuggingFaceModel.DEEPSEEK_R1_1_5B_Q6, HuggingFaceModel.DEEPSEEK_R1_7B_Q4, HuggingFaceModel.DEEPSEEK_R1_7B_Q6, HuggingFaceModel.DEEPSEEK_R1_14B_Q4, HuggingFaceModel.DEEPSEEK_R1_14B_Q6)), PHIND_CODE_LLAMA( "Phind Code Llama", "This model is fine-tuned from Phind-CodeLlama-34B-v1 on an additional 1.5B tokens " + "high-quality programming-related data, achieving 73.8% pass@1 on HumanEval. " + "It's the current state-of-the-art amongst open-source models.", PromptTemplate.ALPACA, List.of( HuggingFaceModel.PHIND_CODE_LLAMA_34B_Q3, HuggingFaceModel.PHIND_CODE_LLAMA_34B_Q4, HuggingFaceModel.PHIND_CODE_LLAMA_34B_Q5)), WIZARD_CODER_PYTHON( "WizardCoder - Python", "WizardCoder, a Code Evol-Instruct fine-tuned Code LLM, which achieves " + "the 73.2 pass@1 and surpasses GPT4 (2023/03/15), ChatGPT-3.5, " + "and Claude2 on the HumanEval Benchmarks.", PromptTemplate.ALPACA, List.of( HuggingFaceModel.WIZARD_CODER_PYTHON_7B_Q3, HuggingFaceModel.WIZARD_CODER_PYTHON_7B_Q4, HuggingFaceModel.WIZARD_CODER_PYTHON_7B_Q5, HuggingFaceModel.WIZARD_CODER_PYTHON_13B_Q3, HuggingFaceModel.WIZARD_CODER_PYTHON_13B_Q4, HuggingFaceModel.WIZARD_CODER_PYTHON_13B_Q5, HuggingFaceModel.WIZARD_CODER_PYTHON_34B_Q3, HuggingFaceModel.WIZARD_CODER_PYTHON_34B_Q4, HuggingFaceModel.WIZARD_CODER_PYTHON_34B_Q5)), LLAMA_3( "Llama 3", "Llama 3 is a family of large language models (LLMs), a collection of pretrained and " + "instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction " + "tuned models are optimized for dialogue use cases and outperform many of the available" + " open source chat models on common industry benchmarks. Further, in developing these " + "models, we took great care to optimize helpfulness and safety.", PromptTemplate.LLAMA_3, List.of( HuggingFaceModel.LLAMA_3_8B_IQ3_M, HuggingFaceModel.LLAMA_3_8B_Q4_K_M, HuggingFaceModel.LLAMA_3_8B_Q5_K_M, HuggingFaceModel.LLAMA_3_8B_Q6_K, HuggingFaceModel.LLAMA_3_8B_Q8_0, HuggingFaceModel.LLAMA_3_70B_IQ1, HuggingFaceModel.LLAMA_3_70B_IQ2_XS, HuggingFaceModel.LLAMA_3_70B_Q4_K_M)), PHI_3( "Phi-3 Mini", "Phi-3 Mini is a 3.8B parameters, lightweight, state-of-the-art open model. " + "When assessed against benchmarks testing common sense, language understanding, math, " + "code, long context and logical reasoning, Phi-3 Mini-4K-Instruct showcased a robust " + "and state-of-the-art performance among models with less than 13 billion parameters.", PromptTemplate.PHI_3, List.of( HuggingFaceModel.PHI_3_3_8B_4K_IQ4_NL, HuggingFaceModel.PHI_3_3_8B_4K_Q5_K_M, HuggingFaceModel.PHI_3_3_8B_4K_Q6_K, HuggingFaceModel.PHI_3_3_8B_4K_Q8_0, HuggingFaceModel.PHI_3_3_8B_4K_FP16)), PHI_3_MEDIUM( "Phi-3 Medium 128K", """ The Phi-3-Medium-128K-Instruct is a 14B parameters, lightweight, state-of-the-art open model \ trained with the Phi-3 datasets that includes both synthetic data and the filtered publicly \ available websites data with a focus on high-quality and reasoning dense properties. \ The model has underwent a post-training process that incorporates both supervised fine-tuning\ and direct preference optimization for the instruction following and safety measures. \ When assessed against benchmarks testing common sense, language understanding, math, code, \ long context and logical reasoning, Phi-3-Medium-128K-Instruct showcased a robust and \ state-of-the-art performance among models of the same-size and next-size-up.""", PromptTemplate.PHI_3, List.of( HuggingFaceModel.PHI_3_14B_128K_IQ3_M, HuggingFaceModel.PHI_3_14B_128K_Q3_K_M, HuggingFaceModel.PHI_3_14B_128K_IQ4_NL, HuggingFaceModel.PHI_3_14B_128K_Q4_K_M, HuggingFaceModel.PHI_3_14B_128K_Q5_K_M, HuggingFaceModel.PHI_3_14B_128K_Q6_K, HuggingFaceModel.PHI_3_14B_128K_Q8_0)), CODE_GEMMA( "CodeGemma Instruct", "CodeGemma Instruct is the first in a series of coding models released by Google. " + "As an instruct model, it specializes in being asked coding related questions, but can " + "also function as an autocomplete/fill-in-middle model for tools like co-pilot.\n" + "This model is perfect for general coding questions or code generation.", PromptTemplate.CODE_GEMMA, InfillPromptTemplate.CODE_GEMMA, List.of( HuggingFaceModel.CODE_GEMMA_7B_Q3_K_L, HuggingFaceModel.CODE_GEMMA_7B_Q4_K_M, HuggingFaceModel.CODE_GEMMA_7B_Q5_K_M, HuggingFaceModel.CODE_GEMMA_7B_Q6_K, HuggingFaceModel.CODE_GEMMA_7B_Q8_0)), CODE_QWEN( "CodeQwen1.5", """ A specialized codeLLM built upon the Qwen1.5 language model. \ CodeQwen1.5-7B has been pretrained with around 3 trillion tokens of code-related data. \ It supports an extensive repertoire of 92 programming languages, and it exhibits \ exceptional capacity in long-context understanding and generation with the ability to \ process information of 64K tokens. In terms of performance, CodeQwen1.5 demonstrates \ impressive capabilities in basic code generation, long-context modelling, code editing \ and SQL. We believe this model can significantly enhance developer productivity and \ streamline software development workflows within diverse technological environments.""", PromptTemplate.CODE_QWEN, InfillPromptTemplate.CODE_QWEN, List.of( HuggingFaceModel.CODE_QWEN_1_5_7B_Q3_K_M, HuggingFaceModel.CODE_QWEN_1_5_7B_Q4_K_M, HuggingFaceModel.CODE_QWEN_1_5_7B_Q5_K_M, HuggingFaceModel.CODE_QWEN_1_5_7B_Q6_K)), CODE_QWEN2_5_CODER( "CodeQwen2.5 Coder", """ Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models \ (formerly known as CodeQwen). It brings the following improvements upon CodeQwen1.5: - Significantly improvements in code generation, code reasoning and code fixing. \ Base on the strong Qwen2.5, we scale up the training tokens into 5.5 trillion including \ source code, text-code grounding, Synthetic data, etc. - A more comprehensive foundation for real-world applications such as Code Agents. \ Not only enhancing coding capabilities but also maintaining its strengths in \ mathematics and general competencies. - Long-context Support up to 128K tokens. """, PromptTemplate.CODE_QWEN, InfillPromptTemplate.CODE_QWEN_2_5, List.of( HuggingFaceModel.CODE_QWEN_2_5_1_5B_Q6_K, HuggingFaceModel.CODE_QWEN_2_5_1_5B_Q8_0, HuggingFaceModel.CODE_QWEN_2_5_3B_Q4_K_M, HuggingFaceModel.CODE_QWEN_2_5_3B_Q6_K, HuggingFaceModel.CODE_QWEN_2_5_3B_Q8_0, HuggingFaceModel.CODE_QWEN_2_5_7B_Q4_K_M, HuggingFaceModel.CODE_QWEN_2_5_7B_Q6_K, HuggingFaceModel.CODE_QWEN_2_5_7B_Q8_0, HuggingFaceModel.CODE_QWEN_2_5_14B_Q4_K_M, HuggingFaceModel.CODE_QWEN_2_5_14B_Q6_K, HuggingFaceModel.CODE_QWEN_2_5_14B_Q8_0, HuggingFaceModel.CODE_QWEN_2_5_32B_Q4_K_M, HuggingFaceModel.CODE_QWEN_2_5_32B_Q6_K, HuggingFaceModel.CODE_QWEN_2_5_32B_Q8_0)), STABLE_CODE( "Stable Code Instruct", """ stable-code-instruct-3b is a 2.7B billion parameter decoder-only language model tuned from \ stable-code-3b. This model was trained on a mix of publicly available datasets, synthetic \ datasets using Direct Preference Optimization (DPO). This instruct tune demonstrates state-of-the-art performance (compared to models of similar \ size) on the MultiPL-E metrics across multiple programming languages tested using BigCode's \ Evaluation Harness, and on the code portions of MT Bench. The model is fine tuned to make it \ usable in tasks like general purpose Code/Software Engineering like conversations and \ SQL related generation and conversation.""", PromptTemplate.STABLE_CODE, InfillPromptTemplate.CODE_QWEN, List.of( HuggingFaceModel.STABLE_CODE_3B_Q3_K_M, HuggingFaceModel.STABLE_CODE_3B_Q4_K_M, HuggingFaceModel.STABLE_CODE_3B_Q5_K_M, HuggingFaceModel.STABLE_CODE_3B_Q6_K, HuggingFaceModel.STABLE_CODE_3B_Q8_0)), CODESTRAL( "Codestral", """ Codestral is an open-weight generative AI model explicitly designed for code generation \ tasks. It helps developers write and interact with code through a shared instruction and \ completion API endpoint. As it masters code and English, it can be used to design advanced \ AI applications for software developers. Codestral is trained on a diverse dataset of 80+ \ programming languages. Codestral saves developers time and effort: it can complete coding \ functions, write tests, and complete any partial code using a fill-in-the-middle mechanism. \ Interacting with Codestral will help level up the developer’s coding game and reduce the \ risk of errors and bugs.""", PromptTemplate.MIXTRAL_INSTRUCT, InfillPromptTemplate.CODESTRAL, List.of( HuggingFaceModel.CODESTRAL_22B_32K_Q3_K_M, HuggingFaceModel.CODESTRAL_22B_32K_Q4_K_M, HuggingFaceModel.CODESTRAL_22B_32K_Q5_K_M, HuggingFaceModel.CODESTRAL_22B_32K_Q6_K, HuggingFaceModel.CODESTRAL_22B_32K_Q8_0)), ; private final String label; private final String description; private final PromptTemplate promptTemplate; private final InfillPromptTemplate infillPromptTemplate; private final List huggingFaceModels; LlamaModel( String label, String description, PromptTemplate promptTemplate, List huggingFaceModels) { this(label, description, promptTemplate, null, huggingFaceModels); } LlamaModel( String label, String description, PromptTemplate promptTemplate, InfillPromptTemplate infillPromptTemplate, List huggingFaceModels) { this.label = label; this.description = description; this.promptTemplate = promptTemplate; this.infillPromptTemplate = infillPromptTemplate; this.huggingFaceModels = huggingFaceModels; } public static @NotNull LlamaModel findByHuggingFaceModel(HuggingFaceModel huggingFaceModel) { return Arrays.stream(LlamaModel.values()) .filter(model -> model.getHuggingFaceModels().contains(huggingFaceModel)) .findFirst() .orElseThrow(() -> new RuntimeException("Unable to find correct LLM")); } public @NotNull List filterSelectedModelsBySize(ModelSize selectedModelSize) { return selectedModelSize != null ? getHuggingFaceModels().stream() .filter(model -> selectedModelSize.size() == model.getParameterSize()) .toList() : List.of(); } public boolean anyDownloaded() { return huggingFaceModels.stream().anyMatch(HuggingFaceModel::isDownloaded); } public String getDownloadedMarker() { return getDownloadedMarker(anyDownloaded()); } public static String getDownloadedMarker(boolean downloaded) { return downloaded ? "✓" : "\u2001"; } public static @NotNull Path getLlamaModelsPath() { return Paths.get(System.getProperty("user.home"), ".codegpt/models/gguf"); } @Override public String toString() { return String.join(" ", getDownloadedMarker(), label, getFormattedModelSizeRange()); } /** * Server started: {@code CodeLlama 7B 4-bit}. */ public @NotNull String toString(@NotNull HuggingFaceModel hfm) { return "%s %dB %d-bit".formatted(label, hfm.getParameterSize(), hfm.getQuantization()); } public String getLabel() { return label; } public String getDescription() { return description; } public PromptTemplate getPromptTemplate() { return promptTemplate; } public InfillPromptTemplate getInfillPromptTemplate() { return infillPromptTemplate; } public List getHuggingFaceModels() { return huggingFaceModels; } /** * Downloaded model with the biggest parameter size, otherwise first. */ public HuggingFaceModel getLastExistingModelOrFirst() { return huggingFaceModels.stream() .filter(HuggingFaceModel::isDownloaded) .max(Comparator.comparing(HuggingFaceModel::getParameterSize)) .orElse(huggingFaceModels.get(0)); } public String getFormattedModelSizeRange() { var parameters = huggingFaceModels.stream() .map(HuggingFaceModel::getParameterSize) .collect(toSet()); if (parameters.size() == 1) { return parameters.iterator().next() + "B"; } return format("(%dB - %dB)", Collections.min(parameters), Collections.max(parameters)); } public List getSortedUniqueModelSizes() { return huggingFaceModels.stream() .map(hfm -> new ModelSize(hfm.getParameterSize(), hfm.isDownloaded())) .sorted() .collect(LinkedHashSet::new, ModelSize.skipSameSize(), Set::addAll) .stream().toList(); } public static List getSorted() { return Arrays.stream(values()).sorted(Comparator.comparing(Enum::name)).toList(); } public record ModelSize(int size, boolean downloaded) implements Comparable { // Sort by size, but downloaded comes first: [ 7B, ✓ 13B, 13B, 34B] private static final Comparator sizeDownloadedFirst = Comparator .comparing(ModelSize::size) .thenComparing(Comparator.comparing(ModelSize::downloaded).reversed()); @Override public int compareTo(@NotNull ModelSize other) { return sizeDownloadedFirst.compare(this, other); } private static @NotNull BiConsumer, ModelSize> skipSameSize() { return (s, e) -> { if (s.stream().noneMatch(v -> v.size == e.size)) { s.add(e); } }; } @Override public String toString() { return "%s %dB".formatted(getDownloadedMarker(downloaded), size); } } }