ProxyAI/src/main/java/ee/carlrobert/codegpt/completions/HuggingFaceModel.java
Carl-Robert 45908e69df
#178 - Add support for running local LLMs via LLaMA C/C++ port (#249)
* Initial implementation of integrating llama.cpp to run LLaMA models locally

* Move submodule

* Copy llama submodule to bundle

* Support for downloading models from IDE

* Code cleanup

* Store port field

* Replace service selection radio group with dropdown

* Add quantization support + other fixes

* Add option to override host

* Fix override host handler

* Disable port field when override host enabled

* Design updates

* Fix llama settings configuration, design changes, clean up code

* Improve You.com coupon design

* Add new Phind model and help tooltip

* Fetch you.com subscription

* Add CodeBooga model, fix downloadable model selection

* Chat history support

* Code refactoring, minor bug fixes

* UI updates, several bug fixes, removed code llama python model

* Code cleanup, enable llama port only on macOS

* Change downloaded gguf models path

* Move some of the labels to codegpt bundle

* Minor fixes

* Remove ToRA model, add help texts

* Fix test

* Modify description
2023-11-03 12:00:24 +02:00

85 lines
2.8 KiB
Java

package ee.carlrobert.codegpt.completions;
import static java.lang.String.format;
import java.net.MalformedURLException;
import java.net.URL;
public enum HuggingFaceModel {
CODE_LLAMA_7B_Q3(7, 3, "CodeLlama-7B-Instruct-GGUF"),
CODE_LLAMA_7B_Q4(7, 4, "CodeLlama-7B-Instruct-GGUF"),
CODE_LLAMA_7B_Q5(7, 5, "CodeLlama-7B-Instruct-GGUF"),
CODE_LLAMA_13B_Q3(13, 3, "CodeLlama-13B-Instruct-GGUF"),
CODE_LLAMA_13B_Q4(13, 4, "CodeLlama-13B-Instruct-GGUF"),
CODE_LLAMA_13B_Q5(13, 5, "CodeLlama-13B-Instruct-GGUF"),
CODE_LLAMA_34B_Q3(34, 3, "CodeLlama-34B-Instruct-GGUF"),
CODE_LLAMA_34B_Q4(34, 4, "CodeLlama-34B-Instruct-GGUF"),
CODE_LLAMA_34B_Q5(34, 5, "CodeLlama-34B-Instruct-GGUF"),
CODE_BOOGA_34B_Q3(34, 3, "CodeBooga-34B-v0.1-GGUF"),
CODE_BOOGA_34B_Q4(34, 4, "CodeBooga-34B-v0.1-GGUF"),
CODE_BOOGA_34B_Q5(34, 5, "CodeBooga-34B-v0.1-GGUF"),
PHIND_CODE_LLAMA_34B_Q3(34, 3, "Phind-CodeLlama-34B-v2-GGUF"),
PHIND_CODE_LLAMA_34B_Q4(34, 4, "Phind-CodeLlama-34B-v2-GGUF"),
PHIND_CODE_LLAMA_34B_Q5(34, 5, "Phind-CodeLlama-34B-v2-GGUF"),
WIZARD_CODER_PYTHON_7B_Q3(7, 3, "WizardCoder-Python-7B-V1.0-GGUF"),
WIZARD_CODER_PYTHON_7B_Q4(7, 4, "WizardCoder-Python-7B-V1.0-GGUF"),
WIZARD_CODER_PYTHON_7B_Q5(7, 5, "WizardCoder-Python-7B-V1.0-GGUF"),
WIZARD_CODER_PYTHON_13B_Q3(13, 3, "WizardCoder-Python-13B-V1.0-GGUF"),
WIZARD_CODER_PYTHON_13B_Q4(13, 4, "WizardCoder-Python-13B-V1.0-GGUF"),
WIZARD_CODER_PYTHON_13B_Q5(13, 5, "WizardCoder-Python-13B-V1.0-GGUF"),
WIZARD_CODER_PYTHON_34B_Q3(34, 3, "WizardCoder-Python-34B-V1.0-GGUF"),
WIZARD_CODER_PYTHON_34B_Q4(34, 4, "WizardCoder-Python-34B-V1.0-GGUF"),
WIZARD_CODER_PYTHON_34B_Q5(34, 5, "WizardCoder-Python-34B-V1.0-GGUF");
private final int parameterSize;
private final int quantization;
private final String modelName;
HuggingFaceModel(int parameterSize, int quantization, String modelName) {
this.parameterSize = parameterSize;
this.quantization = quantization;
this.modelName = modelName;
}
public int getParameterSize() {
return parameterSize;
}
public int getQuantization() {
return quantization;
}
public String getCode() {
return name();
}
public String getFileName() {
return modelName.toLowerCase().replace("-gguf", format(".Q%d_K_M.gguf", quantization));
}
public URL getFileURL() {
try {
return new URL(
format("https://huggingface.co/TheBloke/%s/resolve/main/%s", modelName, getFileName()));
} catch (MalformedURLException ex) {
throw new RuntimeException(ex);
}
}
public URL getHuggingFaceURL() {
try {
return new URL("https://huggingface.co/TheBloke/" + modelName);
} catch (MalformedURLException ex) {
throw new RuntimeException(ex);
}
}
@Override
public String toString() {
return format("%d-bit precision", quantization);
}
}