mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
updated model recs (+1 squashed commits)
Squashed commits: [3e0431ae1] updated model recs
This commit is contained in:
parent
f430916a71
commit
fa815f76c9
3 changed files with 20 additions and 8 deletions
|
@ -65,7 +65,7 @@ KoboldCpp can now also be run on Novita AI, a newer alternative GPU cloud provid
|
|||
|
||||
## Obtaining a GGUF model
|
||||
- KoboldCpp uses GGUF models. They are not included with KoboldCpp, but you can download GGUF files from other places such as [Bartowski's Huggingface](https://huggingface.co/bartowski). Search for "GGUF" on huggingface.co for plenty of compatible models in the `.gguf` format.
|
||||
- For beginners, we recommend the models [Airoboros Mistral 7B](https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf) (smaller and weaker) or [Tiefighter 13B](https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf) (larger model) or [Beepo 22B](https://huggingface.co/concedo/Beepo-22B-GGUF/resolve/main/Beepo-22B-Q4_K_S.gguf) (largest and most powerful)
|
||||
- For beginners, we recommend the models [L3-8B-Stheno-v3.2](https://huggingface.co/bartowski/L3-8B-Stheno-v3.2-GGUF/resolve/main/L3-8B-Stheno-v3.2-Q4_K_S.gguf) (smaller and weaker) or [Tiefighter 13B](https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf) (old but very versatile model) or [Gemma-3-27B Abliterated](https://huggingface.co/mlabonne/gemma-3-27b-it-abliterated-GGUF/resolve/main/gemma-3-27b-it-abliterated.q4_k_m.gguf) (largest and most powerful)
|
||||
- [Alternatively, you can download the tools to convert models to the GGUF format yourself here](https://kcpptools.concedo.workers.dev). Run `convert-hf-to-gguf.py` to convert them, then `quantize_gguf.exe` to quantize the result.
|
||||
- Other models for Whisper (speech recognition), Image Generation, Text to Speech or Image Recognition [can be found on the Wiki](https://github.com/LostRuins/koboldcpp/wiki#what-models-does-koboldcpp-support-what-architectures-are-supported)
|
||||
|
||||
|
@ -204,7 +204,7 @@ and it will install everything required. Alternatively, you can download the abo
|
|||
|
||||
# Where can I download AI model files?
|
||||
- The best place to get GGUF text models is huggingface. For image models, CivitAI has a good selection. Here are some to get started.
|
||||
- Text Generation: [Airoboros Mistral 7B](https://huggingface.co/TheBloke/airoboros-mistral2.2-7B-GGUF/resolve/main/airoboros-mistral2.2-7b.Q4_K_S.gguf) (smaller and weaker) or [Tiefighter 13B](https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf) (larger model) or [Beepo 22B](https://huggingface.co/concedo/Beepo-22B-GGUF/resolve/main/Beepo-22B-Q4_K_S.gguf) (largest and most powerful)
|
||||
- Text Generation: [L3-8B-Stheno-v3.2](https://huggingface.co/bartowski/L3-8B-Stheno-v3.2-GGUF/resolve/main/L3-8B-Stheno-v3.2-Q4_K_S.gguf) (smaller and weaker) or [Tiefighter 13B](https://huggingface.co/KoboldAI/LLaMA2-13B-Tiefighter-GGUF/resolve/main/LLaMA2-13B-Tiefighter.Q4_K_S.gguf) (old but very versatile model) or [Gemma-3-27B Abliterated](https://huggingface.co/mlabonne/gemma-3-27b-it-abliterated-GGUF/resolve/main/gemma-3-27b-it-abliterated.q4_k_m.gguf) (largest and most powerful)
|
||||
- Image Generation: [Anything v3](https://huggingface.co/admruul/anything-v3.0/resolve/main/Anything-V3.0-pruned-fp16.safetensors) or [Deliberate V2](https://huggingface.co/Yntec/Deliberate2/resolve/main/Deliberate_v2.safetensors) or [Dreamshaper SDXL](https://huggingface.co/Lykon/dreamshaper-xl-v2-turbo/resolve/main/DreamShaperXL_Turbo_v2_1.safetensors)
|
||||
- Image Recognition MMproj: [Pick the correct one for your model architecture here](https://huggingface.co/koboldcpp/mmproj/tree/main)
|
||||
- Speech Recognition: [Whisper models for Speech-To-Text](https://huggingface.co/koboldcpp/whisper/tree/main)
|
||||
|
|
17
klite.embd
17
klite.embd
|
@ -13839,6 +13839,20 @@ Current version indicated by LITEVER below.
|
|||
document.getElementById("documentdb_snippetestimate").innerText = `Estimate of context usage: ${snippetEstimate / 3} tokens (${snippetEstimate} characters). Total chunks: ${numberOfChunks}. Cached chunks: ${Object.keys(embeddings_cache).length}`;
|
||||
}
|
||||
|
||||
function add_textdb_separator()
|
||||
{
|
||||
const textarea = document.getElementById('documentdb_data');
|
||||
const text = '[DOCUMENT BREAK]';
|
||||
const start = textarea.selectionStart;
|
||||
const end = textarea.selectionEnd;
|
||||
const before = textarea.value.substring(0, start);
|
||||
const after = textarea.value.substring(end);
|
||||
textarea.value = before + text + after;
|
||||
const newPos = start + text.length;
|
||||
textarea.selectionStart = textarea.selectionEnd = newPos;
|
||||
textarea.focus(); // Keep focus
|
||||
}
|
||||
|
||||
function confirm_memory() {
|
||||
current_memory = document.getElementById("memorytext").value;
|
||||
current_anote = document.getElementById("anotetext").value;
|
||||
|
@ -24447,7 +24461,8 @@ Current version indicated by LITEVER below.
|
|||
<div class="settinglabel settingsmall" style="padding: 4px;">Please note, as with world info this can cause context reprocessing. It is recommended to have at least 8K context size and a good prompt processing speed to use this feature.</div>
|
||||
<div class="settinglabel">
|
||||
<div class="justifyleft"><br>TextDB Storage<span class="helpicon">?<span
|
||||
class="helptext">Paste as much raw text data here as you like. E.g. background information, reference documents, etc. This text will populate the database that will be chunked and searched by TextDB. This can be split using "[DOCUMENT BREAK]" to split the overall database into smaller documents, and titles can be added with "[DOCUMENT BREAK][Title of document]This is the content of the document".</span></span></div>
|
||||
class="helptext">Paste as much raw text data here as you like. E.g. background information, reference documents, etc. This text will populate the database that will be chunked and searched by TextDB. This can be split using "[DOCUMENT BREAK]" to split the overall database into smaller documents, and titles can be added with "[DOCUMENT BREAK][Title of document]This is the content of the document".</span></span>
|
||||
<button type="button" class="btn btn-primary" style="font-size:10px;padding:2px 5px;margin-left:4px;margin:2px;" onclick="add_textdb_separator()">Add Document Separator</button></div>
|
||||
</div>
|
||||
<textarea title="Edit TextDB" class="form-control menuinput_multiline" oninput="estimate_and_show_textDB_usage()" id="documentdb_data" style="height: 120px;"
|
||||
placeholder="Paste as much text data here as you like. This text will populate the database that will be searched by TextDB. This can be split using [DOCUMENT BREAK] to split the overall database into smaller documents, and titles can be added with [DOCUMENT BREAK][Title of document]This is the content of the document."></textarea>
|
||||
|
|
|
@ -5769,9 +5769,6 @@ def show_gui():
|
|||
import_vars(dict)
|
||||
pass
|
||||
|
||||
def display_help():
|
||||
LaunchWebbrowser("https://github.com/LostRuins/koboldcpp/wiki","Cannot launch help in browser.")
|
||||
|
||||
def display_help_models():
|
||||
LaunchWebbrowser("https://github.com/LostRuins/koboldcpp/wiki#what-models-does-koboldcpp-support-what-architectures-are-supported","Cannot launch help in browser.")
|
||||
|
||||
|
@ -5783,7 +5780,7 @@ def show_gui():
|
|||
ctk.CTkButton(tabs , text = "Update", fg_color="#9900cc", hover_color="#aa11dd", command = display_updates, width=90, height = 35 ).grid(row=1,column=0, stick="sw", padx= 5, pady=5)
|
||||
ctk.CTkButton(tabs , text = "Save Config", fg_color="#084a66", hover_color="#085a88", command = save_config_gui, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 5, pady=5)
|
||||
ctk.CTkButton(tabs , text = "Load Config", fg_color="#084a66", hover_color="#085a88", command = load_config_gui, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 92, pady=5)
|
||||
ctk.CTkButton(tabs , text = "Help (Find Models)", fg_color="#992222", hover_color="#bb3333", command = display_help, width=100, height = 35 ).grid(row=1,column=1, stick="sw", padx= 180, pady=5)
|
||||
ctk.CTkButton(tabs , text = "Help (Find Models)", fg_color="#992222", hover_color="#bb3333", command = display_help_models, width=100, height = 35 ).grid(row=1,column=1, stick="sw", padx= 180, pady=5)
|
||||
|
||||
# start a thread that tries to get actual gpu names and layer counts
|
||||
gpuinfo_thread = threading.Thread(target=auto_set_backend_gui)
|
||||
|
@ -5817,7 +5814,7 @@ def show_gui():
|
|||
print("")
|
||||
time.sleep(0.5)
|
||||
if using_gui_launcher:
|
||||
givehelp = show_gui_yesnobox("No Model Loaded","No text or image model file was selected. Cannot continue.\n\nDo you want help finding a GGUF model?")
|
||||
givehelp = show_gui_yesnobox("No Model Loaded","No text or image model file was selected. Need a model to continue.\n\nDo you want help finding a GGUF model?")
|
||||
if givehelp == 'yes':
|
||||
display_help_models()
|
||||
else:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue