mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
added link for novita AI, added legacy warning for old GGML models
This commit is contained in:
parent
c78690737c
commit
27bbdf7d2a
2 changed files with 20 additions and 13 deletions
|
@ -34,6 +34,9 @@ After running this command you can launch Koboldcpp from the current directory u
|
||||||
## Run on RunPod
|
## Run on RunPod
|
||||||
- KoboldCpp can now be used on RunPod cloud GPUs! This is an easy way to get started without installing anything in a minute or two, and is very scalable, capable of running 70B+ models at afforable cost. [Try our RunPod image here!](https://koboldai.org/runpodcpp).
|
- KoboldCpp can now be used on RunPod cloud GPUs! This is an easy way to get started without installing anything in a minute or two, and is very scalable, capable of running 70B+ models at afforable cost. [Try our RunPod image here!](https://koboldai.org/runpodcpp).
|
||||||
|
|
||||||
|
## Run on Novita AI
|
||||||
|
KoboldCpp can now also be run on Novita AI, a newer alternative GPU cloud provider which has a quick launch KoboldCpp template for as well. [Check it out here!](https://koboldai.org/novitacpp)
|
||||||
|
|
||||||
## Docker
|
## Docker
|
||||||
- The official docker can be found at https://hub.docker.com/r/koboldai/koboldcpp
|
- The official docker can be found at https://hub.docker.com/r/koboldai/koboldcpp
|
||||||
- If you're building your own docker, remember to set CUDA_DOCKER_ARCH or enable LLAMA_PORTABLE
|
- If you're building your own docker, remember to set CUDA_DOCKER_ARCH or enable LLAMA_PORTABLE
|
||||||
|
|
30
expose.cpp
30
expose.cpp
|
@ -78,7 +78,7 @@ extern "C"
|
||||||
|
|
||||||
if(file_format==FileFormat::GPTJ_1 || file_format==FileFormat::GPTJ_2 || file_format==FileFormat::GPTJ_3 || file_format==FileFormat::GPTJ_4 || file_format==FileFormat::GPTJ_5)
|
if(file_format==FileFormat::GPTJ_1 || file_format==FileFormat::GPTJ_2 || file_format==FileFormat::GPTJ_3 || file_format==FileFormat::GPTJ_4 || file_format==FileFormat::GPTJ_5)
|
||||||
{
|
{
|
||||||
printf("\n---\nIdentified as GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nIdentified as Legacy GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
ModelLoadResult lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
ModelLoadResult lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
||||||
if (lr == ModelLoadResult::RETRY_LOAD)
|
if (lr == ModelLoadResult::RETRY_LOAD)
|
||||||
{
|
{
|
||||||
|
@ -87,14 +87,14 @@ extern "C"
|
||||||
//if we tried 1 first, then try 3 and lastly 2
|
//if we tried 1 first, then try 3 and lastly 2
|
||||||
//otherwise if we tried 3 first, then try 2
|
//otherwise if we tried 3 first, then try 2
|
||||||
file_format = FileFormat::GPTJ_4;
|
file_format = FileFormat::GPTJ_4;
|
||||||
printf("\n---\nRetrying as GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nRetrying as Legacy GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lr == ModelLoadResult::RETRY_LOAD)
|
if (lr == ModelLoadResult::RETRY_LOAD)
|
||||||
{
|
{
|
||||||
file_format = FileFormat::GPTJ_3;
|
file_format = FileFormat::GPTJ_3;
|
||||||
printf("\n---\nRetrying as GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nRetrying as Legacy GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -102,7 +102,7 @@ extern "C"
|
||||||
if (lr == ModelLoadResult::RETRY_LOAD)
|
if (lr == ModelLoadResult::RETRY_LOAD)
|
||||||
{
|
{
|
||||||
file_format = FileFormat::GPTJ_2;
|
file_format = FileFormat::GPTJ_2;
|
||||||
printf("\n---\nRetrying as GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nRetrying as Legacy GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -118,18 +118,18 @@ extern "C"
|
||||||
}
|
}
|
||||||
else if(file_format==FileFormat::GPT2_1||file_format==FileFormat::GPT2_2||file_format==FileFormat::GPT2_3||file_format==FileFormat::GPT2_4)
|
else if(file_format==FileFormat::GPT2_1||file_format==FileFormat::GPT2_2||file_format==FileFormat::GPT2_3||file_format==FileFormat::GPT2_4)
|
||||||
{
|
{
|
||||||
printf("\n---\nIdentified as GPT-2 model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nIdentified as Legacy GPT-2 model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
ModelLoadResult lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
ModelLoadResult lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
||||||
if (lr == ModelLoadResult::RETRY_LOAD)
|
if (lr == ModelLoadResult::RETRY_LOAD)
|
||||||
{
|
{
|
||||||
file_format = FileFormat::GPT2_3;
|
file_format = FileFormat::GPT2_3;
|
||||||
printf("\n---\nRetrying as GPT-2 model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nRetrying as Legacy GPT-2 model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
||||||
}
|
}
|
||||||
if (lr == ModelLoadResult::RETRY_LOAD)
|
if (lr == ModelLoadResult::RETRY_LOAD)
|
||||||
{
|
{
|
||||||
file_format = FileFormat::GPT2_2;
|
file_format = FileFormat::GPT2_2;
|
||||||
printf("\n---\nRetrying as GPT-2 model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nRetrying as Legacy GPT-2 model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
||||||
}
|
}
|
||||||
if (lr == ModelLoadResult::FAIL || lr == ModelLoadResult::RETRY_LOAD)
|
if (lr == ModelLoadResult::FAIL || lr == ModelLoadResult::RETRY_LOAD)
|
||||||
|
@ -143,27 +143,27 @@ extern "C"
|
||||||
}
|
}
|
||||||
else if(file_format==FileFormat::NEOX_1 || file_format==FileFormat::NEOX_2 || file_format==FileFormat::NEOX_3 || file_format==FileFormat::NEOX_4 || file_format==FileFormat::NEOX_5 || file_format==FileFormat::NEOX_6 || file_format==FileFormat::NEOX_7)
|
else if(file_format==FileFormat::NEOX_1 || file_format==FileFormat::NEOX_2 || file_format==FileFormat::NEOX_3 || file_format==FileFormat::NEOX_4 || file_format==FileFormat::NEOX_5 || file_format==FileFormat::NEOX_6 || file_format==FileFormat::NEOX_7)
|
||||||
{
|
{
|
||||||
printf("\n---\nIdentified as GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nIdentified as Legacy GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
ModelLoadResult lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
ModelLoadResult lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
||||||
if (lr == ModelLoadResult::RETRY_LOAD)
|
if (lr == ModelLoadResult::RETRY_LOAD)
|
||||||
{
|
{
|
||||||
if(file_format==FileFormat::NEOX_2)
|
if(file_format==FileFormat::NEOX_2)
|
||||||
{
|
{
|
||||||
file_format = FileFormat::NEOX_3;
|
file_format = FileFormat::NEOX_3;
|
||||||
printf("\n---\nRetrying as GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nRetrying as Legacy GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
file_format = FileFormat::NEOX_5;
|
file_format = FileFormat::NEOX_5;
|
||||||
printf("\n---\nRetrying as GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nRetrying as Legacy GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (lr == ModelLoadResult::RETRY_LOAD)
|
if (lr == ModelLoadResult::RETRY_LOAD)
|
||||||
{
|
{
|
||||||
file_format = FileFormat::NEOX_1;
|
file_format = FileFormat::NEOX_1;
|
||||||
printf("\n---\nRetrying as GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nRetrying as Legacy GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
lr = gpttype_load_model(inputs, file_format, file_format_meta);
|
||||||
}
|
}
|
||||||
if (lr == ModelLoadResult::FAIL || lr == ModelLoadResult::RETRY_LOAD)
|
if (lr == ModelLoadResult::FAIL || lr == ModelLoadResult::RETRY_LOAD)
|
||||||
|
@ -179,16 +179,20 @@ extern "C"
|
||||||
{
|
{
|
||||||
if(file_format==FileFormat::MPT_1)
|
if(file_format==FileFormat::MPT_1)
|
||||||
{
|
{
|
||||||
printf("\n---\nIdentified as MPT model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nIdentified as Legacy MPT model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
}
|
}
|
||||||
else if(file_format==FileFormat::RWKV_1 || file_format==FileFormat::RWKV_2)
|
else if(file_format==FileFormat::RWKV_1 || file_format==FileFormat::RWKV_2)
|
||||||
{
|
{
|
||||||
printf("\n---\nIdentified as RWKV model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nIdentified as Legacy RWKV model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
}
|
}
|
||||||
else if(file_format==FileFormat::GGUF_GENERIC)
|
else if(file_format==FileFormat::GGUF_GENERIC)
|
||||||
{
|
{
|
||||||
printf("\n---\nIdentified as GGUF model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
printf("\n---\nIdentified as GGUF model: (ver %d)\nAttempting to Load...\n---\n", file_format);
|
||||||
}
|
}
|
||||||
|
else if(file_format==FileFormat::GGML || file_format==FileFormat::GGHF || file_format==FileFormat::GGJT || file_format==FileFormat::GGJT_2 || file_format==FileFormat::GGJT_3)
|
||||||
|
{
|
||||||
|
printf("\n---\nIdentified as Legacy GGML model: (ver %d)\nYou are STRONGLY ENCOURAGED to obtain a newer GGUF model!\nAttempting to Load...\n---\n", file_format);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
printf("\n---\nUnidentified Model Encountered: (ver %d)\n---\n", file_format);
|
printf("\n---\nUnidentified Model Encountered: (ver %d)\n---\n", file_format);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue