added link for novita AI, added legacy warning for old GGML models

This commit is contained in:
Concedo 2024-09-09 11:19:32 +08:00
parent c78690737c
commit 27bbdf7d2a
2 changed files with 20 additions and 13 deletions

View file

@ -34,6 +34,9 @@ After running this command you can launch Koboldcpp from the current directory u
## Run on RunPod ## Run on RunPod
- KoboldCpp can now be used on RunPod cloud GPUs! This is an easy way to get started without installing anything in a minute or two, and is very scalable, capable of running 70B+ models at afforable cost. [Try our RunPod image here!](https://koboldai.org/runpodcpp). - KoboldCpp can now be used on RunPod cloud GPUs! This is an easy way to get started without installing anything in a minute or two, and is very scalable, capable of running 70B+ models at afforable cost. [Try our RunPod image here!](https://koboldai.org/runpodcpp).
## Run on Novita AI
KoboldCpp can now also be run on Novita AI, a newer alternative GPU cloud provider which has a quick launch KoboldCpp template for as well. [Check it out here!](https://koboldai.org/novitacpp)
## Docker ## Docker
- The official docker can be found at https://hub.docker.com/r/koboldai/koboldcpp - The official docker can be found at https://hub.docker.com/r/koboldai/koboldcpp
- If you're building your own docker, remember to set CUDA_DOCKER_ARCH or enable LLAMA_PORTABLE - If you're building your own docker, remember to set CUDA_DOCKER_ARCH or enable LLAMA_PORTABLE

View file

@ -78,7 +78,7 @@ extern "C"
if(file_format==FileFormat::GPTJ_1 || file_format==FileFormat::GPTJ_2 || file_format==FileFormat::GPTJ_3 || file_format==FileFormat::GPTJ_4 || file_format==FileFormat::GPTJ_5) if(file_format==FileFormat::GPTJ_1 || file_format==FileFormat::GPTJ_2 || file_format==FileFormat::GPTJ_3 || file_format==FileFormat::GPTJ_4 || file_format==FileFormat::GPTJ_5)
{ {
printf("\n---\nIdentified as GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format); printf("\n---\nIdentified as Legacy GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format);
ModelLoadResult lr = gpttype_load_model(inputs, file_format, file_format_meta); ModelLoadResult lr = gpttype_load_model(inputs, file_format, file_format_meta);
if (lr == ModelLoadResult::RETRY_LOAD) if (lr == ModelLoadResult::RETRY_LOAD)
{ {
@ -87,14 +87,14 @@ extern "C"
//if we tried 1 first, then try 3 and lastly 2 //if we tried 1 first, then try 3 and lastly 2
//otherwise if we tried 3 first, then try 2 //otherwise if we tried 3 first, then try 2
file_format = FileFormat::GPTJ_4; file_format = FileFormat::GPTJ_4;
printf("\n---\nRetrying as GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format); printf("\n---\nRetrying as Legacy GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format);
lr = gpttype_load_model(inputs, file_format, file_format_meta); lr = gpttype_load_model(inputs, file_format, file_format_meta);
} }
if (lr == ModelLoadResult::RETRY_LOAD) if (lr == ModelLoadResult::RETRY_LOAD)
{ {
file_format = FileFormat::GPTJ_3; file_format = FileFormat::GPTJ_3;
printf("\n---\nRetrying as GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format); printf("\n---\nRetrying as Legacy GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format);
lr = gpttype_load_model(inputs, file_format, file_format_meta); lr = gpttype_load_model(inputs, file_format, file_format_meta);
} }
@ -102,7 +102,7 @@ extern "C"
if (lr == ModelLoadResult::RETRY_LOAD) if (lr == ModelLoadResult::RETRY_LOAD)
{ {
file_format = FileFormat::GPTJ_2; file_format = FileFormat::GPTJ_2;
printf("\n---\nRetrying as GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format); printf("\n---\nRetrying as Legacy GPT-J model: (ver %d)\nAttempting to Load...\n---\n", file_format);
lr = gpttype_load_model(inputs, file_format, file_format_meta); lr = gpttype_load_model(inputs, file_format, file_format_meta);
} }
} }
@ -118,18 +118,18 @@ extern "C"
} }
else if(file_format==FileFormat::GPT2_1||file_format==FileFormat::GPT2_2||file_format==FileFormat::GPT2_3||file_format==FileFormat::GPT2_4) else if(file_format==FileFormat::GPT2_1||file_format==FileFormat::GPT2_2||file_format==FileFormat::GPT2_3||file_format==FileFormat::GPT2_4)
{ {
printf("\n---\nIdentified as GPT-2 model: (ver %d)\nAttempting to Load...\n---\n", file_format); printf("\n---\nIdentified as Legacy GPT-2 model: (ver %d)\nAttempting to Load...\n---\n", file_format);
ModelLoadResult lr = gpttype_load_model(inputs, file_format, file_format_meta); ModelLoadResult lr = gpttype_load_model(inputs, file_format, file_format_meta);
if (lr == ModelLoadResult::RETRY_LOAD) if (lr == ModelLoadResult::RETRY_LOAD)
{ {
file_format = FileFormat::GPT2_3; file_format = FileFormat::GPT2_3;
printf("\n---\nRetrying as GPT-2 model: (ver %d)\nAttempting to Load...\n---\n", file_format); printf("\n---\nRetrying as Legacy GPT-2 model: (ver %d)\nAttempting to Load...\n---\n", file_format);
lr = gpttype_load_model(inputs, file_format, file_format_meta); lr = gpttype_load_model(inputs, file_format, file_format_meta);
} }
if (lr == ModelLoadResult::RETRY_LOAD) if (lr == ModelLoadResult::RETRY_LOAD)
{ {
file_format = FileFormat::GPT2_2; file_format = FileFormat::GPT2_2;
printf("\n---\nRetrying as GPT-2 model: (ver %d)\nAttempting to Load...\n---\n", file_format); printf("\n---\nRetrying as Legacy GPT-2 model: (ver %d)\nAttempting to Load...\n---\n", file_format);
lr = gpttype_load_model(inputs, file_format, file_format_meta); lr = gpttype_load_model(inputs, file_format, file_format_meta);
} }
if (lr == ModelLoadResult::FAIL || lr == ModelLoadResult::RETRY_LOAD) if (lr == ModelLoadResult::FAIL || lr == ModelLoadResult::RETRY_LOAD)
@ -143,27 +143,27 @@ extern "C"
} }
else if(file_format==FileFormat::NEOX_1 || file_format==FileFormat::NEOX_2 || file_format==FileFormat::NEOX_3 || file_format==FileFormat::NEOX_4 || file_format==FileFormat::NEOX_5 || file_format==FileFormat::NEOX_6 || file_format==FileFormat::NEOX_7) else if(file_format==FileFormat::NEOX_1 || file_format==FileFormat::NEOX_2 || file_format==FileFormat::NEOX_3 || file_format==FileFormat::NEOX_4 || file_format==FileFormat::NEOX_5 || file_format==FileFormat::NEOX_6 || file_format==FileFormat::NEOX_7)
{ {
printf("\n---\nIdentified as GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format); printf("\n---\nIdentified as Legacy GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
ModelLoadResult lr = gpttype_load_model(inputs, file_format, file_format_meta); ModelLoadResult lr = gpttype_load_model(inputs, file_format, file_format_meta);
if (lr == ModelLoadResult::RETRY_LOAD) if (lr == ModelLoadResult::RETRY_LOAD)
{ {
if(file_format==FileFormat::NEOX_2) if(file_format==FileFormat::NEOX_2)
{ {
file_format = FileFormat::NEOX_3; file_format = FileFormat::NEOX_3;
printf("\n---\nRetrying as GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format); printf("\n---\nRetrying as Legacy GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
lr = gpttype_load_model(inputs, file_format, file_format_meta); lr = gpttype_load_model(inputs, file_format, file_format_meta);
} }
else else
{ {
file_format = FileFormat::NEOX_5; file_format = FileFormat::NEOX_5;
printf("\n---\nRetrying as GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format); printf("\n---\nRetrying as Legacy GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
lr = gpttype_load_model(inputs, file_format, file_format_meta); lr = gpttype_load_model(inputs, file_format, file_format_meta);
} }
} }
if (lr == ModelLoadResult::RETRY_LOAD) if (lr == ModelLoadResult::RETRY_LOAD)
{ {
file_format = FileFormat::NEOX_1; file_format = FileFormat::NEOX_1;
printf("\n---\nRetrying as GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format); printf("\n---\nRetrying as Legacy GPT-NEO-X model: (ver %d)\nAttempting to Load...\n---\n", file_format);
lr = gpttype_load_model(inputs, file_format, file_format_meta); lr = gpttype_load_model(inputs, file_format, file_format_meta);
} }
if (lr == ModelLoadResult::FAIL || lr == ModelLoadResult::RETRY_LOAD) if (lr == ModelLoadResult::FAIL || lr == ModelLoadResult::RETRY_LOAD)
@ -179,16 +179,20 @@ extern "C"
{ {
if(file_format==FileFormat::MPT_1) if(file_format==FileFormat::MPT_1)
{ {
printf("\n---\nIdentified as MPT model: (ver %d)\nAttempting to Load...\n---\n", file_format); printf("\n---\nIdentified as Legacy MPT model: (ver %d)\nAttempting to Load...\n---\n", file_format);
} }
else if(file_format==FileFormat::RWKV_1 || file_format==FileFormat::RWKV_2) else if(file_format==FileFormat::RWKV_1 || file_format==FileFormat::RWKV_2)
{ {
printf("\n---\nIdentified as RWKV model: (ver %d)\nAttempting to Load...\n---\n", file_format); printf("\n---\nIdentified as Legacy RWKV model: (ver %d)\nAttempting to Load...\n---\n", file_format);
} }
else if(file_format==FileFormat::GGUF_GENERIC) else if(file_format==FileFormat::GGUF_GENERIC)
{ {
printf("\n---\nIdentified as GGUF model: (ver %d)\nAttempting to Load...\n---\n", file_format); printf("\n---\nIdentified as GGUF model: (ver %d)\nAttempting to Load...\n---\n", file_format);
} }
else if(file_format==FileFormat::GGML || file_format==FileFormat::GGHF || file_format==FileFormat::GGJT || file_format==FileFormat::GGJT_2 || file_format==FileFormat::GGJT_3)
{
printf("\n---\nIdentified as Legacy GGML model: (ver %d)\nYou are STRONGLY ENCOURAGED to obtain a newer GGUF model!\nAttempting to Load...\n---\n", file_format);
}
else else
{ {
printf("\n---\nUnidentified Model Encountered: (ver %d)\n---\n", file_format); printf("\n---\nUnidentified Model Encountered: (ver %d)\n---\n", file_format);