mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 09:04:36 +00:00
updated lite, added autorope config based on trained ctxlen, hotfix for falcon gpu broken
This commit is contained in:
parent
89495c0716
commit
d4c22a8b02
4 changed files with 654 additions and 74 deletions
|
@ -328,7 +328,7 @@ static std::string FileFormatTokenizeID(int id, FileFormat file_format)
|
|||
{
|
||||
return std::string(llama_v3_token_to_str(llama_ctx_v3, id));
|
||||
}
|
||||
else if( file_format == FileFormat::GGUF_LLAMA)
|
||||
else if(file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON)
|
||||
{
|
||||
return std::string(llama_token_to_str(llama_ctx_v4, id));
|
||||
}
|
||||
|
@ -340,7 +340,7 @@ static std::string FileFormatTokenizeID(int id, FileFormat file_format)
|
|||
|
||||
static void TokenizeString(const std::string & str_to_tokenize, std::vector<int> & output_tokens, FileFormat file_format)
|
||||
{
|
||||
if (file_format == FileFormat::GGML || file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2 || file_format == FileFormat::GGJT_3 || file_format == FileFormat::GGUF_LLAMA)
|
||||
if (file_format == FileFormat::GGML || file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2 || file_format == FileFormat::GGJT_3 || file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON)
|
||||
{
|
||||
if(file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2 )
|
||||
{
|
||||
|
@ -432,7 +432,13 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
else
|
||||
{
|
||||
//approximate NTK aware ctx
|
||||
rope_freq_base = (params.n_ctx <= 3072 ? 26000.0f : (params.n_ctx <= 4096 ? 32000.0f : (params.n_ctx <= 6144 ? 54000.0f : (params.n_ctx <= 8192 ? 82684.0f : (params.n_ctx <= 12288 ? 140000.0f : 200000.0f)))));
|
||||
auto effectivenctx = params.n_ctx;
|
||||
if((file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON) && llama_ctx_v4->model.hparams.n_ctx_train>2048)
|
||||
{
|
||||
float factor = llama_ctx_v4->model.hparams.n_ctx_train/2048;
|
||||
effectivenctx = effectivenctx/factor;
|
||||
}
|
||||
rope_freq_base = (effectivenctx <= 3072 ? 26000.0f : (effectivenctx <= 4096 ? 32000.0f : (effectivenctx <= 6144 ? 54000.0f : (effectivenctx <= 8192 ? 82684.0f : (effectivenctx <= 12288 ? 140000.0f : 200000.0f)))));
|
||||
|
||||
}
|
||||
|
||||
|
@ -585,7 +591,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
}
|
||||
return ModelLoadResult::SUCCESS;
|
||||
}
|
||||
else if(file_format==FileFormat::GGUF_LLAMA)
|
||||
else if(file_format==FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON)
|
||||
{
|
||||
llama_context_params llama_ctx_params = llama_context_default_params();
|
||||
llama_ctx_params.n_ctx = inputs.max_context_length;
|
||||
|
@ -598,6 +604,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
llama_ctx_params.use_mmap = inputs.use_mmap;
|
||||
llama_ctx_params.use_mlock = inputs.use_mlock;
|
||||
llama_ctx_params.n_gpu_layers = inputs.gpulayers;
|
||||
if(file_format==FileFormat::GGUF_FALCON && llama_ctx_params.n_gpu_layers>0)
|
||||
{
|
||||
printf("\nGPU layer offload for GGUF FALCON is known to have issues, it has been set to 0.\n");
|
||||
llama_ctx_params.n_gpu_layers = 0;
|
||||
}
|
||||
llama_ctx_params.main_gpu = cu_parseinfo_maindevice;
|
||||
llama_ctx_params.rope_freq_base = rope_freq_base;
|
||||
llama_ctx_params.rope_freq_scale = rope_freq_scale;
|
||||
|
@ -1120,7 +1131,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
|||
{
|
||||
//for non llama, limit to 256
|
||||
int bbs = blasbatchsize;
|
||||
if (file_format != FileFormat::GGML && file_format != FileFormat::GGHF && file_format != FileFormat::GGJT && file_format != FileFormat::GGJT_2 && file_format != FileFormat::GGJT_3 && file_format != FileFormat::GGUF_LLAMA)
|
||||
if (file_format != FileFormat::GGML && file_format != FileFormat::GGHF && file_format != FileFormat::GGJT && file_format != FileFormat::GGJT_2 && file_format != FileFormat::GGJT_3 && file_format != FileFormat::GGUF_LLAMA && file_format!=FileFormat::GGUF_FALCON)
|
||||
{
|
||||
bbs = (blasbatchsize > 256 ? 256 : blasbatchsize);
|
||||
}
|
||||
|
@ -1180,7 +1191,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
|||
{
|
||||
n_vocab = llama_v3_n_vocab(llama_ctx_v3);
|
||||
}
|
||||
else if(file_format == FileFormat::GGUF_LLAMA)
|
||||
else if(file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON)
|
||||
{
|
||||
n_vocab = llama_n_vocab(llama_ctx_v4);
|
||||
}
|
||||
|
@ -1331,7 +1342,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
|||
{
|
||||
evalres = (llama_v3_eval(llama_ctx_v3, embd.data(), embdsize, n_past, params.n_threads)==0);
|
||||
}
|
||||
else if(file_format == FileFormat::GGUF_LLAMA)
|
||||
else if(file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON)
|
||||
{
|
||||
evalres = (llama_eval(llama_ctx_v4, embd.data(), embdsize, n_past, params.n_threads)==0);
|
||||
}
|
||||
|
@ -1439,9 +1450,9 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
|||
unsigned int eosID = 0;
|
||||
float * logitsPtr;
|
||||
int btsize = banned_token_ids.size();
|
||||
if(file_format == FileFormat::GGML || file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2 || file_format == FileFormat::GGJT_3 || file_format == FileFormat::GGUF_LLAMA)
|
||||
if(file_format == FileFormat::GGML || file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2 || file_format == FileFormat::GGJT_3 || file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON)
|
||||
{
|
||||
if(file_format == FileFormat::GGUF_LLAMA)
|
||||
if(file_format == FileFormat::GGUF_LLAMA || file_format==FileFormat::GGUF_FALCON)
|
||||
{
|
||||
logitsPtr = llama_get_logits(llama_ctx_v4);
|
||||
eosID = llama_token_eos(llama_ctx_v4);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue