From e84596ec1a34b12d7c745fdde33c1339c8491641 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sat, 15 Mar 2025 19:53:06 +0800 Subject: [PATCH] add config for default gen tokens and bos toggle --- expose.h | 1 + gpttype_adapter.cpp | 17 ++++++----------- koboldcpp.py | 33 ++++++++++++++++++++++++--------- 3 files changed, 31 insertions(+), 20 deletions(-) diff --git a/expose.h b/expose.h index f13d786c9..7cbe4a3a1 100644 --- a/expose.h +++ b/expose.h @@ -60,6 +60,7 @@ struct load_model_inputs const float rope_freq_scale = 1.0f; const float rope_freq_base = 10000.0f; const int moe_experts = -1; + const bool no_bos_token = false; const bool flash_attention = false; const float tensor_split[tensor_split_max] = {}; const int quant_k = 0; diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 66de0e731..487d5d573 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -133,6 +133,7 @@ static std::string concat_output = ""; static std::string concat_output_reader_copy_poll = ""; //for streaming static std::string concat_output_reader_copy_res = ""; //for gen response static std::vector logit_biases; +static bool add_bos_token = true; // if set to false, mmproj handling breaks. dont disable unless you know what you're doing static int delayed_generated_tokens_limit = 0; std::deque delayed_generated_tokens; //for use with antislop sampling @@ -1905,6 +1906,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in kcpp_data->n_ctx = clamped_max_context_length; max_context_limit_at_load = clamped_max_context_length; + add_bos_token = !inputs.no_bos_token; + if(!add_bos_token) + { + printf("\n======\nBOS token prefix was disabled! Your output may be degraded!\n======\n"); + } neox_ctx_v2.hparams.n_ctx = neox_ctx_v3.hparams.n_ctx = gptj_ctx_v1.hparams.n_ctx = gptj_ctx_v2.hparams.n_ctx = gptj_ctx_v3.hparams.n_ctx @@ -2877,17 +2883,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs) bool llava_images_changed = false; - bool add_bos_token = true; //if set to false, mmproj handling breaks - // if(file_format == FileFormat::GGUF_GENERIC && mmproj_filename == "") - // { - // const llama_vocab * tmpvocab = llama_model_get_vocab(llama_get_model(llama_ctx_v4)); - // add_bos_token = llama_vocab_get_add_bos(tmpvocab); - // if(!add_bos_token && debugmode==1) - // { - // printf("\nBOS token prefix was disabled for this model."); - // } - // } - for(int x=0;x