add config for default gen tokens and bos toggle

2025-09-09 16:44:35 +00:00 · 2025-03-15 19:53:06 +08:00 · 2025-03-15 19:53:06 +08:00 · e84596ec1a
commit e84596ec1a
parent bfc30066c9
3 changed files with 31 additions and 20 deletions
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -133,6 +133,7 @@ static std::string concat_output = "";
 static std::string concat_output_reader_copy_poll = ""; //for streaming
 static std::string concat_output_reader_copy_res = ""; //for gen response
 static std::vector<logit_bias> logit_biases;
+static bool add_bos_token = true; // if set to false, mmproj handling breaks. dont disable unless you know what you're doing

 static int delayed_generated_tokens_limit = 0;
 std::deque<std::string> delayed_generated_tokens; //for use with antislop sampling
@ -1905,6 +1906,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in

    kcpp_data->n_ctx = clamped_max_context_length;
    max_context_limit_at_load = clamped_max_context_length;
+    add_bos_token = !inputs.no_bos_token;
+    if(!add_bos_token)
+    {
+        printf("\n======\nBOS token prefix was disabled! Your output may be degraded!\n======\n");
+    }

    neox_ctx_v2.hparams.n_ctx  = neox_ctx_v3.hparams.n_ctx
    = gptj_ctx_v1.hparams.n_ctx = gptj_ctx_v2.hparams.n_ctx = gptj_ctx_v3.hparams.n_ctx
@ -2877,17 +2883,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)

    bool llava_images_changed = false;

-    bool add_bos_token = true; //if set to false, mmproj handling breaks
-    // if(file_format == FileFormat::GGUF_GENERIC && mmproj_filename == "")
-    // {
-    //     const llama_vocab * tmpvocab = llama_model_get_vocab(llama_get_model(llama_ctx_v4));
-    //     add_bos_token = llama_vocab_get_add_bos(tmpvocab);
-    //     if(!add_bos_token && debugmode==1)
-    //     {
-    //          printf("\nBOS token prefix was disabled for this model.");
-    //     }
-    // }
-
    for(int x=0;x<inputs.stop_sequence_len;++x)
    {
        std::string stopper = inputs.stop_sequence[x];