mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-09 16:44:35 +00:00
fix qwen3, fixed sd, fixed glm4
This commit is contained in:
parent
4d8a7a6594
commit
c2802af9e8
7 changed files with 99 additions and 24 deletions
|
@ -1915,6 +1915,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
kcpp_data->n_ctx = clamped_max_context_length;
|
||||
max_context_limit_at_load = clamped_max_context_length;
|
||||
add_bos_token = !inputs.no_bos_token;
|
||||
|
||||
if(!add_bos_token)
|
||||
{
|
||||
printf("\n======\nBOS token prefix was disabled! Your output may be degraded unless model was designed for it!\n======\n");
|
||||
|
@ -2368,6 +2369,14 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
}
|
||||
}
|
||||
|
||||
//we cannot really trust the add bos in vocab. old models don't set it.
|
||||
// instead, we EXPLICITY need to find the add_bos_token key==false to automatically set it off.
|
||||
if(!llamamodel->vocab.get_add_bos() && add_bos_token && file_format_meta.explicitly_no_bos)
|
||||
{
|
||||
printf("\nThis architecture has explicitly disabled the BOS token - if you need it, you must add it manually.\n");
|
||||
add_bos_token = false;
|
||||
}
|
||||
|
||||
//warmup at least 33 tokens to trigger batch
|
||||
std::vector<int> tmp;
|
||||
for (int i = 1; i <= 33; ++i) {
|
||||
|
@ -3180,6 +3189,30 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
}
|
||||
}
|
||||
|
||||
//need to add a cursed hack to get coherency for GLM4, by ensuring injection for both sop and gmask
|
||||
if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) {
|
||||
std::string temp = gpttype_get_chat_template();
|
||||
if (temp.find("[gMASK]<sop>") != std::string::npos) {
|
||||
if (addedmemory == "") {
|
||||
if (kcpp_data->prompt.rfind("[gMASK]", 0) == 0) { //check startswith
|
||||
kcpp_data->prompt.erase(0, 7);
|
||||
}
|
||||
if (kcpp_data->prompt.rfind("<sop>", 0) == 0) { //check startswith
|
||||
kcpp_data->prompt.erase(0, 5);
|
||||
}
|
||||
addedmemory = "<sop>";
|
||||
} else {
|
||||
if (addedmemory.rfind("[gMASK]", 0) == 0) { //check startswith
|
||||
addedmemory.erase(0, 7);
|
||||
}
|
||||
if (addedmemory.rfind("<sop>", 0) == 0) { //check startswith
|
||||
addedmemory.erase(0, 5);
|
||||
}
|
||||
addedmemory = "<sop>" + addedmemory;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool stream_sse = inputs.stream_sse;
|
||||
bool allow_regular_prints = (!is_quiet && debugmode!=-1);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue