fix qwen3, fixed sd, fixed glm4

This commit is contained in:
Concedo 2025-04-29 20:50:46 +08:00
parent 4d8a7a6594
commit c2802af9e8
7 changed files with 99 additions and 24 deletions

View file

@ -1915,6 +1915,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
kcpp_data->n_ctx = clamped_max_context_length;
max_context_limit_at_load = clamped_max_context_length;
add_bos_token = !inputs.no_bos_token;
if(!add_bos_token)
{
printf("\n======\nBOS token prefix was disabled! Your output may be degraded unless model was designed for it!\n======\n");
@ -2368,6 +2369,14 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
}
}
//we cannot really trust the add bos in vocab. old models don't set it.
// instead, we EXPLICITY need to find the add_bos_token key==false to automatically set it off.
if(!llamamodel->vocab.get_add_bos() && add_bos_token && file_format_meta.explicitly_no_bos)
{
printf("\nThis architecture has explicitly disabled the BOS token - if you need it, you must add it manually.\n");
add_bos_token = false;
}
//warmup at least 33 tokens to trigger batch
std::vector<int> tmp;
for (int i = 1; i <= 33; ++i) {
@ -3180,6 +3189,30 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
}
}
//need to add a cursed hack to get coherency for GLM4, by ensuring injection for both sop and gmask
if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) {
std::string temp = gpttype_get_chat_template();
if (temp.find("[gMASK]<sop>") != std::string::npos) {
if (addedmemory == "") {
if (kcpp_data->prompt.rfind("[gMASK]", 0) == 0) { //check startswith
kcpp_data->prompt.erase(0, 7);
}
if (kcpp_data->prompt.rfind("<sop>", 0) == 0) { //check startswith
kcpp_data->prompt.erase(0, 5);
}
addedmemory = "<sop>";
} else {
if (addedmemory.rfind("[gMASK]", 0) == 0) { //check startswith
addedmemory.erase(0, 7);
}
if (addedmemory.rfind("<sop>", 0) == 0) { //check startswith
addedmemory.erase(0, 5);
}
addedmemory = "<sop>" + addedmemory;
}
}
}
bool stream_sse = inputs.stream_sse;
bool allow_regular_prints = (!is_quiet && debugmode!=-1);