disabling the gMask prefix for glm-4 completions

This commit is contained in:
Concedo 2025-05-21 17:29:24 +08:00
parent 49305942ab
commit 8b6dfbd1be
3 changed files with 3 additions and 26 deletions

View file

@ -2439,7 +2439,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) { if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) {
std::string temp = gpttype_get_chat_template(); std::string temp = gpttype_get_chat_template();
if (temp.find("[gMASK]<sop>") != std::string::npos) { if (temp.find("[gMASK]<sop>") != std::string::npos) {
printf("GLM-4 special BOS handling used.\n"); printf("GLM-4 will have no automatic BOS token.\n");
add_bos_token = false; add_bos_token = false;
} }
} }
@ -3262,30 +3262,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
} }
} }
//need to add a cursed hack to get coherency for GLM4, by ensuring injection for both sop and gmask
// if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) {
// std::string temp = gpttype_get_chat_template();
// if (temp.find("[gMASK]<sop>") != std::string::npos) {
// if (addedmemory == "") {
// if (kcpp_data->prompt.rfind("[gMASK]", 0) == 0) { //check startswith
// kcpp_data->prompt.erase(0, 7);
// }
// if (kcpp_data->prompt.rfind("<sop>", 0) == 0) { //check startswith
// kcpp_data->prompt.erase(0, 5);
// }
// addedmemory = "[gMASK]<sop>";
// } else {
// if (addedmemory.rfind("[gMASK]", 0) == 0) { //check startswith
// addedmemory.erase(0, 7);
// }
// if (addedmemory.rfind("<sop>", 0) == 0) { //check startswith
// addedmemory.erase(0, 5);
// }
// addedmemory = "[gMASK]<sop>" + addedmemory;
// }
// }
// }
bool stream_sse = inputs.stream_sse; bool stream_sse = inputs.stream_sse;
bool allow_regular_prints = (!is_quiet && debugmode!=-1); bool allow_regular_prints = (!is_quiet && debugmode!=-1);

View file

@ -115,6 +115,7 @@
"search": ["[gMASK]<sop>"], "search": ["[gMASK]<sop>"],
"name": "GLM-4", "name": "GLM-4",
"adapter": { "adapter": {
"chat_start": "[gMASK]<sop>",
"system_start": "<|system|>\n", "system_start": "<|system|>\n",
"system_end": "", "system_end": "",
"user_start": "<|user|>\n", "user_start": "<|user|>\n",

View file

@ -2079,7 +2079,7 @@ def transform_genparams(genparams, api_format):
if api_format==4 or api_format==7: #handle ollama chat here too if api_format==4 or api_format==7: #handle ollama chat here too
# translate openai chat completion messages format into one big string. # translate openai chat completion messages format into one big string.
messages_array = genparams.get('messages', []) messages_array = genparams.get('messages', [])
messages_string = "" #chat start no longer needed, handled internally messages_string = adapter_obj.get("chat_start", "")
system_message_start = adapter_obj.get("system_start", "\n### Instruction:\n") system_message_start = adapter_obj.get("system_start", "\n### Instruction:\n")
system_message_end = adapter_obj.get("system_end", "") system_message_end = adapter_obj.get("system_end", "")
user_message_start = adapter_obj.get("user_start", "\n### Instruction:\n") user_message_start = adapter_obj.get("user_start", "\n### Instruction:\n")