mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 09:04:36 +00:00
disabling the gMask prefix for glm-4 completions
This commit is contained in:
parent
49305942ab
commit
8b6dfbd1be
3 changed files with 3 additions and 26 deletions
|
@ -2439,7 +2439,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) {
|
if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) {
|
||||||
std::string temp = gpttype_get_chat_template();
|
std::string temp = gpttype_get_chat_template();
|
||||||
if (temp.find("[gMASK]<sop>") != std::string::npos) {
|
if (temp.find("[gMASK]<sop>") != std::string::npos) {
|
||||||
printf("GLM-4 special BOS handling used.\n");
|
printf("GLM-4 will have no automatic BOS token.\n");
|
||||||
add_bos_token = false;
|
add_bos_token = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3262,30 +3262,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//need to add a cursed hack to get coherency for GLM4, by ensuring injection for both sop and gmask
|
|
||||||
// if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) {
|
|
||||||
// std::string temp = gpttype_get_chat_template();
|
|
||||||
// if (temp.find("[gMASK]<sop>") != std::string::npos) {
|
|
||||||
// if (addedmemory == "") {
|
|
||||||
// if (kcpp_data->prompt.rfind("[gMASK]", 0) == 0) { //check startswith
|
|
||||||
// kcpp_data->prompt.erase(0, 7);
|
|
||||||
// }
|
|
||||||
// if (kcpp_data->prompt.rfind("<sop>", 0) == 0) { //check startswith
|
|
||||||
// kcpp_data->prompt.erase(0, 5);
|
|
||||||
// }
|
|
||||||
// addedmemory = "[gMASK]<sop>";
|
|
||||||
// } else {
|
|
||||||
// if (addedmemory.rfind("[gMASK]", 0) == 0) { //check startswith
|
|
||||||
// addedmemory.erase(0, 7);
|
|
||||||
// }
|
|
||||||
// if (addedmemory.rfind("<sop>", 0) == 0) { //check startswith
|
|
||||||
// addedmemory.erase(0, 5);
|
|
||||||
// }
|
|
||||||
// addedmemory = "[gMASK]<sop>" + addedmemory;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
bool stream_sse = inputs.stream_sse;
|
bool stream_sse = inputs.stream_sse;
|
||||||
bool allow_regular_prints = (!is_quiet && debugmode!=-1);
|
bool allow_regular_prints = (!is_quiet && debugmode!=-1);
|
||||||
|
|
||||||
|
|
|
@ -115,6 +115,7 @@
|
||||||
"search": ["[gMASK]<sop>"],
|
"search": ["[gMASK]<sop>"],
|
||||||
"name": "GLM-4",
|
"name": "GLM-4",
|
||||||
"adapter": {
|
"adapter": {
|
||||||
|
"chat_start": "[gMASK]<sop>",
|
||||||
"system_start": "<|system|>\n",
|
"system_start": "<|system|>\n",
|
||||||
"system_end": "",
|
"system_end": "",
|
||||||
"user_start": "<|user|>\n",
|
"user_start": "<|user|>\n",
|
||||||
|
|
|
@ -2079,7 +2079,7 @@ def transform_genparams(genparams, api_format):
|
||||||
if api_format==4 or api_format==7: #handle ollama chat here too
|
if api_format==4 or api_format==7: #handle ollama chat here too
|
||||||
# translate openai chat completion messages format into one big string.
|
# translate openai chat completion messages format into one big string.
|
||||||
messages_array = genparams.get('messages', [])
|
messages_array = genparams.get('messages', [])
|
||||||
messages_string = "" #chat start no longer needed, handled internally
|
messages_string = adapter_obj.get("chat_start", "")
|
||||||
system_message_start = adapter_obj.get("system_start", "\n### Instruction:\n")
|
system_message_start = adapter_obj.get("system_start", "\n### Instruction:\n")
|
||||||
system_message_end = adapter_obj.get("system_end", "")
|
system_message_end = adapter_obj.get("system_end", "")
|
||||||
user_message_start = adapter_obj.get("user_start", "\n### Instruction:\n")
|
user_message_start = adapter_obj.get("user_start", "\n### Instruction:\n")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue