diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 8da5600de..22081e958 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -2439,7 +2439,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) { std::string temp = gpttype_get_chat_template(); if (temp.find("[gMASK]") != std::string::npos) { - printf("GLM-4 special BOS handling used.\n"); + printf("GLM-4 will have no automatic BOS token.\n"); add_bos_token = false; } } @@ -3262,30 +3262,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs) } } - //need to add a cursed hack to get coherency for GLM4, by ensuring injection for both sop and gmask - // if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) { - // std::string temp = gpttype_get_chat_template(); - // if (temp.find("[gMASK]") != std::string::npos) { - // if (addedmemory == "") { - // if (kcpp_data->prompt.rfind("[gMASK]", 0) == 0) { //check startswith - // kcpp_data->prompt.erase(0, 7); - // } - // if (kcpp_data->prompt.rfind("", 0) == 0) { //check startswith - // kcpp_data->prompt.erase(0, 5); - // } - // addedmemory = "[gMASK]"; - // } else { - // if (addedmemory.rfind("[gMASK]", 0) == 0) { //check startswith - // addedmemory.erase(0, 7); - // } - // if (addedmemory.rfind("", 0) == 0) { //check startswith - // addedmemory.erase(0, 5); - // } - // addedmemory = "[gMASK]" + addedmemory; - // } - // } - // } - bool stream_sse = inputs.stream_sse; bool allow_regular_prints = (!is_quiet && debugmode!=-1); diff --git a/kcpp_adapters/AutoGuess.json b/kcpp_adapters/AutoGuess.json index d7faa18c3..3fb0e4e5f 100644 --- a/kcpp_adapters/AutoGuess.json +++ b/kcpp_adapters/AutoGuess.json @@ -115,6 +115,7 @@ "search": ["[gMASK]"], "name": "GLM-4", "adapter": { + "chat_start": "[gMASK]", "system_start": "<|system|>\n", "system_end": "", "user_start": "<|user|>\n", diff --git a/koboldcpp.py b/koboldcpp.py index b35065bfc..254fbc345 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -2079,7 +2079,7 @@ def transform_genparams(genparams, api_format): if api_format==4 or api_format==7: #handle ollama chat here too # translate openai chat completion messages format into one big string. messages_array = genparams.get('messages', []) - messages_string = "" #chat start no longer needed, handled internally + messages_string = adapter_obj.get("chat_start", "") system_message_start = adapter_obj.get("system_start", "\n### Instruction:\n") system_message_end = adapter_obj.get("system_end", "") user_message_start = adapter_obj.get("user_start", "\n### Instruction:\n")