diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index d83628e98..cb5121bf5 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -1907,16 +1907,18 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in printf("Warning: Only GGUF models can use max context above 16k. Max context lowered to 16k.\n"); clamped_max_context_length = 16384; } - if (isGguf && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4 && kcpp_data->n_batch > 16) { + + #if defined(GGML_USE_VULKAN) + if (isGguf && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4 && kcpp_data->n_ubatch > 16) { if(debugmode==1) { - printf("GLM-4 is broken on larger batch sizes. Clamp ignored in debug.\n"); + printf("GLM-4 is broken on larger batch sizes in Vulkan. Clamp ignored in debug.\n"); } else { - printf("GLM-4 is broken on larger batch sizes. Clamping batch size to 16.\n"); - kcpp_data->n_batch = kcpp_data->n_ubatch = 16; + printf("GLM-4 is broken on larger batch sizes in Vulkan. Clamping ubatch size to 16.\n"); + kcpp_data->n_ubatch = 16; } - } + #endif kcpp_data->n_ctx = clamped_max_context_length; max_context_limit_at_load = clamped_max_context_length; diff --git a/klite.embd b/klite.embd index 597f4ba6c..deab4a05b 100644 --- a/klite.embd +++ b/klite.embd @@ -3440,7 +3440,7 @@ Current version indicated by LITEVER below. "name":"GLM-4", "user":"<|user|>\\n", "user_end":"", - "assistant":"<|assistant|>", + "assistant":"<|assistant|>\\n", "assistant_end":"", "system":"<|system|>\\n", "system_end":"",