diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index bf14e230b..93fa12016 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -1907,6 +1907,10 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in printf("Warning: Only GGUF models can use max context above 16k. Max context lowered to 16k.\n"); clamped_max_context_length = 16384; } + if (isGguf && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4 && kcpp_data->n_batch > 16) { + printf("GLM-4 is broken on larger batch sizes. Clamping batch size to 16.\n"); + kcpp_data->n_batch = kcpp_data->n_ubatch = 16; + } kcpp_data->n_ctx = clamped_max_context_length; max_context_limit_at_load = clamped_max_context_length; diff --git a/model_adapter.cpp b/model_adapter.cpp index 3c0117b4e..6fcb8a070 100644 --- a/model_adapter.cpp +++ b/model_adapter.cpp @@ -329,6 +329,10 @@ void print_tok_vec(std::vector &embd) { fileformatmeta->model_architecture = GGUFArch::ARCH_RWKV; } + else if(modelarch=="glm4") + { + fileformatmeta->model_architecture = GGUFArch::ARCH_GLM4; + } printf("Arch Category: %d\n",fileformatmeta->model_architecture); } diff --git a/model_adapter.h b/model_adapter.h index 9b1859de6..246799ae9 100644 --- a/model_adapter.h +++ b/model_adapter.h @@ -61,6 +61,7 @@ enum GGUFArch ARCH_RWKV = 6, ARCH_QWEN2VL = 7, ARCH_GEMMA3 = 8, + ARCH_GLM4 = 9, }; struct FileFormatExtraMeta