mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 00:54:41 +00:00
GLM4 batch clamp
This commit is contained in:
parent
3f545eadbe
commit
4decd6bea1
3 changed files with 9 additions and 0 deletions
|
@ -1907,6 +1907,10 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
printf("Warning: Only GGUF models can use max context above 16k. Max context lowered to 16k.\n");
|
printf("Warning: Only GGUF models can use max context above 16k. Max context lowered to 16k.\n");
|
||||||
clamped_max_context_length = 16384;
|
clamped_max_context_length = 16384;
|
||||||
}
|
}
|
||||||
|
if (isGguf && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4 && kcpp_data->n_batch > 16) {
|
||||||
|
printf("GLM-4 is broken on larger batch sizes. Clamping batch size to 16.\n");
|
||||||
|
kcpp_data->n_batch = kcpp_data->n_ubatch = 16;
|
||||||
|
}
|
||||||
|
|
||||||
kcpp_data->n_ctx = clamped_max_context_length;
|
kcpp_data->n_ctx = clamped_max_context_length;
|
||||||
max_context_limit_at_load = clamped_max_context_length;
|
max_context_limit_at_load = clamped_max_context_length;
|
||||||
|
|
|
@ -329,6 +329,10 @@ void print_tok_vec(std::vector<float> &embd)
|
||||||
{
|
{
|
||||||
fileformatmeta->model_architecture = GGUFArch::ARCH_RWKV;
|
fileformatmeta->model_architecture = GGUFArch::ARCH_RWKV;
|
||||||
}
|
}
|
||||||
|
else if(modelarch=="glm4")
|
||||||
|
{
|
||||||
|
fileformatmeta->model_architecture = GGUFArch::ARCH_GLM4;
|
||||||
|
}
|
||||||
printf("Arch Category: %d\n",fileformatmeta->model_architecture);
|
printf("Arch Category: %d\n",fileformatmeta->model_architecture);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,6 +61,7 @@ enum GGUFArch
|
||||||
ARCH_RWKV = 6,
|
ARCH_RWKV = 6,
|
||||||
ARCH_QWEN2VL = 7,
|
ARCH_QWEN2VL = 7,
|
||||||
ARCH_GEMMA3 = 8,
|
ARCH_GEMMA3 = 8,
|
||||||
|
ARCH_GLM4 = 9,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FileFormatExtraMeta
|
struct FileFormatExtraMeta
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue