Merge commit 'f0adb80bf7' into concedo_experimental

# Conflicts:
#	docs/backend/CANN.md
#	docs/backend/SYCL.md
#	docs/docker.md
#	examples/sycl/run-llama2.sh
#	examples/sycl/win-run-llama2.bat
#	ggml/src/ggml-sycl/ggml-sycl.cpp
#	tools/llama-bench/README.md
This commit is contained in:
Concedo 2025-05-21 19:10:57 +08:00
commit 3fefb3bdf2
7 changed files with 209 additions and 134 deletions

View file

@ -1936,18 +1936,6 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
clamped_max_context_length = 16384;
}
#if defined(GGML_USE_VULKAN)
if (isGguf && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4 && kcpp_data->n_ubatch > 16) {
if(debugmode==1)
{
printf("GLM-4 is broken on larger batch sizes in Vulkan. Clamp ignored in debug.\n");
} else {
printf("GLM-4 is broken on larger batch sizes in Vulkan. Clamping ubatch size to 8.\n");
kcpp_data->n_ubatch = 8;
}
}
#endif
kcpp_data->n_ctx = clamped_max_context_length;
max_context_limit_at_load = clamped_max_context_length;
add_bos_token = !inputs.no_bos_token;