llama : disable graph reuse with pipeline parallelism (#20463)

This commit is contained in:
Georgi Gerganov 2026-03-12 21:04:13 +02:00 committed by GitHub
parent 557fe2d913
commit 57819b8d4b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 17 additions and 19 deletions

View file

@ -342,6 +342,14 @@ llama_context::llama_context(
if (cparams.pipeline_parallel) {
LLAMA_LOG_INFO("%s: pipeline parallelism enabled\n", __func__);
if (!graph_reuse_disable) {
// TODO: figure out a way to make graph reuse work with pipeline parallelism
// ref: https://github.com/ggml-org/llama.cpp/pull/20463
LLAMA_LOG_WARN("%s: graph reuse is currently not compatible with pipeline parallelism - disabling\n", __func__);
graph_reuse_disable = true;
}
}
sched_reserve();