llama : disable graph reuse with pipeline parallelism (#20463)

2026-05-08 01:41:37 +00:00 · 2026-03-12 21:04:13 +02:00 · 2026-03-12 21:04:13 +02:00 · 57819b8d4b
commit 57819b8d4b
parent 557fe2d913
3 changed files with 17 additions and 19 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@ -342,6 +342,14 @@ llama_context::llama_context(

        if (cparams.pipeline_parallel) {
            LLAMA_LOG_INFO("%s: pipeline parallelism enabled\n", __func__);
+
+            if (!graph_reuse_disable) {
+                // TODO: figure out a way to make graph reuse work with pipeline parallelism
+                // ref: https://github.com/ggml-org/llama.cpp/pull/20463
+                LLAMA_LOG_WARN("%s: graph reuse is currently not compatible with pipeline parallelism - disabling\n", __func__);
+
+                graph_reuse_disable = true;
+            }
        }

        sched_reserve();