move pipeline parallelism to a --pipelineparallel launch flag

2026-05-10 04:00:53 +00:00 · 2025-12-11 21:03:41 +08:00 · 2025-12-11 21:03:41 +08:00 · fd0d0cab03
commit fd0d0cab03
parent b7428048fc
5 changed files with 20 additions and 3 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@ -17,6 +17,9 @@
 // llama_context
 //

+//kcpp: use a global flag to toggle pipeline parallelism to avoid messing with ctx params
+static bool kcpp_pipeline_parallelism = false;
+
 llama_context::llama_context(
        const llama_model & model,
              llama_context_params params) :
@ -264,6 +267,11 @@ llama_context::llama_context(
            cparams.offload_kqv &&
            !model.has_tensor_overrides();

+        if(!kcpp_pipeline_parallelism)
+        {
+            pipeline_parallel = false;
+        }
+
        // pipeline parallelism requires support for async compute and events in all devices
        if (pipeline_parallel) {
            for (auto & backend : backends) {