move pipeline parallelism to a --pipelineparallel launch flag

This commit is contained in:
Concedo 2025-12-11 21:03:41 +08:00
parent b7428048fc
commit fd0d0cab03
5 changed files with 20 additions and 3 deletions

View file

@ -17,6 +17,9 @@
// llama_context
//
//kcpp: use a global flag to toggle pipeline parallelism to avoid messing with ctx params
static bool kcpp_pipeline_parallelism = false;
llama_context::llama_context(
const llama_model & model,
llama_context_params params) :
@ -264,6 +267,11 @@ llama_context::llama_context(
cparams.offload_kqv &&
!model.has_tensor_overrides();
if(!kcpp_pipeline_parallelism)
{
pipeline_parallel = false;
}
// pipeline parallelism requires support for async compute and events in all devices
if (pipeline_parallel) {
for (auto & backend : backends) {