mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-10 04:00:53 +00:00
move pipeline parallelism to a --pipelineparallel launch flag
This commit is contained in:
parent
b7428048fc
commit
fd0d0cab03
5 changed files with 20 additions and 3 deletions
|
|
@ -17,6 +17,9 @@
|
|||
// llama_context
|
||||
//
|
||||
|
||||
//kcpp: use a global flag to toggle pipeline parallelism to avoid messing with ctx params
|
||||
static bool kcpp_pipeline_parallelism = false;
|
||||
|
||||
llama_context::llama_context(
|
||||
const llama_model & model,
|
||||
llama_context_params params) :
|
||||
|
|
@ -264,6 +267,11 @@ llama_context::llama_context(
|
|||
cparams.offload_kqv &&
|
||||
!model.has_tensor_overrides();
|
||||
|
||||
if(!kcpp_pipeline_parallelism)
|
||||
{
|
||||
pipeline_parallel = false;
|
||||
}
|
||||
|
||||
// pipeline parallelism requires support for async compute and events in all devices
|
||||
if (pipeline_parallel) {
|
||||
for (auto & backend : backends) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue