mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-08 01:41:37 +00:00
llama : disable graph reuse with pipeline parallelism (#20463)
This commit is contained in:
parent
557fe2d913
commit
57819b8d4b
3 changed files with 17 additions and 19 deletions
|
|
@ -342,6 +342,14 @@ llama_context::llama_context(
|
|||
|
||||
if (cparams.pipeline_parallel) {
|
||||
LLAMA_LOG_INFO("%s: pipeline parallelism enabled\n", __func__);
|
||||
|
||||
if (!graph_reuse_disable) {
|
||||
// TODO: figure out a way to make graph reuse work with pipeline parallelism
|
||||
// ref: https://github.com/ggml-org/llama.cpp/pull/20463
|
||||
LLAMA_LOG_WARN("%s: graph reuse is currently not compatible with pipeline parallelism - disabling\n", __func__);
|
||||
|
||||
graph_reuse_disable = true;
|
||||
}
|
||||
}
|
||||
|
||||
sched_reserve();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue