mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-08 01:41:37 +00:00
graph : fix nkvo offload with FA (#19105)
This commit is contained in:
parent
142cbe2ac6
commit
8f80d1b254
2 changed files with 5 additions and 7 deletions
|
|
@ -2173,13 +2173,6 @@ llm_graph_cb llama_context::graph_get_cb() const {
|
|||
ggml_set_name(cur, name);
|
||||
}
|
||||
|
||||
if (!cparams.offload_kqv) {
|
||||
if (strcmp(name, "kqv_merged_cont") == 0) {
|
||||
// all nodes between the KV store and the attention output are run on the CPU
|
||||
ggml_backend_sched_set_tensor_backend(sched.get(), cur, backend_cpu);
|
||||
}
|
||||
}
|
||||
|
||||
// norm may be automatically assigned to the backend of the previous layer, increasing data transfer between backends
|
||||
// FIXME: fix in ggml_backend_sched
|
||||
const bool full_offload = model.n_gpu_layers() > model.hparams.n_layer;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue