mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-06 23:19:02 +00:00
fix buf_compute_meta size
This commit is contained in:
parent
b2f806a572
commit
e543fcd55f
1 changed files with 4 additions and 2 deletions
|
@ -20423,10 +20423,12 @@ void * llama_context_setup_backend(
|
|||
}
|
||||
}
|
||||
|
||||
const size_t max_nodes = llama_model_max_nodes(*model);
|
||||
const size_t max_nodes = llama_model_max_nodes(*model);
|
||||
const size_t n_window_sum = std::accumulate(cparams.n_layer_window, cparams.n_layer_window + 32, 0u);
|
||||
const size_t n_graphs = static_cast<size_t>(std::ceil(llama_n_layer(model) / n_window_sum));
|
||||
|
||||
// buffer used to store the computation graph and the tensor meta data
|
||||
ctx->buf_compute_meta.resize(ggml_tensor_overhead()*max_nodes + ggml_graph_overhead_custom(max_nodes, false));
|
||||
ctx->buf_compute_meta.resize(ggml_tensor_overhead()*max_nodes + ggml_graph_overhead_custom(max_nodes, false)*n_graphs);
|
||||
|
||||
// TODO: move these checks to ggml_backend_sched
|
||||
// enabling pipeline parallelism in the scheduler increases memory usage, so it is only done when necessary
|
||||
|
|
Loading…
Add table
Reference in a new issue