mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-06 23:19:02 +00:00
fix n_gpu_layers
This commit is contained in:
parent
3c8dbd11fc
commit
4fd3b6679e
1 changed files with 1 additions and 1 deletions
|
@ -20866,7 +20866,7 @@ void llama_model_kvcache_size(
|
|||
uint64_t ne_k = static_cast<uint64_t>(hparams.n_embd_k_gqa()) * cparams.n_ctx * ggml_type_size(cparams.type_k);
|
||||
uint64_t ne_v = static_cast<uint64_t>(hparams.n_embd_v_gqa()) * cparams.n_ctx * ggml_type_size(cparams.type_v);
|
||||
if (use_gpu) {
|
||||
int n_gpu_layers = cparams.n_gpu_layers;
|
||||
int n_gpu_layers = std::min(cparams.n_gpu_layers, hparams.n_layer);
|
||||
*gpu_cache = (ne_k + ne_v) * n_gpu_layers;
|
||||
*cpu_cache = (ne_k + ne_v) * (llama_model_n_layers(model) - n_gpu_layers);
|
||||
} else {
|
||||
|
|
Loading…
Add table
Reference in a new issue