mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-10 14:14:41 +00:00
reduce kv cache from available memory
This commit is contained in:
parent
9858d90ce4
commit
45a1e55eec
5 changed files with 25 additions and 7 deletions
|
@ -523,6 +523,9 @@ extern "C" {
|
|||
// Returns the total number of parameters in the model
|
||||
LLAMA_API uint64_t llama_model_n_params(const struct llama_model * model);
|
||||
|
||||
// Return the size of KV cache in the model
|
||||
LLAMA_API uint64_t llama_model_kvcache_size(const struct llama_model * model, const struct llama_context_params cparams);
|
||||
|
||||
// Return the total number of float operations in the model
|
||||
LLAMA_API void llama_model_n_flops(
|
||||
struct llama_model * model,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue