mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-10 11:24:40 +00:00
add gpu support in llama_model_kvcache_size and llama_model_compute_buf_size
This commit is contained in:
parent
f8e9dc2713
commit
6f54a12c7d
3 changed files with 58 additions and 28 deletions
|
@ -525,10 +525,20 @@ extern "C" {
|
|||
LLAMA_API uint64_t llama_model_n_params(const struct llama_model * model);
|
||||
|
||||
// Return the size of compute buffer size, including input tensors and activations
|
||||
LLAMA_API uint64_t llama_model_compute_buf_size(const struct llama_model * model, const struct llama_context_params cparams, bool compress_memory);
|
||||
LLAMA_API void llama_model_compute_buf_size(
|
||||
uint64_t * cpu_buf,
|
||||
uint64_t * gpu_buf,
|
||||
const struct llama_model * model,
|
||||
const struct llama_context_params cparams,
|
||||
bool use_gpu);
|
||||
|
||||
// Return the size of KV cache in the model
|
||||
LLAMA_API uint64_t llama_model_kvcache_size(const struct llama_model * model, const struct llama_context_params cparams);
|
||||
LLAMA_API void llama_model_kvcache_size(
|
||||
uint64_t * cpu_cache,
|
||||
uint64_t * gpu_cache,
|
||||
const struct llama_model * model,
|
||||
const struct llama_context_params cparams,
|
||||
bool use_gpu);
|
||||
|
||||
// Return the total number of float operations in the model
|
||||
LLAMA_API void llama_model_n_flops(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue