Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	examples/run/run.cpp
#	ggml/src/ggml-cann/aclnn_ops.cpp
This commit is contained in:
Concedo 2025-03-15 19:54:19 +08:00
commit 67851e5415
9 changed files with 39 additions and 8 deletions

View file

@ -947,6 +947,10 @@ extern "C" {
// If set to true, the model will only attend to the past tokens
LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn);
// Set whether the model is in warmup mode or not
// If true, all model tensors are activated during llama_decode() to load and cache their weights.
LLAMA_API void llama_set_warmup(struct llama_context * ctx, bool warmup);
// Set abort callback
LLAMA_API void llama_set_abort_callback(struct llama_context * ctx, ggml_abort_callback abort_callback, void * abort_callback_data);