Merge branch 'upstream' into concedo_experimental

# Conflicts: # examples/run/run.cpp # ggml/src/ggml-cann/aclnn_ops.cpp
2025-09-10 17:14:36 +00:00 · 2025-03-15 19:54:19 +08:00 · 2025-03-15 19:54:19 +08:00 · 67851e5415
commit 67851e5415
parent e84596ec1a 92a391327e
9 changed files with 39 additions and 8 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@ -39,6 +39,7 @@ llama_context::llama_context(
    cparams.flash_attn       = params.flash_attn;
    cparams.no_perf          = params.no_perf;
    cparams.pooling_type     = params.pooling_type;
+    cparams.warmup           = false;

    cparams.n_ctx            = params.n_ctx           == 0    ? hparams.n_ctx_train           : params.n_ctx;
    cparams.rope_freq_base   = params.rope_freq_base  == 0.0f ? hparams.rope_freq_base_train  : params.rope_freq_base;
@ -949,6 +950,12 @@ void llama_context::set_causal_attn(bool value) {
    cparams.causal_attn = value;
 }

+void llama_context::set_warmup(bool value) {
+    LLAMA_LOG_DEBUG("%s: value = %d\n", __func__, value);
+
+    cparams.warmup = value;
+}
+
 void llama_context::set_adapter_lora(
            llama_adapter_lora * adapter,
            float scale) {
@ -1595,7 +1602,7 @@ void llama_context::output_reorder() {
 //

 int32_t llama_context::graph_max_nodes() const {
-    return std::max<int32_t>(8192, 5*model.n_tensors());
+    return std::max<int32_t>(65536, 5*model.n_tensors());
 }

 ggml_cgraph * llama_context::graph_init() {
@ -2373,6 +2380,10 @@ void llama_set_causal_attn(llama_context * ctx, bool causal_attn) {
    ctx->set_causal_attn(causal_attn);
 }

+void llama_set_warmup(llama_context * ctx, bool warmup) {
+    ctx->set_warmup(warmup);
+}
+
 void llama_synchronize(llama_context * ctx) {
    ctx->synchronize();
 }