allow quantkv with contextshift

2025-09-10 17:14:36 +00:00 · 2025-03-16 21:48:42 +08:00 · 2025-03-16 21:48:42 +08:00 · 6888f5495d
commit 6888f5495d
parent e466ce65e2
2 changed files with 6 additions and 7 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@ -720,7 +720,7 @@ void llama_context::kv_self_update() {
            printf("\nWARNING: The current context does not support K-shift!\n");
        } else {

-        LLAMA_LOG_DEBUG("%s: applying K-shift\n", __func__);
+        // LLAMA_LOG_DEBUG("%s: applying K-shift\n", __func__);

        // apply K-shift if needed
        if (model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE) {
@ -774,7 +774,7 @@ void llama_context::kv_self_update() {

    // reserve a worst case graph if needed
    if (need_reserve) {
-        LLAMA_LOG_DEBUG("%s: reserving a worst case graph\n", __func__);
+        // LLAMA_LOG_DEBUG("%s: reserving a worst case graph\n", __func__);

        // build worst-case graph
        uint32_t n_seqs = 1; // TODO: worst-case number of sequences