fix llama-cli pos sync

2025-09-10 09:54:36 +00:00 · 2025-05-19 18:08:27 +04:00 · 2025-05-19 18:08:27 +04:00 · 421b3deca5
commit 421b3deca5
parent c54a6a0132
2 changed files with 32 additions and 2 deletions
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -348,6 +348,9 @@ int main(int argc, char ** argv) {

        // remove any "future" tokens that we might have inherited from the previous session
        llama_kv_cache_seq_rm(ctx, -1, n_matching_session_tokens, -1);
+        if (my_rank == 0) {
+            llama_send_kv_cache_seq_rm(ctx, -1, n_matching_session_tokens, -1);
+        }
    }

    LOG_DBG("recalculate the cached logits (check): embd_inp.size() %zu, n_matching_session_tokens %zu, embd_inp.size() %zu, session_tokens.size() %zu\n",
@ -593,6 +596,11 @@ int main(int argc, char ** argv) {
                        llama_kv_cache_seq_rm (ctx, 0, params.n_keep            , params.n_keep + n_discard);
                        llama_kv_cache_seq_add(ctx, 0, params.n_keep + n_discard, n_past, -n_discard);

+                        if (my_rank == 0) {
+                            llama_send_kv_cache_seq_rm (ctx, 0, params.n_keep            , params.n_keep + n_discard);
+                            llama_send_kv_cache_seq_add(ctx, 0, params.n_keep + n_discard, n_past, -n_discard);
+                        }
+
                        n_past -= n_discard;

                        LOG_DBG("after swap: n_past = %d\n", n_past);