fix llama-cli pos sync

This commit is contained in:
Lizonghang 2025-05-19 18:08:27 +04:00
parent c54a6a0132
commit 421b3deca5
2 changed files with 32 additions and 2 deletions

View file

@ -348,6 +348,9 @@ int main(int argc, char ** argv) {
// remove any "future" tokens that we might have inherited from the previous session
llama_kv_cache_seq_rm(ctx, -1, n_matching_session_tokens, -1);
if (my_rank == 0) {
llama_send_kv_cache_seq_rm(ctx, -1, n_matching_session_tokens, -1);
}
}
LOG_DBG("recalculate the cached logits (check): embd_inp.size() %zu, n_matching_session_tokens %zu, embd_inp.size() %zu, session_tokens.size() %zu\n",
@ -593,6 +596,11 @@ int main(int argc, char ** argv) {
llama_kv_cache_seq_rm (ctx, 0, params.n_keep , params.n_keep + n_discard);
llama_kv_cache_seq_add(ctx, 0, params.n_keep + n_discard, n_past, -n_discard);
if (my_rank == 0) {
llama_send_kv_cache_seq_rm (ctx, 0, params.n_keep , params.n_keep + n_discard);
llama_send_kv_cache_seq_add(ctx, 0, params.n_keep + n_discard, n_past, -n_discard);
}
n_past -= n_discard;
LOG_DBG("after swap: n_past = %d\n", n_past);