fix-update-flashinfer_wrapper_local_chat

2025-09-10 06:14:58 +00:00 · 2025-02-25 12:47:31 +00:00 · 2025-02-25 12:47:31 +00:00 · 477ac28a9c
commit 477ac28a9c
parent 5474be5299
4 changed files with 15 additions and 4 deletions
--- a/ktransformers/util/utils.py
+++ b/ktransformers/util/utils.py
@ -177,6 +177,7 @@ def prefill_and_generate(model, tokenizer, inputs, max_new_tokens=10000, use_cud
        else:
            inputs_embeds = model.model.embed_tokens(inputs.to("cpu")).to(torch_device)
        if use_flashinfer_mla:
+            MLAWrapperSingleton.update_buffer(past_key_values.max_pages)
            MLAWrapperSingleton.need_plan_all()
            
        logits = model(