mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-10 06:14:58 +00:00
fix-update-flashinfer_wrapper_local_chat
This commit is contained in:
parent
5474be5299
commit
477ac28a9c
4 changed files with 15 additions and 4 deletions
|
@ -177,6 +177,7 @@ def prefill_and_generate(model, tokenizer, inputs, max_new_tokens=10000, use_cud
|
|||
else:
|
||||
inputs_embeds = model.model.embed_tokens(inputs.to("cpu")).to(torch_device)
|
||||
if use_flashinfer_mla:
|
||||
MLAWrapperSingleton.update_buffer(past_key_values.max_pages)
|
||||
MLAWrapperSingleton.need_plan_all()
|
||||
|
||||
logits = model(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue