diff --git a/ktransformers/util/utils.py b/ktransformers/util/utils.py index 0297b57..f030257 100644 --- a/ktransformers/util/utils.py +++ b/ktransformers/util/utils.py @@ -170,7 +170,6 @@ def prefill_and_generate(model, tokenizer, inputs, max_new_tokens=10000, use_cud past_key_values.cur_idx=cache_position start_time = time.time() - inputs_embeds = model.model.embed_tokens(inputs.to("cpu")).to(torch_device) if mode == "long_context": inputs_embeds = model.model.embed_tokens(inputs.to("cpu")) else: