diff --git a/ktransformers/util/utils.py b/ktransformers/util/utils.py index 7301572..308def1 100644 --- a/ktransformers/util/utils.py +++ b/ktransformers/util/utils.py @@ -243,7 +243,7 @@ def prefill_and_generate(model, tokenizer, inputs, max_new_tokens=10000, use_cud elif torch.xpu.is_available(): torch.xpu.set_device(torch_device) else: - RuntimeError("The device: {torch_device} is not available") + raise RuntimeError(f"The device: {torch_device} is not available") inputs_embeds = model.model.embed_tokens(cur_token.to("cpu")).to(torch_device) # with torch.cuda.stream(custom_stream): logits=model(inputs_embeds=inputs_embeds, @@ -284,7 +284,7 @@ def prefill_and_generate(model, tokenizer, inputs, max_new_tokens=10000, use_cud elif torch.xpu.is_available(): torch.xpu.set_device(torch_device) else: - RuntimeError("The device: {torch_device} is not available") + raise RuntimeError(f"The device: {torch_device} is not available") with torch.no_grad(): stream = TextStreamer(tokenizer)