support Moonlight

This commit is contained in:
Atream 2025-02-23 14:21:18 +00:00
parent 95d937c51d
commit e8e02e5ccc
3 changed files with 4 additions and 10 deletions

View file

@ -58,13 +58,12 @@ def local_chat(
gguf_path: str | None = None,
max_new_tokens: int = 300,
cpu_infer: int = Config().cpu_infer,
use_cuda_graph: bool = False,
use_cuda_graph: bool = True,
prompt_file : str | None = None,
mode: str = "normal",
force_think: bool = False,
):
torch.set_grad_enabled(False)
Config().cpu_infer = cpu_infer
@ -160,9 +159,6 @@ def local_chat(
input_tensor = tokenizer.apply_chat_template(
messages, add_generation_prompt=True, return_tensors="pt"
)
# input_tensor = torch.tensor([[0, 6657, 84646]], device=input_tensor.device)
if force_think:
token_thinks = torch.tensor([tokenizer.encode("<think>\\n",add_special_tokens=False)],device=input_tensor.device)
input_tensor = torch.cat(
@ -184,6 +180,4 @@ def local_chat(
if __name__ == "__main__":
# fire.Fire(local_chat)
# local_chat(model_path="/mnt/data/model/DeepSeek-R1", gguf_path="/mnt/data/model/DeepseekV3-q4km-gguf", cpu_infer=33, force_think=False)
local_chat(model_path="/mnt/data/model/Moonlight-16B-A3B-Instruct", gguf_path="/mnt/data/model/Moonlight-16B-A3B-Instruct-GGUF", cpu_infer=33, force_think=False)
fire.Fire(local_chat)