add flush print in local_chat output and change default optimize yaml of deepseekv3 to single gpu

This commit is contained in:
liam 2025-02-08 13:15:52 +08:00
parent 0262f954c7
commit c18ecd7b7f
2 changed files with 3 additions and 3 deletions

View file

@ -329,14 +329,14 @@ class TransformersInterface(BackendInterfaceBase):
self.profiler.create_and_start_timer("prefill")
for t in self.prefill(input_ids, self.check_is_new(thread_id)):
if t is not None:
print(t, end="")
print(t, end="",flush=True)
yield t
self.profiler.pause_timer("prefill")
self.profiler.create_and_start_timer("decode")
for t in self.generate():
if t is not None:
print(t, end="")
print(t, end="",flush=True)
yield t
print("")
self.profiler.pause_timer("decode")