mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-10 15:29:39 +00:00
roll back ktransformers backend, add max_tokens, max_completion_tokens param
This commit is contained in:
parent
a1162eea01
commit
03a65d6bea
10 changed files with 144 additions and 161 deletions
|
@ -207,7 +207,7 @@ async def chat_completion(request: Request, create: ChatCompletionCreate):
|
|||
"<tools▁end>":"<|tool▁calls▁end|>"
|
||||
}
|
||||
# Use check_client_connected for early stopping
|
||||
async for res in interface.inference(input_message, id, create.temperature, create.top_p):
|
||||
async for res in interface.inference(input_message, id, create.temperature, create.top_p, create.max_tokens, create.max_completion_tokens):
|
||||
if isinstance(res, RawUsage):
|
||||
# Final return on utilization
|
||||
raw_usage = res
|
||||
|
@ -371,7 +371,7 @@ async def chat_completion(request: Request, create: ChatCompletionCreate):
|
|||
"<tool▁end>":"<|tool▁call▁end|>",
|
||||
"<tools▁end>":"<|tool▁calls▁end|>"
|
||||
}
|
||||
async for res in interface.inference(input_message, id, create.temperature, create.top_p):
|
||||
async for res in interface.inference(input_message, id, create.temperature, create.top_p, create.max_tokens, create.max_completion_tokens):
|
||||
if isinstance(res, RawUsage):
|
||||
raw_usage = res
|
||||
usage = CompletionUsage(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue