mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-06 20:49:55 +00:00
60 lines
1.5 KiB
Python
60 lines
1.5 KiB
Python
from typing import List, Optional
|
|
from typing_extensions import Literal
|
|
from enum import Enum
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
from ktransformers.server.schemas.base import Object
|
|
|
|
from openai.types.completion_usage import CompletionUsage
|
|
from openai.types.chat.chat_completion_chunk import Choice
|
|
|
|
|
|
class Role(Enum):
|
|
system = 'system'
|
|
user = 'user'
|
|
assistant = 'assistant'
|
|
tool = 'tool'
|
|
function = 'function'
|
|
|
|
|
|
class Message(BaseModel):
|
|
content: str
|
|
role:Role
|
|
name: Optional[str] = None
|
|
def to_tokenizer_message(self):
|
|
return {'content':self.content,'role':self.role.value}
|
|
|
|
|
|
class ChatCompletionCreate(BaseModel):
|
|
messages: List[Message]
|
|
model : str
|
|
stream : bool = False
|
|
temperature: Optional[float] = Field(default=1.0)
|
|
top_p: Optional[float] = Field(default=1.0)
|
|
|
|
def get_tokenizer_messages(self):
|
|
return [m.to_tokenizer_message() for m in self.messages]
|
|
|
|
|
|
class ChatCompletionChunk(BaseModel):
|
|
id: str
|
|
choices: List[Choice]
|
|
created: int
|
|
model: str
|
|
object: Literal["chat.completion.chunk"]
|
|
service_tier: Optional[Literal["scale", "default"]] = None
|
|
system_fingerprint: Optional[str] = None
|
|
usage: Optional[CompletionUsage] = None
|
|
|
|
|
|
def to_stream_reply(self):
|
|
return f"data: {self.model_dump_json()}\n\n"
|
|
|
|
|
|
class RawUsage(BaseModel):
|
|
tokenize_time: float
|
|
prefill_time: float
|
|
decode_time: float
|
|
prefill_count: int
|
|
decode_count: int
|