mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-06 04:30:03 +00:00
100 lines
No EOL
3.1 KiB
Python
100 lines
No EOL
3.1 KiB
Python
from typing import List, Optional, Union, Dict, Any
|
|
from typing_extensions import Literal
|
|
from enum import Enum
|
|
from pydantic import BaseModel, Field
|
|
from ktransformers.server.config.config import Config
|
|
from ktransformers.server.schemas.base import Object
|
|
|
|
|
|
from openai.types.chat.chat_completion_chunk import Choice
|
|
|
|
from uuid import uuid4
|
|
|
|
class CompletionUsage(BaseModel):
|
|
prompt_tokens: int
|
|
completion_tokens: int
|
|
total_tokens: int
|
|
prompt_tokens_details: Optional[Dict[str, Any]] = None
|
|
completion_tokens_details: Optional[Dict[str, Any]] = None
|
|
prefill_time: Optional[float] = None
|
|
decode_time: Optional[float] = None
|
|
|
|
class Role(Enum):
|
|
system = 'system'
|
|
user = 'user'
|
|
assistant = 'assistant'
|
|
tool = 'tool'
|
|
function = 'function'
|
|
|
|
class Message(BaseModel):
|
|
content: Optional[str] = None
|
|
role: Role
|
|
name: Optional[str] = None
|
|
tool_calls: Optional[List[Dict[str, Any]]] = {}
|
|
tool_call_id: Optional[str] = None
|
|
|
|
def to_tokenizer_message(self):
|
|
message = {'role': self.role.value}
|
|
if self.content is not None:
|
|
message['content'] = self.content
|
|
if self.name is not None:
|
|
message['name'] = self.name
|
|
if self.tool_calls is not {}:
|
|
message['tool_calls'] = self.tool_calls
|
|
if self.tool_call_id is not None:
|
|
message['tool_call_id'] = self.tool_call_id
|
|
return message
|
|
|
|
class FunctionParameters(BaseModel):
|
|
type: str = "object"
|
|
properties: Dict[str, Any] = {}
|
|
required: Optional[List[str]] = None
|
|
|
|
class FunctionDefinition(BaseModel):
|
|
name: str
|
|
description: Optional[str] = None
|
|
parameters: FunctionParameters = Field(default_factory=FunctionParameters)
|
|
|
|
class ToolFunction(BaseModel):
|
|
function: FunctionDefinition
|
|
|
|
class Tool(BaseModel):
|
|
type: Literal["function"]
|
|
function: FunctionDefinition
|
|
|
|
class ChatCompletionCreate(BaseModel):
|
|
messages: List[Message]
|
|
model: str
|
|
stream: bool = False
|
|
temperature: Optional[float] = Field(default=Config().temperature)
|
|
top_p: Optional[float] = Field(default=Config().top_p)
|
|
tools: Optional[List[Tool]] = None
|
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None
|
|
stream_options: Optional[Dict[str, Any]] = None
|
|
frequency_penalty: float = 0
|
|
presence_penalty: float = 0
|
|
max_tokens: Optional[int] = Field(default=Config().max_new_tokens)
|
|
max_completion_tokens: Optional[int] = Field(default=Config().max_new_tokens)
|
|
return_speed: Optional[bool] = Field(default=False)
|
|
def get_tokenizer_messages(self):
|
|
return [m.to_tokenizer_message() for m in self.messages]
|
|
|
|
class ChatCompletionChunk(BaseModel):
|
|
id: str
|
|
choices: List[Choice]
|
|
created: int
|
|
model: str
|
|
object: Literal["chat.completion.chunk"]
|
|
service_tier: Optional[Literal["scale", "default"]] = None
|
|
system_fingerprint: Optional[str] = None
|
|
usage: Optional[CompletionUsage] = None
|
|
|
|
def to_stream_reply(self):
|
|
return f"data: {self.model_dump_json()}\n\n"
|
|
|
|
class RawUsage(BaseModel):
|
|
tokenize_time: float
|
|
prefill_time: float
|
|
decode_time: float
|
|
prefill_count: int
|
|
decode_count: int |