mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-06 04:30:03 +00:00
40 lines
925 B
Python
40 lines
925 B
Python
import sys, os
|
|
from typing import AsyncIterator, Dict, Tuple
|
|
|
|
import torch
|
|
|
|
from ..args import ConfigArgs, default_args
|
|
|
|
from ..base import BackendInterfaceBase, ThreadContext
|
|
from ktransformers.server.schemas.assistants.runs import RunObject
|
|
|
|
|
|
from ..args import *
|
|
|
|
class ExllamaThreadContext(ThreadContext):
|
|
def __init__(self, run: RunObject, args: ConfigArgs = default_args) -> None:
|
|
super().__init__(run,args)
|
|
|
|
def get_interface(self):
|
|
return
|
|
|
|
def get_local_messages(self):
|
|
raise NotImplementedError
|
|
|
|
|
|
|
|
|
|
class ExllamaInterface(BackendInterfaceBase):
|
|
|
|
def __init__(self, args: ConfigArgs = ...):
|
|
raise NotImplementedError
|
|
|
|
def tokenize_prompt(self, prompt: str) -> torch.Tensor:
|
|
raise NotImplementedError
|
|
|
|
async def inference(self,local_messages,request_unique_id:Optional[str])->AsyncIterator:
|
|
raise NotImplementedError
|
|
|
|
|
|
|
|
|