import sys, os from typing import AsyncIterator, Dict, Tuple import torch from ..args import ConfigArgs, default_args from ..base import BackendInterfaceBase, ThreadContext from ktransformers.server.schemas.assistants.runs import RunObject from ..args import * class ExllamaThreadContext(ThreadContext): def __init__(self, run: RunObject, args: ConfigArgs = default_args) -> None: super().__init__(run,args) def get_interface(self): return def get_local_messages(self): raise NotImplementedError class ExllamaInterface(BackendInterfaceBase): def __init__(self, args: ConfigArgs = ...): raise NotImplementedError def tokenize_prompt(self, prompt: str) -> torch.Tensor: raise NotImplementedError async def inference(self,local_messages,request_unique_id:Optional[str])->AsyncIterator: raise NotImplementedError