kvcache-ai-ktransformers/ktransformers/server/backend/interfaces/exllamav2.py
2024-07-27 16:06:58 +08:00

40 lines
925 B
Python

import sys, os
from typing import AsyncIterator, Dict, Tuple
import torch
from ..args import ConfigArgs, default_args
from ..base import BackendInterfaceBase, ThreadContext
from ktransformers.server.schemas.assistants.runs import RunObject
from ..args import *
class ExllamaThreadContext(ThreadContext):
def __init__(self, run: RunObject, args: ConfigArgs = default_args) -> None:
super().__init__(run,args)
def get_interface(self):
return
def get_local_messages(self):
raise NotImplementedError
class ExllamaInterface(BackendInterfaceBase):
def __init__(self, args: ConfigArgs = ...):
raise NotImplementedError
def tokenize_prompt(self, prompt: str) -> torch.Tensor:
raise NotImplementedError
async def inference(self,local_messages,request_unique_id:Optional[str])->AsyncIterator:
raise NotImplementedError