mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-11 07:44:35 +00:00
Implement multi-batch support for v2, v3, and r1 models with backend_type configured as ktransformers.
This commit is contained in:
parent
890b0f1622
commit
a6ab9e349c
6 changed files with 383 additions and 52 deletions
|
@ -141,6 +141,7 @@ class TransformersInterface(BackendInterfaceBase):
|
|||
# thread_related
|
||||
last_request_id: Optional[str] = None
|
||||
ever_generated_ids: Set[int] = set()
|
||||
attention_mask: torch.Tensor
|
||||
|
||||
def __init__(self, args: ConfigArgs = default_args):
|
||||
self.args = args
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue