Implement multi-batch support for v2, v3, and r1 models with backend_type configured as ktransformers.

This commit is contained in:
jiafei96 2025-07-09 09:09:47 +00:00
parent 890b0f1622
commit a6ab9e349c
6 changed files with 383 additions and 52 deletions

View file

@ -141,6 +141,7 @@ class TransformersInterface(BackendInterfaceBase):
# thread_related
last_request_id: Optional[str] = None
ever_generated_ids: Set[int] = set()
attention_mask: torch.Tensor
def __init__(self, args: ConfigArgs = default_args):
self.args = args