mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-10 14:51:06 +00:00
support chunk prefill, support 139K context for 24G VRAM
This commit is contained in:
parent
494469d4c5
commit
f35e8d41d8
10 changed files with 227 additions and 83 deletions
|
@ -23,7 +23,7 @@ class ConfigArgs(BaseModel):
|
|||
max_batch_size: int = Field(
|
||||
None, description="Max number of batches to run at once, assuming the sequences will fit within total_context"
|
||||
)
|
||||
max_chunk_size: int = Field(
|
||||
chunk_prefill_size: int = Field(
|
||||
None,
|
||||
description=(
|
||||
"Max chunk size. Determines the size of prefill operations. Can be reduced to reduce pauses whenever a new"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue