mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-10 15:29:39 +00:00
support chunk prefill, support 139K context for 24G VRAM
This commit is contained in:
parent
494469d4c5
commit
f35e8d41d8
10 changed files with 227 additions and 83 deletions
|
@ -105,7 +105,8 @@ class Config(metaclass=Singleton):
|
|||
|
||||
self.total_context = self.model.get("total_context", 2**18)
|
||||
self.max_batch_size = self.model.get("max_batch_size", 20 if self.paged else 1)
|
||||
self.max_chunk_size = self.model.get("max_chunk_size", 2048)
|
||||
self.chunk_prefill_size = self.model.get("chunk_prefill_size", 8192)
|
||||
|
||||
self.max_new_tokens = self.model.get("max_new_tokens", 2000)
|
||||
self.json_mode = self.model.get("json_mode", False)
|
||||
self.healing = self.model.get("healing", False)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue