mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-09 13:55:27 +00:00
Fix bug with non-base-multiple chunk_size, update test examples, and resolve issue with writing model_config. Hugging Face URL input is still unsupported.
This commit is contained in:
parent
64e6aa026a
commit
be84d04253
4 changed files with 65 additions and 71 deletions
|
@ -43,10 +43,10 @@ class KDeepseekV3ForCausalLM(DeepseekV3PreTrainedModel):
|
|||
def init_wrapper(self, use_cuda_graph, device, max_batch_size, max_pages):
|
||||
self.use_cuda_graph = use_cuda_graph
|
||||
self.workspace_buffer = torch.empty(128 * 1024 * 1024, dtype=torch.int8).to(0)
|
||||
self.qo_indptr_buf = torch.empty((max_batch_size+1,), dtype=torch.int32, device=device)
|
||||
self.paged_kv_indptr_buf = torch.empty((max_batch_size+1,), dtype=torch.int32, device=device)
|
||||
self.qo_indptr_buf = torch.empty((max_batch_size+2,), dtype=torch.int32, device=device)
|
||||
self.paged_kv_indptr_buf = torch.empty((max_batch_size+2,), dtype=torch.int32, device=device)
|
||||
self.paged_kv_indices_buf = torch.empty((max_pages,), dtype=torch.int32, device=device)
|
||||
self.paged_kv_len_buf = torch.empty((max_batch_size,), dtype=torch.int32, device=device)
|
||||
self.paged_kv_len_buf = torch.empty((max_batch_size+1,), dtype=torch.int32, device=device)
|
||||
self.bsz_tensor_buf = torch.empty((1, ), dtype=torch.int32, device=device)
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue