mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-10 06:14:58 +00:00
add balance-serve, support concurrence
This commit is contained in:
parent
8d0292aa44
commit
25cee5810e
196 changed files with 22077 additions and 565 deletions
|
@ -211,11 +211,11 @@ class KTransformersInterface(TransformersInterface):
|
|||
|
||||
chunk_start = 0
|
||||
while chunk_start < input_ids_length:
|
||||
chunk_end = min(chunk_start + self.args.chunk_prefill_size, input_ids_length)
|
||||
chunk_end = min(chunk_start + self.args.chunk_size, input_ids_length)
|
||||
if self.cache != None:
|
||||
self.cache.cur_idx=cache_position[chunk_start:chunk_end]
|
||||
logits = chunk_prefill(input_ids[:, chunk_start:chunk_end], cache_position[chunk_start:chunk_end])
|
||||
chunk_start += self.args.chunk_prefill_size
|
||||
chunk_start += self.args.chunk_size
|
||||
|
||||
if flashinfer_enabled:
|
||||
MLAWrapperSingleton.reset_buffer()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue