llama : remove KV cache defragmentation logic (#15473)

ggml-ci
This commit is contained in:
Georgi Gerganov 2025-08-22 12:22:13 +03:00 committed by GitHub
parent ad5c975c2d
commit 9ebebef62f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 32 additions and 440 deletions

View file

@ -274,7 +274,6 @@ def start_server_background(args):
server_args.extend(['--batch-size', args.batch_size])
server_args.extend(['--ubatch-size', args.ubatch_size])
server_args.extend(['--n-predict', args.max_tokens * 2])
server_args.extend(['--defrag-thold', "0.1"])
server_args.append('--cont-batching')
server_args.append('--metrics')
server_args.append('--flash-attn')