llama : remove KV cache defragmentation logic (#15473)

ggml-ci
This commit is contained in:
Georgi Gerganov 2025-08-22 12:22:13 +03:00 committed by GitHub
parent ad5c975c2d
commit 9ebebef62f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 32 additions and 440 deletions

View file

@ -28,7 +28,6 @@ LLAMA_BENCH_DB_FIELDS = [
"model_type", "model_size", "model_n_params", "n_batch", "n_ubatch", "n_threads",
"cpu_mask", "cpu_strict", "poll", "type_k", "type_v", "n_gpu_layers",
"split_mode", "main_gpu", "no_kv_offload", "flash_attn", "tensor_split", "tensor_buft_overrides",
"defrag_thold",
"use_mmap", "embeddings", "no_op_offload", "n_prompt", "n_gen", "n_depth",
"test_time", "avg_ns", "stddev_ns", "avg_ts", "stddev_ts",
]