llama : remove KV cache defragmentation logic (#15473)

ggml-ci
2025-09-10 17:14:36 +00:00 · 2025-08-22 12:22:13 +03:00 · 2025-08-22 12:22:13 +03:00 · 9ebebef62f
commit 9ebebef62f
parent ad5c975c2d
16 changed files with 32 additions and 440 deletions
--- a/scripts/compare-llama-bench.py
+++ b/scripts/compare-llama-bench.py
@ -28,7 +28,6 @@ LLAMA_BENCH_DB_FIELDS = [
    "model_type",   "model_size",   "model_n_params", "n_batch",    "n_ubatch",     "n_threads",
    "cpu_mask",     "cpu_strict",   "poll",           "type_k",     "type_v",       "n_gpu_layers",
    "split_mode",   "main_gpu",     "no_kv_offload",  "flash_attn", "tensor_split", "tensor_buft_overrides",
-    "defrag_thold",
    "use_mmap",     "embeddings",   "no_op_offload",  "n_prompt",   "n_gen",        "n_depth",
    "test_time",    "avg_ns",       "stddev_ns",      "avg_ts",     "stddev_ts",
 ]