update api docs and lite

2025-09-10 17:14:36 +00:00 · 2025-03-29 15:39:25 +08:00 · 2025-03-29 15:39:25 +08:00 · 396875e1c4
commit 396875e1c4
parent 6a709be50a 3714c3ee1a
43 changed files with 4776 additions and 2051 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@ -2317,11 +2317,6 @@ llama_context * llama_init_from_model(
        params.flash_attn = false;
    }

-    if (params.flash_attn && model->hparams.n_embd_head_k != model->hparams.n_embd_head_v) {
-        LLAMA_LOG_WARN("%s: flash_attn requires n_embd_head_k == n_embd_head_v - forcing off\n", __func__);
-        params.flash_attn = false;
-    }
-
    if (ggml_is_quantized(params.type_v) && !params.flash_attn) {
        LLAMA_LOG_ERROR("%s: V cache quantization requires flash_attn\n", __func__);
        return nullptr;