update api docs and lite

This commit is contained in:
Concedo 2025-03-29 15:39:25 +08:00
commit 396875e1c4
43 changed files with 4776 additions and 2051 deletions

View file

@ -2317,11 +2317,6 @@ llama_context * llama_init_from_model(
params.flash_attn = false;
}
if (params.flash_attn && model->hparams.n_embd_head_k != model->hparams.n_embd_head_v) {
LLAMA_LOG_WARN("%s: flash_attn requires n_embd_head_k == n_embd_head_v - forcing off\n", __func__);
params.flash_attn = false;
}
if (ggml_is_quantized(params.type_v) && !params.flash_attn) {
LLAMA_LOG_ERROR("%s: V cache quantization requires flash_attn\n", __func__);
return nullptr;