CUDA: fix crash on uneven context without FA (#16988)

2026-05-10 04:00:53 +00:00 · 2025-11-06 14:05:47 +01:00 · 2025-11-06 14:05:47 +01:00 · aa374175c3
commit aa374175c3
parent 5b180c3d60
7 changed files with 44 additions and 38 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@ -21,6 +21,8 @@ llama_context::llama_context(
              llama_context_params params) :
    model(model),
    balloc(std::make_unique<llama_batch_allocr>(model.hparams.n_pos_per_embd())) {
+    // TODO warning when creating llama_context with awkward ctx size that is not a power of 2,
+    //     may need to be backend-dependent
    LLAMA_LOG_INFO("%s: constructing llama_context\n", __func__);

    t_start_us = model.t_start_us;