fast forwarding for rwkv for unmodified contexts

2025-09-11 01:24:36 +00:00 · 2023-04-19 15:09:35 +08:00 · 2023-04-19 15:09:35 +08:00 · 45ec09d31b
commit 45ec09d31b
parent f39def81d4
8 changed files with 70 additions and 46 deletions
--- a/llama_adapter.cpp
+++ b/llama_adapter.cpp
@ -145,7 +145,7 @@ generation_outputs llama_generate(const generation_inputs inputs, generation_out
    std::fill(last_n_tokens.begin(), last_n_tokens.end(), 0);
    n_past = 0;

-    ContextFastForward(current_context_tokens, embd_inp, n_past, last_n_tokens, nctx, smartcontext, useSmartContext);
+    ContextFastForward(current_context_tokens, embd_inp, n_past, last_n_tokens, nctx, smartcontext, useSmartContext,false);

    //if using BLAS and prompt is big enough, switch to single thread and use a huge batch
    bool blasmode = (embd_inp.size() >= 32 && ggml_cpu_has_blas());