larger warmup batch

2025-09-09 16:44:35 +00:00 · 2025-04-05 10:57:04 +08:00 · 2025-04-05 10:57:04 +08:00 · b3143384b4
commit b3143384b4
parent 59c02aa1a6
1 changed files with 5 additions and 2 deletions
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -2297,8 +2297,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
            }
        }

-        //determine mem per token
-        std::vector<int> tmp = {1, 2, 3, 4};
+        //warmup at least 33 tokens to trigger batch
+        std::vector<int> tmp;
+        for (int i = 1; i <= 33; ++i) {
+            tmp.push_back(i);
+        }
        llama_kv_self_clear(llama_ctx_v4);
        auto er = llama_decode(llama_ctx_v4, llama_batch_get_one(tmp.data(), tmp.size()));
        if(er!=0)