From b3143384b4f443d385731fddb2fe20893948c69a Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sat, 5 Apr 2025 10:57:04 +0800 Subject: [PATCH] larger warmup batch --- gpttype_adapter.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 46c7bee99..a97712c3d 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -2297,8 +2297,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in } } - //determine mem per token - std::vector tmp = {1, 2, 3, 4}; + //warmup at least 33 tokens to trigger batch + std::vector tmp; + for (int i = 1; i <= 33; ++i) { + tmp.push_back(i); + } llama_kv_self_clear(llama_ctx_v4); auto er = llama_decode(llama_ctx_v4, llama_batch_get_one(tmp.data(), tmp.size())); if(er!=0)