From da31acbe6a42093cf0f898cbb1abe6bc3dff42a0 Mon Sep 17 00:00:00 2001 From: leeetao <3122669219@qq.com> Date: Fri, 4 Jul 2025 08:52:15 +0000 Subject: [PATCH] Modified batch backend_buffer size to actual size --- src/llama.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/llama.cpp b/src/llama.cpp index 1020277c..d316e400 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -3052,8 +3052,7 @@ struct llama_sbatch { ubatch_token.resize(!has_embd ? n_ubatch : 0); ubatch_embd.resize(has_embd ? n_embd * n_ubatch : 0); - // TODO: just a guess and test, need to be removed(from tao) - ubatch_backend_embd.resize(n_embd * n_tokens * 3); + ubatch_backend_embd.resize(n_embd * n_tokens + n_tokens); ubatch_out_embd.resize(n_embd * n_tokens); ubatch_pos.resize(n_ubatch);