From 65ad14140a3875fc9d8191cf6eb695687cc32fb0 Mon Sep 17 00:00:00 2001 From: Lizonghang <870644199@qq.com> Date: Tue, 11 Feb 2025 17:10:11 +0400 Subject: [PATCH] do not check loaded tensors due to increased latency --- src/llama.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/llama.cpp b/src/llama.cpp index 5ceefb10..70a7195b 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -17794,8 +17794,6 @@ static void manage_graph_tensors(struct ggml_cgraph * cgraph, int advice, bool f if (lower_name.find("cuda") != std::string::npos) continue; } - if (is_tensor_loaded(cur)) continue; - size_t size = ggml_nbytes(cur); size_t first = reinterpret_cast(cur->data); size_t last = first + size;