From 65ad14140a3875fc9d8191cf6eb695687cc32fb0 Mon Sep 17 00:00:00 2001
From: Lizonghang <870644199@qq.com>
Date: Tue, 11 Feb 2025 17:10:11 +0400
Subject: [PATCH] do not check loaded tensors due to increased latency

---
 src/llama.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/llama.cpp b/src/llama.cpp
index 5ceefb10..70a7195b 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -17794,8 +17794,6 @@ static void manage_graph_tensors(struct ggml_cgraph * cgraph, int advice, bool f
             if (lower_name.find("cuda") != std::string::npos) continue;
         }
 
-        if (is_tensor_loaded(cur)) continue;
-
         size_t size  = ggml_nbytes(cur);
         size_t first = reinterpret_cast<size_t>(cur->data);
         size_t last  = first + size;