do not check loaded tensors due to increased latency

2025-09-06 15:59:07 +00:00 · 2025-02-11 17:10:11 +04:00 · 2025-02-11 17:10:11 +04:00 · 65ad14140a
commit 65ad14140a
parent 3dd3138207
1 changed files with 0 additions and 2 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -17794,8 +17794,6 @@ static void manage_graph_tensors(struct ggml_cgraph * cgraph, int advice, bool f
            if (lower_name.find("cuda") != std::string::npos) continue;
        }

-        if (is_tensor_loaded(cur)) continue;
-
        size_t size  = ggml_nbytes(cur);
        size_t first = reinterpret_cast<size_t>(cur->data);
        size_t last  = first + size;