diff --git a/src/llama.cpp b/src/llama.cpp index 70a7195b..9f613743 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -17823,13 +17823,16 @@ static void manage_graph_tensors(struct ggml_cgraph * cgraph, int advice, bool f } for (const auto & segment : merged_segments) { + size_t prefetch_dense = 4; size_t len = std::max(segment.end - segment.start, static_cast(page_size)); posix_madvise(reinterpret_cast(segment.start), len, advice); // hint to load into memory // force to prefetch data if (force && advice == POSIX_MADV_WILLNEED) { volatile char * ptr = reinterpret_cast(segment.start); - for (size_t off = 0; off < len; off += page_size) { - (void)ptr[off]; + for (size_t off = 0; off < len; off += prefetch_dense * page_size) { + for (size_t i = 0; i < prefetch_dense; i++) { + if (off + i * page_size < len) (void)ptr[off + i * page_size]; + } } } }