set POSIX_MADV_WILLNEED for the next subgraph

This commit is contained in:
Lizonghang 2025-01-30 13:29:34 +04:00
parent f9b4c46b74
commit b680cb74fe

View file

@ -18066,16 +18066,17 @@ static int llama_decode_internal(
} }
// overlap memory scheduling with other nodes' communication and computing // overlap memory scheduling with other nodes' communication and computing
if (cparams.unload) { {
timer(manage_graph_tensors); timer(manage_graph_tensors);
if (n_world != 1) {
manage_graph_tensors(sub_gf, POSIX_MADV_DONTNEED);
int next_gf_id = (i + 1) % gf.size(); int next_gf_id = (i + 1) % gf.size();
manage_graph_tensors(gf[next_gf_id], POSIX_MADV_WILLNEED, false); manage_graph_tensors(gf[next_gf_id], POSIX_MADV_WILLNEED, false);
if (my_rank == 0 && (is_last_l || (next_gf_id == (int)gf.size() - 1))) { if (my_rank == 0 && (is_last_l || (next_gf_id == (int)gf.size() - 1))) {
manage_graph_tensors(gf[0], POSIX_MADV_WILLNEED, false); manage_graph_tensors(gf[0], POSIX_MADV_WILLNEED, false);
} }
if (cparams.unload && n_world > 1) {
manage_graph_tensors(sub_gf, POSIX_MADV_DONTNEED);
} }
} }
} }