mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-06 09:49:02 +00:00
fix gpu underutilization
This commit is contained in:
parent
e219fada4e
commit
07a397360b
1 changed files with 14 additions and 9 deletions
|
@ -1362,17 +1362,22 @@ static bool assign_layers_to_device(
|
||||||
}
|
}
|
||||||
|
|
||||||
// check the solution
|
// check the solution
|
||||||
bool is_set_suboptimal = false;
|
bool has_free_gpu_memory = false, has_overload = false;
|
||||||
for (uint32_t m = 0; m < n_world; ++m) {
|
for (uint32_t m = 0; m < n_world; ++m) {
|
||||||
uint32_t w_m = best_solution[m], n_m = best_solution[m + n_world];
|
uint32_t w_m = best_solution[m], n_m = best_solution[m + n_world];
|
||||||
// if w[m] > n[m] and there is still free VRAM, the GPU is not fully utilized,
|
|
||||||
// indicating that the memory constraints are too strict, and the set assignment is suboptimal.
|
// if there is still free GPU memory
|
||||||
if (w_m > n_m && n_m < static_cast<uint32_t>(std::round(W * vec_z_gpu[m]))) {
|
if (n_m < static_cast<uint32_t>(std::round(W * vec_z_gpu[m]))) {
|
||||||
is_set_suboptimal = true;
|
has_free_gpu_memory = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if there is device overloaded
|
||||||
|
if (w_m > n_m) {
|
||||||
|
has_overload = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_set_suboptimal) {
|
if (has_free_gpu_memory && has_overload) {
|
||||||
int worst_device = -1;
|
int worst_device = -1;
|
||||||
float worst_speed = std::numeric_limits<float>::max();
|
float worst_speed = std::numeric_limits<float>::max();
|
||||||
|
|
||||||
|
@ -1422,8 +1427,8 @@ static bool assign_layers_to_device(
|
||||||
LOG_INF(" - N Layer Window : %d\n", w[m]);
|
LOG_INF(" - N Layer Window : %d\n", w[m]);
|
||||||
LOG_INF(" - N GPU Layers : %d\n", n[m]);
|
LOG_INF(" - N GPU Layers : %d\n", n[m]);
|
||||||
}
|
}
|
||||||
LOG_INF("\nEstimated Latency: %.3f ms\n", final_objective);
|
// LOG_INF("\nEstimated Latency: %.3f ms\n", final_objective);
|
||||||
LOG_INF("------------------------------------------");
|
// LOG_INF("------------------------------------------");
|
||||||
|
|
||||||
// copy value from w and n to n_layer_window and n_gpu_layers, respectively
|
// copy value from w and n to n_layer_window and n_gpu_layers, respectively
|
||||||
std::copy(w.begin(), w.end(), n_layer_window);
|
std::copy(w.begin(), w.end(), n_layer_window);
|
||||||
|
|
Loading…
Add table
Reference in a new issue