Merge commit 'acd38efee3' into concedo_experimental

# Conflicts: # .devops/cpu.Dockerfile # .devops/vulkan.Dockerfile # .github/workflows/build.yml # .github/workflows/docker.yml # CMakeLists.txt # README.md # cmake/llama-config.cmake.in # examples/simple-cmake-pkg/.gitignore # ggml/CMakeLists.txt # ggml/src/CMakeLists.txt # ggml/src/ggml-hip/CMakeLists.txt
2025-09-11 09:34:37 +00:00 · 2025-01-28 18:16:44 +08:00 · 2025-01-28 18:16:44 +08:00 · c5d4e07664
commit c5d4e07664
parent 6bf0b2d062 acd38efee3
11 changed files with 395 additions and 61 deletions
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@ -1314,10 +1314,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
    const int act_gpu_layers = devices.empty() ? 0 : std::min(n_gpu_layers, (int)n_layer + 1);
    auto get_layer_buft_list = [&](int il) -> llama_model::impl::layer_dev {
        if (il < i_gpu_start || (il - i_gpu_start) >= act_gpu_layers) {
+            LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s\n", il, ggml_backend_dev_name(cpu_dev));
            return {cpu_dev, &pimpl->cpu_buft_list};
        }
        const int layer_gpu = std::upper_bound(splits.begin(), splits.begin() + n_devices(), float(il - i_gpu_start)/act_gpu_layers) - splits.begin();
        auto * dev = devices.at(layer_gpu);
+        LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s\n", il, ggml_backend_dev_name(dev));
        return {dev, &pimpl->gpu_buft_list.at(dev)};
    };