mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-12 09:59:41 +00:00
reduce some spamminess
This commit is contained in:
parent
816d9b7989
commit
a83f2d5fce
2 changed files with 5 additions and 4 deletions
|
@ -75,7 +75,7 @@ bool llama_kv_cache_init(
|
||||||
const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa(i) + hparams.n_embd_k_s();
|
const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa(i) + hparams.n_embd_k_s();
|
||||||
const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa(i) + hparams.n_embd_v_s();
|
const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa(i) + hparams.n_embd_v_s();
|
||||||
|
|
||||||
LLAMA_LOG_DEBUG("%s: layer %d: n_embd_k_gqa = %d, n_embd_v_gqa = %d\n", __func__, i, n_embd_k_gqa, n_embd_v_gqa);
|
// LLAMA_LOG_DEBUG("%s: layer %d: n_embd_k_gqa = %d, n_embd_v_gqa = %d\n", __func__, i, n_embd_k_gqa, n_embd_v_gqa);
|
||||||
|
|
||||||
ggml_backend_buffer_type_t buft;
|
ggml_backend_buffer_type_t buft;
|
||||||
if (offload) {
|
if (offload) {
|
||||||
|
|
|
@ -1280,7 +1280,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||||
|
|
||||||
bool use_mmap_buffer = true;
|
bool use_mmap_buffer = true;
|
||||||
|
|
||||||
LLAMA_LOG_INFO("%s: loading model tensors, this can take a while... (mmap = %s)\n", __func__, use_mmap_buffer ? "true" : "false");
|
LLAMA_LOG_INFO("%s: loading model tensors, this can take a while...", __func__);
|
||||||
|
// LLAMA_LOG_INFO("%s: loading model tensors, this can take a while... (mmap = %s)\n", __func__, use_mmap_buffer ? "true" : "false");
|
||||||
|
|
||||||
// build a list of buffer types for the CPU and GPU devices
|
// build a list of buffer types for the CPU and GPU devices
|
||||||
pimpl->cpu_buft_list = make_cpu_buft_list(devices);
|
pimpl->cpu_buft_list = make_cpu_buft_list(devices);
|
||||||
|
@ -1328,12 +1329,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||||
const int act_gpu_layers = devices.empty() ? 0 : std::min(n_gpu_layers, (int)n_layer + 1);
|
const int act_gpu_layers = devices.empty() ? 0 : std::min(n_gpu_layers, (int)n_layer + 1);
|
||||||
auto get_layer_buft_list = [&](int il) -> llama_model::impl::layer_dev {
|
auto get_layer_buft_list = [&](int il) -> llama_model::impl::layer_dev {
|
||||||
if (il < i_gpu_start || (il - i_gpu_start) >= act_gpu_layers) {
|
if (il < i_gpu_start || (il - i_gpu_start) >= act_gpu_layers) {
|
||||||
LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s\n", il, ggml_backend_dev_name(cpu_dev));
|
// LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s\n", il, ggml_backend_dev_name(cpu_dev));
|
||||||
return {cpu_dev, &pimpl->cpu_buft_list};
|
return {cpu_dev, &pimpl->cpu_buft_list};
|
||||||
}
|
}
|
||||||
const int layer_gpu = std::upper_bound(splits.begin(), splits.begin() + n_devices(), float(il - i_gpu_start)/act_gpu_layers) - splits.begin();
|
const int layer_gpu = std::upper_bound(splits.begin(), splits.begin() + n_devices(), float(il - i_gpu_start)/act_gpu_layers) - splits.begin();
|
||||||
auto * dev = devices.at(layer_gpu);
|
auto * dev = devices.at(layer_gpu);
|
||||||
LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s\n", il, ggml_backend_dev_name(dev));
|
// LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s\n", il, ggml_backend_dev_name(dev));
|
||||||
return {dev, &pimpl->gpu_buft_list.at(dev)};
|
return {dev, &pimpl->gpu_buft_list.at(dev)};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue