keep the output layer weights in shared memory by default

2025-09-09 21:14:34 +00:00 · 2025-01-25 23:31:43 +04:00 · 2025-01-25 23:31:43 +04:00 · 1ca9a43bd1
commit 1ca9a43bd1
parent f3dd5776eb
4 changed files with 16 additions and 16 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -19781,7 +19781,7 @@ struct llama_model_params llama_model_default_params() {
        /*.use_mmap                    =*/ true,
        /*.use_mlock                   =*/ false,
        /*.check_tensors               =*/ false,
-        /*.keep_out_in_metal           =*/ false,
+        /*.keep_out_in_metal           =*/ true,
    };

 #ifdef GGML_USE_METAL
@ -19799,7 +19799,7 @@ struct llama_context_params llama_context_default_params() {
        /*.n_layer_window              =*/ {32},
        /*.n_gpu_layers                =*/ 0,
        /*.unload                      =*/ false,
-        /*.keep_out_in_metal           =*/ false,
+        /*.keep_out_in_metal           =*/ true,
        /*.master_ip                   =*/ nullptr,
        /*.next_node_ip                =*/ nullptr,
        /*.n_ctx                       =*/ 512,