more consistency fixes

This commit is contained in:
Concedo 2025-08-13 19:28:53 +08:00
parent 955cf66bbc
commit 4b2ca1169c
3 changed files with 5 additions and 1 deletions

View file

@ -120,6 +120,8 @@ bool embeddingstype_load_model(const embeddings_load_model_inputs inputs)
model_params.use_mmap = inputs.use_mmap;
model_params.use_mlock = false;
model_params.n_gpu_layers = inputs.gpulayers; //offload if possible
int kcpp_parseinfo_maindevice = inputs.kcpp_main_gpu<=0?0:inputs.kcpp_main_gpu;
model_params.main_gpu = kcpp_parseinfo_maindevice;
model_params.split_mode = llama_split_mode::LLAMA_SPLIT_MODE_LAYER;
llama_model * embeddingsmodel = llama_model_load_from_file(modelfile.c_str(), model_params);