mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .github/workflows/build.yml # examples/embedding/embedding.cpp # tools/imatrix/imatrix.cpp # tools/perplexity/perplexity.cpp
This commit is contained in:
commit
2439014a03
40 changed files with 2058 additions and 429 deletions
|
@ -559,7 +559,6 @@ static void speculative_decoding_setup(std::string spec_model_filename, const ll
|
|||
draft_model_params.use_mlock = base_model_params.use_mlock;
|
||||
draft_model_params.n_gpu_layers = draft_gpulayers; //layers offload the speculative model.
|
||||
draft_ctx_params.n_ctx = base_ctx_params.n_ctx;
|
||||
draft_ctx_params.logits_all = false;
|
||||
draft_ctx_params.offload_kqv = base_ctx_params.offload_kqv;
|
||||
draft_model_params.main_gpu = base_model_params.main_gpu;
|
||||
draft_model_params.split_mode = llama_split_mode::LLAMA_SPLIT_MODE_LAYER;
|
||||
|
@ -2147,7 +2146,6 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
}
|
||||
|
||||
llama_ctx_params.offload_kqv = !inputs.low_vram;
|
||||
llama_ctx_params.logits_all = false;
|
||||
model_params.use_mmap = inputs.use_mmap;
|
||||
model_params.use_mlock = inputs.use_mlock;
|
||||
model_params.n_gpu_layers = inputs.gpulayers;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue