mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 00:54:41 +00:00
default kv_unified to true, handle LLAMA_SET_ROWS.
This commit is contained in:
parent
30675b0798
commit
6d50def409
5 changed files with 18 additions and 1 deletions
|
@ -585,6 +585,7 @@ static void speculative_decoding_setup(std::string spec_model_filename, const ll
|
|||
draft_ctx_params.offload_kqv = base_ctx_params.offload_kqv;
|
||||
draft_model_params.main_gpu = base_model_params.main_gpu;
|
||||
draft_model_params.split_mode = llama_split_mode::LLAMA_SPLIT_MODE_LAYER;
|
||||
draft_ctx_params.kv_unified = base_ctx_params.kv_unified;
|
||||
#if defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN)
|
||||
bool ts_all_zero = true;
|
||||
for (int i = 0; i < tensor_split_max; ++i) {
|
||||
|
@ -2183,6 +2184,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
}
|
||||
|
||||
llama_ctx_params.offload_kqv = !inputs.low_vram;
|
||||
llama_ctx_params.kv_unified = true;
|
||||
model_params.use_mmap = inputs.use_mmap;
|
||||
model_params.use_mlock = inputs.use_mlock;
|
||||
model_params.n_gpu_layers = inputs.gpulayers;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue