mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
added new binding fields for quant k and quant v
This commit is contained in:
parent
039cc392d1
commit
10a1d628ad
4 changed files with 67 additions and 38 deletions
|
@ -1107,6 +1107,8 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
}
|
||||
|
||||
llama_ctx_params.flash_attn = kcpp_params->flash_attn;
|
||||
llama_ctx_params.type_k = (inputs.quant_k>1?GGML_TYPE_Q4_0:(inputs.quant_k==1?GGML_TYPE_Q8_0:GGML_TYPE_F16));
|
||||
llama_ctx_params.type_v = (inputs.quant_v>1?GGML_TYPE_Q4_0:(inputs.quant_v==1?GGML_TYPE_Q8_0:GGML_TYPE_F16));
|
||||
llama_ctx_v4 = llama_new_context_with_model(llamamodel, llama_ctx_params);
|
||||
|
||||
if (llama_ctx_v4 == NULL)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue