added new binding fields for quant k and quant v

This commit is contained in:
Concedo 2024-06-03 14:35:59 +08:00
parent 039cc392d1
commit 10a1d628ad
4 changed files with 67 additions and 38 deletions

View file

@ -1107,6 +1107,8 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
}
llama_ctx_params.flash_attn = kcpp_params->flash_attn;
llama_ctx_params.type_k = (inputs.quant_k>1?GGML_TYPE_Q4_0:(inputs.quant_k==1?GGML_TYPE_Q8_0:GGML_TYPE_F16));
llama_ctx_params.type_v = (inputs.quant_v>1?GGML_TYPE_Q4_0:(inputs.quant_v==1?GGML_TYPE_Q8_0:GGML_TYPE_F16));
llama_ctx_v4 = llama_new_context_with_model(llamamodel, llama_ctx_params);
if (llama_ctx_v4 == NULL)