mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-28 03:30:20 +00:00
llama-quant : default ftype param Q5_1 --> Q8_0 (#20828)
Change the default `ftype` in `llama_model_quantize_params` from `LLAMA_FTYPE_MOSTLY_Q5_1` to `LLAMA_FTYPE_MOSTLY_Q8_0`. In case some external program naively uses the default quantization params, we should probably default to a known-good type like Q8_0 rather than Q5_1, which is rather old.
This commit is contained in:
parent
8ea8fee966
commit
9d34231bb8
1 changed files with 1 additions and 1 deletions
|
|
@ -1283,7 +1283,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
|
|||
llama_model_quantize_params llama_model_quantize_default_params() {
|
||||
llama_model_quantize_params result = {
|
||||
/*.nthread =*/ 0,
|
||||
/*.ftype =*/ LLAMA_FTYPE_MOSTLY_Q5_1,
|
||||
/*.ftype =*/ LLAMA_FTYPE_MOSTLY_Q8_0,
|
||||
/*.output_tensor_type =*/ GGML_TYPE_COUNT,
|
||||
/*.token_embedding_type =*/ GGML_TYPE_COUNT,
|
||||
/*.allow_requantize =*/ false,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue