mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 09:04:36 +00:00
defaulting to f32 kv, and 4 threads seem to produce better results
This commit is contained in:
parent
506cd62638
commit
119392f6f2
3 changed files with 5 additions and 2 deletions
|
@ -32,6 +32,7 @@ extern "C" {
|
|||
const int threads;
|
||||
const int max_context_length;
|
||||
const int batch_size;
|
||||
const bool f16_kv;
|
||||
const char * model_filename;
|
||||
const int n_parts_overwrite = -1;
|
||||
};
|
||||
|
@ -75,7 +76,7 @@ extern "C" {
|
|||
ctx_params.n_ctx = inputs.max_context_length;
|
||||
ctx_params.n_parts = inputs.n_parts_overwrite;
|
||||
ctx_params.seed = -1;
|
||||
ctx_params.f16_kv = true;
|
||||
ctx_params.f16_kv = inputs.f16_kv;
|
||||
ctx_params.logits_all = false;
|
||||
|
||||
ctx = llama_init_from_file(model.c_str(), ctx_params);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue