defaulting to f32 kv, and 4 threads seem to produce better results

This commit is contained in:
Concedo 2023-03-25 11:11:40 +08:00
parent 506cd62638
commit 119392f6f2
3 changed files with 5 additions and 2 deletions

View file

@ -32,6 +32,7 @@ extern "C" {
const int threads;
const int max_context_length;
const int batch_size;
const bool f16_kv;
const char * model_filename;
const int n_parts_overwrite = -1;
};
@ -75,7 +76,7 @@ extern "C" {
ctx_params.n_ctx = inputs.max_context_length;
ctx_params.n_parts = inputs.n_parts_overwrite;
ctx_params.seed = -1;
ctx_params.f16_kv = true;
ctx_params.f16_kv = inputs.f16_kv;
ctx_params.logits_all = false;
ctx = llama_init_from_file(model.c_str(), ctx_params);