merged q4_2 and q4_3 dequants and FIXED CLBLAST SLOWNESS!

This commit is contained in:
Concedo 2023-04-24 21:33:01 +08:00
parent e58f1d1336
commit 1b9b9068b1
4 changed files with 122 additions and 89 deletions

View file

@ -366,7 +366,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
}
params.n_batch = bbs; //received reports of 1024 and above crashing on some models
params.n_threads = 1;
//params.n_threads = 1; //do not limit here anymore.
}
current_context_tokens.resize(n_past);