mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 09:04:36 +00:00
removed openblas backend, merged into CPU (with llamafile for BLAS). GPU backend is now automatically selected when running from CLI unless noblas is specified.
This commit is contained in:
parent
01c7d82185
commit
53bf0fb32d
14 changed files with 115 additions and 159 deletions
|
@ -1624,7 +1624,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
gptj_ctx_v3.hparams.rope_freq_scale = neox_ctx_v3.hparams.rope_freq_scale = rope_freq_scale;
|
||||
gptj_ctx_v3.hparams.rope_freq_base = neox_ctx_v3.hparams.rope_freq_base = rope_freq_base;
|
||||
|
||||
//this is used for the mem_per_token eval, openblas needs more RAM
|
||||
//this is used for the mem_per_token eval, blas needs more RAM
|
||||
bool v3_use_scratch = ggml_v3_cpu_has_gpublas();
|
||||
|
||||
int cu_parseinfo_maindevice = inputs.cublas_info<=0?0:inputs.cublas_info;
|
||||
|
@ -2362,11 +2362,11 @@ int GetThreadsToUse(bool blasmode)
|
|||
{
|
||||
if(!ggml_cpu_has_gpublas())
|
||||
{
|
||||
return 1;
|
||||
return std::min(kcpp_data->n_blasthreads, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
return kcpp_data->n_blasthreads;
|
||||
return kcpp_data->n_blasthreads;
|
||||
}
|
||||
}
|
||||
return kcpp_data->n_threads;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue