removed openblas backend, merged into CPU (with llamafile for BLAS). GPU backend is now automatically selected when running from CLI unless noblas is specified.

2025-09-10 09:04:36 +00:00 · 2024-09-15 19:21:52 +08:00 · 2024-09-15 19:21:52 +08:00 · 53bf0fb32d
commit 53bf0fb32d
parent 01c7d82185
14 changed files with 115 additions and 159 deletions
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -1624,7 +1624,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
    gptj_ctx_v3.hparams.rope_freq_scale = neox_ctx_v3.hparams.rope_freq_scale = rope_freq_scale;
    gptj_ctx_v3.hparams.rope_freq_base = neox_ctx_v3.hparams.rope_freq_base = rope_freq_base;

-    //this is used for the mem_per_token eval, openblas needs more RAM
+    //this is used for the mem_per_token eval, blas needs more RAM
    bool v3_use_scratch = ggml_v3_cpu_has_gpublas();

    int cu_parseinfo_maindevice = inputs.cublas_info<=0?0:inputs.cublas_info;
@ -2362,11 +2362,11 @@ int GetThreadsToUse(bool blasmode)
    {
        if(!ggml_cpu_has_gpublas())
        {
-            return 1;
+            return std::min(kcpp_data->n_blasthreads, 4);
        }
        else
        {
-             return kcpp_data->n_blasthreads;
+            return kcpp_data->n_blasthreads;
        }
    }
    return kcpp_data->n_threads;