gpu layer offloading disabled for phi models in clblast

2025-09-11 01:24:36 +00:00 · 2024-01-25 17:40:05 +08:00 · 2024-01-25 17:40:05 +08:00 · d9a7bd577a
commit d9a7bd577a
parent 0a70cc1ba7
4 changed files with 38 additions and 36 deletions
--- a/model_adapter.h
+++ b/model_adapter.h
@ -21,7 +21,8 @@ enum FileFormat
    GGJT=3, // 3=(llama ggjt)
    GGJT_2=4, //newer llama format unshuffled
    GGJT_3=5, //using 16bit scalar
-    GGUF_LLAMA=6, //GGUF (llama newest ver)
+
+    GGUF_GENERIC=6, //GGUF (llama newest ver)

    GPTJ_1=100, //the very first super old GPTJ format
    GPTJ_2=101, //pygmalion, uses old ggml lib
@ -47,14 +48,20 @@ enum FileFormat

    MPT_1=500, //first supported mpt version

-    GGUF_FALCON=600, //GGUF (falcon)
+};

+enum GGUFArch
+{
+    DEFAULT = 0, //used for llama and other generic gguf
+    FALCON = 1,
+    PHI = 2,
 };

 struct FileFormatExtraMeta
 {
    int n_ctx_train = 2048;
    int fileversion = 0;
+    GGUFArch model_architecture = GGUFArch::DEFAULT;
 };

 enum ModelLoadResult