integrated q5 formats

2025-09-10 17:14:36 +00:00 · 2023-04-28 12:58:39 +08:00 · 2023-04-28 12:58:39 +08:00 · 032a171867
commit 032a171867
parent e8a389f85b
13 changed files with 184 additions and 129 deletions
--- a/otherarch/neox.cpp
+++ b/otherarch/neox.cpp
@ -2,6 +2,7 @@
 #include "otherarch.h"

 #include "utils.h"
+#include "common-ggml.h"

 #include <cassert>
 #include <cmath>
@ -76,24 +77,13 @@ ModelLoadResult stablelm_model_load(const std::string & fname, stablelm_model &

    // for the big tensors, we have the option to store the data in 16-bit floats or quantized
    // in order to save memory and also to speed up the computation
-    ggml_type wtype = GGML_TYPE_COUNT;
-    switch (model.hparams.ftype) {
-        case 0: wtype = GGML_TYPE_F32;  break;
-        case 1: wtype = GGML_TYPE_F16;  break;
-        case 2: wtype = GGML_TYPE_Q4_0; break;
-        case 3: wtype = GGML_TYPE_Q4_1; break;
-        case 5: wtype = GGML_TYPE_Q4_2; break;
-        case 6: wtype = GGML_TYPE_Q4_3; break;
-        default:
-                {
-                    fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n",
-                            __func__, fname.c_str(), model.hparams.ftype);
-                    return ModelLoadResult::FAIL;
-                }
+    ggml_type wtype = ggml_ftype_to_ggml_type((ggml_ftype) (model.hparams.ftype));
+    if (wtype == GGML_TYPE_COUNT) {
+        fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n",
+                __func__, fname.c_str(), model.hparams.ftype);
+        return ModelLoadResult::FAIL;
    }

-    const ggml_type wtype2 = GGML_TYPE_F32;
-
    auto & ctx = model.ctx;

    size_t ctx_size = 0;