update license, added backwards compatibility with both ggml model formats, fixed context length issues.

2025-09-11 17:44:38 +00:00 · 2023-03-20 23:43:35 +08:00 · 2023-03-20 23:43:35 +08:00 · 8d39365af6
commit 8d39365af6
parent a2c10e0d2f
11 changed files with 807 additions and 15 deletions
--- a/main.cpp
+++ b/main.cpp
@ -89,8 +89,8 @@ struct llama_model {
    std::map<std::string, struct ggml_tensor *> tensors;
 };

-// load the model's weights from a file
-bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx, ggml_type memory_type = GGML_TYPE_F32, int n_parts_overwrite=-1) {
+// load the model's weights from a file. return val: 0=fail, 1=newformat, 2=legacy
+int llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx, ggml_type memory_type = GGML_TYPE_F32, int n_parts_overwrite=-1) {
    fprintf(stderr, "%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str());

    std::vector<char> f_buf(1024*1024);
@ -102,6 +102,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
        return false;
    }

+    bool legacy_file_format = false;
    // verify magic
    {
        uint32_t magic;
@ -109,8 +110,10 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
        if (magic == 0x67676d6c) {
            fprintf(stderr, "%s: invalid model file '%s' (too old, regenerate your model files!)\n",
                    __func__, fname.c_str());
-            return false;
+            legacy_file_format = true;
        }
+        else
+        {
        if (magic != 0x67676d66) {
            fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str());
            return false;
@ -124,6 +127,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
                    __func__, fname.c_str(), format_version);
            return false;
        }
+        }
    }

    int n_ff = 0;
@ -173,12 +177,16 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
            word.resize(len);
            fin.read((char *) word.data(), len);

+            if(!legacy_file_format)
+            {
            float score;
            fin.read((char *) &score, sizeof(score));
+            vocab.score[i] = score;
+            }

            vocab.token_to_id[word] = i;
            vocab.id_to_token[i] = word;
-            vocab.score[i] = score;
+            

            //if (i < 30000) {
            //    fprintf(stderr, "%s: vocab[%d] = '%s'\n", __func__, i, word.c_str());
@ -531,7 +539,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
        fin.close();
    }

-    return true;
+    return (legacy_file_format?2:true);
 }

 // evaluate the transformer