Merge branch 'master' into concedo

# Conflicts: # .github/workflows/build.yml # CMakeLists.txt # Makefile # README.md # main.cpp
2025-09-11 01:24:36 +00:00 · 2023-03-22 22:31:45 +08:00 · 2023-03-22 22:31:45 +08:00 · 86c7457e24
commit 86c7457e24
parent 5c475503ce ae44e23ee3
25 changed files with 3028 additions and 1944 deletions
--- a/extra.cpp
+++ b/extra.cpp
@ -1,5 +1,6 @@

 #include "extra.h"
+#include "llama.cpp"

 #include <cassert>
 #include <cstring>
@ -17,13 +18,41 @@
 #include <alloca.h>
 #endif

+//return val: 0=fail, 1=legacy, 2=newformat
+ int check_file_format(const std::string & fname)
+ {
+    std::vector<char> f_buf(1024*1024);
+
+    auto fin = std::ifstream(fname, std::ios::binary);
+    fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf.size());
+    if (!fin) {
+        fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
+        return false;
+    }
+
+    int fileformat = 0;
+    uint32_t magic;
+    fin.read((char *) &magic, sizeof(magic));
+    if (magic == LLAMA_FILE_MAGIC_UNVERSIONED) {
+       fileformat = 1;
+    }else{
+        fileformat = 2;
+    }
+    fin.close();
+    
+    return fileformat;
+ }
+
+
+
+
 // TODO: Calculate this constant from the vocabulary
 #define MAX_TOKEN_LEN 18
 // SentencePiece implementation after https://guillaume-be.github.io/2020-05-30/sentence_piece
-std::vector<gpt_vocab::id> legacy_llama_tokenize(const gpt_vocab & vocab, const std::string & text, bool bos) {
-    std::vector<gpt_vocab::id> res;
+std::vector<llama_token> legacy_llama_tokenize(const llama_vocab & vocab, const std::string & text, bool bos) {
+    std::vector<llama_token> res;
    std::vector<int> score;
-    std::vector<gpt_vocab::id> prev;
+    std::vector<llama_token> prev;
    int len = text.length();

    score.resize(len + 1);
@ -50,14 +79,14 @@ std::vector<gpt_vocab::id> legacy_llama_tokenize(const gpt_vocab & vocab, const
    // Backward pass
    int i = len;
    while (i > 0) {
-        gpt_vocab::id token_id = prev[i];
+        llama_token token_id = prev[i];
        if (token_id == 0) {
 	    // TODO: Return error or something more meaningful
            printf("failed to tokenize string!\n");
 	    break;
        }
        res.push_back(token_id);
-        auto token = (*vocab.id_to_token.find(token_id)).second;
+        auto token = vocab.id_to_token[token_id].tok;
        i -= token.length();
    }

@ -68,5 +97,33 @@ std::vector<gpt_vocab::id> legacy_llama_tokenize(const gpt_vocab & vocab, const
    // Pieces are in reverse order so correct that
    std::reverse(res.begin(), res.end());

+    return res;
+}
+
+int legacy_llama_tokenize(
+        struct llama_context * ctx,
+                  const char * text,
+                 llama_token * tokens,
+                         int   n_max_tokens,
+                        bool   add_bos) {
+    auto res = legacy_llama_tokenize(ctx->vocab, text, add_bos);
+
+    if (n_max_tokens < (int) res.size()) {
+        fprintf(stderr, "%s: too many tokens\n", __func__);
+        return -((int) res.size());
+    }
+
+    for (size_t i = 0; i < res.size(); i++) {
+        tokens[i] = res[i];
+    }
+
+    return res.size();
+}
+
+std::vector<llama_token> legacy_llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) {
+    std::vector<llama_token> res(8096);
+    int n = legacy_llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos);
+    res.resize(n);
+
    return res;
 }