mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
Merge branch 'master' into concedo
# Conflicts: # .github/workflows/build.yml # CMakeLists.txt # Makefile # README.md # main.cpp
This commit is contained in:
commit
86c7457e24
25 changed files with 3028 additions and 1944 deletions
67
extra.cpp
67
extra.cpp
|
@ -1,5 +1,6 @@
|
|||
|
||||
#include "extra.h"
|
||||
#include "llama.cpp"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
|
@ -17,13 +18,41 @@
|
|||
#include <alloca.h>
|
||||
#endif
|
||||
|
||||
//return val: 0=fail, 1=legacy, 2=newformat
|
||||
int check_file_format(const std::string & fname)
|
||||
{
|
||||
std::vector<char> f_buf(1024*1024);
|
||||
|
||||
auto fin = std::ifstream(fname, std::ios::binary);
|
||||
fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf.size());
|
||||
if (!fin) {
|
||||
fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
int fileformat = 0;
|
||||
uint32_t magic;
|
||||
fin.read((char *) &magic, sizeof(magic));
|
||||
if (magic == LLAMA_FILE_MAGIC_UNVERSIONED) {
|
||||
fileformat = 1;
|
||||
}else{
|
||||
fileformat = 2;
|
||||
}
|
||||
fin.close();
|
||||
|
||||
return fileformat;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// TODO: Calculate this constant from the vocabulary
|
||||
#define MAX_TOKEN_LEN 18
|
||||
// SentencePiece implementation after https://guillaume-be.github.io/2020-05-30/sentence_piece
|
||||
std::vector<gpt_vocab::id> legacy_llama_tokenize(const gpt_vocab & vocab, const std::string & text, bool bos) {
|
||||
std::vector<gpt_vocab::id> res;
|
||||
std::vector<llama_token> legacy_llama_tokenize(const llama_vocab & vocab, const std::string & text, bool bos) {
|
||||
std::vector<llama_token> res;
|
||||
std::vector<int> score;
|
||||
std::vector<gpt_vocab::id> prev;
|
||||
std::vector<llama_token> prev;
|
||||
int len = text.length();
|
||||
|
||||
score.resize(len + 1);
|
||||
|
@ -50,14 +79,14 @@ std::vector<gpt_vocab::id> legacy_llama_tokenize(const gpt_vocab & vocab, const
|
|||
// Backward pass
|
||||
int i = len;
|
||||
while (i > 0) {
|
||||
gpt_vocab::id token_id = prev[i];
|
||||
llama_token token_id = prev[i];
|
||||
if (token_id == 0) {
|
||||
// TODO: Return error or something more meaningful
|
||||
printf("failed to tokenize string!\n");
|
||||
break;
|
||||
}
|
||||
res.push_back(token_id);
|
||||
auto token = (*vocab.id_to_token.find(token_id)).second;
|
||||
auto token = vocab.id_to_token[token_id].tok;
|
||||
i -= token.length();
|
||||
}
|
||||
|
||||
|
@ -68,5 +97,33 @@ std::vector<gpt_vocab::id> legacy_llama_tokenize(const gpt_vocab & vocab, const
|
|||
// Pieces are in reverse order so correct that
|
||||
std::reverse(res.begin(), res.end());
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
int legacy_llama_tokenize(
|
||||
struct llama_context * ctx,
|
||||
const char * text,
|
||||
llama_token * tokens,
|
||||
int n_max_tokens,
|
||||
bool add_bos) {
|
||||
auto res = legacy_llama_tokenize(ctx->vocab, text, add_bos);
|
||||
|
||||
if (n_max_tokens < (int) res.size()) {
|
||||
fprintf(stderr, "%s: too many tokens\n", __func__);
|
||||
return -((int) res.size());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < res.size(); i++) {
|
||||
tokens[i] = res[i];
|
||||
}
|
||||
|
||||
return res.size();
|
||||
}
|
||||
|
||||
std::vector<llama_token> legacy_llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) {
|
||||
std::vector<llama_token> res(8096);
|
||||
int n = legacy_llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos);
|
||||
res.resize(n);
|
||||
|
||||
return res;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue