Merge branch 'master' into concedo

# Conflicts:
#	.github/workflows/build.yml
#	CMakeLists.txt
#	Makefile
#	README.md
#	main.cpp
This commit is contained in:
Concedo 2023-03-22 22:31:45 +08:00
commit 86c7457e24
25 changed files with 3028 additions and 1944 deletions

View file

@ -1,5 +1,6 @@
#include "extra.h"
#include "llama.cpp"
#include <cassert>
#include <cstring>
@ -17,13 +18,41 @@
#include <alloca.h>
#endif
//return val: 0=fail, 1=legacy, 2=newformat
int check_file_format(const std::string & fname)
{
std::vector<char> f_buf(1024*1024);
auto fin = std::ifstream(fname, std::ios::binary);
fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf.size());
if (!fin) {
fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
return false;
}
int fileformat = 0;
uint32_t magic;
fin.read((char *) &magic, sizeof(magic));
if (magic == LLAMA_FILE_MAGIC_UNVERSIONED) {
fileformat = 1;
}else{
fileformat = 2;
}
fin.close();
return fileformat;
}
// TODO: Calculate this constant from the vocabulary
#define MAX_TOKEN_LEN 18
// SentencePiece implementation after https://guillaume-be.github.io/2020-05-30/sentence_piece
std::vector<gpt_vocab::id> legacy_llama_tokenize(const gpt_vocab & vocab, const std::string & text, bool bos) {
std::vector<gpt_vocab::id> res;
std::vector<llama_token> legacy_llama_tokenize(const llama_vocab & vocab, const std::string & text, bool bos) {
std::vector<llama_token> res;
std::vector<int> score;
std::vector<gpt_vocab::id> prev;
std::vector<llama_token> prev;
int len = text.length();
score.resize(len + 1);
@ -50,14 +79,14 @@ std::vector<gpt_vocab::id> legacy_llama_tokenize(const gpt_vocab & vocab, const
// Backward pass
int i = len;
while (i > 0) {
gpt_vocab::id token_id = prev[i];
llama_token token_id = prev[i];
if (token_id == 0) {
// TODO: Return error or something more meaningful
printf("failed to tokenize string!\n");
break;
}
res.push_back(token_id);
auto token = (*vocab.id_to_token.find(token_id)).second;
auto token = vocab.id_to_token[token_id].tok;
i -= token.length();
}
@ -68,5 +97,33 @@ std::vector<gpt_vocab::id> legacy_llama_tokenize(const gpt_vocab & vocab, const
// Pieces are in reverse order so correct that
std::reverse(res.begin(), res.end());
return res;
}
int legacy_llama_tokenize(
struct llama_context * ctx,
const char * text,
llama_token * tokens,
int n_max_tokens,
bool add_bos) {
auto res = legacy_llama_tokenize(ctx->vocab, text, add_bos);
if (n_max_tokens < (int) res.size()) {
fprintf(stderr, "%s: too many tokens\n", __func__);
return -((int) res.size());
}
for (size_t i = 0; i < res.size(); i++) {
tokens[i] = res[i];
}
return res.size();
}
std::vector<llama_token> legacy_llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) {
std::vector<llama_token> res(8096);
int n = legacy_llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos);
res.resize(n);
return res;
}