mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-12 18:09:42 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .flake8 # .github/workflows/bench.yml # .github/workflows/python-lint.yml # .pre-commit-config.yaml # Makefile # README.md # models/ggml-vocab-bert-bge.gguf.inp # models/ggml-vocab-bert-bge.gguf.out # models/ggml-vocab-deepseek-coder.gguf.inp # models/ggml-vocab-deepseek-coder.gguf.out # models/ggml-vocab-deepseek-llm.gguf.inp # models/ggml-vocab-deepseek-llm.gguf.out # models/ggml-vocab-falcon.gguf.inp # models/ggml-vocab-falcon.gguf.out # models/ggml-vocab-gpt-2.gguf.inp # models/ggml-vocab-gpt-2.gguf.out # models/ggml-vocab-llama-bpe.gguf.inp # models/ggml-vocab-llama-bpe.gguf.out # models/ggml-vocab-llama-spm.gguf.inp # models/ggml-vocab-llama-spm.gguf.out # models/ggml-vocab-mpt.gguf.inp # models/ggml-vocab-mpt.gguf.out # models/ggml-vocab-phi-3.gguf # models/ggml-vocab-phi-3.gguf.inp # models/ggml-vocab-phi-3.gguf.out # models/ggml-vocab-refact.gguf # models/ggml-vocab-starcoder.gguf.inp # models/ggml-vocab-starcoder.gguf.out # requirements/requirements-convert.txt # scripts/compare-llama-bench.py # scripts/run-with-preset.py # scripts/verify-checksum-models.py # tests/CMakeLists.txt # tests/test-tokenizer-0.cpp
This commit is contained in:
commit
6c000cbe7a
40 changed files with 1593 additions and 936 deletions
27
llama.cpp
27
llama.cpp
|
@ -2389,7 +2389,7 @@ static bool llama_kv_cache_init(
|
|||
cache.recurrent = model.arch == LLM_ARCH_MAMBA;
|
||||
cache.v_trans = !cparams.flash_attn;
|
||||
|
||||
// TODO: support mixed reccurent Transformer architectues
|
||||
// TODO: support mixed recurrent Transformer architectures
|
||||
// NOTE: (!a || b) is a logical implication (a -> b)
|
||||
GGML_ASSERT(!cache.recurrent || n_embd_k_gqa == hparams.n_embd_k_s());
|
||||
GGML_ASSERT(!cache.recurrent || n_embd_v_gqa == hparams.n_embd_v_s());
|
||||
|
@ -4437,6 +4437,12 @@ static void llm_load_vocab(
|
|||
} else if (
|
||||
tokenizer_pre == "gpt-2") {
|
||||
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_GPT2;
|
||||
} else if (
|
||||
tokenizer_pre == "refact") {
|
||||
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_REFACT;
|
||||
} else if (
|
||||
tokenizer_pre == "command-r") {
|
||||
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_COMMAND_R;
|
||||
} else {
|
||||
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
|
||||
}
|
||||
|
@ -12022,7 +12028,7 @@ static bool llama_is_user_defined_token(const llama_vocab& vocab, llama_token id
|
|||
static uint8_t llama_token_to_byte(const llama_vocab& vocab, llama_token id) {
|
||||
GGML_ASSERT(llama_vocab_get_type(vocab) != LLAMA_VOCAB_TYPE_NONE);
|
||||
GGML_ASSERT(llama_is_byte_token(vocab, id));
|
||||
const auto& token_data = vocab.id_to_token.at(id);
|
||||
const auto & token_data = vocab.id_to_token.at(id);
|
||||
switch (llama_vocab_get_type(vocab)) {
|
||||
case LLAMA_VOCAB_TYPE_SPM: {
|
||||
auto buf = token_data.text.substr(3, 2);
|
||||
|
@ -12503,14 +12509,13 @@ struct llm_tokenizer_bpe {
|
|||
"\\s?\\p{L}+",
|
||||
"\\s?\\p{P}+",
|
||||
"[一-龥ࠀ-一가-]+",
|
||||
"\\p{N}+",
|
||||
"\\p{N}",
|
||||
});
|
||||
break;
|
||||
case LLAMA_VOCAB_PRE_TYPE_FALCON:
|
||||
word_collection = unicode_regex_split(text, {
|
||||
"[\\p{P}\\$\\+<=>\\^~\\|]+",
|
||||
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
|
||||
"\\p{N}+",
|
||||
"[0-9][0-9][0-9]",
|
||||
});
|
||||
break;
|
||||
|
@ -12526,6 +12531,13 @@ struct llm_tokenizer_bpe {
|
|||
});
|
||||
break;
|
||||
case LLAMA_VOCAB_PRE_TYPE_STARCODER:
|
||||
case LLAMA_VOCAB_PRE_TYPE_REFACT:
|
||||
case LLAMA_VOCAB_PRE_TYPE_COMMAND_R:
|
||||
word_collection = unicode_regex_split(text, {
|
||||
"\\p{N}",
|
||||
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
|
||||
});
|
||||
break;
|
||||
case LLAMA_VOCAB_PRE_TYPE_GPT2:
|
||||
word_collection = unicode_regex_split(text, {
|
||||
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
|
||||
|
@ -17782,9 +17794,10 @@ int32_t llama_tokenize(
|
|||
|
||||
static std::string llama_decode_text(const std::string & text) {
|
||||
std::string decoded_text;
|
||||
auto unicode_sequences = unicode_cpts_from_utf8(text);
|
||||
for (auto & unicode_sequence : unicode_sequences) {
|
||||
decoded_text += unicode_utf8_to_byte(unicode_cpt_to_utf8(unicode_sequence));
|
||||
|
||||
const auto cpts = unicode_cpts_from_utf8(text);
|
||||
for (const auto cpt : cpts) {
|
||||
decoded_text += unicode_utf8_to_byte(unicode_cpt_to_utf8(cpt));
|
||||
}
|
||||
|
||||
return decoded_text;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue