mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
fixed some old models failing due to tokenizer changes, update lite (+1 squashed commits)
Squashed commits: [9dee81ec] fixed some old models failing due to tokenizer changes, update lite tooltip (+3 squashed commit) Squashed commit: [5ab95a79] fixes [a561d5e2] fixed some old models failing due to tokenizer changes [95e65daf] lite updates
This commit is contained in:
parent
dd1d61ea6b
commit
cff75061fe
4 changed files with 72 additions and 26 deletions
18
llama.cpp
18
llama.cpp
|
@ -2200,7 +2200,13 @@ static void llm_load_vocab(
|
|||
const std::string word = gguf_get_arr_str(ctx, merges_keyidx, i);
|
||||
if (!OldBPETokenizerMode)
|
||||
{
|
||||
GGML_ASSERT_CONTINUE(codepoints_from_utf8(word).size() > 0);
|
||||
auto validcodepoints = codepoints_from_utf8(word).size() > 0;
|
||||
GGML_ASSERT_CONTINUE(validcodepoints);
|
||||
if(!validcodepoints)
|
||||
{
|
||||
OldBPETokenizerMode = true;
|
||||
printf("\nFalling Back to older tokenizer...");
|
||||
}
|
||||
}
|
||||
|
||||
std::string first;
|
||||
|
@ -2236,9 +2242,15 @@ static void llm_load_vocab(
|
|||
|
||||
for (uint32_t i = 0; i < n_vocab; i++) {
|
||||
std::string word = gguf_get_arr_str(ctx, token_idx, i);
|
||||
if (!OldBPETokenizerMode)
|
||||
if (!OldBPETokenizerMode)
|
||||
{
|
||||
GGML_ASSERT_CONTINUE(codepoints_from_utf8(word).size() > 0);
|
||||
auto validcodepoints = codepoints_from_utf8(word).size() > 0;
|
||||
GGML_ASSERT_CONTINUE(validcodepoints);
|
||||
if(!validcodepoints)
|
||||
{
|
||||
OldBPETokenizerMode = true;
|
||||
printf("\nFalling Back to older tokenizer...");
|
||||
}
|
||||
}
|
||||
|
||||
vocab.token_to_id[word] = i;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue