mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
kobo cheats death again (+1 squashed commits)
Squashed commits: [708e2429] kobo cheats death again
This commit is contained in:
parent
f9f1585a7f
commit
b4dc29f425
10 changed files with 225 additions and 46 deletions
|
@ -11,6 +11,10 @@
|
|||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#if defined(GGML_USE_CLBLAST)
|
||||
# include "ggml_v3b-opencl.h"
|
||||
#endif
|
||||
|
||||
static const size_t kiB = 1024;
|
||||
static const size_t MiB = 1024*kiB;
|
||||
static const size_t GiB = 1024*MiB;
|
||||
|
@ -150,6 +154,9 @@ static bool buft_supported(ggml_backend_buffer_type_t buft, ggml_backend_dev_t d
|
|||
throw std::runtime_error(format("failed to create ggml context"));
|
||||
}
|
||||
|
||||
#if defined(GGML_USE_CLBLAST)
|
||||
ggml_cl_init();
|
||||
#endif
|
||||
ggml_backend_buffer_ptr buf { ggml_backend_buft_alloc_buffer(buft, 0) };
|
||||
ggml_tensor * op_tensor = fn(ctx.get());
|
||||
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
||||
|
@ -1153,6 +1160,16 @@ void llm_load_vocab(llama_model_loader & ml, llama_model & model) {
|
|||
const int n_merges = gguf_get_arr_n(ctx, merges_keyidx);
|
||||
for (int i = 0; i < n_merges; i++) {
|
||||
const std::string word = gguf_get_arr_str(ctx, merges_keyidx, i);
|
||||
if (!OldBPETokenizerMode)
|
||||
{
|
||||
auto validcodepoints = unicode_cpts_from_utf8(word).size() > 0;
|
||||
GGML_ASSERT_CONTINUE(validcodepoints);
|
||||
if(!validcodepoints)
|
||||
{
|
||||
OldBPETokenizerMode = true;
|
||||
printf("\nFalling Back to older tokenizer...");
|
||||
}
|
||||
}
|
||||
GGML_ASSERT(unicode_cpts_from_utf8(word).size() > 0);
|
||||
|
||||
std::string first;
|
||||
|
@ -1398,10 +1415,13 @@ void llm_load_vocab(llama_model_loader & ml, llama_model & model) {
|
|||
|
||||
for (uint32_t i = 0; i < n_vocab; i++) {
|
||||
std::string word = gguf_get_arr_str(ctx, token_idx, i);
|
||||
if (word.empty()) {
|
||||
if (!OldBPETokenizerMode)
|
||||
{
|
||||
if (word.empty()) {
|
||||
LLAMA_LOG_WARN("%s: empty token at index %u\n", __func__, i);
|
||||
word = "[EMPTY_" + std::to_string(i) + "]";
|
||||
}
|
||||
}
|
||||
|
||||
vocab.token_to_id[word] = i;
|
||||
vocab.max_token_len = std::max(vocab.max_token_len, (int) word.size());
|
||||
|
@ -1424,7 +1444,7 @@ void llm_load_vocab(llama_model_loader & ml, llama_model & model) {
|
|||
}
|
||||
}
|
||||
}
|
||||
GGML_ASSERT(vocab.id_to_token.size() == vocab.token_to_id.size());
|
||||
GGML_ASSERT_CONTINUE(vocab.id_to_token.size() == vocab.token_to_id.size());
|
||||
|
||||
vocab.init_tokenizer();
|
||||
|
||||
|
@ -1681,8 +1701,8 @@ void llm_load_vocab(llama_model_loader & ml, llama_model & model) {
|
|||
} else {
|
||||
// token is control, but not marked as EOG -> print a debug log
|
||||
if (vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL && vocab.special_eog_ids.count(t.second) == 0) {
|
||||
LLAMA_LOG_DEBUG("%s: control token: %6d '%s' is not marked as EOG\n",
|
||||
__func__, t.second, t.first.c_str());
|
||||
// LLAMA_LOG_DEBUG("%s: control token: %6d '%s' is not marked as EOG\n",
|
||||
// __func__, t.second, t.first.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue