mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 17:44:38 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # README.md # examples/parallel/parallel.cpp # ggml/src/CMakeLists.txt # ggml/src/ggml-blas/CMakeLists.txt # ggml/src/ggml-sycl/CMakeLists.txt # ggml/src/gguf.cpp # scripts/sync-ggml.last # tests/test-gguf.cpp
This commit is contained in:
commit
b42b618897
8 changed files with 89 additions and 37 deletions
|
@ -3814,7 +3814,7 @@ class BertModel(TextModel):
|
||||||
remove_whitespaces = tokenizer.clean_up_tokenization_spaces
|
remove_whitespaces = tokenizer.clean_up_tokenization_spaces
|
||||||
precompiled_charsmap = b64decode(tokenizer_json["normalizer"]["precompiled_charsmap"])
|
precompiled_charsmap = b64decode(tokenizer_json["normalizer"]["precompiled_charsmap"])
|
||||||
|
|
||||||
vocab_size = self.hparams.get("vocab_size", tokenizer.vocab_size)
|
vocab_size = max(self.hparams.get("vocab_size", 0), tokenizer.vocab_size)
|
||||||
else:
|
else:
|
||||||
sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue]
|
sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue]
|
||||||
sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
|
sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
|
||||||
|
@ -3827,7 +3827,7 @@ class BertModel(TextModel):
|
||||||
tokenizer = SentencePieceProcessor()
|
tokenizer = SentencePieceProcessor()
|
||||||
tokenizer.LoadFromFile(str(tokenizer_path))
|
tokenizer.LoadFromFile(str(tokenizer_path))
|
||||||
|
|
||||||
vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
|
vocab_size = max(self.hparams.get("vocab_size", 0), tokenizer.vocab_size())
|
||||||
|
|
||||||
tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
|
tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
|
||||||
scores: list[float] = [-10000.0] * vocab_size
|
scores: list[float] = [-10000.0] * vocab_size
|
||||||
|
@ -3857,33 +3857,26 @@ class BertModel(TextModel):
|
||||||
unk_token = tokenizer_config_json.get("unk_token")
|
unk_token = tokenizer_config_json.get("unk_token")
|
||||||
unk_token_id = added_vocab.get(unk_token, tokenizer_json["model"].get("unk_id", 3))
|
unk_token_id = added_vocab.get(unk_token, tokenizer_json["model"].get("unk_id", 3))
|
||||||
|
|
||||||
for token_id in range(vocab_size):
|
for token_id in range(tokenizer.vocab_size):
|
||||||
piece = tokenizer._convert_id_to_token(token_id)
|
piece = tokenizer._convert_id_to_token(token_id)
|
||||||
text = piece.encode("utf-8")
|
if (piece := tokenizer._convert_id_to_token(token_id)) is not None:
|
||||||
score = tokenizer_json["model"]["vocab"][token_id][1]
|
text = piece.encode("utf-8")
|
||||||
|
score = tokenizer_json["model"]["vocab"][token_id][1]
|
||||||
|
|
||||||
toktype = SentencePieceTokenTypes.NORMAL
|
toktype = SentencePieceTokenTypes.NORMAL
|
||||||
if token_id == unk_token_id:
|
if token_id == unk_token_id:
|
||||||
toktype = SentencePieceTokenTypes.UNKNOWN
|
toktype = SentencePieceTokenTypes.UNKNOWN
|
||||||
elif token_id in tokenizer.all_special_ids:
|
elif token_id in tokenizer.all_special_ids:
|
||||||
toktype = SentencePieceTokenTypes.CONTROL
|
toktype = SentencePieceTokenTypes.CONTROL
|
||||||
elif token_id in added_vocab.values():
|
elif token_id in added_vocab.values():
|
||||||
toktype = SentencePieceTokenTypes.USER_DEFINED
|
toktype = SentencePieceTokenTypes.USER_DEFINED
|
||||||
# No reliable way to detect this, but jina doesn't have any
|
# No reliable way to detect this, but jina doesn't have any
|
||||||
# elif tokenizer.IsByte(token_id):
|
# elif tokenizer.IsByte(token_id):
|
||||||
# toktype = SentencePieceTokenTypes.BYTE
|
# toktype = SentencePieceTokenTypes.BYTE
|
||||||
|
|
||||||
tokens[token_id] = text
|
tokens[token_id] = text
|
||||||
scores[token_id] = score
|
scores[token_id] = score
|
||||||
toktypes[token_id] = toktype
|
toktypes[token_id] = toktype
|
||||||
|
|
||||||
if vocab_size > len(tokens):
|
|
||||||
pad_count = vocab_size - len(tokens)
|
|
||||||
logger.debug(f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]")
|
|
||||||
for i in range(1, pad_count + 1):
|
|
||||||
tokens.append(bytes(f"[PAD{i}]", encoding="utf-8"))
|
|
||||||
scores.append(-1000.0)
|
|
||||||
toktypes.append(SentencePieceTokenTypes.UNUSED)
|
|
||||||
|
|
||||||
if isinstance(tokenizer, SentencePieceProcessor):
|
if isinstance(tokenizer, SentencePieceProcessor):
|
||||||
# realign tokens (see HF tokenizer code)
|
# realign tokens (see HF tokenizer code)
|
||||||
|
@ -3896,6 +3889,12 @@ class BertModel(TextModel):
|
||||||
SentencePieceTokenTypes.UNKNOWN,
|
SentencePieceTokenTypes.UNKNOWN,
|
||||||
] + toktypes[3:-1]
|
] + toktypes[3:-1]
|
||||||
|
|
||||||
|
if self.model_arch == gguf.MODEL_ARCH.NOMIC_BERT_MOE:
|
||||||
|
# Add mask token missing from sentencepiece.bpe.model
|
||||||
|
tokens[250001] = b'<mask>'
|
||||||
|
scores[250001] = 0.0
|
||||||
|
toktypes[250001] = SentencePieceTokenTypes.CONTROL
|
||||||
|
|
||||||
self.gguf_writer.add_tokenizer_model("t5")
|
self.gguf_writer.add_tokenizer_model("t5")
|
||||||
self.gguf_writer.add_tokenizer_pre("default")
|
self.gguf_writer.add_tokenizer_pre("default")
|
||||||
self.gguf_writer.add_token_list(tokens)
|
self.gguf_writer.add_token_list(tokens)
|
||||||
|
|
|
@ -2108,9 +2108,6 @@ extern "C" {
|
||||||
GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
|
GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
|
||||||
GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
|
GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
|
||||||
|
|
||||||
GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
|
|
||||||
GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
|
|
||||||
|
|
||||||
// print info and performance information for the graph
|
// print info and performance information for the graph
|
||||||
GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
|
GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,8 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
void ggml_print_backtrace(void);
|
||||||
|
|
||||||
#ifndef MIN
|
#ifndef MIN
|
||||||
# define MIN(a, b) ((a) < (b) ? (a) : (b))
|
# define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1668,7 +1668,7 @@ static std::array<uint32_t, 2> fa_rows_cols(FaCodePath path, uint32_t D, uint32_
|
||||||
return {64, 32};
|
return {64, 32};
|
||||||
}
|
}
|
||||||
return {64, 64};
|
return {64, 64};
|
||||||
};
|
}
|
||||||
|
|
||||||
static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vector<uint32_t>& warptile, bool mul_mat_id, ggml_type src0_type) {
|
static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vector<uint32_t>& warptile, bool mul_mat_id, ggml_type src0_type) {
|
||||||
|
|
||||||
|
|
|
@ -134,7 +134,7 @@ static void ggml_print_backtrace_symbols(void) {
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void ggml_print_backtrace(void) {
|
void ggml_print_backtrace(void) {
|
||||||
const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
|
const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
|
||||||
if (GGML_NO_BACKTRACE) {
|
if (GGML_NO_BACKTRACE) {
|
||||||
return;
|
return;
|
||||||
|
@ -161,6 +161,10 @@ static void ggml_print_backtrace(void) {
|
||||||
const int parent_pid = getpid();
|
const int parent_pid = getpid();
|
||||||
const int child_pid = fork();
|
const int child_pid = fork();
|
||||||
if (child_pid < 0) { // error
|
if (child_pid < 0) { // error
|
||||||
|
#if defined(__linux__)
|
||||||
|
close(lock[1]);
|
||||||
|
close(lock[0]);
|
||||||
|
#endif
|
||||||
return;
|
return;
|
||||||
} else if (child_pid == 0) { // child
|
} else if (child_pid == 0) { // child
|
||||||
char attach[32];
|
char attach[32];
|
||||||
|
@ -168,6 +172,7 @@ static void ggml_print_backtrace(void) {
|
||||||
#if defined(__linux__)
|
#if defined(__linux__)
|
||||||
close(lock[1]);
|
close(lock[1]);
|
||||||
(void) !read(lock[0], lock, 1);
|
(void) !read(lock[0], lock, 1);
|
||||||
|
close(lock[0]);
|
||||||
#endif
|
#endif
|
||||||
// try gdb
|
// try gdb
|
||||||
execlp("gdb", "gdb", "--batch",
|
execlp("gdb", "gdb", "--batch",
|
||||||
|
@ -196,7 +201,7 @@ static void ggml_print_backtrace(void) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static void ggml_print_backtrace(void) {
|
void ggml_print_backtrace(void) {
|
||||||
// platform not supported
|
// platform not supported
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -217,6 +222,8 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
|
||||||
|
|
||||||
//
|
//
|
||||||
// logging
|
// logging
|
||||||
//
|
//
|
||||||
|
|
26
ggml/src/ggml.cpp
Normal file
26
ggml/src/ggml.cpp
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
#include "ggml-impl.h"
|
||||||
|
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <exception>
|
||||||
|
|
||||||
|
static std::terminate_handler previous_terminate_handler;
|
||||||
|
|
||||||
|
GGML_NORETURN static void ggml_uncaught_exception() {
|
||||||
|
ggml_print_backtrace();
|
||||||
|
if (previous_terminate_handler) {
|
||||||
|
previous_terminate_handler();
|
||||||
|
}
|
||||||
|
abort(); // unreachable unless previous_terminate_handler was nullptr
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool ggml_uncaught_exception_init = []{
|
||||||
|
const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
|
||||||
|
if (GGML_NO_BACKTRACE) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const auto prev{std::get_terminate()};
|
||||||
|
GGML_ASSERT(prev != ggml_uncaught_exception);
|
||||||
|
previous_terminate_handler = prev;
|
||||||
|
std::set_terminate(ggml_uncaught_exception);
|
||||||
|
return true;
|
||||||
|
}();
|
|
@ -358,13 +358,29 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
||||||
isggufv1 = false;
|
isggufv1 = false;
|
||||||
|
|
||||||
if (ok && gr.read(ctx->version)) {
|
if (ok && gr.read(ctx->version)) {
|
||||||
if (ctx->version == 1) {
|
if (ok && ctx->version == 0) {
|
||||||
|
GGML_LOG_ERROR("%s: bad GGUF version: %" PRIu32 "\n", __func__, ctx->version);
|
||||||
|
ok = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* bit layout is different when reading non-native endian models.
|
||||||
|
* assuming that the GGUF version is 3, the non-native endian model
|
||||||
|
* would read it as 0x30000000. we can use the AND operation against
|
||||||
|
* the last 4 hexadecimal digits to check if the model is the same
|
||||||
|
* endianness as the host system.
|
||||||
|
*/
|
||||||
|
if (ok && (ctx->version & 0x0000FFFF) == 0x00000000) {
|
||||||
|
GGML_LOG_ERROR("%s: failed to load model: this GGUF file version %" PRIu32 " is extremely large, is there a mismatch between the host and model endianness?\n", __func__, ctx->version);
|
||||||
|
ok = false;
|
||||||
|
}
|
||||||
|
if (ok && ctx->version == 1) {
|
||||||
fprintf(stderr, "%s: GGUFv1 is deprecated, please use a more up-to-date version!\n", __func__);
|
fprintf(stderr, "%s: GGUFv1 is deprecated, please use a more up-to-date version!\n", __func__);
|
||||||
// ok = false;
|
// ok = false;
|
||||||
isggufv1 = true;
|
isggufv1 = true;
|
||||||
}
|
}
|
||||||
if (ctx->version > GGUF_VERSION) {
|
if (ok && ctx->version > GGUF_VERSION) {
|
||||||
fprintf(stderr, "%s: this GGUF file is version %" PRIu32 " but this software is designed for version %d, things may break.\n",
|
GGML_LOG_ERROR("%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
|
||||||
__func__, ctx->version, GGUF_VERSION);
|
__func__, ctx->version, GGUF_VERSION);
|
||||||
// ok = false;
|
// ok = false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -2319,9 +2319,11 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
||||||
|
|
||||||
std::string model_name;
|
std::string model_name;
|
||||||
std::string tokenizer_pre;
|
std::string tokenizer_pre;
|
||||||
|
std::string general_arch;
|
||||||
|
|
||||||
ml.get_key(LLM_KV_GENERAL_NAME, model_name, false);
|
ml.get_key(LLM_KV_GENERAL_NAME, model_name, false);
|
||||||
ml.get_key(LLM_KV_TOKENIZER_PRE, tokenizer_pre, false);
|
ml.get_key(LLM_KV_TOKENIZER_PRE, tokenizer_pre, false);
|
||||||
|
ml.get_key(LLM_KV_GENERAL_ARCHITECTURE, general_arch, false);
|
||||||
|
|
||||||
// model name to lowercase
|
// model name to lowercase
|
||||||
std::transform(model_name.begin(), model_name.end(), model_name.begin(),
|
std::transform(model_name.begin(), model_name.end(), model_name.begin(),
|
||||||
|
@ -2330,8 +2332,11 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
// set attributes by model/tokenizer name
|
// set attributes by model/tokenizer/architecture name
|
||||||
if (_contains_any(tokenizer_pre, {"jina-v2-de", "jina-v2-es", "jina-v2-code"})) {
|
if (false
|
||||||
|
|| _contains_any(tokenizer_pre, {"jina-v2-de", "jina-v2-es", "jina-v2-code"})
|
||||||
|
|| _contains_any(general_arch, {"nomic-bert-moe"})
|
||||||
|
) {
|
||||||
_set_token_attr("<mask>", LLAMA_TOKEN_ATTR_LSTRIP, true);
|
_set_token_attr("<mask>", LLAMA_TOKEN_ATTR_LSTRIP, true);
|
||||||
} else if (_contains_any(model_name, {"phi-3", "phi3"})) {
|
} else if (_contains_any(model_name, {"phi-3", "phi3"})) {
|
||||||
for (auto id : cache_special_tokens) {
|
for (auto id : cache_special_tokens) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue