Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	README.md
#	examples/parallel/parallel.cpp
#	ggml/src/CMakeLists.txt
#	ggml/src/ggml-blas/CMakeLists.txt
#	ggml/src/ggml-sycl/CMakeLists.txt
#	ggml/src/gguf.cpp
#	scripts/sync-ggml.last
#	tests/test-gguf.cpp
This commit is contained in:
Concedo 2025-06-02 23:26:43 +08:00
commit b42b618897
8 changed files with 89 additions and 37 deletions

View file

@ -3814,7 +3814,7 @@ class BertModel(TextModel):
remove_whitespaces = tokenizer.clean_up_tokenization_spaces
precompiled_charsmap = b64decode(tokenizer_json["normalizer"]["precompiled_charsmap"])
vocab_size = self.hparams.get("vocab_size", tokenizer.vocab_size)
vocab_size = max(self.hparams.get("vocab_size", 0), tokenizer.vocab_size)
else:
sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue]
sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
@ -3827,7 +3827,7 @@ class BertModel(TextModel):
tokenizer = SentencePieceProcessor()
tokenizer.LoadFromFile(str(tokenizer_path))
vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
vocab_size = max(self.hparams.get("vocab_size", 0), tokenizer.vocab_size())
tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
scores: list[float] = [-10000.0] * vocab_size
@ -3857,33 +3857,26 @@ class BertModel(TextModel):
unk_token = tokenizer_config_json.get("unk_token")
unk_token_id = added_vocab.get(unk_token, tokenizer_json["model"].get("unk_id", 3))
for token_id in range(vocab_size):
for token_id in range(tokenizer.vocab_size):
piece = tokenizer._convert_id_to_token(token_id)
text = piece.encode("utf-8")
score = tokenizer_json["model"]["vocab"][token_id][1]
if (piece := tokenizer._convert_id_to_token(token_id)) is not None:
text = piece.encode("utf-8")
score = tokenizer_json["model"]["vocab"][token_id][1]
toktype = SentencePieceTokenTypes.NORMAL
if token_id == unk_token_id:
toktype = SentencePieceTokenTypes.UNKNOWN
elif token_id in tokenizer.all_special_ids:
toktype = SentencePieceTokenTypes.CONTROL
elif token_id in added_vocab.values():
toktype = SentencePieceTokenTypes.USER_DEFINED
# No reliable way to detect this, but jina doesn't have any
# elif tokenizer.IsByte(token_id):
# toktype = SentencePieceTokenTypes.BYTE
toktype = SentencePieceTokenTypes.NORMAL
if token_id == unk_token_id:
toktype = SentencePieceTokenTypes.UNKNOWN
elif token_id in tokenizer.all_special_ids:
toktype = SentencePieceTokenTypes.CONTROL
elif token_id in added_vocab.values():
toktype = SentencePieceTokenTypes.USER_DEFINED
# No reliable way to detect this, but jina doesn't have any
# elif tokenizer.IsByte(token_id):
# toktype = SentencePieceTokenTypes.BYTE
tokens[token_id] = text
scores[token_id] = score
toktypes[token_id] = toktype
if vocab_size > len(tokens):
pad_count = vocab_size - len(tokens)
logger.debug(f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]")
for i in range(1, pad_count + 1):
tokens.append(bytes(f"[PAD{i}]", encoding="utf-8"))
scores.append(-1000.0)
toktypes.append(SentencePieceTokenTypes.UNUSED)
tokens[token_id] = text
scores[token_id] = score
toktypes[token_id] = toktype
if isinstance(tokenizer, SentencePieceProcessor):
# realign tokens (see HF tokenizer code)
@ -3896,6 +3889,12 @@ class BertModel(TextModel):
SentencePieceTokenTypes.UNKNOWN,
] + toktypes[3:-1]
if self.model_arch == gguf.MODEL_ARCH.NOMIC_BERT_MOE:
# Add mask token missing from sentencepiece.bpe.model
tokens[250001] = b'<mask>'
scores[250001] = 0.0
toktypes[250001] = SentencePieceTokenTypes.CONTROL
self.gguf_writer.add_tokenizer_model("t5")
self.gguf_writer.add_tokenizer_pre("default")
self.gguf_writer.add_token_list(tokens)

View file

@ -2108,9 +2108,6 @@ extern "C" {
GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
// print info and performance information for the graph
GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);

View file

@ -32,6 +32,8 @@
extern "C" {
#endif
void ggml_print_backtrace(void);
#ifndef MIN
# define MIN(a, b) ((a) < (b) ? (a) : (b))
#endif

View file

@ -1668,7 +1668,7 @@ static std::array<uint32_t, 2> fa_rows_cols(FaCodePath path, uint32_t D, uint32_
return {64, 32};
}
return {64, 64};
};
}
static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vector<uint32_t>& warptile, bool mul_mat_id, ggml_type src0_type) {

View file

@ -134,7 +134,7 @@ static void ggml_print_backtrace_symbols(void) {
}
#endif
static void ggml_print_backtrace(void) {
void ggml_print_backtrace(void) {
const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
if (GGML_NO_BACKTRACE) {
return;
@ -161,6 +161,10 @@ static void ggml_print_backtrace(void) {
const int parent_pid = getpid();
const int child_pid = fork();
if (child_pid < 0) { // error
#if defined(__linux__)
close(lock[1]);
close(lock[0]);
#endif
return;
} else if (child_pid == 0) { // child
char attach[32];
@ -168,6 +172,7 @@ static void ggml_print_backtrace(void) {
#if defined(__linux__)
close(lock[1]);
(void) !read(lock[0], lock, 1);
close(lock[0]);
#endif
// try gdb
execlp("gdb", "gdb", "--batch",
@ -196,7 +201,7 @@ static void ggml_print_backtrace(void) {
}
}
#else
static void ggml_print_backtrace(void) {
void ggml_print_backtrace(void) {
// platform not supported
}
#endif
@ -217,6 +222,8 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
abort();
}
// ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
//
// logging
//

26
ggml/src/ggml.cpp Normal file
View file

@ -0,0 +1,26 @@
#include "ggml-impl.h"
#include <cstdlib>
#include <exception>
static std::terminate_handler previous_terminate_handler;
GGML_NORETURN static void ggml_uncaught_exception() {
ggml_print_backtrace();
if (previous_terminate_handler) {
previous_terminate_handler();
}
abort(); // unreachable unless previous_terminate_handler was nullptr
}
static bool ggml_uncaught_exception_init = []{
const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
if (GGML_NO_BACKTRACE) {
return false;
}
const auto prev{std::get_terminate()};
GGML_ASSERT(prev != ggml_uncaught_exception);
previous_terminate_handler = prev;
std::set_terminate(ggml_uncaught_exception);
return true;
}();

View file

@ -358,13 +358,29 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
isggufv1 = false;
if (ok && gr.read(ctx->version)) {
if (ctx->version == 1) {
if (ok && ctx->version == 0) {
GGML_LOG_ERROR("%s: bad GGUF version: %" PRIu32 "\n", __func__, ctx->version);
ok = false;
}
/*
* bit layout is different when reading non-native endian models.
* assuming that the GGUF version is 3, the non-native endian model
* would read it as 0x30000000. we can use the AND operation against
* the last 4 hexadecimal digits to check if the model is the same
* endianness as the host system.
*/
if (ok && (ctx->version & 0x0000FFFF) == 0x00000000) {
GGML_LOG_ERROR("%s: failed to load model: this GGUF file version %" PRIu32 " is extremely large, is there a mismatch between the host and model endianness?\n", __func__, ctx->version);
ok = false;
}
if (ok && ctx->version == 1) {
fprintf(stderr, "%s: GGUFv1 is deprecated, please use a more up-to-date version!\n", __func__);
// ok = false;
isggufv1 = true;
}
if (ctx->version > GGUF_VERSION) {
fprintf(stderr, "%s: this GGUF file is version %" PRIu32 " but this software is designed for version %d, things may break.\n",
if (ok && ctx->version > GGUF_VERSION) {
GGML_LOG_ERROR("%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
__func__, ctx->version, GGUF_VERSION);
// ok = false;
}

View file

@ -2319,9 +2319,11 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
std::string model_name;
std::string tokenizer_pre;
std::string general_arch;
ml.get_key(LLM_KV_GENERAL_NAME, model_name, false);
ml.get_key(LLM_KV_TOKENIZER_PRE, tokenizer_pre, false);
ml.get_key(LLM_KV_GENERAL_ARCHITECTURE, general_arch, false);
// model name to lowercase
std::transform(model_name.begin(), model_name.end(), model_name.begin(),
@ -2330,8 +2332,11 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
}
);
// set attributes by model/tokenizer name
if (_contains_any(tokenizer_pre, {"jina-v2-de", "jina-v2-es", "jina-v2-code"})) {
// set attributes by model/tokenizer/architecture name
if (false
|| _contains_any(tokenizer_pre, {"jina-v2-de", "jina-v2-es", "jina-v2-code"})
|| _contains_any(general_arch, {"nomic-bert-moe"})
) {
_set_token_attr("<mask>", LLAMA_TOKEN_ATTR_LSTRIP, true);
} else if (_contains_any(model_name, {"phi-3", "phi3"})) {
for (auto id : cache_special_tokens) {