mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-06 08:01:27 +00:00
model: support youtu-vl model (#18479)
* Support Youtu-VL Model * merge code * fix bug * revert qwen2 code & support rsplit in minja.hpp * update warm info * fix annotation * u * revert minja.hpp * fix * Do not write routed_scaling_factor to gguf when routed_scaling_factor is None * fix expert_weights_scale * LGTM after whitespace fixes * fix * fix * fix * layers to layer_index * enum fix --------- Co-authored-by: Xuan-Son Nguyen <son@huggingface.co> Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
parent
3ccccc83f7
commit
ced765be44
17 changed files with 473 additions and 36 deletions
|
|
@ -1683,7 +1683,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_VALUE_LENGTH_MLA, hparams.n_embd_head_v_mla, false);
|
||||
ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
|
||||
ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
|
||||
ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
|
||||
ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale, false);
|
||||
ml.get_key(LLM_KV_EXPERT_WEIGHTS_NORM, hparams.expert_weights_norm, false);
|
||||
ml.get_key(LLM_KV_EXPERT_GATING_FUNC, hparams.expert_gating_func, false);
|
||||
if (hparams.expert_gating_func == LLAMA_EXPERT_GATING_FUNC_TYPE_NONE) {
|
||||
|
|
@ -4785,7 +4785,11 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|||
|
||||
// output
|
||||
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
|
||||
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, 0);
|
||||
// try to load output.weight, if not found, use token_embd (tied embeddings)
|
||||
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
|
||||
if (!output) {
|
||||
output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_layer; ++i) {
|
||||
auto & layer = layers[i];
|
||||
|
|
@ -4848,7 +4852,11 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|||
|
||||
// output
|
||||
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
|
||||
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, 0);
|
||||
// try to load output.weight, if not found, use token_embd (tied embeddings)
|
||||
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
|
||||
if (!output) {
|
||||
output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_layer; ++i) {
|
||||
auto & layer = layers[i];
|
||||
|
|
|
|||
|
|
@ -314,6 +314,12 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
|||
"[!\"#$%&'()*+,\\-./:;<=>?@\\[\\\\\\]^_`{|}~][A-Za-z]+|[^\r\n\\p{L}\\p{P}\\p{S}]?[\\p{L}\\p{M}]+| ?[\\p{P}\\p{S}]+[\r\n]*|\\s*[\r\n]+|\\s+(?!\\S)|\\s+",
|
||||
};
|
||||
break;
|
||||
case LLAMA_VOCAB_PRE_TYPE_YOUTU:
|
||||
regex_exprs = {
|
||||
"[가-힣ㄱ-ㆎ]+|[!…“”‘’—:;,、-〿︰-﹏]+|[ㄅ-ㄯ]+|[一-龥-ゟ゠-ヿ]+",
|
||||
"[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
||||
};
|
||||
break;
|
||||
case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER:
|
||||
regex_exprs = {
|
||||
"[\r\n]",
|
||||
|
|
@ -1861,6 +1867,11 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|||
tokenizer_pre == "deepseek-v3") {
|
||||
pre_type = LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM;
|
||||
clean_spaces = false;
|
||||
} else if (
|
||||
tokenizer_pre == "youtu") {
|
||||
pre_type = LLAMA_VOCAB_PRE_TYPE_YOUTU;
|
||||
clean_spaces = false;
|
||||
ignore_merges = true;
|
||||
} else if (
|
||||
tokenizer_pre == "falcon") {
|
||||
pre_type = LLAMA_VOCAB_PRE_TYPE_FALCON;
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@ enum llama_vocab_pre_type {
|
|||
LLAMA_VOCAB_PRE_TYPE_MINIMAX_M2 = 41,
|
||||
LLAMA_VOCAB_PRE_TYPE_AFMOE = 42,
|
||||
LLAMA_VOCAB_PRE_TYPE_SOLAR_OPEN = 43,
|
||||
LLAMA_VOCAB_PRE_TYPE_YOUTU = 44,
|
||||
};
|
||||
|
||||
struct LLM_KV;
|
||||
|
|
|
|||
|
|
@ -215,7 +215,7 @@ llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_gr
|
|||
model.layers[il].ffn_exp_probs_b,
|
||||
n_expert, n_expert_used,
|
||||
LLM_FFN_SILU, hparams.expert_weights_norm,
|
||||
true, hparams.expert_weights_scale,
|
||||
hparams.expert_weights_scale, hparams.expert_weights_scale,
|
||||
(llama_expert_gating_func_type) hparams.expert_gating_func,
|
||||
il);
|
||||
cb(moe_out, "ffn_moe_out", il);
|
||||
|
|
|
|||
|
|
@ -964,6 +964,11 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
|
|||
{ "\\p{P}", unicode_cpt_flags::PUNCTUATION },
|
||||
{ "\\p{M}", unicode_cpt_flags::ACCENT_MARK },
|
||||
{ "\\p{S}", unicode_cpt_flags::SYMBOL },
|
||||
{ "\\p{Lu}", unicode_cpt_flags::LETTER }, // Uppercase letter
|
||||
{ "\\p{Ll}", unicode_cpt_flags::LETTER }, // Lowercase letter
|
||||
{ "\\p{Lt}", unicode_cpt_flags::LETTER }, // Titlecase letter
|
||||
{ "\\p{Lm}", unicode_cpt_flags::LETTER }, // Modifier letter
|
||||
{ "\\p{Lo}", unicode_cpt_flags::LETTER }, // Other letter
|
||||
};
|
||||
|
||||
static const std::map<int, int> k_ucat_cpt = {
|
||||
|
|
@ -1074,22 +1079,26 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
|
|||
continue;
|
||||
}
|
||||
|
||||
if (regex_expr[i + 0] == '\\' && i + 4 < regex_expr.size() &&
|
||||
// Match \p{...} Unicode properties of varying lengths
|
||||
if (regex_expr[i + 0] == '\\' && i + 3 < regex_expr.size() &&
|
||||
regex_expr[i + 1] == 'p' &&
|
||||
regex_expr[i + 2] == '{' &&
|
||||
regex_expr[i + 4] == '}') {
|
||||
const std::string pat = regex_expr.substr(i, 5);
|
||||
if (k_ucat_enum.find(pat) != k_ucat_enum.end()) {
|
||||
if (!inside) {
|
||||
regex_expr_collapsed += '[';
|
||||
regex_expr[i + 2] == '{') {
|
||||
// Find the closing brace
|
||||
size_t closing_brace = regex_expr.find('}', i + 3);
|
||||
if (closing_brace != std::string::npos && closing_brace <= i + 10) { // reasonable limit
|
||||
const std::string pat = regex_expr.substr(i, closing_brace - i + 1);
|
||||
if (k_ucat_enum.find(pat) != k_ucat_enum.end()) {
|
||||
if (!inside) {
|
||||
regex_expr_collapsed += '[';
|
||||
}
|
||||
regex_expr_collapsed += k_ucat_cpt.at(k_ucat_enum.at(pat));
|
||||
regex_expr_collapsed += k_ucat_map.at(k_ucat_enum.at(pat));
|
||||
if (!inside) {
|
||||
regex_expr_collapsed += ']';
|
||||
}
|
||||
i = closing_brace;
|
||||
continue;
|
||||
}
|
||||
regex_expr_collapsed += k_ucat_cpt.at(k_ucat_enum.at(pat));
|
||||
regex_expr_collapsed += k_ucat_map.at(k_ucat_enum.at(pat));
|
||||
if (!inside) {
|
||||
regex_expr_collapsed += ']';
|
||||
}
|
||||
i += 4;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue