fix qwen3, fixed sd, fixed glm4

This commit is contained in:
Concedo 2025-04-29 20:50:46 +08:00
parent 4d8a7a6594
commit c2802af9e8
7 changed files with 99 additions and 24 deletions

View file

@ -1915,6 +1915,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
kcpp_data->n_ctx = clamped_max_context_length; kcpp_data->n_ctx = clamped_max_context_length;
max_context_limit_at_load = clamped_max_context_length; max_context_limit_at_load = clamped_max_context_length;
add_bos_token = !inputs.no_bos_token; add_bos_token = !inputs.no_bos_token;
if(!add_bos_token) if(!add_bos_token)
{ {
printf("\n======\nBOS token prefix was disabled! Your output may be degraded unless model was designed for it!\n======\n"); printf("\n======\nBOS token prefix was disabled! Your output may be degraded unless model was designed for it!\n======\n");
@ -2368,6 +2369,14 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
} }
} }
//we cannot really trust the add bos in vocab. old models don't set it.
// instead, we EXPLICITY need to find the add_bos_token key==false to automatically set it off.
if(!llamamodel->vocab.get_add_bos() && add_bos_token && file_format_meta.explicitly_no_bos)
{
printf("\nThis architecture has explicitly disabled the BOS token - if you need it, you must add it manually.\n");
add_bos_token = false;
}
//warmup at least 33 tokens to trigger batch //warmup at least 33 tokens to trigger batch
std::vector<int> tmp; std::vector<int> tmp;
for (int i = 1; i <= 33; ++i) { for (int i = 1; i <= 33; ++i) {
@ -3180,6 +3189,30 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
} }
} }
//need to add a cursed hack to get coherency for GLM4, by ensuring injection for both sop and gmask
if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) {
std::string temp = gpttype_get_chat_template();
if (temp.find("[gMASK]<sop>") != std::string::npos) {
if (addedmemory == "") {
if (kcpp_data->prompt.rfind("[gMASK]", 0) == 0) { //check startswith
kcpp_data->prompt.erase(0, 7);
}
if (kcpp_data->prompt.rfind("<sop>", 0) == 0) { //check startswith
kcpp_data->prompt.erase(0, 5);
}
addedmemory = "<sop>";
} else {
if (addedmemory.rfind("[gMASK]", 0) == 0) { //check startswith
addedmemory.erase(0, 7);
}
if (addedmemory.rfind("<sop>", 0) == 0) { //check startswith
addedmemory.erase(0, 5);
}
addedmemory = "<sop>" + addedmemory;
}
}
}
bool stream_sse = inputs.stream_sse; bool stream_sse = inputs.stream_sse;
bool allow_regular_prints = (!is_quiet && debugmode!=-1); bool allow_regular_prints = (!is_quiet && debugmode!=-1);

View file

@ -0,0 +1,8 @@
{
"system_start": "<|im_start|>system\n",
"system_end": "<|im_end|>\n",
"user_start": "<|im_start|>user\n",
"user_end": "<|im_end|>\n",
"assistant_start": "<|im_start|>assistant\n",
"assistant_end": "<|im_end|>\n<think>\n\n</think>\n"
}

View file

@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
--> -->
<script> <script>
const LITEVER = 233; const LITEVER = 234;
const urlParams = new URLSearchParams(window.location.search); const urlParams = new URLSearchParams(window.location.search);
var localflag = urlParams.get('local'); //this will be replaced automatically in embedded kcpp var localflag = urlParams.get('local'); //this will be replaced automatically in embedded kcpp
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_"; const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
@ -3171,6 +3171,7 @@ Current version indicated by LITEVER below.
instruct_systag_end: "", instruct_systag_end: "",
instruct_sysprompt: "", instruct_sysprompt: "",
instruct_has_markdown: true, instruct_has_markdown: true,
instruct_has_latex: true,
placeholder_tags: true, placeholder_tags: true,
render_special_tags: false, render_special_tags: false,
request_logprobs: false, request_logprobs: false,
@ -3330,6 +3331,16 @@ Current version indicated by LITEVER below.
}, },
{ {
"id":3, "id":3,
"name":"ChatML (No Think)",
"user":"<|im_start|>user\\n",
"user_end":"<|im_end|>\\n",
"assistant":"<|im_start|>assistant\\n<think>\\n\\n</think>\\n",
"assistant_end":"<|im_end|>\\n",
"system":"<|im_start|>system\\n",
"system_end":"<|im_end|>\\n",
},
{
"id":4,
"name":"CommandR", "name":"CommandR",
"user":"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>", "user":"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>",
"user_end":"<|END_OF_TURN_TOKEN|>", "user_end":"<|END_OF_TURN_TOKEN|>",
@ -3339,7 +3350,7 @@ Current version indicated by LITEVER below.
"system_end":"<|END_OF_TURN_TOKEN|>", "system_end":"<|END_OF_TURN_TOKEN|>",
}, },
{ {
"id":4, "id":5,
"name":"Gemma 2 & 3", "name":"Gemma 2 & 3",
"user":"<start_of_turn>user\\n", "user":"<start_of_turn>user\\n",
"user_end":"<end_of_turn>\\n", "user_end":"<end_of_turn>\\n",
@ -3349,7 +3360,7 @@ Current version indicated by LITEVER below.
"system_end":"<end_of_turn>\\n", "system_end":"<end_of_turn>\\n",
}, },
{ {
"id":5, "id":6,
"name":"Llama 2 Chat", "name":"Llama 2 Chat",
"user":"[INST] ", "user":"[INST] ",
"user_end":"", "user_end":"",
@ -3359,7 +3370,7 @@ Current version indicated by LITEVER below.
"system_end":"", "system_end":"",
}, },
{ {
"id":6, "id":7,
"name":"Llama 3 Chat", "name":"Llama 3 Chat",
"user":"<|start_header_id|>user<|end_header_id|>\\n\\n", "user":"<|start_header_id|>user<|end_header_id|>\\n\\n",
"user_end":"<|eot_id|>", "user_end":"<|eot_id|>",
@ -3369,7 +3380,7 @@ Current version indicated by LITEVER below.
"system_end":"<|eot_id|>", "system_end":"<|eot_id|>",
}, },
{ {
"id":7, "id":8,
"name":"Llama 4 Chat", "name":"Llama 4 Chat",
"user":"<|header_start|>user<|header_end|>\\n\\n", "user":"<|header_start|>user<|header_end|>\\n\\n",
"user_end":"<|eot|>", "user_end":"<|eot|>",
@ -3379,7 +3390,7 @@ Current version indicated by LITEVER below.
"system_end":"<|eot|>", "system_end":"<|eot|>",
}, },
{ {
"id":8, "id":9,
"name":"Metharme", "name":"Metharme",
"user":"<|user|>", "user":"<|user|>",
"user_end":"", "user_end":"",
@ -3389,7 +3400,7 @@ Current version indicated by LITEVER below.
"system_end":"", "system_end":"",
}, },
{ {
"id":9, "id":10,
"name":"Mistral V1", "name":"Mistral V1",
"user":" [INST] ", "user":" [INST] ",
"user_end":"", "user_end":"",
@ -3399,7 +3410,7 @@ Current version indicated by LITEVER below.
"system_end":"", "system_end":"",
}, },
{ {
"id":10, "id":11,
"name":"Mistral V2 & V3", "name":"Mistral V2 & V3",
"user":"[INST] ", "user":"[INST] ",
"user_end":"", "user_end":"",
@ -3409,7 +3420,7 @@ Current version indicated by LITEVER below.
"system_end":"", "system_end":"",
}, },
{ {
"id":11, "id":12,
"name":"Mistral V7 & V3-Tekken", "name":"Mistral V7 & V3-Tekken",
"user":"[INST]", "user":"[INST]",
"user_end":"", "user_end":"",
@ -3419,7 +3430,7 @@ Current version indicated by LITEVER below.
"system_end":"[/SYSTEM_PROMPT]", "system_end":"[/SYSTEM_PROMPT]",
}, },
{ {
"id":12, "id":13,
"name":"Phi-3 Mini", "name":"Phi-3 Mini",
"user":"<|user|>\\n", "user":"<|user|>\\n",
"user_end":"<|end|>\\n", "user_end":"<|end|>\\n",
@ -3429,7 +3440,7 @@ Current version indicated by LITEVER below.
"system_end":"<|end|>\\n", "system_end":"<|end|>\\n",
}, },
{ {
"id":13, "id":14,
"name":"Vicuna", "name":"Vicuna",
"user":"\\nUSER: ", "user":"\\nUSER: ",
"user_end":"", "user_end":"",
@ -3439,7 +3450,7 @@ Current version indicated by LITEVER below.
"system_end":"", "system_end":"",
}, },
{ {
"id":14, "id":15,
"name":"Deepseek V2.5", "name":"Deepseek V2.5",
"user":"<User>", "user":"<User>",
"user_end":"<end▁of▁sentence>", "user_end":"<end▁of▁sentence>",
@ -3449,7 +3460,7 @@ Current version indicated by LITEVER below.
"system_end":"", "system_end":"",
}, },
{ {
"id":15, "id":16,
"name":"GLM-4", "name":"GLM-4",
"user":"<|user|>\\n", "user":"<|user|>\\n",
"user_end":"", "user_end":"",
@ -5378,7 +5389,7 @@ Current version indicated by LITEVER below.
navigator.clipboard.writeText(innercode); navigator.clipboard.writeText(innercode);
} }
function simpleMarkdown(text) { function simpleMarkdown(text, renderLatex) {
const escapeHTML = (str) => str.replace(/</g, "&lt;").replace(/>/g, "&gt;"); const escapeHTML = (str) => str.replace(/</g, "&lt;").replace(/>/g, "&gt;");
const highlightCode = (code) => { const highlightCode = (code) => {
let cpybtn = `<button class="unselectable" onclick="return copyMarkdownCode(this)" style="float:right;">Copy</button>`; let cpybtn = `<button class="unselectable" onclick="return copyMarkdownCode(this)" style="float:right;">Copy</button>`;
@ -5501,7 +5512,10 @@ Current version indicated by LITEVER below.
.replace(/ \n/g, "\n<br/>"); .replace(/ \n/g, "\n<br/>");
md = replaceTabbedCodeblocks(md); md = replaceTabbedCodeblocks(md);
md = md.replace(/<\/code\><\/pre\>\n<pre\><code\>/g, "\n"); md = md.replace(/<\/code\><\/pre\>\n<pre\><code\>/g, "\n");
if(renderLatex)
{
md = replaceLatex(md); md = replaceLatex(md);
}
md = md.replace(/<\/ul>\n/gm, "</ul>").replace(/<\/ol>\n/gm, "</ol>"); md = md.replace(/<\/ul>\n/gm, "</ul>").replace(/<\/ol>\n/gm, "</ol>");
md = md.replace(/\\([`_~\*\+\-\.\^\\\<\>\(\)\[\]])/gm, "$1"); md = md.replace(/\\([`_~\*\+\-\.\^\\\<\>\(\)\[\]])/gm, "$1");
return md; return md;
@ -11149,6 +11163,7 @@ Current version indicated by LITEVER below.
document.getElementById("adventure_context_mod").checked = localsettings.adventure_context_mod; document.getElementById("adventure_context_mod").checked = localsettings.adventure_context_mod;
document.getElementById("chat_context_mod").checked = localsettings.chat_context_mod; document.getElementById("chat_context_mod").checked = localsettings.chat_context_mod;
document.getElementById("instruct_has_markdown").checked = localsettings.instruct_has_markdown; document.getElementById("instruct_has_markdown").checked = localsettings.instruct_has_markdown;
document.getElementById("instruct_has_latex").checked = localsettings.instruct_has_latex;
document.getElementById("placeholder_tags").checked = localsettings.placeholder_tags; document.getElementById("placeholder_tags").checked = localsettings.placeholder_tags;
document.getElementById("run_in_background").checked = run_in_background; document.getElementById("run_in_background").checked = run_in_background;
document.getElementById("auto_ctxlen").checked = localsettings.auto_ctxlen; document.getElementById("auto_ctxlen").checked = localsettings.auto_ctxlen;
@ -11601,6 +11616,7 @@ Current version indicated by LITEVER below.
localsettings.adventure_context_mod = (document.getElementById("adventure_context_mod").checked ? true : false); localsettings.adventure_context_mod = (document.getElementById("adventure_context_mod").checked ? true : false);
localsettings.chat_context_mod = (document.getElementById("chat_context_mod").checked ? true : false); localsettings.chat_context_mod = (document.getElementById("chat_context_mod").checked ? true : false);
localsettings.instruct_has_markdown = (document.getElementById("instruct_has_markdown").checked ? true : false); localsettings.instruct_has_markdown = (document.getElementById("instruct_has_markdown").checked ? true : false);
localsettings.instruct_has_latex = (document.getElementById("instruct_has_latex").checked ? true : false);
localsettings.placeholder_tags = (document.getElementById("placeholder_tags").checked ? true : false); localsettings.placeholder_tags = (document.getElementById("placeholder_tags").checked ? true : false);
run_in_background = (document.getElementById("run_in_background").checked ? true : false); run_in_background = (document.getElementById("run_in_background").checked ? true : false);
background_audio_loop(run_in_background); background_audio_loop(run_in_background);
@ -17833,7 +17849,7 @@ Current version indicated by LITEVER below.
{ {
fulltxt += "```"; //force end code block fulltxt += "```"; //force end code block
} }
fulltxt = simpleMarkdown(fulltxt); fulltxt = simpleMarkdown(fulltxt,localsettings.instruct_has_latex);
} }
let instruct_turns = repack_instruct_turns(fulltxt, `%SpcStg%`,`%SpcEtg%`, true); let instruct_turns = repack_instruct_turns(fulltxt, `%SpcStg%`,`%SpcEtg%`, true);
@ -18598,7 +18614,10 @@ Current version indicated by LITEVER below.
{ {
processed_msg += "```"; //force end code block processed_msg += "```"; //force end code block
} }
processed_msg = simpleMarkdown(processed_msg); if(localsettings.instruct_has_markdown)
{
processed_msg = simpleMarkdown(processed_msg,localsettings.instruct_has_latex);
}
//convert the msg into images //convert the msg into images
processed_msg = processed_msg.replace(/\[<\|p\|.+?\|p\|>\]/g, function (m) { processed_msg = processed_msg.replace(/\[<\|p\|.+?\|p\|>\]/g, function (m) {
@ -20357,7 +20376,7 @@ Current version indicated by LITEVER below.
replacedText = replacedText.replace(/&quot;(.*?)&quot;/g, wrapperSpan(styleRole, 'speech')); // Apply the speech style to "speech". replacedText = replacedText.replace(/&quot;(.*?)&quot;/g, wrapperSpan(styleRole, 'speech')); // Apply the speech style to "speech".
if(localsettings.instruct_has_markdown) if(localsettings.instruct_has_markdown)
{ {
replacedText = simpleMarkdown(replacedText); replacedText = simpleMarkdown(replacedText,localsettings.instruct_has_latex);
} }
return `<span>${replacedText}</span>`; return `<span>${replacedText}</span>`;
}); });
@ -21270,9 +21289,14 @@ Current version indicated by LITEVER below.
<div class="settingitem"> <div class="settingitem">
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall">Enable Markdown <span class="helpicon">?<span <div class="justifyleft settingsmall">Enable Markdown <span class="helpicon">?<span
class="helptext">Allows the UI to use markdown formatting such as quotes, LaTeX, and code blocks.</span></span></div> class="helptext">Allows the UI to use markdown formatting such as quotes and code blocks.</span></span></div>
<input type="checkbox" title="Enabled Markdown" id="instruct_has_markdown" style="margin:0px 0px 0px auto;"> <input type="checkbox" title="Enabled Markdown" id="instruct_has_markdown" style="margin:0px 0px 0px auto;">
</div> </div>
<div class="settinglabel">
<div class="justifyleft settingsmall">Enable LaTeX <span class="helpicon">?<span
class="helptext">Allows the UI to render LaTeX within markdown formatting (Needs Markdown).</span></span></div>
<input type="checkbox" title="Enable LaTeX (Needs Markdown)" id="instruct_has_latex" style="margin:0px 0px 0px auto;">
</div>
<div class="settinglabel"> <div class="settinglabel">
<div class="justifyleft settingsmall">Trim Sentences <span class="helpicon">?<span <div class="justifyleft settingsmall">Trim Sentences <span class="helpicon">?<span
class="helptext">Trims incomplete sentences in AI output.</span></span></div> class="helptext">Trims incomplete sentences in AI output.</span></span></div>

View file

@ -860,7 +860,7 @@ def dump_gguf_metadata(file_path): #if you're gonna copy this into your own proj
if dt_translated=="arr": if dt_translated=="arr":
print(f"{dt_translated}: {curr_key} = [{len(curr_val)}]") print(f"{dt_translated}: {curr_key} = [{len(curr_val)}]")
elif dt_translated=="str": elif dt_translated=="str":
print(f"{dt_translated}: {curr_key} = {curr_val[:100]}") print(f"{dt_translated}: {curr_key} = {curr_val[:256]}")
else: else:
print(f"{dt_translated}: {curr_key} = {curr_val}") print(f"{dt_translated}: {curr_key} = {curr_val}")
print("\n*** GGUF TENSOR INFO ***") print("\n*** GGUF TENSOR INFO ***")

View file

@ -291,6 +291,15 @@ void print_tok_vec(std::vector<float> &embd)
if (keyidx != -1) { if (keyidx != -1) {
freq_base_train = gguf_get_val_f32(ctx, keyidx); freq_base_train = gguf_get_val_f32(ctx, keyidx);
} }
fkey = "tokenizer.ggml.add_bos_token";
keyidx = gguf_find_key(ctx, fkey.c_str());
if (keyidx != -1) {
bool result = gguf_get_val_bool(ctx, keyidx);
if(result==false)
{
fileformatmeta->explicitly_no_bos = true;
}
}
int filever = gguf_get_version(ctx); int filever = gguf_get_version(ctx);

View file

@ -71,6 +71,7 @@ struct FileFormatExtraMeta
GGUFArch model_architecture = GGUFArch::ARCH_DEFAULT; GGUFArch model_architecture = GGUFArch::ARCH_DEFAULT;
int n_expert_count = 0; int n_expert_count = 0;
std::string model_architecture_str = ""; std::string model_architecture_str = "";
bool explicitly_no_bos = false; //only true if key exists AND is false
}; };
struct TopPicksData struct TopPicksData

View file

@ -1639,15 +1639,15 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
} }
struct ggml_init_params params; struct ggml_init_params params;
params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10 MB params.mem_size = static_cast<size_t>(20 * 1024 * 1024); // 20 MB increased by kcpp
if (sd_version_is_sd3(sd_ctx->sd->version)) { if (sd_version_is_sd3(sd_ctx->sd->version)) {
params.mem_size *= 3; params.mem_size *= 2; //readjust by kcpp as above changed
} }
if (sd_version_is_flux(sd_ctx->sd->version)) { if (sd_version_is_flux(sd_ctx->sd->version)) {
params.mem_size *= 4; params.mem_size *= 2; //readjust by kcpp as above changed
} }
if (sd_ctx->sd->stacked_id) { if (sd_ctx->sd->stacked_id) {
params.mem_size += static_cast<size_t>(10 * 1024 * 1024); // 10 MB params.mem_size += static_cast<size_t>(15 * 1024 * 1024); // 10 MB
} }
params.mem_size += width * height * 3 * sizeof(float); params.mem_size += width * height * 3 * sizeof(float);
params.mem_size *= batch_count; params.mem_size *= batch_count;