fix qwen3, fixed sd, fixed glm4

This commit is contained in:
Concedo 2025-04-29 20:50:46 +08:00
parent 4d8a7a6594
commit c2802af9e8
7 changed files with 99 additions and 24 deletions

View file

@ -1915,6 +1915,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
kcpp_data->n_ctx = clamped_max_context_length;
max_context_limit_at_load = clamped_max_context_length;
add_bos_token = !inputs.no_bos_token;
if(!add_bos_token)
{
printf("\n======\nBOS token prefix was disabled! Your output may be degraded unless model was designed for it!\n======\n");
@ -2368,6 +2369,14 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
}
}
//we cannot really trust the add bos in vocab. old models don't set it.
// instead, we EXPLICITY need to find the add_bos_token key==false to automatically set it off.
if(!llamamodel->vocab.get_add_bos() && add_bos_token && file_format_meta.explicitly_no_bos)
{
printf("\nThis architecture has explicitly disabled the BOS token - if you need it, you must add it manually.\n");
add_bos_token = false;
}
//warmup at least 33 tokens to trigger batch
std::vector<int> tmp;
for (int i = 1; i <= 33; ++i) {
@ -3180,6 +3189,30 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
}
}
//need to add a cursed hack to get coherency for GLM4, by ensuring injection for both sop and gmask
if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) {
std::string temp = gpttype_get_chat_template();
if (temp.find("[gMASK]<sop>") != std::string::npos) {
if (addedmemory == "") {
if (kcpp_data->prompt.rfind("[gMASK]", 0) == 0) { //check startswith
kcpp_data->prompt.erase(0, 7);
}
if (kcpp_data->prompt.rfind("<sop>", 0) == 0) { //check startswith
kcpp_data->prompt.erase(0, 5);
}
addedmemory = "<sop>";
} else {
if (addedmemory.rfind("[gMASK]", 0) == 0) { //check startswith
addedmemory.erase(0, 7);
}
if (addedmemory.rfind("<sop>", 0) == 0) { //check startswith
addedmemory.erase(0, 5);
}
addedmemory = "<sop>" + addedmemory;
}
}
}
bool stream_sse = inputs.stream_sse;
bool allow_regular_prints = (!is_quiet && debugmode!=-1);

View file

@ -0,0 +1,8 @@
{
"system_start": "<|im_start|>system\n",
"system_end": "<|im_end|>\n",
"user_start": "<|im_start|>user\n",
"user_end": "<|im_end|>\n",
"assistant_start": "<|im_start|>assistant\n",
"assistant_end": "<|im_end|>\n<think>\n\n</think>\n"
}

View file

@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
-->
<script>
const LITEVER = 233;
const LITEVER = 234;
const urlParams = new URLSearchParams(window.location.search);
var localflag = urlParams.get('local'); //this will be replaced automatically in embedded kcpp
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
@ -3171,6 +3171,7 @@ Current version indicated by LITEVER below.
instruct_systag_end: "",
instruct_sysprompt: "",
instruct_has_markdown: true,
instruct_has_latex: true,
placeholder_tags: true,
render_special_tags: false,
request_logprobs: false,
@ -3330,6 +3331,16 @@ Current version indicated by LITEVER below.
},
{
"id":3,
"name":"ChatML (No Think)",
"user":"<|im_start|>user\\n",
"user_end":"<|im_end|>\\n",
"assistant":"<|im_start|>assistant\\n<think>\\n\\n</think>\\n",
"assistant_end":"<|im_end|>\\n",
"system":"<|im_start|>system\\n",
"system_end":"<|im_end|>\\n",
},
{
"id":4,
"name":"CommandR",
"user":"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>",
"user_end":"<|END_OF_TURN_TOKEN|>",
@ -3339,7 +3350,7 @@ Current version indicated by LITEVER below.
"system_end":"<|END_OF_TURN_TOKEN|>",
},
{
"id":4,
"id":5,
"name":"Gemma 2 & 3",
"user":"<start_of_turn>user\\n",
"user_end":"<end_of_turn>\\n",
@ -3349,7 +3360,7 @@ Current version indicated by LITEVER below.
"system_end":"<end_of_turn>\\n",
},
{
"id":5,
"id":6,
"name":"Llama 2 Chat",
"user":"[INST] ",
"user_end":"",
@ -3359,7 +3370,7 @@ Current version indicated by LITEVER below.
"system_end":"",
},
{
"id":6,
"id":7,
"name":"Llama 3 Chat",
"user":"<|start_header_id|>user<|end_header_id|>\\n\\n",
"user_end":"<|eot_id|>",
@ -3369,7 +3380,7 @@ Current version indicated by LITEVER below.
"system_end":"<|eot_id|>",
},
{
"id":7,
"id":8,
"name":"Llama 4 Chat",
"user":"<|header_start|>user<|header_end|>\\n\\n",
"user_end":"<|eot|>",
@ -3379,7 +3390,7 @@ Current version indicated by LITEVER below.
"system_end":"<|eot|>",
},
{
"id":8,
"id":9,
"name":"Metharme",
"user":"<|user|>",
"user_end":"",
@ -3389,7 +3400,7 @@ Current version indicated by LITEVER below.
"system_end":"",
},
{
"id":9,
"id":10,
"name":"Mistral V1",
"user":" [INST] ",
"user_end":"",
@ -3399,7 +3410,7 @@ Current version indicated by LITEVER below.
"system_end":"",
},
{
"id":10,
"id":11,
"name":"Mistral V2 & V3",
"user":"[INST] ",
"user_end":"",
@ -3409,7 +3420,7 @@ Current version indicated by LITEVER below.
"system_end":"",
},
{
"id":11,
"id":12,
"name":"Mistral V7 & V3-Tekken",
"user":"[INST]",
"user_end":"",
@ -3419,7 +3430,7 @@ Current version indicated by LITEVER below.
"system_end":"[/SYSTEM_PROMPT]",
},
{
"id":12,
"id":13,
"name":"Phi-3 Mini",
"user":"<|user|>\\n",
"user_end":"<|end|>\\n",
@ -3429,7 +3440,7 @@ Current version indicated by LITEVER below.
"system_end":"<|end|>\\n",
},
{
"id":13,
"id":14,
"name":"Vicuna",
"user":"\\nUSER: ",
"user_end":"",
@ -3439,7 +3450,7 @@ Current version indicated by LITEVER below.
"system_end":"",
},
{
"id":14,
"id":15,
"name":"Deepseek V2.5",
"user":"<User>",
"user_end":"<end▁of▁sentence>",
@ -3449,7 +3460,7 @@ Current version indicated by LITEVER below.
"system_end":"",
},
{
"id":15,
"id":16,
"name":"GLM-4",
"user":"<|user|>\\n",
"user_end":"",
@ -5378,7 +5389,7 @@ Current version indicated by LITEVER below.
navigator.clipboard.writeText(innercode);
}
function simpleMarkdown(text) {
function simpleMarkdown(text, renderLatex) {
const escapeHTML = (str) => str.replace(/</g, "&lt;").replace(/>/g, "&gt;");
const highlightCode = (code) => {
let cpybtn = `<button class="unselectable" onclick="return copyMarkdownCode(this)" style="float:right;">Copy</button>`;
@ -5501,7 +5512,10 @@ Current version indicated by LITEVER below.
.replace(/ \n/g, "\n<br/>");
md = replaceTabbedCodeblocks(md);
md = md.replace(/<\/code\><\/pre\>\n<pre\><code\>/g, "\n");
md = replaceLatex(md);
if(renderLatex)
{
md = replaceLatex(md);
}
md = md.replace(/<\/ul>\n/gm, "</ul>").replace(/<\/ol>\n/gm, "</ol>");
md = md.replace(/\\([`_~\*\+\-\.\^\\\<\>\(\)\[\]])/gm, "$1");
return md;
@ -11149,6 +11163,7 @@ Current version indicated by LITEVER below.
document.getElementById("adventure_context_mod").checked = localsettings.adventure_context_mod;
document.getElementById("chat_context_mod").checked = localsettings.chat_context_mod;
document.getElementById("instruct_has_markdown").checked = localsettings.instruct_has_markdown;
document.getElementById("instruct_has_latex").checked = localsettings.instruct_has_latex;
document.getElementById("placeholder_tags").checked = localsettings.placeholder_tags;
document.getElementById("run_in_background").checked = run_in_background;
document.getElementById("auto_ctxlen").checked = localsettings.auto_ctxlen;
@ -11601,6 +11616,7 @@ Current version indicated by LITEVER below.
localsettings.adventure_context_mod = (document.getElementById("adventure_context_mod").checked ? true : false);
localsettings.chat_context_mod = (document.getElementById("chat_context_mod").checked ? true : false);
localsettings.instruct_has_markdown = (document.getElementById("instruct_has_markdown").checked ? true : false);
localsettings.instruct_has_latex = (document.getElementById("instruct_has_latex").checked ? true : false);
localsettings.placeholder_tags = (document.getElementById("placeholder_tags").checked ? true : false);
run_in_background = (document.getElementById("run_in_background").checked ? true : false);
background_audio_loop(run_in_background);
@ -17833,7 +17849,7 @@ Current version indicated by LITEVER below.
{
fulltxt += "```"; //force end code block
}
fulltxt = simpleMarkdown(fulltxt);
fulltxt = simpleMarkdown(fulltxt,localsettings.instruct_has_latex);
}
let instruct_turns = repack_instruct_turns(fulltxt, `%SpcStg%`,`%SpcEtg%`, true);
@ -18598,7 +18614,10 @@ Current version indicated by LITEVER below.
{
processed_msg += "```"; //force end code block
}
processed_msg = simpleMarkdown(processed_msg);
if(localsettings.instruct_has_markdown)
{
processed_msg = simpleMarkdown(processed_msg,localsettings.instruct_has_latex);
}
//convert the msg into images
processed_msg = processed_msg.replace(/\[<\|p\|.+?\|p\|>\]/g, function (m) {
@ -20357,7 +20376,7 @@ Current version indicated by LITEVER below.
replacedText = replacedText.replace(/&quot;(.*?)&quot;/g, wrapperSpan(styleRole, 'speech')); // Apply the speech style to "speech".
if(localsettings.instruct_has_markdown)
{
replacedText = simpleMarkdown(replacedText);
replacedText = simpleMarkdown(replacedText,localsettings.instruct_has_latex);
}
return `<span>${replacedText}</span>`;
});
@ -21270,9 +21289,14 @@ Current version indicated by LITEVER below.
<div class="settingitem">
<div class="settinglabel">
<div class="justifyleft settingsmall">Enable Markdown <span class="helpicon">?<span
class="helptext">Allows the UI to use markdown formatting such as quotes, LaTeX, and code blocks.</span></span></div>
class="helptext">Allows the UI to use markdown formatting such as quotes and code blocks.</span></span></div>
<input type="checkbox" title="Enabled Markdown" id="instruct_has_markdown" style="margin:0px 0px 0px auto;">
</div>
<div class="settinglabel">
<div class="justifyleft settingsmall">Enable LaTeX <span class="helpicon">?<span
class="helptext">Allows the UI to render LaTeX within markdown formatting (Needs Markdown).</span></span></div>
<input type="checkbox" title="Enable LaTeX (Needs Markdown)" id="instruct_has_latex" style="margin:0px 0px 0px auto;">
</div>
<div class="settinglabel">
<div class="justifyleft settingsmall">Trim Sentences <span class="helpicon">?<span
class="helptext">Trims incomplete sentences in AI output.</span></span></div>

View file

@ -860,7 +860,7 @@ def dump_gguf_metadata(file_path): #if you're gonna copy this into your own proj
if dt_translated=="arr":
print(f"{dt_translated}: {curr_key} = [{len(curr_val)}]")
elif dt_translated=="str":
print(f"{dt_translated}: {curr_key} = {curr_val[:100]}")
print(f"{dt_translated}: {curr_key} = {curr_val[:256]}")
else:
print(f"{dt_translated}: {curr_key} = {curr_val}")
print("\n*** GGUF TENSOR INFO ***")

View file

@ -291,6 +291,15 @@ void print_tok_vec(std::vector<float> &embd)
if (keyidx != -1) {
freq_base_train = gguf_get_val_f32(ctx, keyidx);
}
fkey = "tokenizer.ggml.add_bos_token";
keyidx = gguf_find_key(ctx, fkey.c_str());
if (keyidx != -1) {
bool result = gguf_get_val_bool(ctx, keyidx);
if(result==false)
{
fileformatmeta->explicitly_no_bos = true;
}
}
int filever = gguf_get_version(ctx);

View file

@ -71,6 +71,7 @@ struct FileFormatExtraMeta
GGUFArch model_architecture = GGUFArch::ARCH_DEFAULT;
int n_expert_count = 0;
std::string model_architecture_str = "";
bool explicitly_no_bos = false; //only true if key exists AND is false
};
struct TopPicksData

View file

@ -1639,15 +1639,15 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
}
struct ggml_init_params params;
params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10 MB
params.mem_size = static_cast<size_t>(20 * 1024 * 1024); // 20 MB increased by kcpp
if (sd_version_is_sd3(sd_ctx->sd->version)) {
params.mem_size *= 3;
params.mem_size *= 2; //readjust by kcpp as above changed
}
if (sd_version_is_flux(sd_ctx->sd->version)) {
params.mem_size *= 4;
params.mem_size *= 2; //readjust by kcpp as above changed
}
if (sd_ctx->sd->stacked_id) {
params.mem_size += static_cast<size_t>(10 * 1024 * 1024); // 10 MB
params.mem_size += static_cast<size_t>(15 * 1024 * 1024); // 10 MB
}
params.mem_size += width * height * 3 * sizeof(float);
params.mem_size *= batch_count;