mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
fix qwen3, fixed sd, fixed glm4
This commit is contained in:
parent
4d8a7a6594
commit
c2802af9e8
7 changed files with 99 additions and 24 deletions
|
@ -1915,6 +1915,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
kcpp_data->n_ctx = clamped_max_context_length;
|
||||
max_context_limit_at_load = clamped_max_context_length;
|
||||
add_bos_token = !inputs.no_bos_token;
|
||||
|
||||
if(!add_bos_token)
|
||||
{
|
||||
printf("\n======\nBOS token prefix was disabled! Your output may be degraded unless model was designed for it!\n======\n");
|
||||
|
@ -2368,6 +2369,14 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
}
|
||||
}
|
||||
|
||||
//we cannot really trust the add bos in vocab. old models don't set it.
|
||||
// instead, we EXPLICITY need to find the add_bos_token key==false to automatically set it off.
|
||||
if(!llamamodel->vocab.get_add_bos() && add_bos_token && file_format_meta.explicitly_no_bos)
|
||||
{
|
||||
printf("\nThis architecture has explicitly disabled the BOS token - if you need it, you must add it manually.\n");
|
||||
add_bos_token = false;
|
||||
}
|
||||
|
||||
//warmup at least 33 tokens to trigger batch
|
||||
std::vector<int> tmp;
|
||||
for (int i = 1; i <= 33; ++i) {
|
||||
|
@ -3180,6 +3189,30 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
}
|
||||
}
|
||||
|
||||
//need to add a cursed hack to get coherency for GLM4, by ensuring injection for both sop and gmask
|
||||
if (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_GLM4) {
|
||||
std::string temp = gpttype_get_chat_template();
|
||||
if (temp.find("[gMASK]<sop>") != std::string::npos) {
|
||||
if (addedmemory == "") {
|
||||
if (kcpp_data->prompt.rfind("[gMASK]", 0) == 0) { //check startswith
|
||||
kcpp_data->prompt.erase(0, 7);
|
||||
}
|
||||
if (kcpp_data->prompt.rfind("<sop>", 0) == 0) { //check startswith
|
||||
kcpp_data->prompt.erase(0, 5);
|
||||
}
|
||||
addedmemory = "<sop>";
|
||||
} else {
|
||||
if (addedmemory.rfind("[gMASK]", 0) == 0) { //check startswith
|
||||
addedmemory.erase(0, 7);
|
||||
}
|
||||
if (addedmemory.rfind("<sop>", 0) == 0) { //check startswith
|
||||
addedmemory.erase(0, 5);
|
||||
}
|
||||
addedmemory = "<sop>" + addedmemory;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool stream_sse = inputs.stream_sse;
|
||||
bool allow_regular_prints = (!is_quiet && debugmode!=-1);
|
||||
|
||||
|
|
8
kcpp_adapters/ChatML-NoThink.json
Normal file
8
kcpp_adapters/ChatML-NoThink.json
Normal file
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"system_start": "<|im_start|>system\n",
|
||||
"system_end": "<|im_end|>\n",
|
||||
"user_start": "<|im_start|>user\n",
|
||||
"user_end": "<|im_end|>\n",
|
||||
"assistant_start": "<|im_start|>assistant\n",
|
||||
"assistant_end": "<|im_end|>\n<think>\n\n</think>\n"
|
||||
}
|
62
klite.embd
62
klite.embd
|
@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
|
|||
-->
|
||||
|
||||
<script>
|
||||
const LITEVER = 233;
|
||||
const LITEVER = 234;
|
||||
const urlParams = new URLSearchParams(window.location.search);
|
||||
var localflag = urlParams.get('local'); //this will be replaced automatically in embedded kcpp
|
||||
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
|
||||
|
@ -3171,6 +3171,7 @@ Current version indicated by LITEVER below.
|
|||
instruct_systag_end: "",
|
||||
instruct_sysprompt: "",
|
||||
instruct_has_markdown: true,
|
||||
instruct_has_latex: true,
|
||||
placeholder_tags: true,
|
||||
render_special_tags: false,
|
||||
request_logprobs: false,
|
||||
|
@ -3330,6 +3331,16 @@ Current version indicated by LITEVER below.
|
|||
},
|
||||
{
|
||||
"id":3,
|
||||
"name":"ChatML (No Think)",
|
||||
"user":"<|im_start|>user\\n",
|
||||
"user_end":"<|im_end|>\\n",
|
||||
"assistant":"<|im_start|>assistant\\n<think>\\n\\n</think>\\n",
|
||||
"assistant_end":"<|im_end|>\\n",
|
||||
"system":"<|im_start|>system\\n",
|
||||
"system_end":"<|im_end|>\\n",
|
||||
},
|
||||
{
|
||||
"id":4,
|
||||
"name":"CommandR",
|
||||
"user":"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>",
|
||||
"user_end":"<|END_OF_TURN_TOKEN|>",
|
||||
|
@ -3339,7 +3350,7 @@ Current version indicated by LITEVER below.
|
|||
"system_end":"<|END_OF_TURN_TOKEN|>",
|
||||
},
|
||||
{
|
||||
"id":4,
|
||||
"id":5,
|
||||
"name":"Gemma 2 & 3",
|
||||
"user":"<start_of_turn>user\\n",
|
||||
"user_end":"<end_of_turn>\\n",
|
||||
|
@ -3349,7 +3360,7 @@ Current version indicated by LITEVER below.
|
|||
"system_end":"<end_of_turn>\\n",
|
||||
},
|
||||
{
|
||||
"id":5,
|
||||
"id":6,
|
||||
"name":"Llama 2 Chat",
|
||||
"user":"[INST] ",
|
||||
"user_end":"",
|
||||
|
@ -3359,7 +3370,7 @@ Current version indicated by LITEVER below.
|
|||
"system_end":"",
|
||||
},
|
||||
{
|
||||
"id":6,
|
||||
"id":7,
|
||||
"name":"Llama 3 Chat",
|
||||
"user":"<|start_header_id|>user<|end_header_id|>\\n\\n",
|
||||
"user_end":"<|eot_id|>",
|
||||
|
@ -3369,7 +3380,7 @@ Current version indicated by LITEVER below.
|
|||
"system_end":"<|eot_id|>",
|
||||
},
|
||||
{
|
||||
"id":7,
|
||||
"id":8,
|
||||
"name":"Llama 4 Chat",
|
||||
"user":"<|header_start|>user<|header_end|>\\n\\n",
|
||||
"user_end":"<|eot|>",
|
||||
|
@ -3379,7 +3390,7 @@ Current version indicated by LITEVER below.
|
|||
"system_end":"<|eot|>",
|
||||
},
|
||||
{
|
||||
"id":8,
|
||||
"id":9,
|
||||
"name":"Metharme",
|
||||
"user":"<|user|>",
|
||||
"user_end":"",
|
||||
|
@ -3389,7 +3400,7 @@ Current version indicated by LITEVER below.
|
|||
"system_end":"",
|
||||
},
|
||||
{
|
||||
"id":9,
|
||||
"id":10,
|
||||
"name":"Mistral V1",
|
||||
"user":" [INST] ",
|
||||
"user_end":"",
|
||||
|
@ -3399,7 +3410,7 @@ Current version indicated by LITEVER below.
|
|||
"system_end":"",
|
||||
},
|
||||
{
|
||||
"id":10,
|
||||
"id":11,
|
||||
"name":"Mistral V2 & V3",
|
||||
"user":"[INST] ",
|
||||
"user_end":"",
|
||||
|
@ -3409,7 +3420,7 @@ Current version indicated by LITEVER below.
|
|||
"system_end":"",
|
||||
},
|
||||
{
|
||||
"id":11,
|
||||
"id":12,
|
||||
"name":"Mistral V7 & V3-Tekken",
|
||||
"user":"[INST]",
|
||||
"user_end":"",
|
||||
|
@ -3419,7 +3430,7 @@ Current version indicated by LITEVER below.
|
|||
"system_end":"[/SYSTEM_PROMPT]",
|
||||
},
|
||||
{
|
||||
"id":12,
|
||||
"id":13,
|
||||
"name":"Phi-3 Mini",
|
||||
"user":"<|user|>\\n",
|
||||
"user_end":"<|end|>\\n",
|
||||
|
@ -3429,7 +3440,7 @@ Current version indicated by LITEVER below.
|
|||
"system_end":"<|end|>\\n",
|
||||
},
|
||||
{
|
||||
"id":13,
|
||||
"id":14,
|
||||
"name":"Vicuna",
|
||||
"user":"\\nUSER: ",
|
||||
"user_end":"",
|
||||
|
@ -3439,7 +3450,7 @@ Current version indicated by LITEVER below.
|
|||
"system_end":"",
|
||||
},
|
||||
{
|
||||
"id":14,
|
||||
"id":15,
|
||||
"name":"Deepseek V2.5",
|
||||
"user":"<|User|>",
|
||||
"user_end":"<|end▁of▁sentence|>",
|
||||
|
@ -3449,7 +3460,7 @@ Current version indicated by LITEVER below.
|
|||
"system_end":"",
|
||||
},
|
||||
{
|
||||
"id":15,
|
||||
"id":16,
|
||||
"name":"GLM-4",
|
||||
"user":"<|user|>\\n",
|
||||
"user_end":"",
|
||||
|
@ -5378,7 +5389,7 @@ Current version indicated by LITEVER below.
|
|||
navigator.clipboard.writeText(innercode);
|
||||
}
|
||||
|
||||
function simpleMarkdown(text) {
|
||||
function simpleMarkdown(text, renderLatex) {
|
||||
const escapeHTML = (str) => str.replace(/</g, "<").replace(/>/g, ">");
|
||||
const highlightCode = (code) => {
|
||||
let cpybtn = `<button class="unselectable" onclick="return copyMarkdownCode(this)" style="float:right;">Copy</button>`;
|
||||
|
@ -5501,7 +5512,10 @@ Current version indicated by LITEVER below.
|
|||
.replace(/ \n/g, "\n<br/>");
|
||||
md = replaceTabbedCodeblocks(md);
|
||||
md = md.replace(/<\/code\><\/pre\>\n<pre\><code\>/g, "\n");
|
||||
md = replaceLatex(md);
|
||||
if(renderLatex)
|
||||
{
|
||||
md = replaceLatex(md);
|
||||
}
|
||||
md = md.replace(/<\/ul>\n/gm, "</ul>").replace(/<\/ol>\n/gm, "</ol>");
|
||||
md = md.replace(/\\([`_~\*\+\-\.\^\\\<\>\(\)\[\]])/gm, "$1");
|
||||
return md;
|
||||
|
@ -11149,6 +11163,7 @@ Current version indicated by LITEVER below.
|
|||
document.getElementById("adventure_context_mod").checked = localsettings.adventure_context_mod;
|
||||
document.getElementById("chat_context_mod").checked = localsettings.chat_context_mod;
|
||||
document.getElementById("instruct_has_markdown").checked = localsettings.instruct_has_markdown;
|
||||
document.getElementById("instruct_has_latex").checked = localsettings.instruct_has_latex;
|
||||
document.getElementById("placeholder_tags").checked = localsettings.placeholder_tags;
|
||||
document.getElementById("run_in_background").checked = run_in_background;
|
||||
document.getElementById("auto_ctxlen").checked = localsettings.auto_ctxlen;
|
||||
|
@ -11601,6 +11616,7 @@ Current version indicated by LITEVER below.
|
|||
localsettings.adventure_context_mod = (document.getElementById("adventure_context_mod").checked ? true : false);
|
||||
localsettings.chat_context_mod = (document.getElementById("chat_context_mod").checked ? true : false);
|
||||
localsettings.instruct_has_markdown = (document.getElementById("instruct_has_markdown").checked ? true : false);
|
||||
localsettings.instruct_has_latex = (document.getElementById("instruct_has_latex").checked ? true : false);
|
||||
localsettings.placeholder_tags = (document.getElementById("placeholder_tags").checked ? true : false);
|
||||
run_in_background = (document.getElementById("run_in_background").checked ? true : false);
|
||||
background_audio_loop(run_in_background);
|
||||
|
@ -17833,7 +17849,7 @@ Current version indicated by LITEVER below.
|
|||
{
|
||||
fulltxt += "```"; //force end code block
|
||||
}
|
||||
fulltxt = simpleMarkdown(fulltxt);
|
||||
fulltxt = simpleMarkdown(fulltxt,localsettings.instruct_has_latex);
|
||||
}
|
||||
|
||||
let instruct_turns = repack_instruct_turns(fulltxt, `%SpcStg%`,`%SpcEtg%`, true);
|
||||
|
@ -18598,7 +18614,10 @@ Current version indicated by LITEVER below.
|
|||
{
|
||||
processed_msg += "```"; //force end code block
|
||||
}
|
||||
processed_msg = simpleMarkdown(processed_msg);
|
||||
if(localsettings.instruct_has_markdown)
|
||||
{
|
||||
processed_msg = simpleMarkdown(processed_msg,localsettings.instruct_has_latex);
|
||||
}
|
||||
|
||||
//convert the msg into images
|
||||
processed_msg = processed_msg.replace(/\[<\|p\|.+?\|p\|>\]/g, function (m) {
|
||||
|
@ -20357,7 +20376,7 @@ Current version indicated by LITEVER below.
|
|||
replacedText = replacedText.replace(/"(.*?)"/g, wrapperSpan(styleRole, 'speech')); // Apply the speech style to "speech".
|
||||
if(localsettings.instruct_has_markdown)
|
||||
{
|
||||
replacedText = simpleMarkdown(replacedText);
|
||||
replacedText = simpleMarkdown(replacedText,localsettings.instruct_has_latex);
|
||||
}
|
||||
return `<span>${replacedText}</span>`;
|
||||
});
|
||||
|
@ -21270,9 +21289,14 @@ Current version indicated by LITEVER below.
|
|||
<div class="settingitem">
|
||||
<div class="settinglabel">
|
||||
<div class="justifyleft settingsmall">Enable Markdown <span class="helpicon">?<span
|
||||
class="helptext">Allows the UI to use markdown formatting such as quotes, LaTeX, and code blocks.</span></span></div>
|
||||
class="helptext">Allows the UI to use markdown formatting such as quotes and code blocks.</span></span></div>
|
||||
<input type="checkbox" title="Enabled Markdown" id="instruct_has_markdown" style="margin:0px 0px 0px auto;">
|
||||
</div>
|
||||
<div class="settinglabel">
|
||||
<div class="justifyleft settingsmall">Enable LaTeX <span class="helpicon">?<span
|
||||
class="helptext">Allows the UI to render LaTeX within markdown formatting (Needs Markdown).</span></span></div>
|
||||
<input type="checkbox" title="Enable LaTeX (Needs Markdown)" id="instruct_has_latex" style="margin:0px 0px 0px auto;">
|
||||
</div>
|
||||
<div class="settinglabel">
|
||||
<div class="justifyleft settingsmall">Trim Sentences <span class="helpicon">?<span
|
||||
class="helptext">Trims incomplete sentences in AI output.</span></span></div>
|
||||
|
|
|
@ -860,7 +860,7 @@ def dump_gguf_metadata(file_path): #if you're gonna copy this into your own proj
|
|||
if dt_translated=="arr":
|
||||
print(f"{dt_translated}: {curr_key} = [{len(curr_val)}]")
|
||||
elif dt_translated=="str":
|
||||
print(f"{dt_translated}: {curr_key} = {curr_val[:100]}")
|
||||
print(f"{dt_translated}: {curr_key} = {curr_val[:256]}")
|
||||
else:
|
||||
print(f"{dt_translated}: {curr_key} = {curr_val}")
|
||||
print("\n*** GGUF TENSOR INFO ***")
|
||||
|
|
|
@ -291,6 +291,15 @@ void print_tok_vec(std::vector<float> &embd)
|
|||
if (keyidx != -1) {
|
||||
freq_base_train = gguf_get_val_f32(ctx, keyidx);
|
||||
}
|
||||
fkey = "tokenizer.ggml.add_bos_token";
|
||||
keyidx = gguf_find_key(ctx, fkey.c_str());
|
||||
if (keyidx != -1) {
|
||||
bool result = gguf_get_val_bool(ctx, keyidx);
|
||||
if(result==false)
|
||||
{
|
||||
fileformatmeta->explicitly_no_bos = true;
|
||||
}
|
||||
}
|
||||
|
||||
int filever = gguf_get_version(ctx);
|
||||
|
||||
|
|
|
@ -71,6 +71,7 @@ struct FileFormatExtraMeta
|
|||
GGUFArch model_architecture = GGUFArch::ARCH_DEFAULT;
|
||||
int n_expert_count = 0;
|
||||
std::string model_architecture_str = "";
|
||||
bool explicitly_no_bos = false; //only true if key exists AND is false
|
||||
};
|
||||
|
||||
struct TopPicksData
|
||||
|
|
|
@ -1639,15 +1639,15 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
|
|||
}
|
||||
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10 MB
|
||||
params.mem_size = static_cast<size_t>(20 * 1024 * 1024); // 20 MB increased by kcpp
|
||||
if (sd_version_is_sd3(sd_ctx->sd->version)) {
|
||||
params.mem_size *= 3;
|
||||
params.mem_size *= 2; //readjust by kcpp as above changed
|
||||
}
|
||||
if (sd_version_is_flux(sd_ctx->sd->version)) {
|
||||
params.mem_size *= 4;
|
||||
params.mem_size *= 2; //readjust by kcpp as above changed
|
||||
}
|
||||
if (sd_ctx->sd->stacked_id) {
|
||||
params.mem_size += static_cast<size_t>(10 * 1024 * 1024); // 10 MB
|
||||
params.mem_size += static_cast<size_t>(15 * 1024 * 1024); // 10 MB
|
||||
}
|
||||
params.mem_size += width * height * 3 * sizeof(float);
|
||||
params.mem_size *= batch_count;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue