mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
fix for gemma3n
This commit is contained in:
parent
b8c1fc7c9e
commit
c45b8dc56f
4 changed files with 58 additions and 35 deletions
|
@ -2287,6 +2287,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
}
|
}
|
||||||
//handle override tensor
|
//handle override tensor
|
||||||
std::string tensoroverrides = inputs.override_tensors;
|
std::string tensoroverrides = inputs.override_tensors;
|
||||||
|
if(file_format_meta.model_architecture==GGUFArch::ARCH_GEMMA3N)
|
||||||
|
{
|
||||||
|
std::string forced = "per_layer_token_embd.weight=CPU"; //this tensor on gpu is problematic on unsloth q4_0
|
||||||
|
tensoroverrides = (tensoroverrides=="" ? forced: (forced+","+tensoroverrides));
|
||||||
|
}
|
||||||
if(tensoroverrides!="" && ggml_backend_dev_count()>1)
|
if(tensoroverrides!="" && ggml_backend_dev_count()>1)
|
||||||
{
|
{
|
||||||
printf("Handling Override Tensors for backends: ");
|
printf("Handling Override Tensors for backends: ");
|
||||||
|
|
83
klite.embd
83
klite.embd
|
@ -6221,7 +6221,9 @@ Current version indicated by LITEVER below.
|
||||||
.then((data) => {
|
.then((data) => {
|
||||||
if(data.data && data.data.length > 0)
|
if(data.data && data.data.length > 0)
|
||||||
{
|
{
|
||||||
onDone(data.data); //probe failed
|
let dropdown = get_custom_ep_model_dropdown("2");
|
||||||
|
update_oai_model_list(data,dropdown);
|
||||||
|
onDone(data.data); //probe success
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -10488,6 +10490,36 @@ Current version indicated by LITEVER below.
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
function update_oai_model_list(data, dropdown)
|
||||||
|
{
|
||||||
|
let isOpenrouter = (document.getElementById("customapidropdown").value==3);
|
||||||
|
var lastOption = dropdown.lastElementChild;
|
||||||
|
for (var i = dropdown.options.length - 1; i >= 0; i--) {
|
||||||
|
var option = dropdown.options[i];
|
||||||
|
dropdown.remove(option);
|
||||||
|
}
|
||||||
|
let selidx = 0;
|
||||||
|
let sortedarr = [];
|
||||||
|
for(var i = 0; i < data.data.length; i++) {
|
||||||
|
var opt = data.data[i];
|
||||||
|
sortedarr.push(opt.id);
|
||||||
|
}
|
||||||
|
sortedarr.sort();
|
||||||
|
for(var i=0;i<sortedarr.length;++i)
|
||||||
|
{
|
||||||
|
var el = document.createElement("option");
|
||||||
|
el.textContent = sortedarr[i];
|
||||||
|
el.value = sortedarr[i];
|
||||||
|
if(isOpenrouter && sortedarr[i]=="mistralai/mistral-7b-instruct")
|
||||||
|
{
|
||||||
|
selidx = i;
|
||||||
|
}
|
||||||
|
dropdown.appendChild(el);
|
||||||
|
}
|
||||||
|
dropdown.appendChild(lastOption);
|
||||||
|
dropdown.selectedIndex = selidx;
|
||||||
|
oai_model_change(true);
|
||||||
|
}
|
||||||
function oai_fetch_models()
|
function oai_fetch_models()
|
||||||
{
|
{
|
||||||
let desired_oai_key = document.getElementById("custom_oai_key").value.trim();
|
let desired_oai_key = document.getElementById("custom_oai_key").value.trim();
|
||||||
|
@ -10506,7 +10538,6 @@ Current version indicated by LITEVER below.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let isOpenrouter = (document.getElementById("customapidropdown").value==3);
|
|
||||||
let dropdown = get_custom_ep_model_dropdown();
|
let dropdown = get_custom_ep_model_dropdown();
|
||||||
fetch((desired_oai_ep + oai_models_endpoint), {
|
fetch((desired_oai_ep + oai_models_endpoint), {
|
||||||
method: 'GET',
|
method: 'GET',
|
||||||
|
@ -10526,32 +10557,7 @@ Current version indicated by LITEVER below.
|
||||||
|
|
||||||
if (!data.error && data.data && data.data.length > 0)
|
if (!data.error && data.data && data.data.length > 0)
|
||||||
{
|
{
|
||||||
var lastOption = dropdown.lastElementChild;
|
update_oai_model_list(data,dropdown);
|
||||||
for (var i = dropdown.options.length - 1; i >= 0; i--) {
|
|
||||||
var option = dropdown.options[i];
|
|
||||||
dropdown.remove(option);
|
|
||||||
}
|
|
||||||
let selidx = 0;
|
|
||||||
let sortedarr = [];
|
|
||||||
for(var i = 0; i < data.data.length; i++) {
|
|
||||||
var opt = data.data[i];
|
|
||||||
sortedarr.push(opt.id);
|
|
||||||
}
|
|
||||||
sortedarr.sort();
|
|
||||||
for(var i=0;i<sortedarr.length;++i)
|
|
||||||
{
|
|
||||||
var el = document.createElement("option");
|
|
||||||
el.textContent = sortedarr[i];
|
|
||||||
el.value = sortedarr[i];
|
|
||||||
if(isOpenrouter && sortedarr[i]=="mistralai/mistral-7b-instruct")
|
|
||||||
{
|
|
||||||
selidx = i;
|
|
||||||
}
|
|
||||||
dropdown.appendChild(el);
|
|
||||||
}
|
|
||||||
dropdown.appendChild(lastOption);
|
|
||||||
dropdown.selectedIndex = selidx;
|
|
||||||
oai_model_change(true);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -16662,8 +16668,7 @@ Current version indicated by LITEVER below.
|
||||||
"max_tokens": submit_payload.params.max_length,
|
"max_tokens": submit_payload.params.max_length,
|
||||||
"model": custom_oai_model,
|
"model": custom_oai_model,
|
||||||
"temperature": submit_payload.params.temperature,
|
"temperature": submit_payload.params.temperature,
|
||||||
"top_p": submit_payload.params.top_p,
|
"top_p": submit_payload.params.top_p
|
||||||
"stop": get_stop_sequences().slice(0, 4), //lets try adding stop sequences, limit to first 4
|
|
||||||
}
|
}
|
||||||
if(localsettings.request_logprobs && !targetep.toLowerCase().includes("api.x.ai") && !targetep.toLowerCase().includes("api.mistral.ai"))
|
if(localsettings.request_logprobs && !targetep.toLowerCase().includes("api.x.ai") && !targetep.toLowerCase().includes("api.mistral.ai"))
|
||||||
{
|
{
|
||||||
|
@ -16675,7 +16680,12 @@ Current version indicated by LITEVER below.
|
||||||
oai_payload.logprobs = 5;
|
oai_payload.logprobs = 5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(!targetep.toLowerCase().includes("api.mistral.ai"))
|
if(!targetep.toLowerCase().includes("api.x.ai"))
|
||||||
|
{
|
||||||
|
//grok has no support for stop
|
||||||
|
oai_payload.stop = get_stop_sequences().slice(0, 4); //lets try adding stop sequences, limit to first 4
|
||||||
|
}
|
||||||
|
if(!targetep.toLowerCase().includes("api.mistral.ai") && !targetep.toLowerCase().includes("api.x.ai"))
|
||||||
{
|
{
|
||||||
//mistral api does not support presence pen
|
//mistral api does not support presence pen
|
||||||
oai_payload.presence_penalty = scaled_rep_pen;
|
oai_payload.presence_penalty = scaled_rep_pen;
|
||||||
|
@ -22551,7 +22561,7 @@ Current version indicated by LITEVER below.
|
||||||
}
|
}
|
||||||
|
|
||||||
function updateAestheticTextPreview() {
|
function updateAestheticTextPreview() {
|
||||||
let preview = `The shadows dance across the walls under the flickering candlelight of the quiet tavern.\n\nIt is well past dinnertime, and a cool breeze fills the room, which is nearly silent except for the hushed conversations from the few remaining patrons.\n{{[OUTPUT]}}\n*A small Kobold wearing a tattered brown cloak scurries up to you*\n\n"Excuse me, adventurer, I am Kobo the Kobold," he coughs softly and continues, "could you spare me a little coin? I haven't eaten for so long..." *kobo looks downcast with pleading eyes*\n{{[INPUT]}}\n*retrieves a small copper coin from a leather pouch, and places it on the table*\n\n"Hmm, that depends. Do you know to calculate the factorial of a number?", you chuckle.\n{{[OUTPUT]}}\nThe pathetic Kobold looks taken aback by your strange request, but then grudgingly agrees. *sighs heavily* "I guess..." *takes a few steps backwards, and starts scratching into the grimy floor with a stick*\n\n"Kobo just needs some food..." The kobold takes a deep breath and starts writing.\n\n\`\`\`\ndef factorial(n):\n if n == 0:\n return 1\n else:\n return n * factorial(n-1)\n\`\`\`\n*Kobo looks at you again* "Is that... acceptable?"\n{{[INPUT]}}\n*patting the sad kobold on his head, as he gratefully accepts the coin*\n\n"Aww there you go! Try not to spend it all it one place."\n\nYou watch as Kobo scampers off into the distance. Tomorrow comes.`;
|
let preview = `The shadows dance across the walls under the flickering candlelight of the quiet tavern.\n\nIt is well past dinnertime, and a cool breeze fills the room, which is nearly silent except for the hushed conversations from the few remaining patrons.\n{{[OUTPUT]}}\n*A small Kobold wearing a tattered brown cloak scurries up to you*\n\n"Excuse me, adventurer, I am Kobo the Kobold," he coughs softly and continues, "could you spare me a little coin? I haven't eaten for so long..." *kobo looks downcast with pleading eyes*\n{{[INPUT]}}\n*retrieves a small copper coin from a leather pouch, and places it on the table*\n\n"Hmm, that depends. Do you know to calculate the factorial of a number?", you chuckle.\n{{[OUTPUT]}}\nThe pathetic Kobold looks taken aback by your strange request, but then grudgingly agrees. *sighs heavily* "I guess..." *takes a few steps backwards, and starts scratching into the grimy floor with a stick*\n\n"Kobo just needs some food..." The kobold takes a deep breath and starts writing.\n\n\`\`\`\ndef factorial(n):\n if n == 0:\n return 1\n else:\n return n * factorial(n-1)\n\`\`\`\n*Kobo looks at you again* "Is that... acceptable?"\n{{[INPUT]}}\n*patting the sad kobold on his head, as he gratefully accepts the coin*\n\n"Aww there you go! Try not to spend it all it one place."\n\nYou watch as Kobo scampers off into the distance.`;
|
||||||
if(localsettings.opmode==3)
|
if(localsettings.opmode==3)
|
||||||
{
|
{
|
||||||
preview = replaceAll(preview,'\n{{[OUTPUT]}}\n', `\n${localsettings.chatopponent.split("||$||")[0]}: `);
|
preview = replaceAll(preview,'\n{{[OUTPUT]}}\n', `\n${localsettings.chatopponent.split("||$||")[0]}: `);
|
||||||
|
@ -24204,7 +24214,7 @@ Current version indicated by LITEVER below.
|
||||||
oninput="document.getElementById('top_k').value = this.value;"></div>
|
oninput="document.getElementById('top_k').value = this.value;"></div>
|
||||||
<div class="settingminmax">
|
<div class="settingminmax">
|
||||||
<div class="justifyleft">0</div>
|
<div class="justifyleft">0</div>
|
||||||
<div class="justifyright">1</div>
|
<div class="justifyright">100</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
@ -25478,8 +25488,11 @@ Current version indicated by LITEVER below.
|
||||||
<option style="display:none;" class="custom_model_option" value="custom">[Custom]</option>
|
<option style="display:none;" class="custom_model_option" value="custom">[Custom]</option>
|
||||||
</select>
|
</select>
|
||||||
<select title="Grok AI Model Selection" style="padding:4px;display:inline;width:calc(100% - 220px)" class="form-control hidden" id="custom_grok_model" onchange="oai_model_change(true)">
|
<select title="Grok AI Model Selection" style="padding:4px;display:inline;width:calc(100% - 220px)" class="form-control hidden" id="custom_grok_model" onchange="oai_model_change(true)">
|
||||||
<option value="grok-beta">grok-beta</option>
|
<option value="grok-2-1212">grok-2-1212</option>
|
||||||
<option value="grok-3">grok-3</option>
|
<option value="grok-3" selected>grok-3</option>
|
||||||
|
<option value="grok-3-mini">grok-3-mini</option>
|
||||||
|
<option value="grok-3-mini-fast">grok-3-mini-fast</option>
|
||||||
|
<option value="grok-4-0709">grok-4-0709</option>
|
||||||
<option style="display:none;" class="custom_model_option" value="custom">[Custom]</option>
|
<option style="display:none;" class="custom_model_option" value="custom">[Custom]</option>
|
||||||
</select>
|
</select>
|
||||||
<select title="Pollinations AI Model Selection" style="padding:4px;display:inline;width:calc(100% - 220px)" class="form-control hidden" id="custom_pollinations_model" onchange="oai_model_change(true)">
|
<select title="Pollinations AI Model Selection" style="padding:4px;display:inline;width:calc(100% - 220px)" class="form-control hidden" id="custom_pollinations_model" onchange="oai_model_change(true)">
|
||||||
|
|
|
@ -340,6 +340,10 @@ void print_tok_vec(std::vector<float> &embd)
|
||||||
{
|
{
|
||||||
fileformatmeta->model_architecture = GGUFArch::ARCH_GEMMA3;
|
fileformatmeta->model_architecture = GGUFArch::ARCH_GEMMA3;
|
||||||
}
|
}
|
||||||
|
else if(modelarch=="gemma3n")
|
||||||
|
{
|
||||||
|
fileformatmeta->model_architecture = GGUFArch::ARCH_GEMMA3N;
|
||||||
|
}
|
||||||
else if(modelarch=="rwkv6" || modelarch=="rwkv7")
|
else if(modelarch=="rwkv6" || modelarch=="rwkv7")
|
||||||
{
|
{
|
||||||
fileformatmeta->model_architecture = GGUFArch::ARCH_RWKV;
|
fileformatmeta->model_architecture = GGUFArch::ARCH_RWKV;
|
||||||
|
|
|
@ -62,6 +62,7 @@ enum GGUFArch
|
||||||
ARCH_QWEN2VL = 7,
|
ARCH_QWEN2VL = 7,
|
||||||
ARCH_GEMMA3 = 8,
|
ARCH_GEMMA3 = 8,
|
||||||
ARCH_GLM4 = 9,
|
ARCH_GLM4 = 9,
|
||||||
|
ARCH_GEMMA3N = 10,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FileFormatExtraMeta
|
struct FileFormatExtraMeta
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue