mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 09:04:36 +00:00
add more warmup (+1 squashed commits)
Squashed commits: [9578d5352] updated lite
This commit is contained in:
parent
4decd6bea1
commit
cb1c182673
3 changed files with 14 additions and 8 deletions
|
@ -2379,6 +2379,9 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
{
|
{
|
||||||
printf("\nLLAMA EVAL returned nonzero: %d\n",er);
|
printf("\nLLAMA EVAL returned nonzero: %d\n",er);
|
||||||
}
|
}
|
||||||
|
tmp = {1};
|
||||||
|
llama_kv_self_clear(llama_ctx_v4);
|
||||||
|
er = llama_decode(llama_ctx_v4, llama_batch_get_one(tmp.data(), tmp.size()));
|
||||||
return ModelLoadResult::SUCCESS;
|
return ModelLoadResult::SUCCESS;
|
||||||
}
|
}
|
||||||
else if (file_format == FileFormat::RWKV_1 || file_format==FileFormat::RWKV_2)
|
else if (file_format == FileFormat::RWKV_1 || file_format==FileFormat::RWKV_2)
|
||||||
|
|
15
klite.embd
15
klite.embd
|
@ -6142,7 +6142,10 @@ Current version indicated by LITEVER below.
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"n": 1,
|
"n": 1,
|
||||||
"size": "1024x1024",
|
"size": "1024x1024",
|
||||||
"response_format":"b64_json",
|
}
|
||||||
|
if(localsettings.saved_dalle_model!="gpt-image-1")
|
||||||
|
{
|
||||||
|
dalle_payload["response_format"] = "b64_json";
|
||||||
}
|
}
|
||||||
|
|
||||||
//remove all null fields
|
//remove all null fields
|
||||||
|
@ -6164,17 +6167,17 @@ Current version indicated by LITEVER below.
|
||||||
console.log(resp);
|
console.log(resp);
|
||||||
if(resp.data && resp.data.length>0)
|
if(resp.data && resp.data.length>0)
|
||||||
{
|
{
|
||||||
onImagesDone(resp.data[0].b64_json);
|
onImagesDone(resp.data[0].b64_json,null);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
console.log("Generation Error!");
|
console.log("Generation Error!");
|
||||||
onImagesDone(null);
|
onImagesDone(null,JSON.stringify(resp));
|
||||||
}
|
}
|
||||||
|
|
||||||
}).catch((error) => {
|
}).catch((error) => {
|
||||||
console.log("Generation Error: " + error);
|
console.log("Generation Error: " + error);
|
||||||
onImagesDone(null);
|
onImagesDone(null,null);
|
||||||
});
|
});
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -15595,7 +15598,7 @@ Current version indicated by LITEVER below.
|
||||||
image_db[imgid] = { done: false, queue: "Generating", result: "", prompt:sentence, poll_category:0 };
|
image_db[imgid] = { done: false, queue: "Generating", result: "", prompt:sentence, poll_category:0 };
|
||||||
image_db[imgid].aspect = 0;
|
image_db[imgid].aspect = 0;
|
||||||
image_db[imgid].imsource = 0; //0=generated,1=uploaded
|
image_db[imgid].imsource = 0; //0=generated,1=uploaded
|
||||||
generate_dalle_image(genimg_payload,(outputimg)=>{
|
generate_dalle_image(genimg_payload,(outputimg,outputerr)=>{
|
||||||
if(outputimg)
|
if(outputimg)
|
||||||
{
|
{
|
||||||
//console.log(outputimg);
|
//console.log(outputimg);
|
||||||
|
@ -15607,7 +15610,7 @@ Current version indicated by LITEVER below.
|
||||||
}, true, imgres);
|
}, true, imgres);
|
||||||
}else{
|
}else{
|
||||||
image_db[imgid].queue = "Failed";
|
image_db[imgid].queue = "Failed";
|
||||||
msgbox("Image Generation Failed!\n\nPlease make sure your OpenAI key is set correctly and you are allowed to use DALL-E.\n");
|
msgbox(`Image Generation Failed!\n\n${outputerr?(outputerr+"\n\n"):""}Please make sure your OpenAI key is set correctly and you are allowed to use DALL-E.\n`);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -994,7 +994,7 @@ def autoset_gpu_layers(ctxsize, sdquanted, bbs, qkv_level): #shitty algo to dete
|
||||||
fattn_discount = 1.0/(3.2 if qkv_level==2 else (1.6 if qkv_level==1 else 1.0))
|
fattn_discount = 1.0/(3.2 if qkv_level==2 else (1.6 if qkv_level==1 else 1.0))
|
||||||
mem1 = layers*(4 if bbs <= 512 else (bbs/128))*headkvlen*cs*fattn_discount*4*1.45
|
mem1 = layers*(4 if bbs <= 512 else (bbs/128))*headkvlen*cs*fattn_discount*4*1.45
|
||||||
mem2 = layers*headcount*headkvlen*cs*fattn_discount*4*1.15
|
mem2 = layers*headcount*headkvlen*cs*fattn_discount*4*1.15
|
||||||
ratio = (mem - reservedmem - mem1) / (fsize + mem2)
|
ratio = max(ratio,(mem - reservedmem - mem1) / (fsize + mem2))
|
||||||
layerlimit = min(int(ratio*layers), (layers + 3))
|
layerlimit = min(int(ratio*layers), (layers + 3))
|
||||||
layerlimit = (0 if layerlimit<=2 else layerlimit)
|
layerlimit = (0 if layerlimit<=2 else layerlimit)
|
||||||
return layerlimit
|
return layerlimit
|
||||||
|
@ -4325,7 +4325,7 @@ def show_gui():
|
||||||
quick_gpuname_label = ctk.CTkLabel(quick_tab, text="")
|
quick_gpuname_label = ctk.CTkLabel(quick_tab, text="")
|
||||||
quick_gpuname_label.grid(row=3, column=1, padx=75, sticky="W")
|
quick_gpuname_label.grid(row=3, column=1, padx=75, sticky="W")
|
||||||
quick_gpuname_label.configure(text_color="#ffff00")
|
quick_gpuname_label.configure(text_color="#ffff00")
|
||||||
quick_gpu_layers_entry,quick_gpu_layers_label = makelabelentry(quick_tab,"GPU Layers:", gpulayers_var, 6, 50,tooltip="How many layers to offload onto the GPU.\nVRAM intensive, usage increases with model and context size.\nRequires some trial and error to find the best fit value.\n\nCommon values for total layers, accuracy not guaranteed.\n\nLlama/Mistral 7b/8b: 33\nSolar 10.7b/11b: 49\nLlama 13b: 41\nLlama 20b(stack): 63\nLlama/Yi 34b: 61\nMixtral 8x7b: 33\nLlama 70b: 81")
|
quick_gpu_layers_entry,quick_gpu_layers_label = makelabelentry(quick_tab,"GPU Layers:", gpulayers_var, 6, 50,tooltip="How many layers to offload onto the GPU.\nUsage varies based on model type and increases with model and context size.\nRequires some trial and error to find the best fit value.\n\nNote: The auto estimation is often inaccurate! Please set layers yourself for best results!")
|
||||||
quick_layercounter_label = ctk.CTkLabel(quick_tab, text="")
|
quick_layercounter_label = ctk.CTkLabel(quick_tab, text="")
|
||||||
quick_layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
|
quick_layercounter_label.grid(row=6, column=1, padx=75, sticky="W")
|
||||||
quick_layercounter_label.configure(text_color="#ffff00")
|
quick_layercounter_label.configure(text_color="#ffff00")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue