load embedding at current maxctx instead of max trained ctx by default

This commit is contained in:
Concedo 2025-08-13 18:42:14 +08:00
parent 06a3ee4c3b
commit 955cf66bbc
2 changed files with 4 additions and 2 deletions

View file

@ -3431,6 +3431,7 @@ Current version indicated by LITEVER below.
max_context_length: (localflag?4096:3072), max_context_length: (localflag?4096:3072),
max_length: (localflag?512:256), max_length: (localflag?512:256),
last_maxctx: 0,
auto_ctxlen: true, auto_ctxlen: true,
auto_genamt: true, auto_genamt: true,
rep_pen: 1.06, rep_pen: 1.06,
@ -11156,9 +11157,10 @@ Current version indicated by LITEVER below.
document.getElementById("max_length_slide").max = 4096; document.getElementById("max_length_slide").max = 4096;
document.getElementById("max_length_slide_label").innerText = 4096; document.getElementById("max_length_slide_label").innerText = 4096;
} }
if(localflag && localsettings.max_context_length==defaultsettings.max_context_length && ep_maxctx>4096) if(localflag && ep_maxctx>=4096 && localsettings.max_context_length<ep_maxctx && (localsettings.last_maxctx!=ep_maxctx || localsettings.max_context_length==defaultsettings.max_context_length))
{ {
localsettings.max_context_length = ep_maxctx; localsettings.max_context_length = ep_maxctx;
localsettings.last_maxctx = ep_maxctx;
} }
}).catch(error => { }).catch(error => {
console.log("Failed to get true max ctx: " + error); console.log("Failed to get true max ctx: " + error);

View file

@ -1901,7 +1901,7 @@ def embeddings_load_model(model_filename):
inputs.flash_attention = False inputs.flash_attention = False
inputs.threads = args.threads inputs.threads = args.threads
inputs.use_mmap = args.usemmap inputs.use_mmap = args.usemmap
inputs.embeddingsmaxctx = args.embeddingsmaxctx inputs.embeddingsmaxctx = (args.embeddingsmaxctx if args.embeddingsmaxctx else args.contextsize) # for us to clamp to contextsize if embeddingsmaxctx unspecified
inputs = set_backend_props(inputs) inputs = set_backend_props(inputs)
ret = handle.embeddings_load_model(inputs) ret = handle.embeddings_load_model(inputs)
return ret return ret