sd: support for dynamic LoRA loading from a directory (#2036)

* backend support for controlling LoRA cache and fixed multipliers

The generation LoRA multipliers are now added to the initial
multipliers, so e.g. a merged LCM model will behave the same as
a normal model with a preloaded LCM LoRA.

* frontend support
This commit is contained in:
Wagner Bruna 2026-03-16 09:39:21 -03:00 committed by GitHub
parent b88fc44d0e
commit feea014774
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 402 additions and 120 deletions

View file

@ -230,6 +230,7 @@ struct sd_generation_inputs
const char * cache_options = nullptr;
const bool upscale = false;
const int lora_len = 0;
const char ** lora_filenames = nullptr;
const float * lora_multipliers = nullptr;
};
struct sd_generation_outputs

View file

@ -89,7 +89,11 @@ ttsmodelpath = "" #if empty, not initialized
embeddingsmodelpath = "" #if empty, not initialized
musicllmmodelpath = "" #if empty, not initialized
musicdiffusionmodelpath = "" #if empty, not initialized
imglorainfo = []
imglora_preload = []
imglora_bypath = {}
imglora_name2path = {}
imglora_cached = True
imglora_initial_fixed = True
maxctx = 8192
maxhordectx = 0 #set to whatever maxctx is if 0
maxhordelen = 1024
@ -362,6 +366,7 @@ class sd_generation_inputs(ctypes.Structure):
("cache_options", ctypes.c_char_p),
("upscale", ctypes.c_bool),
("lora_len", ctypes.c_int),
("lora_filenames", ctypes.POINTER(ctypes.c_char_p)),
("lora_multipliers", ctypes.POINTER(ctypes.c_float))]
class sd_generation_outputs(ctypes.Structure):
@ -1175,25 +1180,33 @@ def get_capabilities():
admin_type = (2 if args.admin and args.admindir and args.adminpassword else (1 if args.admin and args.admindir else 0))
return {"result":"KoboldCpp", "version":KcppVersion, "protected":has_password, "llm":has_llm, "txt2img":has_txt2img,"vision":has_vision_support,"audio":has_audio_support,"transcribe":has_whisper,"multiplayer":has_multiplayer,"websearch":has_search,"tts":has_tts, "embeddings":has_embeddings, "music":has_music, "savedata":(savedata_obj is not None), "admin": admin_type, "guidance": has_guidance, "jinja": has_jinja, "mcp":has_mcp}
def scan_directory(dirpath, valid_exts, depth):
files = []
for entry in sorted(os.listdir(dirpath)): # Scan top-level directory
full_path = os.path.join(dirpath, entry)
if os.path.isfile(full_path) and entry.lower().endswith(valid_exts): # If toplevel file
files.append(entry)
elif depth > 0 and os.path.isdir(full_path): #if dir, scan up to 1 level deep
for subentry in sorted(os.listdir(full_path)):
sub_full_path = os.path.join(full_path, subentry)
if os.path.isfile(sub_full_path) and subentry.endswith(valid_exts):
rel_path = os.path.join(entry, subentry)
files.append(rel_path)
return files
def get_current_admindir_list():
opts = []
if args.admin and args.admindir:
dirpath = os.path.abspath(args.admindir)
valid_exts = (".kcpps", ".kcppt", ".gguf")
for entry in sorted(os.listdir(dirpath)): # Scan top-level directory
full_path = os.path.join(dirpath, entry)
if os.path.isfile(full_path) and entry.endswith(valid_exts): # If toplevel file
opts.append(entry)
elif os.path.isdir(full_path): #if dir, scan up to 1 level deep
for subentry in sorted(os.listdir(full_path)):
sub_full_path = os.path.join(full_path, subentry)
if os.path.isfile(sub_full_path) and subentry.endswith(valid_exts):
rel_path = os.path.join(entry, subentry)
opts.append(rel_path)
opts = scan_directory(dirpath, valid_exts, 1)
opts.append("initial_model")
opts.append("unload_model")
return opts
def dump_gguf_metadata(file_path): #if you're gonna copy this into your own project at least credit concedo
chunk_size = 1024*1024*12 # read first 12mb of file
try:
@ -2018,7 +2031,7 @@ def sd_quant_option(value):
except Exception:
return 0
def sd_load_model(model_filename,vae_filename,lora_filenames,t5xxl_filename,clip1_filename,clip2_filename,photomaker_filename,upscaler_filename):
def sd_load_model(model_filename,vae_filename,t5xxl_filename,clip1_filename,clip2_filename,photomaker_filename,upscaler_filename):
global args
inputs = sd_load_model_inputs()
inputs.model_filename = model_filename.encode("UTF-8")
@ -2047,14 +2060,22 @@ def sd_load_model(model_filename,vae_filename,lora_filenames,t5xxl_filename,clip
inputs.photomaker_filename = photomaker_filename.encode("UTF-8")
inputs.upscaler_filename = upscaler_filename.encode("UTF-8")
lora_filenames = [lf.encode("UTF-8") for lf in lora_filenames[:lora_filenames_max] if lf]
lora_len = len(lora_filenames)
lora_multipliers = prepare_lora_multipliers([])
inputs.lora_len = lora_len
inputs.lora_filenames = (ctypes.c_char_p * lora_len)(*lora_filenames)
inputs.lora_multipliers = (ctypes.c_float * lora_len)(*lora_multipliers)
lora_filenames, lora_multipliers = prepare_initial_lora_multipliers()
inputs.lora_len = len(lora_filenames)
inputs.lora_filenames = (ctypes.c_char_p * inputs.lora_len)(*lora_filenames)
inputs.lora_multipliers = (ctypes.c_float * inputs.lora_len)(*lora_multipliers)
if 0 and inputs.lora_len:
print("Preloading LoRAs:")
for i in range(inputs.lora_len):
print(f" {inputs.lora_filenames[i]} @ {inputs.lora_multipliers[i]}")
# auto if no zero-weight lora, dynamic otherwise
inputs.lora_apply_mode = 3 if 0. in lora_multipliers else 0
lora_apply_mode = 0 # auto
if imglora_bypath:
lora_dynamic = 1 << 3 # accept changes at runtime
lora_cache = 1 << 4 if imglora_cached else 0 # cache the preloaded LoRAs
lora_fixed = 1 << 5 if imglora_initial_fixed else 0 # do not allow changes to the non-zero preloaded LoRAs
lora_apply_mode = lora_dynamic | lora_cache | lora_fixed
inputs.lora_apply_mode = lora_apply_mode
inputs.img_hard_limit = args.sdclamped
inputs.img_soft_limit = args.sdclampedsoft
@ -2177,23 +2198,57 @@ def sanitize_lora_multipliers(sdloramult):
sdloramult = [tryparsefloat(m, 0.) for m in sdloramult]
return sdloramult
def prepare_lora_multipliers(request_list):
orig_multipliers = [lora[3] for lora in imglorainfo]
req_by_path = {}
def prepare_initial_lora_multipliers():
res_paths = []
res_multipliers = []
num_loras = len(imglora_preload)
if num_loras > lora_filenames_max:
print(f'Warning: more than {lora_filenames_max} preloaded LoRAs, extra ones will be ignored')
num_loras = lora_filenames_max
for info in imglora_preload[:num_loras]:
res_paths.append(info['fullpath'].encode("UTF-8"))
res_multipliers.append(info['multiplier'])
return res_paths, res_multipliers
def prepare_lora_multipliers_backend(request_list, imglora_bypath):
req_dedup = {}
for r in request_list:
if not isinstance(r, dict):
continue
multiplier = tryparsefloat(r.get('multiplier'), 0.)
path = r.get('path')
if path and isinstance(path, str):
req_by_path[path] = req_by_path.get(path, 0.) + multiplier
result = []
for i, (fullpath, name, path, origmul) in enumerate(imglorainfo):
multiplier = orig_multipliers[i]
if multiplier == 0. and path in req_by_path:
multiplier = req_by_path[path]
result.append(multiplier)
return result
multiplier = tryparsefloat(r.get('multiplier'), 0.)
if not path or not isinstance(path, str) or not multiplier:
continue
info = imglora_bypath.get(path)
if info:
fullpath = info["fullpath"]
req_dedup[fullpath] = req_dedup.get(fullpath, 0.) + multiplier
res_paths = []
res_multipliers = []
for fullpath, multiplier in req_dedup.items():
if multiplier != 0.0:
res_paths.append(fullpath.encode("UTF-8"))
res_multipliers.append(multiplier)
# enforce lora_filenames_max
max_requests = lora_filenames_max - len(imglora_preload)
if len(res_paths) > max_requests:
msg_preloaded = ""
if len(imglora_preload) > 0:
msg_preloaded = f" (including {len(imglora_preload)} preloaded)"
print(f'Warning: more than {lora_filenames_max} requested LoRAs{msg_preloaded}, extra ones will be ignored')
res_paths = res_paths[:max_requests]
res_multipliers = res_multipliers[:max_requests]
return res_paths, res_multipliers
def prepare_lora_multipliers(request_list):
return prepare_lora_multipliers_backend(request_list, imglora_bypath)
def mk_sdapi_lora_list(imglora_bypath):
return [
{'name': info['name'], 'path': info['path']}
for info in imglora_bypath.values()
if info['multiplier'] == 0.0 # both preloaded and scanned
]
def extract_loras_from_prompt(prompt):
pattern = r'<lora:([^:>]+):([^>]+)>'
@ -2219,16 +2274,17 @@ def extract_loras_from_prompt(prompt):
return prompt, lora_data
def lora_map_name_to_path(request_list):
name2path = {}
for _, name, path, _ in imglorainfo:
name2path[name] = path
result = []
for req in request_list:
out = dict(req)
name = out.pop('name')
path = name2path.get(name)
if path:
out['path'] = path
path = imglora_name2path.get(name)
if not path:
print(f'LoRA {name} not found')
continue
info = imglora_bypath.get(path)
if info:
out['path'] = info['path']
result.append(out)
return result
@ -2283,6 +2339,7 @@ def sd_generate(genparams):
extra_images_arr = ([] if not extra_images_arr else extra_images_arr)
extra_images_arr = [img for img in extra_images_arr if img not in (None, "")]
extra_images_arr = extra_images_arr[:extra_images_max]
lora_filenames, lora_multipliers = prepare_lora_multipliers(genparams.get("lora", []))
#clean vars
cfg_scale = (1 if cfg_scale < 1 else (forced_maxcfg if cfg_scale > forced_maxcfg else cfg_scale))
@ -2334,9 +2391,8 @@ def sd_generate(genparams):
inputs.cache_mode = cache_mode.encode("UTF-8")
inputs.cache_options = cache_options.encode("UTF-8")
inputs.upscale = (True if tryparseint(genparams.get("enable_hr", 0),0) else False)
lora_multipliers = prepare_lora_multipliers(genparams.get("lora", []))
inputs.lora_len = len(lora_multipliers)
inputs.lora_len = len(lora_filenames)
inputs.lora_filenames = (ctypes.c_char_p * inputs.lora_len)(*lora_filenames)
inputs.lora_multipliers = (ctypes.c_float * inputs.lora_len)(*lora_multipliers)
ret = handle.sd_generate(inputs)
@ -4426,7 +4482,7 @@ Change Mode<br>
response_body = (json.dumps({"object":"list","data":mlist}).encode())
elif clean_path.endswith('/sdapi/v1/loras'):
response_body = (json.dumps([{'name': name, 'path': path} for _, name, path, multiplier in imglorainfo if multiplier == 0.])).encode()
response_body = (json.dumps(mk_sdapi_lora_list(imglora_bypath))).encode()
elif clean_path.endswith('/sdapi/v1/upscalers'):
if args.sdupscaler:
@ -8739,26 +8795,88 @@ def main(launch_args, default_args):
input()
def mk_lora_info(imgloras, multipliers):
# (full path, name, name+extension, can change multiplier)
# XXX for each LoRA, sdapi needs a name and a path; we could use
# the full filename as a path, but we don't know if we can expose it
used_lora_names = set()
result = []
def mk_lora_info(imgloras, multipliers, mock_filesystem=False):
first_multiplier = multipliers[0] if len(multipliers) > 0 else 1.
lora_files = []
lora_dirs = []
# identify files and dirs
for i, lora_path in enumerate(imgloras):
multiplier = multipliers[i] if i < len(multipliers) else first_multiplier
lora_file = os.path.basename(lora_path)
if mock_filesystem:
print('fake filesystem access')
if lora_path.endswith('/'):
lora_dirs.append(lora_path)
else:
lora_files.append(('', lora_path, multiplier))
elif os.path.isfile(lora_path):
lora_files.append(('', lora_path, multiplier))
elif os.path.isdir(lora_path):
lora_dirs.append(lora_path)
elif os.path.exists(lora_path):
print(f"Unexpected file type for SD LORA model file {lora_path}")
else:
print(f"Missing SD LORA model file {lora_path}...")
# scan all dirs
for lora_dir in lora_dirs:
print(f'Scanning {lora_dir} for LoRAs...')
if mock_filesystem:
print('fake directory scan')
files = ['lora1_makebelieve.gguf', 'lora2/makebelieve.gguf']
else:
files = scan_directory(lora_dir, ('.safetensors', '.gguf'), 1)
print(f' found {len(files)} files under {lora_dir}')
for file in files:
lora_files.append((lora_dir, file, 0.0))
# dedup and map all files
unique_lora_names = set()
lora_fullmap = {}
for i, (lora_dir, lora_path, multiplier) in enumerate(lora_files):
if lora_dir:
# lora_path is relative: we can show it on the interface and accept it
lora_fullpath = os.path.join(lora_dir, lora_path)
# NOTE: we are including the relative directory on the short name
lora_file = lora_path
preloaded = False
else:
lora_fullpath = lora_path
# we don't know which portion of the path we can show, so omit it
lora_file = os.path.basename(lora_path)
preloaded = True
if not mock_filesystem:
lora_fullpath = os.path.abspath(lora_fullpath)
# dedup paths (e.g. preloaded and on directory)
if lora_fullpath in lora_fullmap:
lora_fullmap[lora_fullpath]["multiplier"] += multiplier
continue
lora_name, lora_ext = os.path.splitext(lora_file)
# ensure unique names
i = 1
mapped_name = lora_name
while mapped_name in used_lora_names:
lora_uname = lora_name
while lora_uname in unique_lora_names:
i += 1
mapped_name = lora_name + '_' + str(i)
used_lora_names.add(mapped_name)
result.append((lora_path, mapped_name, mapped_name + lora_ext, multiplier))
return result
lora_uname = lora_name + '_' + str(i)
unique_lora_names.add(lora_uname)
lora_upath = lora_uname + lora_ext
lora_entry = {
'fullpath': lora_fullpath,
'name': lora_uname,
'path': lora_upath,
'multiplier': multiplier,
'preloaded': preloaded,
}
lora_fullmap[lora_fullpath] = lora_entry
# build the runtime tables
preloaded_table = []
lora_path_map = {}
lora_name_map = {}
for lora_entry in lora_fullmap.values():
# only map LoRAs that can be changed
if not imglora_initial_fixed or lora_entry["multiplier"] == 0.0:
lora_path_map[lora_entry["path"]] = lora_entry
lora_name_map[lora_entry["name"]] = lora_entry["path"]
if lora_entry["preloaded"]:
preloaded_table.append(lora_entry)
return preloaded_table, lora_path_map, lora_name_map
def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
@ -9201,16 +9319,9 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
imgclip2 = ""
imgphotomaker = ""
imgupscaler = ""
if args.sdlora and len(args.sdlora)>0:
for i in range (0,len(args.sdlora)):
curr = args.sdlora[i]
if os.path.exists(curr):
imgloras.append(os.path.abspath(curr))
else:
print(f"Missing SD LORA model file {curr}...")
global imglorainfo
global imglora_preload, imglora_bypath, imglora_name2path
args.sdloramult = sanitize_lora_multipliers(args.sdloramult)
imglorainfo = mk_lora_info(imgloras, args.sdloramult)
imglora_preload, imglora_bypath, imglora_name2path = mk_lora_info(args.sdlora, args.sdloramult)
if args.sdvae:
if os.path.exists(args.sdvae):
imgvae = os.path.abspath(args.sdvae)
@ -9247,7 +9358,7 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
friendlysdmodelname = os.path.basename(imgmodel)
friendlysdmodelname = os.path.splitext(friendlysdmodelname)[0]
friendlysdmodelname = sanitize_string(friendlysdmodelname)
loadok = sd_load_model(imgmodel,imgvae,imgloras,imgt5xxl,imgclip1,imgclip2,imgphotomaker,imgupscaler)
loadok = sd_load_model(imgmodel,imgvae,imgt5xxl,imgclip1,imgclip2,imgphotomaker,imgupscaler)
print("Load Image Model OK: " + str(loadok))
if not loadok:
exitcounter = 999

View file

@ -45,6 +45,62 @@
static_assert((int)SD_TYPE_COUNT == (int)GGML_TYPE_COUNT,
"inconsistency between SD_TYPE_COUNT and GGML_TYPE_COUNT");
struct LoraMap {
std::vector<std::pair<std::string, float>> items;
std::unordered_map<std::string, std::size_t> index;
void add_lora(const std::string& k, float v) {
auto it = index.find(k);
if (it == index.end()) {
index[k] = items.size();
items.emplace_back(k, v);
} else {
items[it->second].second += v;
}
}
float check_small_mult(float mult) {
if (mult > 1e-6 || mult < -1e-6)
return mult;
return 0.f;
}
float get_mult(const std::string& k) {
auto lora = index.find(k);
if (lora == index.end()) return 0.f;
return check_small_mult(items[lora->second].second);
}
std::vector<sd_lora_t> get_lora_specs(bool include_zeroes = false) {
std::vector<sd_lora_t> lora_specs;
for (const auto & lora: items) {
float multiplier = check_small_mult(lora.second);
if (include_zeroes || multiplier != 0.f) {
sd_lora_t spec = {};
spec.path = lora.first.c_str();
spec.multiplier = multiplier;
lora_specs.push_back(spec);
}
}
return lora_specs;
}
std::string get_lora_meta() {
std::stringstream lora_meta;
lora_meta << std::setprecision(6);
for (const auto & lora: items) {
float multiplier = check_small_mult(lora.second);
if (multiplier != 0.f) {
std::string lora_name = std::filesystem::path(lora.first).stem().string();
lora_meta << "<lora:" << lora_name << ":" << multiplier << ">";
}
}
return lora_meta.str();
}
};
struct SDParams {
int n_threads = -1;
std::string model_path;
@ -79,9 +135,9 @@ struct SDParams {
bool chroma_use_dit_mask = true;
std::vector<std::string> lora_paths;
std::vector<float> lora_multipliers;
LoraMap lora_map;
bool lora_dynamic = false;
bool lora_fixed = false;
std::string cache_mode;
std::string cache_options;
@ -211,12 +267,10 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
set_sd_quiet(sd_is_quiet);
executable_path = inputs.executable_path;
std::string taesdpath = "";
std::vector<std::string> lora_paths;
std::vector<float> lora_multipliers;
LoraMap lora_map;
for(int i=0;i<inputs.lora_len;++i)
{
lora_paths.push_back(inputs.lora_filenames[i]);
lora_multipliers.push_back(inputs.lora_multipliers[i]);
lora_map.add_lora(inputs.lora_filenames[i], inputs.lora_multipliers[i]);
}
std::string vaefilename = inputs.vae_filename;
std::string t5xxl_filename = inputs.t5xxl_filename;
@ -233,23 +287,32 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
int lora_apply_mode = LORA_APPLY_AT_RUNTIME;
bool lora_dynamic = false;
bool lora_cache = false;
bool lora_fixed = false;
if(inputs.lora_apply_mode >= 0 && inputs.lora_apply_mode <= 2) {
lora_apply_mode = inputs.lora_apply_mode;
}
else if(inputs.lora_apply_mode == 3) {
lora_dynamic = true;
else {
// bit 3: LoRAs can be changed dynamically
// bit 4: cache the initial LoRA list in VRAM
// bit 5: do not allow multiplier changes for the initial LoRAs
lora_dynamic = !!(inputs.lora_apply_mode & (1<<3));
lora_cache = lora_dynamic && !!(inputs.lora_apply_mode & (1<<4));
lora_fixed = lora_dynamic && !!(inputs.lora_apply_mode & (1<<5));
}
if(lora_paths.size() > 0)
if(lora_map.items.size() > 0)
{
const char* lora_apply_mode_name = lora_apply_mode == 1 ? "immediately"
: lora_apply_mode == 2 ? "at runtime"
: "auto";
const char * lora_dynamic_name = lora_dynamic ? " (dynamic)" : "";
printf("With LoRAs in apply mode %s%s:\n", lora_apply_mode_name, lora_dynamic_name);
for(int i=0;i<lora_paths.size();++i)
const char * lora_dynamic_name = lora_dynamic ? ", dynamic" : "";
const char * lora_cache_name = lora_cache ? ", with caching" : "";
printf("With LoRAs in apply mode %s%s%s:\n", lora_apply_mode_name, lora_dynamic_name, lora_cache_name);
for(auto lora: lora_map.items)
{
printf(" %s at %f power\n", lora_paths[i].c_str(),lora_multipliers[i]);
const char * lora_fixed_name = lora_fixed && lora.second != 0.f ? " (fixed)" : "";
printf(" %s at %f power%s\n", lora.first.c_str(), lora.second, lora_fixed_name);
}
}
@ -337,9 +400,9 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
sd_params->clip_l_path = clip1_filename;
sd_params->clip_g_path = clip2_filename;
sd_params->stacked_id_embeddings_path = photomaker_filename;
sd_params->lora_paths = lora_paths;
sd_params->lora_multipliers = lora_multipliers;
sd_params->lora_map = lora_map;
sd_params->lora_dynamic = lora_dynamic;
sd_params->lora_fixed = lora_fixed;
//if t5 is set, and model is a gguf, load it as a diffusion model path
bool endswithgguf = (sd_params->model_path.rfind(".gguf") == sd_params->model_path.size() - 5);
if((sd_params->t5xxl_path!="" || sd_params->clip_l_path!="" || sd_params->clip_g_path!="") && endswithgguf)
@ -429,21 +492,13 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
sdmodelfilename = mpath.filename().string();
// preload the LoRAs with the initial multipliers
std::vector<sd_lora_t> lora_specs;
for(int i=0;i<sd_params->lora_paths.size();++i)
{
if (!lora_dynamic && sd_params->lora_multipliers[i] == 0.)
continue;
sd_lora_t spec = {};
spec.path = sd_params->lora_paths[i].c_str();
spec.multiplier = sd_params->lora_multipliers[i];
lora_specs.push_back(spec);
}
std::vector<sd_lora_t> lora_specs = sd_params->lora_map.get_lora_specs(lora_dynamic&& lora_cache);
if(lora_specs.size()>0)
{
printf(" applying %zu LoRAs...\n", lora_specs.size());
sd_ctx->sd->kcpp_lora_cache_populate = lora_cache;
sd_ctx->sd->apply_loras(lora_specs.data(), lora_specs.size());
sd_ctx->sd->kcpp_lora_cache_populate = false;
}
input_extraimage_buffers.reserve(max_extra_images);
@ -1166,24 +1221,21 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
parse_cache_options(params.cache, sd_params->cache_mode, sd_params->cache_options);
params.batch_count = 1;
std::vector<sd_lora_t> lora_specs;
std::stringstream lora_meta;
lora_meta << std::setprecision(6);
for(size_t i=0;i<sd_params->lora_paths.size();++i)
{
float multiplier = sd_params->lora_multipliers[i];
if (sd_params->lora_dynamic) {
multiplier = i < inputs.lora_len ? inputs.lora_multipliers[i] : 0.;
}
if (multiplier != 0.f) {
sd_lora_t spec = {};
spec.path = sd_params->lora_paths[i].c_str();
spec.multiplier = multiplier;
lora_specs.push_back(spec);
std::string lora_name = std::filesystem::path(sd_params->lora_paths[i]).stem().string();
lora_meta << "<lora:" << lora_name << ":" << multiplier << ">";
LoraMap lora_map = sd_params->lora_map;
if (sd_params->lora_dynamic) {
for (int i = 0; i < inputs.lora_len; i++) {
// check if it was initially fixed
std::string path = inputs.lora_filenames[i];
float preloaded_mult = sd_params->lora_map.get_mult(path);
if (!sd_params->lora_fixed || preloaded_mult == 0.f) {
lora_map.add_lora(path, inputs.lora_multipliers[i]);
}
}
}
std::vector<sd_lora_t> lora_specs = lora_map.get_lora_specs();
std::string lora_meta = lora_map.get_lora_meta();
if(!sd_is_quiet && sddebugmode==1) {
if (lora_specs.size() > 0) {
printf("Applying LoRAs:\n");
@ -1424,9 +1476,9 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
{
printf("Upscaling output image...\n");
upscaled_image = upscale(upscaler_ctx, results[i], 2);
png = stbi_write_png_to_mem(upscaled_image.data, 0, upscaled_image.width, upscaled_image.height, upscaled_image.channel, &out_data_len, get_image_params(params, lora_meta.str()).c_str());
png = stbi_write_png_to_mem(upscaled_image.data, 0, upscaled_image.width, upscaled_image.height, upscaled_image.channel, &out_data_len, get_image_params(params, lora_meta).c_str());
} else {
png = stbi_write_png_to_mem(results[i].data, 0, results[i].width, results[i].height, results[i].channel, &out_data_len, get_image_params(params, lora_meta.str()).c_str());
png = stbi_write_png_to_mem(results[i].data, 0, results[i].width, results[i].height, results[i].channel, &out_data_len, get_image_params(params, lora_meta).c_str());
}
if (png != NULL)

View file

@ -139,6 +139,7 @@ public:
std::vector<std::shared_ptr<LoraModel>> first_stage_lora_models;
bool apply_lora_immediately = false;
std::map<std::string, std::shared_ptr<LoraModel>> kcpp_lora_cache;
bool kcpp_lora_cache_populate = false;
std::string taesd_path;
bool use_tiny_autoencoder = false;
@ -1209,7 +1210,6 @@ public:
return it->second;
}
}
// by construction, kcpp will always find the preloaded LoRAs on the cache
std::string lora_path = lora_id;
static std::string high_noise_tag = "|high_noise|";
@ -1224,13 +1224,13 @@ public:
LOG_WARN("load lora tensors from %s failed", lora_path.c_str());
// also cache negatives to avoid I/O at runtime
lora = nullptr;
if (kcpp_at_runtime)
if (kcpp_at_runtime && kcpp_lora_cache_populate)
kcpp_lora_cache[lora_key] = lora;
return lora;
}
lora->multiplier = multiplier;
if (kcpp_at_runtime)
if (kcpp_at_runtime && kcpp_lora_cache_populate)
kcpp_lora_cache[lora_key] = lora;
return lora;
}

View file

@ -53,16 +53,63 @@ def extract_loras_from_prompt(*args, **kwargs):
return koboldcpp.extract_loras_from_prompt(*args, **kwargs)
def mk_lora_info(*args, **kwargs):
def mk_lora_info(imgloras, multipliers):
"""
>>> mk_lora_info(['/x/lora1.safetensors', '/y/lora2.gguf'], [])
[('/x/lora1.safetensors', 'lora1', 'lora1.safetensors', 1.0), ('/y/lora2.gguf', 'lora2', 'lora2.gguf', 1.0)]
>>> mk_lora_info(['/x/lora1.safetensors', '/y/lora1.safetensors'], [0.3])
[('/x/lora1.safetensors', 'lora1', 'lora1.safetensors', 0.3), ('/y/lora1.safetensors', 'lora1_2', 'lora1_2.safetensors', 0.3)]
>>> mk_lora_info(['./lora1.gguf', '/y/lora2.gguf', 'lora3.gguf'], [0, 0.3])
[('./lora1.gguf', 'lora1', 'lora1.gguf', 0), ('/y/lora2.gguf', 'lora2', 'lora2.gguf', 0.3), ('lora3.gguf', 'lora3', 'lora3.gguf', 0)]
>>> pre, path, name = mk_lora_info(['/x/lora1.safetensors', '/y/lora2.gguf'], [])
fake filesystem access
fake filesystem access
>>> pre
[{'fullpath': '/x/lora1.safetensors', 'name': 'lora1', 'path': 'lora1.safetensors', 'multiplier': 1.0, 'preloaded': True}, {'fullpath': '/y/lora2.gguf', 'name': 'lora2', 'path': 'lora2.gguf', 'multiplier': 1.0, 'preloaded': True}]
>>> path
{}
>>> name
{}
>>> pre, path, name = mk_lora_info(['/x/lora1.safetensors', '/y/lora2.gguf'], [0.])
fake filesystem access
fake filesystem access
>>> pre
[{'fullpath': '/x/lora1.safetensors', 'name': 'lora1', 'path': 'lora1.safetensors', 'multiplier': 0.0, 'preloaded': True}, {'fullpath': '/y/lora2.gguf', 'name': 'lora2', 'path': 'lora2.gguf', 'multiplier': 0.0, 'preloaded': True}]
>>> path
{'lora1.safetensors': {'fullpath': '/x/lora1.safetensors', 'name': 'lora1', 'path': 'lora1.safetensors', 'multiplier': 0.0, 'preloaded': True}, 'lora2.gguf': {'fullpath': '/y/lora2.gguf', 'name': 'lora2', 'path': 'lora2.gguf', 'multiplier': 0.0, 'preloaded': True}}
>>> name
{'lora1': 'lora1.safetensors', 'lora2': 'lora2.gguf'}
>>> pre, path, name = mk_lora_info(['/x/lora1.safetensors', '/y/lora1.safetensors'], [0.3])
fake filesystem access
fake filesystem access
>>> pre
[{'fullpath': '/x/lora1.safetensors', 'name': 'lora1', 'path': 'lora1.safetensors', 'multiplier': 0.3, 'preloaded': True}, {'fullpath': '/y/lora1.safetensors', 'name': 'lora1_2', 'path': 'lora1_2.safetensors', 'multiplier': 0.3, 'preloaded': True}]
>>> path
{}
>>> pre, path, name = mk_lora_info(['/lora/dir/'], [0.3])
fake filesystem access
Scanning /lora/dir/ for LoRAs...
fake directory scan
found 2 files under /lora/dir/
>>> pre
[]
>>> expected = {
... 'lora1_makebelieve.gguf': {
... 'fullpath': '/lora/dir/lora1_makebelieve.gguf',
... 'name': 'lora1_makebelieve',
... 'path': 'lora1_makebelieve.gguf',
... 'multiplier': 0.0,
... 'preloaded': False},
... 'lora2/makebelieve.gguf': {
... 'fullpath': '/lora/dir/lora2/makebelieve.gguf',
... 'name': 'lora2/makebelieve',
... 'path': 'lora2/makebelieve.gguf',
... 'multiplier': 0.0,
... 'preloaded': False}}
>>> path == expected
True
>>> name
{'lora1_makebelieve': 'lora1_makebelieve.gguf', 'lora2/makebelieve': 'lora2/makebelieve.gguf'}
"""
return koboldcpp.mk_lora_info(*args, **kwargs)
return koboldcpp.mk_lora_info(imgloras, multipliers, True)
def sanitize_lora_multipliers(*args, **kwargs):
"""
@ -86,6 +133,77 @@ def sanitize_lora_multipliers(*args, **kwargs):
return koboldcpp.sanitize_lora_multipliers(*args, **kwargs)
def prepare_lora_multipliers(req_list, imglora_bypath):
"""
>>> req = [
... {"path": "a.gguf", "multiplier": "0.5"},
... {"path": "a.gguf", "multiplier": 1.0},
... ]
>>> imglora = {"a.gguf": {"fullpath": "/abs/a.gguf"}}
>>> paths, mults = prepare_lora_multipliers(req, imglora)
>>> paths == [b"/abs/a.gguf"], mults == [1.5]
(True, True)
>>> req = [
... {"path": "b.gguf", "multiplier": "2"},
... {"path": "c.gguf"},
... "not a dict",
... {"path": "", "multiplier": "3"},
... {"path": "b.gguf", "multiplier": 0},
... ]
>>> imglora = {"b.gguf": {"fullpath": "/abs/b.gguf"},
... "c.gguf": {"fullpath": "/abs/c.gguf"}}
>>> paths, mults = prepare_lora_multipliers(req, imglora)
>>> paths == [b"/abs/b.gguf"], mults == [2.0]
(True, True)
>>> req = [{"path": "missing.gguf", "multiplier": "5"}]
>>> imglora = {}
>>> paths, mults = prepare_lora_multipliers(req, imglora)
>>> paths == [], mults == []
(True, True)
>>> req = [
... {"path": "x.gguf", "multiplier": 1},
... {"path": "y.gguf", "multiplier": 2},
... ]
>>> imglora = {
... "x.gguf": {"fullpath": "/abs/x.gguf", "path": "x.gguf", "multiplier": 0.0},
... "y.gguf": {"fullpath": "/abs/y.gguf", "path": "y.gguf", "multiplier": 0.0},
... }
>>> paths, mults = prepare_lora_multipliers(req, imglora)
>>> paths == [b'/abs/x.gguf', b'/abs/y.gguf']
True
>>> mults == [1.0, 2.0]
True
"""
return koboldcpp.prepare_lora_multipliers_backend(req_list, imglora_bypath)
def mk_sdapi_lora_list(imglora_bypath):
'''
>>> imglora_bypath = {
... 'lora_a.safetensors': {'name': 'lora_a', 'path': 'lora_a.safetensors', 'multiplier': 0.0},
... 'lora_b.gguf' : {'name': 'lora_b', 'path': 'lora_b.gguf', 'multiplier': 0.0},
... 'lora_c.safetensors': {'name': 'lora_c', 'path': 'lora_c.safetensors', 'multiplier': 1.0},
... 'chars/waifu.gguf' : {'name': 'chars/waifu', 'path': 'chars/waifu.gguf', 'multiplier': 0.0}
... }
>>> mk_sdapi_lora_list(imglora_bypath)
[{'name': 'lora_a', 'path': 'lora_a.safetensors'}, {'name': 'lora_b', 'path': 'lora_b.gguf'}, {'name': 'chars/waifu', 'path': 'chars/waifu.gguf'}]
>>> empty_data = {}
>>> mk_sdapi_lora_list(empty_data)
[]
>>> mixed_data = {
... 'k1': {'name': 'X', 'path': 'p1', 'multiplier': 0.5},
... 'k2': {'name': 'Y', 'path': 'p2', 'multiplier': 0.0}
... }
>>> mk_sdapi_lora_list(mixed_data)
[{'name': 'Y', 'path': 'p2'}]
'''
return koboldcpp.mk_sdapi_lora_list(imglora_bypath)
def gendefaults_parse_meta_field(*args, **kwargs):
'''