sd: support for dynamic LoRA loading from a directory (#2036)

* backend support for controlling LoRA cache and fixed multipliers The generation LoRA multipliers are now added to the initial multipliers, so e.g. a merged LCM model will behave the same as a normal model with a preloaded LCM LoRA. * frontend support
2026-05-18 23:49:46 +00:00 · 2026-03-16 09:39:21 -03:00 · 2026-03-16 09:39:21 -03:00 · feea014774
commit feea014774
parent b88fc44d0e
5 changed files with 402 additions and 120 deletions
--- a/expose.h
+++ b/expose.h
@ -230,6 +230,7 @@ struct sd_generation_inputs
    const char * cache_options = nullptr;
    const bool upscale = false;
    const int lora_len = 0;
+    const char ** lora_filenames = nullptr;
    const float * lora_multipliers = nullptr;
 };
 struct sd_generation_outputs
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -89,7 +89,11 @@ ttsmodelpath = "" #if empty, not initialized
 embeddingsmodelpath = "" #if empty, not initialized
 musicllmmodelpath = "" #if empty, not initialized
 musicdiffusionmodelpath = "" #if empty, not initialized
-imglorainfo = []
+imglora_preload = []
+imglora_bypath = {}
+imglora_name2path = {}
+imglora_cached = True
+imglora_initial_fixed = True
 maxctx = 8192
 maxhordectx = 0 #set to whatever maxctx is if 0
 maxhordelen = 1024
@ -362,6 +366,7 @@ class sd_generation_inputs(ctypes.Structure):
                ("cache_options", ctypes.c_char_p),
                ("upscale", ctypes.c_bool),
                ("lora_len", ctypes.c_int),
+                ("lora_filenames", ctypes.POINTER(ctypes.c_char_p)),
                ("lora_multipliers", ctypes.POINTER(ctypes.c_float))]

 class sd_generation_outputs(ctypes.Structure):
@ -1175,25 +1180,33 @@ def get_capabilities():
    admin_type = (2 if args.admin and args.admindir and args.adminpassword else (1 if args.admin and args.admindir else 0))
    return {"result":"KoboldCpp", "version":KcppVersion, "protected":has_password, "llm":has_llm, "txt2img":has_txt2img,"vision":has_vision_support,"audio":has_audio_support,"transcribe":has_whisper,"multiplayer":has_multiplayer,"websearch":has_search,"tts":has_tts, "embeddings":has_embeddings, "music":has_music, "savedata":(savedata_obj is not None), "admin": admin_type, "guidance": has_guidance, "jinja": has_jinja, "mcp":has_mcp}

+
+def scan_directory(dirpath, valid_exts, depth):
+    files = []
+    for entry in sorted(os.listdir(dirpath)): # Scan top-level directory
+        full_path = os.path.join(dirpath, entry)
+        if os.path.isfile(full_path) and entry.lower().endswith(valid_exts): # If toplevel file
+            files.append(entry)
+        elif depth > 0 and os.path.isdir(full_path): #if dir, scan up to 1 level deep
+            for subentry in sorted(os.listdir(full_path)):
+                sub_full_path = os.path.join(full_path, subentry)
+                if os.path.isfile(sub_full_path) and subentry.endswith(valid_exts):
+                    rel_path = os.path.join(entry, subentry)
+                    files.append(rel_path)
+    return files
+
+
 def get_current_admindir_list():
    opts = []
    if args.admin and args.admindir:
        dirpath = os.path.abspath(args.admindir)
        valid_exts = (".kcpps", ".kcppt", ".gguf")
-        for entry in sorted(os.listdir(dirpath)): # Scan top-level directory
-            full_path = os.path.join(dirpath, entry)
-            if os.path.isfile(full_path) and entry.endswith(valid_exts): # If toplevel file
-                opts.append(entry)
-            elif os.path.isdir(full_path): #if dir, scan up to 1 level deep
-                for subentry in sorted(os.listdir(full_path)):
-                    sub_full_path = os.path.join(full_path, subentry)
-                    if os.path.isfile(sub_full_path) and subentry.endswith(valid_exts):
-                        rel_path = os.path.join(entry, subentry)
-                        opts.append(rel_path)
+        opts = scan_directory(dirpath, valid_exts, 1)
        opts.append("initial_model")
        opts.append("unload_model")
    return opts

+
 def dump_gguf_metadata(file_path): #if you're gonna copy this into your own project at least credit concedo
    chunk_size = 1024*1024*12  # read first 12mb of file
    try:
@ -2018,7 +2031,7 @@ def sd_quant_option(value):
    except Exception:
        return 0

-def sd_load_model(model_filename,vae_filename,lora_filenames,t5xxl_filename,clip1_filename,clip2_filename,photomaker_filename,upscaler_filename):
+def sd_load_model(model_filename,vae_filename,t5xxl_filename,clip1_filename,clip2_filename,photomaker_filename,upscaler_filename):
    global args
    inputs = sd_load_model_inputs()
    inputs.model_filename = model_filename.encode("UTF-8")
@ -2047,14 +2060,22 @@ def sd_load_model(model_filename,vae_filename,lora_filenames,t5xxl_filename,clip
    inputs.photomaker_filename = photomaker_filename.encode("UTF-8")
    inputs.upscaler_filename = upscaler_filename.encode("UTF-8")

-    lora_filenames = [lf.encode("UTF-8") for lf in lora_filenames[:lora_filenames_max] if lf]
-    lora_len = len(lora_filenames)
-    lora_multipliers = prepare_lora_multipliers([])
-    inputs.lora_len = lora_len
-    inputs.lora_filenames = (ctypes.c_char_p * lora_len)(*lora_filenames)
-    inputs.lora_multipliers = (ctypes.c_float * lora_len)(*lora_multipliers)
+    lora_filenames, lora_multipliers = prepare_initial_lora_multipliers()
+    inputs.lora_len = len(lora_filenames)
+    inputs.lora_filenames = (ctypes.c_char_p * inputs.lora_len)(*lora_filenames)
+    inputs.lora_multipliers = (ctypes.c_float * inputs.lora_len)(*lora_multipliers)
+    if 0 and inputs.lora_len:
+        print("Preloading LoRAs:")
+        for i in range(inputs.lora_len):
+            print(f"  {inputs.lora_filenames[i]} @ {inputs.lora_multipliers[i]}")
    # auto if no zero-weight lora, dynamic otherwise
-    inputs.lora_apply_mode = 3 if 0. in lora_multipliers else 0
+    lora_apply_mode = 0 # auto
+    if imglora_bypath:
+        lora_dynamic = 1 << 3 # accept changes at runtime
+        lora_cache   = 1 << 4 if imglora_cached else 0 # cache the preloaded LoRAs
+        lora_fixed   = 1 << 5 if imglora_initial_fixed else 0 # do not allow changes to the non-zero preloaded LoRAs
+        lora_apply_mode = lora_dynamic | lora_cache | lora_fixed
+    inputs.lora_apply_mode = lora_apply_mode

    inputs.img_hard_limit = args.sdclamped
    inputs.img_soft_limit = args.sdclampedsoft
@ -2177,23 +2198,57 @@ def sanitize_lora_multipliers(sdloramult):
    sdloramult = [tryparsefloat(m, 0.) for m in sdloramult]
    return sdloramult

-def prepare_lora_multipliers(request_list):
-    orig_multipliers = [lora[3] for lora in imglorainfo]
-    req_by_path = {}
+def prepare_initial_lora_multipliers():
+    res_paths = []
+    res_multipliers = []
+    num_loras = len(imglora_preload)
+    if num_loras > lora_filenames_max:
+        print(f'Warning: more than {lora_filenames_max} preloaded LoRAs, extra ones will be ignored')
+        num_loras = lora_filenames_max
+    for info in imglora_preload[:num_loras]:
+        res_paths.append(info['fullpath'].encode("UTF-8"))
+        res_multipliers.append(info['multiplier'])
+    return res_paths, res_multipliers
+
+def prepare_lora_multipliers_backend(request_list, imglora_bypath):
+    req_dedup = {}
    for r in request_list:
        if not isinstance(r, dict):
            continue
-        multiplier = tryparsefloat(r.get('multiplier'), 0.)
        path = r.get('path')
-        if path and isinstance(path, str):
-            req_by_path[path] = req_by_path.get(path, 0.) + multiplier
-    result = []
-    for i, (fullpath, name, path, origmul) in enumerate(imglorainfo):
-        multiplier = orig_multipliers[i]
-        if multiplier == 0. and path in req_by_path:
-            multiplier = req_by_path[path]
-        result.append(multiplier)
-    return result
+        multiplier = tryparsefloat(r.get('multiplier'), 0.)
+        if not path or not isinstance(path, str) or not multiplier:
+            continue
+        info = imglora_bypath.get(path)
+        if info:
+            fullpath = info["fullpath"]
+            req_dedup[fullpath] = req_dedup.get(fullpath, 0.) + multiplier
+    res_paths = []
+    res_multipliers = []
+    for fullpath, multiplier in req_dedup.items():
+        if multiplier != 0.0:
+            res_paths.append(fullpath.encode("UTF-8"))
+            res_multipliers.append(multiplier)
+    # enforce lora_filenames_max
+    max_requests = lora_filenames_max - len(imglora_preload)
+    if len(res_paths) > max_requests:
+        msg_preloaded = ""
+        if len(imglora_preload) > 0:
+            msg_preloaded = f" (including {len(imglora_preload)} preloaded)"
+        print(f'Warning: more than {lora_filenames_max} requested LoRAs{msg_preloaded}, extra ones will be ignored')
+        res_paths = res_paths[:max_requests]
+        res_multipliers = res_multipliers[:max_requests]
+    return res_paths, res_multipliers
+
+def prepare_lora_multipliers(request_list):
+    return prepare_lora_multipliers_backend(request_list, imglora_bypath)
+
+def mk_sdapi_lora_list(imglora_bypath):
+    return [
+        {'name': info['name'], 'path': info['path']}
+            for info in imglora_bypath.values()
+                if info['multiplier'] == 0.0 # both preloaded and scanned
+    ]

 def extract_loras_from_prompt(prompt):
    pattern = r'<lora:([^:>]+):([^>]+)>'
@ -2219,16 +2274,17 @@ def extract_loras_from_prompt(prompt):
    return prompt, lora_data

 def lora_map_name_to_path(request_list):
-    name2path = {}
-    for _, name, path, _ in imglorainfo:
-        name2path[name] = path
    result = []
    for req in request_list:
        out = dict(req)
        name = out.pop('name')
-        path = name2path.get(name)
-        if path:
-            out['path'] = path
+        path = imglora_name2path.get(name)
+        if not path:
+            print(f'LoRA {name} not found')
+            continue
+        info = imglora_bypath.get(path)
+        if info:
+            out['path'] = info['path']
            result.append(out)
    return result

@ -2283,6 +2339,7 @@ def sd_generate(genparams):
    extra_images_arr = ([] if not extra_images_arr else extra_images_arr)
    extra_images_arr = [img for img in extra_images_arr if img not in (None, "")]
    extra_images_arr = extra_images_arr[:extra_images_max]
+    lora_filenames, lora_multipliers = prepare_lora_multipliers(genparams.get("lora", []))

    #clean vars
    cfg_scale = (1 if cfg_scale < 1 else (forced_maxcfg if cfg_scale > forced_maxcfg else cfg_scale))
@ -2334,9 +2391,8 @@ def sd_generate(genparams):
    inputs.cache_mode = cache_mode.encode("UTF-8")
    inputs.cache_options = cache_options.encode("UTF-8")
    inputs.upscale = (True if tryparseint(genparams.get("enable_hr", 0),0) else False)
-
-    lora_multipliers = prepare_lora_multipliers(genparams.get("lora", []))
-    inputs.lora_len = len(lora_multipliers)
+    inputs.lora_len = len(lora_filenames)
+    inputs.lora_filenames = (ctypes.c_char_p * inputs.lora_len)(*lora_filenames)
    inputs.lora_multipliers = (ctypes.c_float * inputs.lora_len)(*lora_multipliers)

    ret = handle.sd_generate(inputs)
@ -4426,7 +4482,7 @@ Change Mode<br>
            response_body = (json.dumps({"object":"list","data":mlist}).encode())

        elif clean_path.endswith('/sdapi/v1/loras'):
-            response_body = (json.dumps([{'name': name, 'path': path} for _, name, path, multiplier in imglorainfo if multiplier == 0.])).encode()
+            response_body = (json.dumps(mk_sdapi_lora_list(imglora_bypath))).encode()

        elif clean_path.endswith('/sdapi/v1/upscalers'):
            if args.sdupscaler:
@ -8739,26 +8795,88 @@ def main(launch_args, default_args):
                input()


-def mk_lora_info(imgloras, multipliers):
-    # (full path, name, name+extension, can change multiplier)
-    # XXX for each LoRA, sdapi needs a name and a path; we could use
-    # the full filename as a path, but we don't know if we can expose it
-    used_lora_names = set()
-    result = []
+def mk_lora_info(imgloras, multipliers, mock_filesystem=False):
    first_multiplier = multipliers[0] if len(multipliers) > 0 else 1.
+    lora_files = []
+    lora_dirs = []
+    # identify files and dirs
    for i, lora_path in enumerate(imgloras):
        multiplier = multipliers[i] if i < len(multipliers) else first_multiplier
-        lora_file = os.path.basename(lora_path)
+        if mock_filesystem:
+            print('fake filesystem access')
+            if lora_path.endswith('/'):
+                lora_dirs.append(lora_path)
+            else:
+                lora_files.append(('', lora_path, multiplier))
+        elif os.path.isfile(lora_path):
+            lora_files.append(('', lora_path, multiplier))
+        elif os.path.isdir(lora_path):
+            lora_dirs.append(lora_path)
+        elif os.path.exists(lora_path):
+            print(f"Unexpected file type for SD LORA model file {lora_path}")
+        else:
+            print(f"Missing SD LORA model file {lora_path}...")
+    # scan all dirs
+    for lora_dir in lora_dirs:
+        print(f'Scanning {lora_dir} for LoRAs...')
+        if mock_filesystem:
+            print('fake directory scan')
+            files = ['lora1_makebelieve.gguf', 'lora2/makebelieve.gguf']
+        else:
+            files = scan_directory(lora_dir, ('.safetensors', '.gguf'), 1)
+        print(f'  found {len(files)} files under {lora_dir}')
+        for file in files:
+            lora_files.append((lora_dir, file, 0.0))
+    # dedup and map all files
+    unique_lora_names = set()
+    lora_fullmap = {}
+    for i, (lora_dir, lora_path, multiplier) in enumerate(lora_files):
+        if lora_dir:
+            # lora_path is relative: we can show it on the interface and accept it
+            lora_fullpath = os.path.join(lora_dir, lora_path)
+            # NOTE: we are including the relative directory on the short name
+            lora_file = lora_path
+            preloaded = False
+        else:
+            lora_fullpath = lora_path
+            # we don't know which portion of the path we can show, so omit it
+            lora_file = os.path.basename(lora_path)
+            preloaded = True
+        if not mock_filesystem:
+            lora_fullpath = os.path.abspath(lora_fullpath)
+        # dedup paths (e.g. preloaded and on directory)
+        if lora_fullpath in lora_fullmap:
+            lora_fullmap[lora_fullpath]["multiplier"] += multiplier
+            continue
        lora_name, lora_ext = os.path.splitext(lora_file)
        # ensure unique names
        i = 1
-        mapped_name = lora_name
-        while mapped_name in used_lora_names:
+        lora_uname = lora_name
+        while lora_uname in unique_lora_names:
            i += 1
-            mapped_name = lora_name + '_' + str(i)
-        used_lora_names.add(mapped_name)
-        result.append((lora_path, mapped_name, mapped_name + lora_ext, multiplier))
-    return result
+            lora_uname = lora_name + '_' + str(i)
+        unique_lora_names.add(lora_uname)
+        lora_upath = lora_uname + lora_ext
+        lora_entry = {
+            'fullpath': lora_fullpath,
+            'name': lora_uname,
+            'path': lora_upath,
+            'multiplier': multiplier,
+            'preloaded': preloaded,
+        }
+        lora_fullmap[lora_fullpath] = lora_entry
+    # build the runtime tables
+    preloaded_table = []
+    lora_path_map = {}
+    lora_name_map = {}
+    for lora_entry in lora_fullmap.values():
+        # only map LoRAs that can be changed
+        if not imglora_initial_fixed or lora_entry["multiplier"] == 0.0:
+            lora_path_map[lora_entry["path"]] = lora_entry
+            lora_name_map[lora_entry["name"]] = lora_entry["path"]
+        if lora_entry["preloaded"]:
+            preloaded_table.append(lora_entry)
+    return preloaded_table, lora_path_map, lora_name_map


 def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
@ -9201,16 +9319,9 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
            imgclip2 = ""
            imgphotomaker = ""
            imgupscaler = ""
-            if args.sdlora and len(args.sdlora)>0:
-                for i in range (0,len(args.sdlora)):
-                    curr = args.sdlora[i]
-                    if os.path.exists(curr):
-                        imgloras.append(os.path.abspath(curr))
-                    else:
-                        print(f"Missing SD LORA model file {curr}...")
-            global imglorainfo
+            global imglora_preload, imglora_bypath, imglora_name2path
            args.sdloramult = sanitize_lora_multipliers(args.sdloramult)
-            imglorainfo = mk_lora_info(imgloras, args.sdloramult)
+            imglora_preload, imglora_bypath, imglora_name2path = mk_lora_info(args.sdlora, args.sdloramult)
            if args.sdvae:
                if os.path.exists(args.sdvae):
                    imgvae = os.path.abspath(args.sdvae)
@ -9247,7 +9358,7 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
            friendlysdmodelname = os.path.basename(imgmodel)
            friendlysdmodelname = os.path.splitext(friendlysdmodelname)[0]
            friendlysdmodelname = sanitize_string(friendlysdmodelname)
-            loadok = sd_load_model(imgmodel,imgvae,imgloras,imgt5xxl,imgclip1,imgclip2,imgphotomaker,imgupscaler)
+            loadok = sd_load_model(imgmodel,imgvae,imgt5xxl,imgclip1,imgclip2,imgphotomaker,imgupscaler)
            print("Load Image Model OK: " + str(loadok))
            if not loadok:
                exitcounter = 999
--- a/otherarch/sdcpp/sdtype_adapter.cpp
+++ b/otherarch/sdcpp/sdtype_adapter.cpp
@ -45,6 +45,62 @@
 static_assert((int)SD_TYPE_COUNT == (int)GGML_TYPE_COUNT,
              "inconsistency between SD_TYPE_COUNT and GGML_TYPE_COUNT");

+struct LoraMap {
+    std::vector<std::pair<std::string, float>> items;
+    std::unordered_map<std::string, std::size_t> index;
+
+    void add_lora(const std::string& k, float v) {
+        auto it = index.find(k);
+        if (it == index.end()) {
+            index[k] = items.size();
+            items.emplace_back(k, v);
+        } else {
+            items[it->second].second += v;
+        }
+    }
+
+    float check_small_mult(float mult) {
+        if (mult > 1e-6 || mult < -1e-6)
+            return mult;
+        return 0.f;
+    }
+
+    float get_mult(const std::string& k) {
+        auto lora = index.find(k);
+        if (lora == index.end()) return 0.f;
+        return check_small_mult(items[lora->second].second);
+    }
+
+    std::vector<sd_lora_t> get_lora_specs(bool include_zeroes = false) {
+        std::vector<sd_lora_t> lora_specs;
+        for (const auto & lora: items) {
+            float multiplier = check_small_mult(lora.second);
+            if (include_zeroes || multiplier != 0.f) {
+                sd_lora_t spec = {};
+                spec.path = lora.first.c_str();
+                spec.multiplier = multiplier;
+                lora_specs.push_back(spec);
+            }
+        }
+        return lora_specs;
+    }
+
+    std::string get_lora_meta() {
+        std::stringstream lora_meta;
+        lora_meta << std::setprecision(6);
+        for (const auto & lora: items) {
+            float multiplier = check_small_mult(lora.second);
+            if (multiplier != 0.f) {
+                std::string lora_name = std::filesystem::path(lora.first).stem().string();
+                lora_meta << "<lora:" << lora_name << ":" << multiplier << ">";
+            }
+        }
+        return lora_meta.str();
+    }
+
+};
+
+
 struct SDParams {
    int n_threads = -1;
    std::string model_path;
@ -79,9 +135,9 @@ struct SDParams {

    bool chroma_use_dit_mask     = true;

-    std::vector<std::string> lora_paths;
-    std::vector<float> lora_multipliers;
+    LoraMap lora_map;
    bool lora_dynamic = false;
+    bool lora_fixed   = false;

    std::string cache_mode;
    std::string cache_options;
@ -211,12 +267,10 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
    set_sd_quiet(sd_is_quiet);
    executable_path = inputs.executable_path;
    std::string taesdpath = "";
-    std::vector<std::string> lora_paths;
-    std::vector<float> lora_multipliers;
+    LoraMap lora_map;
    for(int i=0;i<inputs.lora_len;++i)
    {
-        lora_paths.push_back(inputs.lora_filenames[i]);
-        lora_multipliers.push_back(inputs.lora_multipliers[i]);
+        lora_map.add_lora(inputs.lora_filenames[i], inputs.lora_multipliers[i]);
    }
    std::string vaefilename = inputs.vae_filename;
    std::string t5xxl_filename = inputs.t5xxl_filename;
@ -233,23 +287,32 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {

    int lora_apply_mode = LORA_APPLY_AT_RUNTIME;
    bool lora_dynamic = false;
+    bool lora_cache = false;
+    bool lora_fixed = false;
    if(inputs.lora_apply_mode >= 0 && inputs.lora_apply_mode <= 2) {
        lora_apply_mode = inputs.lora_apply_mode;
    }
-    else if(inputs.lora_apply_mode == 3) {
-        lora_dynamic = true;
+    else {
+        // bit 3: LoRAs can be changed dynamically
+        // bit 4: cache the initial LoRA list in VRAM
+        // bit 5: do not allow multiplier changes for the initial LoRAs
+        lora_dynamic = !!(inputs.lora_apply_mode & (1<<3));
+        lora_cache   = lora_dynamic && !!(inputs.lora_apply_mode & (1<<4));
+        lora_fixed   = lora_dynamic && !!(inputs.lora_apply_mode & (1<<5));
    }

-    if(lora_paths.size() > 0)
+    if(lora_map.items.size() > 0)
    {
        const char* lora_apply_mode_name = lora_apply_mode == 1 ? "immediately"
                                         : lora_apply_mode == 2 ? "at runtime"
                                         : "auto";
-        const char * lora_dynamic_name = lora_dynamic ? " (dynamic)" : "";
-        printf("With LoRAs in apply mode %s%s:\n", lora_apply_mode_name, lora_dynamic_name);
-        for(int i=0;i<lora_paths.size();++i)
+        const char * lora_dynamic_name = lora_dynamic ? ", dynamic" : "";
+        const char * lora_cache_name = lora_cache ? ", with caching" : "";
+        printf("With LoRAs in apply mode %s%s%s:\n", lora_apply_mode_name, lora_dynamic_name, lora_cache_name);
+        for(auto lora: lora_map.items)
        {
-            printf("  %s at %f power\n", lora_paths[i].c_str(),lora_multipliers[i]);
+            const char * lora_fixed_name = lora_fixed && lora.second != 0.f ? " (fixed)" : "";
+            printf("  %s at %f power%s\n", lora.first.c_str(), lora.second, lora_fixed_name);
        }
    }

@ -337,9 +400,9 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
    sd_params->clip_l_path = clip1_filename;
    sd_params->clip_g_path = clip2_filename;
    sd_params->stacked_id_embeddings_path = photomaker_filename;
-    sd_params->lora_paths = lora_paths;
-    sd_params->lora_multipliers = lora_multipliers;
+    sd_params->lora_map = lora_map;
    sd_params->lora_dynamic = lora_dynamic;
+    sd_params->lora_fixed   = lora_fixed;
    //if t5 is set, and model is a gguf, load it as a diffusion model path
    bool endswithgguf = (sd_params->model_path.rfind(".gguf") == sd_params->model_path.size() - 5);
    if((sd_params->t5xxl_path!="" || sd_params->clip_l_path!="" || sd_params->clip_g_path!="") && endswithgguf)
@ -429,21 +492,13 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
    sdmodelfilename = mpath.filename().string();

    // preload the LoRAs with the initial multipliers
-    std::vector<sd_lora_t> lora_specs;
-    for(int i=0;i<sd_params->lora_paths.size();++i)
-    {
-        if (!lora_dynamic && sd_params->lora_multipliers[i] == 0.)
-            continue;
-        sd_lora_t spec = {};
-        spec.path = sd_params->lora_paths[i].c_str();
-        spec.multiplier = sd_params->lora_multipliers[i];
-        lora_specs.push_back(spec);
-    }
-
+    std::vector<sd_lora_t> lora_specs = sd_params->lora_map.get_lora_specs(lora_dynamic&& lora_cache);
    if(lora_specs.size()>0)
    {
        printf("  applying %zu LoRAs...\n", lora_specs.size());
+        sd_ctx->sd->kcpp_lora_cache_populate = lora_cache;
        sd_ctx->sd->apply_loras(lora_specs.data(), lora_specs.size());
+        sd_ctx->sd->kcpp_lora_cache_populate = false;
    }

    input_extraimage_buffers.reserve(max_extra_images);
@ -1166,24 +1221,21 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
    parse_cache_options(params.cache, sd_params->cache_mode, sd_params->cache_options);
    params.batch_count = 1;

-    std::vector<sd_lora_t> lora_specs;
-    std::stringstream lora_meta;
-    lora_meta << std::setprecision(6);
-    for(size_t i=0;i<sd_params->lora_paths.size();++i)
-    {
-        float multiplier = sd_params->lora_multipliers[i];
-        if (sd_params->lora_dynamic) {
-            multiplier = i < inputs.lora_len ? inputs.lora_multipliers[i] : 0.;
-        }
-        if (multiplier != 0.f) {
-            sd_lora_t spec = {};
-            spec.path = sd_params->lora_paths[i].c_str();
-            spec.multiplier = multiplier;
-            lora_specs.push_back(spec);
-            std::string lora_name = std::filesystem::path(sd_params->lora_paths[i]).stem().string();
-            lora_meta << "<lora:" << lora_name << ":" << multiplier << ">";
+    LoraMap lora_map = sd_params->lora_map;
+    if (sd_params->lora_dynamic) {
+        for (int i = 0; i < inputs.lora_len; i++) {
+            // check if it was initially fixed
+            std::string path = inputs.lora_filenames[i];
+            float preloaded_mult = sd_params->lora_map.get_mult(path);
+            if (!sd_params->lora_fixed || preloaded_mult == 0.f) {
+                lora_map.add_lora(path, inputs.lora_multipliers[i]);
+            }
        }
    }
+
+    std::vector<sd_lora_t> lora_specs = lora_map.get_lora_specs();
+    std::string lora_meta = lora_map.get_lora_meta();
+
    if(!sd_is_quiet && sddebugmode==1) {
        if (lora_specs.size() > 0) {
            printf("Applying LoRAs:\n");
@ -1424,9 +1476,9 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
            {
                printf("Upscaling output image...\n");
                upscaled_image = upscale(upscaler_ctx, results[i], 2);
-                png = stbi_write_png_to_mem(upscaled_image.data, 0, upscaled_image.width, upscaled_image.height, upscaled_image.channel, &out_data_len, get_image_params(params, lora_meta.str()).c_str());
+                png = stbi_write_png_to_mem(upscaled_image.data, 0, upscaled_image.width, upscaled_image.height, upscaled_image.channel, &out_data_len, get_image_params(params, lora_meta).c_str());
            } else {
-                png = stbi_write_png_to_mem(results[i].data, 0, results[i].width, results[i].height, results[i].channel, &out_data_len, get_image_params(params, lora_meta.str()).c_str());
+                png = stbi_write_png_to_mem(results[i].data, 0, results[i].width, results[i].height, results[i].channel, &out_data_len, get_image_params(params, lora_meta).c_str());
            }

            if (png != NULL)
--- a/otherarch/sdcpp/stable-diffusion.cpp
+++ b/otherarch/sdcpp/stable-diffusion.cpp
@ -139,6 +139,7 @@ public:
    std::vector<std::shared_ptr<LoraModel>> first_stage_lora_models;
    bool apply_lora_immediately = false;
    std::map<std::string, std::shared_ptr<LoraModel>> kcpp_lora_cache;
+    bool kcpp_lora_cache_populate = false;

    std::string taesd_path;
    bool use_tiny_autoencoder            = false;
@ -1209,7 +1210,6 @@ public:
                return it->second;
            }
        }
-        // by construction, kcpp will always find the preloaded LoRAs on the cache

        std::string lora_path             = lora_id;
        static std::string high_noise_tag = "|high_noise|";
@ -1224,13 +1224,13 @@ public:
            LOG_WARN("load lora tensors from %s failed", lora_path.c_str());
            // also cache negatives to avoid I/O at runtime
            lora = nullptr;
-            if (kcpp_at_runtime)
+            if (kcpp_at_runtime && kcpp_lora_cache_populate)
                kcpp_lora_cache[lora_key] = lora;
            return lora;
        }

        lora->multiplier = multiplier;
-        if (kcpp_at_runtime)
+        if (kcpp_at_runtime && kcpp_lora_cache_populate)
            kcpp_lora_cache[lora_key] = lora;
        return lora;
    }
--- a/tests/test_koboldcpp.py
+++ b/tests/test_koboldcpp.py
@ -53,16 +53,63 @@ def extract_loras_from_prompt(*args, **kwargs):

    return koboldcpp.extract_loras_from_prompt(*args, **kwargs)

-def mk_lora_info(*args, **kwargs):
+def mk_lora_info(imgloras, multipliers):
    """
-    >>> mk_lora_info(['/x/lora1.safetensors', '/y/lora2.gguf'], [])
-    [('/x/lora1.safetensors', 'lora1', 'lora1.safetensors', 1.0), ('/y/lora2.gguf', 'lora2', 'lora2.gguf', 1.0)]
-    >>> mk_lora_info(['/x/lora1.safetensors', '/y/lora1.safetensors'], [0.3])
-    [('/x/lora1.safetensors', 'lora1', 'lora1.safetensors', 0.3), ('/y/lora1.safetensors', 'lora1_2', 'lora1_2.safetensors', 0.3)]
-    >>> mk_lora_info(['./lora1.gguf', '/y/lora2.gguf', 'lora3.gguf'], [0, 0.3])
-    [('./lora1.gguf', 'lora1', 'lora1.gguf', 0), ('/y/lora2.gguf', 'lora2', 'lora2.gguf', 0.3), ('lora3.gguf', 'lora3', 'lora3.gguf', 0)]
+    >>> pre, path, name = mk_lora_info(['/x/lora1.safetensors', '/y/lora2.gguf'], [])
+    fake filesystem access
+    fake filesystem access
+    >>> pre
+    [{'fullpath': '/x/lora1.safetensors', 'name': 'lora1', 'path': 'lora1.safetensors', 'multiplier': 1.0, 'preloaded': True}, {'fullpath': '/y/lora2.gguf', 'name': 'lora2', 'path': 'lora2.gguf', 'multiplier': 1.0, 'preloaded': True}]
+    >>> path
+    {}
+    >>> name
+    {}
+
+    >>> pre, path, name = mk_lora_info(['/x/lora1.safetensors', '/y/lora2.gguf'], [0.])
+    fake filesystem access
+    fake filesystem access
+    >>> pre
+    [{'fullpath': '/x/lora1.safetensors', 'name': 'lora1', 'path': 'lora1.safetensors', 'multiplier': 0.0, 'preloaded': True}, {'fullpath': '/y/lora2.gguf', 'name': 'lora2', 'path': 'lora2.gguf', 'multiplier': 0.0, 'preloaded': True}]
+    >>> path
+    {'lora1.safetensors': {'fullpath': '/x/lora1.safetensors', 'name': 'lora1', 'path': 'lora1.safetensors', 'multiplier': 0.0, 'preloaded': True}, 'lora2.gguf': {'fullpath': '/y/lora2.gguf', 'name': 'lora2', 'path': 'lora2.gguf', 'multiplier': 0.0, 'preloaded': True}}
+    >>> name
+    {'lora1': 'lora1.safetensors', 'lora2': 'lora2.gguf'}
+
+    >>> pre, path, name = mk_lora_info(['/x/lora1.safetensors', '/y/lora1.safetensors'], [0.3])
+    fake filesystem access
+    fake filesystem access
+    >>> pre
+    [{'fullpath': '/x/lora1.safetensors', 'name': 'lora1', 'path': 'lora1.safetensors', 'multiplier': 0.3, 'preloaded': True}, {'fullpath': '/y/lora1.safetensors', 'name': 'lora1_2', 'path': 'lora1_2.safetensors', 'multiplier': 0.3, 'preloaded': True}]
+    >>> path
+    {}
+
+    >>> pre, path, name = mk_lora_info(['/lora/dir/'], [0.3])
+    fake filesystem access
+    Scanning /lora/dir/ for LoRAs...
+    fake directory scan
+      found 2 files under /lora/dir/
+    >>> pre
+    []
+    >>> expected = {
+    ... 'lora1_makebelieve.gguf': {
+    ...     'fullpath': '/lora/dir/lora1_makebelieve.gguf',
+    ...     'name': 'lora1_makebelieve',
+    ...     'path': 'lora1_makebelieve.gguf',
+    ...     'multiplier': 0.0,
+    ...     'preloaded': False},
+    ... 'lora2/makebelieve.gguf': {
+    ...     'fullpath': '/lora/dir/lora2/makebelieve.gguf',
+    ...     'name': 'lora2/makebelieve',
+    ...     'path': 'lora2/makebelieve.gguf',
+    ...     'multiplier': 0.0,
+    ...     'preloaded': False}}
+    >>> path == expected
+    True
+    >>> name
+    {'lora1_makebelieve': 'lora1_makebelieve.gguf', 'lora2/makebelieve': 'lora2/makebelieve.gguf'}
+
    """
-    return koboldcpp.mk_lora_info(*args, **kwargs)
+    return koboldcpp.mk_lora_info(imgloras, multipliers, True)

 def sanitize_lora_multipliers(*args, **kwargs):
    """
@ -86,6 +133,77 @@ def sanitize_lora_multipliers(*args, **kwargs):
    return koboldcpp.sanitize_lora_multipliers(*args, **kwargs)


+def prepare_lora_multipliers(req_list, imglora_bypath):
+    """
+    >>> req = [
+    ...     {"path": "a.gguf", "multiplier": "0.5"},
+    ...     {"path": "a.gguf", "multiplier": 1.0},
+    ... ]
+    >>> imglora = {"a.gguf": {"fullpath": "/abs/a.gguf"}}
+    >>> paths, mults = prepare_lora_multipliers(req, imglora)
+    >>> paths == [b"/abs/a.gguf"], mults == [1.5]
+    (True, True)
+
+    >>> req = [
+    ...     {"path": "b.gguf", "multiplier": "2"},
+    ...     {"path": "c.gguf"},
+    ...     "not a dict",
+    ...     {"path": "", "multiplier": "3"},
+    ...     {"path": "b.gguf", "multiplier": 0},
+    ... ]
+    >>> imglora = {"b.gguf": {"fullpath": "/abs/b.gguf"},
+    ...            "c.gguf": {"fullpath": "/abs/c.gguf"}}
+    >>> paths, mults = prepare_lora_multipliers(req, imglora)
+    >>> paths == [b"/abs/b.gguf"], mults == [2.0]
+    (True, True)
+
+    >>> req = [{"path": "missing.gguf", "multiplier": "5"}]
+    >>> imglora = {}
+    >>> paths, mults = prepare_lora_multipliers(req, imglora)
+    >>> paths == [], mults == []
+    (True, True)
+
+    >>> req = [
+    ...     {"path": "x.gguf", "multiplier": 1},
+    ...     {"path": "y.gguf", "multiplier": 2},
+    ... ]
+    >>> imglora = {
+    ...     "x.gguf": {"fullpath": "/abs/x.gguf", "path": "x.gguf", "multiplier": 0.0},
+    ...     "y.gguf": {"fullpath": "/abs/y.gguf", "path": "y.gguf", "multiplier": 0.0},
+    ... }
+    >>> paths, mults = prepare_lora_multipliers(req, imglora)
+    >>> paths == [b'/abs/x.gguf', b'/abs/y.gguf']
+    True
+    >>> mults == [1.0, 2.0]
+    True
+    """
+    return koboldcpp.prepare_lora_multipliers_backend(req_list, imglora_bypath)
+
+def mk_sdapi_lora_list(imglora_bypath):
+    '''
+    >>> imglora_bypath = {
+    ...     'lora_a.safetensors': {'name': 'lora_a', 'path': 'lora_a.safetensors', 'multiplier': 0.0},
+    ...     'lora_b.gguf'       : {'name': 'lora_b', 'path': 'lora_b.gguf', 'multiplier': 0.0},
+    ...     'lora_c.safetensors': {'name': 'lora_c', 'path': 'lora_c.safetensors', 'multiplier': 1.0},
+    ...     'chars/waifu.gguf'  : {'name': 'chars/waifu', 'path': 'chars/waifu.gguf', 'multiplier': 0.0}
+    ... }
+    >>> mk_sdapi_lora_list(imglora_bypath)
+    [{'name': 'lora_a', 'path': 'lora_a.safetensors'}, {'name': 'lora_b', 'path': 'lora_b.gguf'}, {'name': 'chars/waifu', 'path': 'chars/waifu.gguf'}]
+
+    >>> empty_data = {}
+    >>> mk_sdapi_lora_list(empty_data)
+    []
+
+    >>> mixed_data = {
+    ...     'k1': {'name': 'X', 'path': 'p1', 'multiplier': 0.5},
+    ...     'k2': {'name': 'Y', 'path': 'p2', 'multiplier': 0.0}
+    ... }
+    >>> mk_sdapi_lora_list(mixed_data)
+    [{'name': 'Y', 'path': 'p2'}]
+    '''
+    return koboldcpp.mk_sdapi_lora_list(imglora_bypath)
+
+
 def gendefaults_parse_meta_field(*args, **kwargs):
    '''