diff --git a/expose.h b/expose.h index 84da17d32..70e68543c 100644 --- a/expose.h +++ b/expose.h @@ -230,6 +230,7 @@ struct sd_generation_inputs const char * cache_options = nullptr; const bool upscale = false; const int lora_len = 0; + const char ** lora_filenames = nullptr; const float * lora_multipliers = nullptr; }; struct sd_generation_outputs diff --git a/koboldcpp.py b/koboldcpp.py index b91510e0c..3177c6a6b 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -89,7 +89,11 @@ ttsmodelpath = "" #if empty, not initialized embeddingsmodelpath = "" #if empty, not initialized musicllmmodelpath = "" #if empty, not initialized musicdiffusionmodelpath = "" #if empty, not initialized -imglorainfo = [] +imglora_preload = [] +imglora_bypath = {} +imglora_name2path = {} +imglora_cached = True +imglora_initial_fixed = True maxctx = 8192 maxhordectx = 0 #set to whatever maxctx is if 0 maxhordelen = 1024 @@ -362,6 +366,7 @@ class sd_generation_inputs(ctypes.Structure): ("cache_options", ctypes.c_char_p), ("upscale", ctypes.c_bool), ("lora_len", ctypes.c_int), + ("lora_filenames", ctypes.POINTER(ctypes.c_char_p)), ("lora_multipliers", ctypes.POINTER(ctypes.c_float))] class sd_generation_outputs(ctypes.Structure): @@ -1175,25 +1180,33 @@ def get_capabilities(): admin_type = (2 if args.admin and args.admindir and args.adminpassword else (1 if args.admin and args.admindir else 0)) return {"result":"KoboldCpp", "version":KcppVersion, "protected":has_password, "llm":has_llm, "txt2img":has_txt2img,"vision":has_vision_support,"audio":has_audio_support,"transcribe":has_whisper,"multiplayer":has_multiplayer,"websearch":has_search,"tts":has_tts, "embeddings":has_embeddings, "music":has_music, "savedata":(savedata_obj is not None), "admin": admin_type, "guidance": has_guidance, "jinja": has_jinja, "mcp":has_mcp} + +def scan_directory(dirpath, valid_exts, depth): + files = [] + for entry in sorted(os.listdir(dirpath)): # Scan top-level directory + full_path = os.path.join(dirpath, entry) + if os.path.isfile(full_path) and entry.lower().endswith(valid_exts): # If toplevel file + files.append(entry) + elif depth > 0 and os.path.isdir(full_path): #if dir, scan up to 1 level deep + for subentry in sorted(os.listdir(full_path)): + sub_full_path = os.path.join(full_path, subentry) + if os.path.isfile(sub_full_path) and subentry.endswith(valid_exts): + rel_path = os.path.join(entry, subentry) + files.append(rel_path) + return files + + def get_current_admindir_list(): opts = [] if args.admin and args.admindir: dirpath = os.path.abspath(args.admindir) valid_exts = (".kcpps", ".kcppt", ".gguf") - for entry in sorted(os.listdir(dirpath)): # Scan top-level directory - full_path = os.path.join(dirpath, entry) - if os.path.isfile(full_path) and entry.endswith(valid_exts): # If toplevel file - opts.append(entry) - elif os.path.isdir(full_path): #if dir, scan up to 1 level deep - for subentry in sorted(os.listdir(full_path)): - sub_full_path = os.path.join(full_path, subentry) - if os.path.isfile(sub_full_path) and subentry.endswith(valid_exts): - rel_path = os.path.join(entry, subentry) - opts.append(rel_path) + opts = scan_directory(dirpath, valid_exts, 1) opts.append("initial_model") opts.append("unload_model") return opts + def dump_gguf_metadata(file_path): #if you're gonna copy this into your own project at least credit concedo chunk_size = 1024*1024*12 # read first 12mb of file try: @@ -2018,7 +2031,7 @@ def sd_quant_option(value): except Exception: return 0 -def sd_load_model(model_filename,vae_filename,lora_filenames,t5xxl_filename,clip1_filename,clip2_filename,photomaker_filename,upscaler_filename): +def sd_load_model(model_filename,vae_filename,t5xxl_filename,clip1_filename,clip2_filename,photomaker_filename,upscaler_filename): global args inputs = sd_load_model_inputs() inputs.model_filename = model_filename.encode("UTF-8") @@ -2047,14 +2060,22 @@ def sd_load_model(model_filename,vae_filename,lora_filenames,t5xxl_filename,clip inputs.photomaker_filename = photomaker_filename.encode("UTF-8") inputs.upscaler_filename = upscaler_filename.encode("UTF-8") - lora_filenames = [lf.encode("UTF-8") for lf in lora_filenames[:lora_filenames_max] if lf] - lora_len = len(lora_filenames) - lora_multipliers = prepare_lora_multipliers([]) - inputs.lora_len = lora_len - inputs.lora_filenames = (ctypes.c_char_p * lora_len)(*lora_filenames) - inputs.lora_multipliers = (ctypes.c_float * lora_len)(*lora_multipliers) + lora_filenames, lora_multipliers = prepare_initial_lora_multipliers() + inputs.lora_len = len(lora_filenames) + inputs.lora_filenames = (ctypes.c_char_p * inputs.lora_len)(*lora_filenames) + inputs.lora_multipliers = (ctypes.c_float * inputs.lora_len)(*lora_multipliers) + if 0 and inputs.lora_len: + print("Preloading LoRAs:") + for i in range(inputs.lora_len): + print(f" {inputs.lora_filenames[i]} @ {inputs.lora_multipliers[i]}") # auto if no zero-weight lora, dynamic otherwise - inputs.lora_apply_mode = 3 if 0. in lora_multipliers else 0 + lora_apply_mode = 0 # auto + if imglora_bypath: + lora_dynamic = 1 << 3 # accept changes at runtime + lora_cache = 1 << 4 if imglora_cached else 0 # cache the preloaded LoRAs + lora_fixed = 1 << 5 if imglora_initial_fixed else 0 # do not allow changes to the non-zero preloaded LoRAs + lora_apply_mode = lora_dynamic | lora_cache | lora_fixed + inputs.lora_apply_mode = lora_apply_mode inputs.img_hard_limit = args.sdclamped inputs.img_soft_limit = args.sdclampedsoft @@ -2177,23 +2198,57 @@ def sanitize_lora_multipliers(sdloramult): sdloramult = [tryparsefloat(m, 0.) for m in sdloramult] return sdloramult -def prepare_lora_multipliers(request_list): - orig_multipliers = [lora[3] for lora in imglorainfo] - req_by_path = {} +def prepare_initial_lora_multipliers(): + res_paths = [] + res_multipliers = [] + num_loras = len(imglora_preload) + if num_loras > lora_filenames_max: + print(f'Warning: more than {lora_filenames_max} preloaded LoRAs, extra ones will be ignored') + num_loras = lora_filenames_max + for info in imglora_preload[:num_loras]: + res_paths.append(info['fullpath'].encode("UTF-8")) + res_multipliers.append(info['multiplier']) + return res_paths, res_multipliers + +def prepare_lora_multipliers_backend(request_list, imglora_bypath): + req_dedup = {} for r in request_list: if not isinstance(r, dict): continue - multiplier = tryparsefloat(r.get('multiplier'), 0.) path = r.get('path') - if path and isinstance(path, str): - req_by_path[path] = req_by_path.get(path, 0.) + multiplier - result = [] - for i, (fullpath, name, path, origmul) in enumerate(imglorainfo): - multiplier = orig_multipliers[i] - if multiplier == 0. and path in req_by_path: - multiplier = req_by_path[path] - result.append(multiplier) - return result + multiplier = tryparsefloat(r.get('multiplier'), 0.) + if not path or not isinstance(path, str) or not multiplier: + continue + info = imglora_bypath.get(path) + if info: + fullpath = info["fullpath"] + req_dedup[fullpath] = req_dedup.get(fullpath, 0.) + multiplier + res_paths = [] + res_multipliers = [] + for fullpath, multiplier in req_dedup.items(): + if multiplier != 0.0: + res_paths.append(fullpath.encode("UTF-8")) + res_multipliers.append(multiplier) + # enforce lora_filenames_max + max_requests = lora_filenames_max - len(imglora_preload) + if len(res_paths) > max_requests: + msg_preloaded = "" + if len(imglora_preload) > 0: + msg_preloaded = f" (including {len(imglora_preload)} preloaded)" + print(f'Warning: more than {lora_filenames_max} requested LoRAs{msg_preloaded}, extra ones will be ignored') + res_paths = res_paths[:max_requests] + res_multipliers = res_multipliers[:max_requests] + return res_paths, res_multipliers + +def prepare_lora_multipliers(request_list): + return prepare_lora_multipliers_backend(request_list, imglora_bypath) + +def mk_sdapi_lora_list(imglora_bypath): + return [ + {'name': info['name'], 'path': info['path']} + for info in imglora_bypath.values() + if info['multiplier'] == 0.0 # both preloaded and scanned + ] def extract_loras_from_prompt(prompt): pattern = r']+):([^>]+)>' @@ -2219,16 +2274,17 @@ def extract_loras_from_prompt(prompt): return prompt, lora_data def lora_map_name_to_path(request_list): - name2path = {} - for _, name, path, _ in imglorainfo: - name2path[name] = path result = [] for req in request_list: out = dict(req) name = out.pop('name') - path = name2path.get(name) - if path: - out['path'] = path + path = imglora_name2path.get(name) + if not path: + print(f'LoRA {name} not found') + continue + info = imglora_bypath.get(path) + if info: + out['path'] = info['path'] result.append(out) return result @@ -2283,6 +2339,7 @@ def sd_generate(genparams): extra_images_arr = ([] if not extra_images_arr else extra_images_arr) extra_images_arr = [img for img in extra_images_arr if img not in (None, "")] extra_images_arr = extra_images_arr[:extra_images_max] + lora_filenames, lora_multipliers = prepare_lora_multipliers(genparams.get("lora", [])) #clean vars cfg_scale = (1 if cfg_scale < 1 else (forced_maxcfg if cfg_scale > forced_maxcfg else cfg_scale)) @@ -2334,9 +2391,8 @@ def sd_generate(genparams): inputs.cache_mode = cache_mode.encode("UTF-8") inputs.cache_options = cache_options.encode("UTF-8") inputs.upscale = (True if tryparseint(genparams.get("enable_hr", 0),0) else False) - - lora_multipliers = prepare_lora_multipliers(genparams.get("lora", [])) - inputs.lora_len = len(lora_multipliers) + inputs.lora_len = len(lora_filenames) + inputs.lora_filenames = (ctypes.c_char_p * inputs.lora_len)(*lora_filenames) inputs.lora_multipliers = (ctypes.c_float * inputs.lora_len)(*lora_multipliers) ret = handle.sd_generate(inputs) @@ -4426,7 +4482,7 @@ Change Mode
response_body = (json.dumps({"object":"list","data":mlist}).encode()) elif clean_path.endswith('/sdapi/v1/loras'): - response_body = (json.dumps([{'name': name, 'path': path} for _, name, path, multiplier in imglorainfo if multiplier == 0.])).encode() + response_body = (json.dumps(mk_sdapi_lora_list(imglora_bypath))).encode() elif clean_path.endswith('/sdapi/v1/upscalers'): if args.sdupscaler: @@ -8739,26 +8795,88 @@ def main(launch_args, default_args): input() -def mk_lora_info(imgloras, multipliers): - # (full path, name, name+extension, can change multiplier) - # XXX for each LoRA, sdapi needs a name and a path; we could use - # the full filename as a path, but we don't know if we can expose it - used_lora_names = set() - result = [] +def mk_lora_info(imgloras, multipliers, mock_filesystem=False): first_multiplier = multipliers[0] if len(multipliers) > 0 else 1. + lora_files = [] + lora_dirs = [] + # identify files and dirs for i, lora_path in enumerate(imgloras): multiplier = multipliers[i] if i < len(multipliers) else first_multiplier - lora_file = os.path.basename(lora_path) + if mock_filesystem: + print('fake filesystem access') + if lora_path.endswith('/'): + lora_dirs.append(lora_path) + else: + lora_files.append(('', lora_path, multiplier)) + elif os.path.isfile(lora_path): + lora_files.append(('', lora_path, multiplier)) + elif os.path.isdir(lora_path): + lora_dirs.append(lora_path) + elif os.path.exists(lora_path): + print(f"Unexpected file type for SD LORA model file {lora_path}") + else: + print(f"Missing SD LORA model file {lora_path}...") + # scan all dirs + for lora_dir in lora_dirs: + print(f'Scanning {lora_dir} for LoRAs...') + if mock_filesystem: + print('fake directory scan') + files = ['lora1_makebelieve.gguf', 'lora2/makebelieve.gguf'] + else: + files = scan_directory(lora_dir, ('.safetensors', '.gguf'), 1) + print(f' found {len(files)} files under {lora_dir}') + for file in files: + lora_files.append((lora_dir, file, 0.0)) + # dedup and map all files + unique_lora_names = set() + lora_fullmap = {} + for i, (lora_dir, lora_path, multiplier) in enumerate(lora_files): + if lora_dir: + # lora_path is relative: we can show it on the interface and accept it + lora_fullpath = os.path.join(lora_dir, lora_path) + # NOTE: we are including the relative directory on the short name + lora_file = lora_path + preloaded = False + else: + lora_fullpath = lora_path + # we don't know which portion of the path we can show, so omit it + lora_file = os.path.basename(lora_path) + preloaded = True + if not mock_filesystem: + lora_fullpath = os.path.abspath(lora_fullpath) + # dedup paths (e.g. preloaded and on directory) + if lora_fullpath in lora_fullmap: + lora_fullmap[lora_fullpath]["multiplier"] += multiplier + continue lora_name, lora_ext = os.path.splitext(lora_file) # ensure unique names i = 1 - mapped_name = lora_name - while mapped_name in used_lora_names: + lora_uname = lora_name + while lora_uname in unique_lora_names: i += 1 - mapped_name = lora_name + '_' + str(i) - used_lora_names.add(mapped_name) - result.append((lora_path, mapped_name, mapped_name + lora_ext, multiplier)) - return result + lora_uname = lora_name + '_' + str(i) + unique_lora_names.add(lora_uname) + lora_upath = lora_uname + lora_ext + lora_entry = { + 'fullpath': lora_fullpath, + 'name': lora_uname, + 'path': lora_upath, + 'multiplier': multiplier, + 'preloaded': preloaded, + } + lora_fullmap[lora_fullpath] = lora_entry + # build the runtime tables + preloaded_table = [] + lora_path_map = {} + lora_name_map = {} + for lora_entry in lora_fullmap.values(): + # only map LoRAs that can be changed + if not imglora_initial_fixed or lora_entry["multiplier"] == 0.0: + lora_path_map[lora_entry["path"]] = lora_entry + lora_name_map[lora_entry["name"]] = lora_entry["path"] + if lora_entry["preloaded"]: + preloaded_table.append(lora_entry) + return preloaded_table, lora_path_map, lora_name_map def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False): @@ -9201,16 +9319,9 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False): imgclip2 = "" imgphotomaker = "" imgupscaler = "" - if args.sdlora and len(args.sdlora)>0: - for i in range (0,len(args.sdlora)): - curr = args.sdlora[i] - if os.path.exists(curr): - imgloras.append(os.path.abspath(curr)) - else: - print(f"Missing SD LORA model file {curr}...") - global imglorainfo + global imglora_preload, imglora_bypath, imglora_name2path args.sdloramult = sanitize_lora_multipliers(args.sdloramult) - imglorainfo = mk_lora_info(imgloras, args.sdloramult) + imglora_preload, imglora_bypath, imglora_name2path = mk_lora_info(args.sdlora, args.sdloramult) if args.sdvae: if os.path.exists(args.sdvae): imgvae = os.path.abspath(args.sdvae) @@ -9247,7 +9358,7 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False): friendlysdmodelname = os.path.basename(imgmodel) friendlysdmodelname = os.path.splitext(friendlysdmodelname)[0] friendlysdmodelname = sanitize_string(friendlysdmodelname) - loadok = sd_load_model(imgmodel,imgvae,imgloras,imgt5xxl,imgclip1,imgclip2,imgphotomaker,imgupscaler) + loadok = sd_load_model(imgmodel,imgvae,imgt5xxl,imgclip1,imgclip2,imgphotomaker,imgupscaler) print("Load Image Model OK: " + str(loadok)) if not loadok: exitcounter = 999 diff --git a/otherarch/sdcpp/sdtype_adapter.cpp b/otherarch/sdcpp/sdtype_adapter.cpp index 36b1d2d13..4ce06341b 100644 --- a/otherarch/sdcpp/sdtype_adapter.cpp +++ b/otherarch/sdcpp/sdtype_adapter.cpp @@ -45,6 +45,62 @@ static_assert((int)SD_TYPE_COUNT == (int)GGML_TYPE_COUNT, "inconsistency between SD_TYPE_COUNT and GGML_TYPE_COUNT"); +struct LoraMap { + std::vector> items; + std::unordered_map index; + + void add_lora(const std::string& k, float v) { + auto it = index.find(k); + if (it == index.end()) { + index[k] = items.size(); + items.emplace_back(k, v); + } else { + items[it->second].second += v; + } + } + + float check_small_mult(float mult) { + if (mult > 1e-6 || mult < -1e-6) + return mult; + return 0.f; + } + + float get_mult(const std::string& k) { + auto lora = index.find(k); + if (lora == index.end()) return 0.f; + return check_small_mult(items[lora->second].second); + } + + std::vector get_lora_specs(bool include_zeroes = false) { + std::vector lora_specs; + for (const auto & lora: items) { + float multiplier = check_small_mult(lora.second); + if (include_zeroes || multiplier != 0.f) { + sd_lora_t spec = {}; + spec.path = lora.first.c_str(); + spec.multiplier = multiplier; + lora_specs.push_back(spec); + } + } + return lora_specs; + } + + std::string get_lora_meta() { + std::stringstream lora_meta; + lora_meta << std::setprecision(6); + for (const auto & lora: items) { + float multiplier = check_small_mult(lora.second); + if (multiplier != 0.f) { + std::string lora_name = std::filesystem::path(lora.first).stem().string(); + lora_meta << ""; + } + } + return lora_meta.str(); + } + +}; + + struct SDParams { int n_threads = -1; std::string model_path; @@ -79,9 +135,9 @@ struct SDParams { bool chroma_use_dit_mask = true; - std::vector lora_paths; - std::vector lora_multipliers; + LoraMap lora_map; bool lora_dynamic = false; + bool lora_fixed = false; std::string cache_mode; std::string cache_options; @@ -211,12 +267,10 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) { set_sd_quiet(sd_is_quiet); executable_path = inputs.executable_path; std::string taesdpath = ""; - std::vector lora_paths; - std::vector lora_multipliers; + LoraMap lora_map; for(int i=0;i= 0 && inputs.lora_apply_mode <= 2) { lora_apply_mode = inputs.lora_apply_mode; } - else if(inputs.lora_apply_mode == 3) { - lora_dynamic = true; + else { + // bit 3: LoRAs can be changed dynamically + // bit 4: cache the initial LoRA list in VRAM + // bit 5: do not allow multiplier changes for the initial LoRAs + lora_dynamic = !!(inputs.lora_apply_mode & (1<<3)); + lora_cache = lora_dynamic && !!(inputs.lora_apply_mode & (1<<4)); + lora_fixed = lora_dynamic && !!(inputs.lora_apply_mode & (1<<5)); } - if(lora_paths.size() > 0) + if(lora_map.items.size() > 0) { const char* lora_apply_mode_name = lora_apply_mode == 1 ? "immediately" : lora_apply_mode == 2 ? "at runtime" : "auto"; - const char * lora_dynamic_name = lora_dynamic ? " (dynamic)" : ""; - printf("With LoRAs in apply mode %s%s:\n", lora_apply_mode_name, lora_dynamic_name); - for(int i=0;iclip_l_path = clip1_filename; sd_params->clip_g_path = clip2_filename; sd_params->stacked_id_embeddings_path = photomaker_filename; - sd_params->lora_paths = lora_paths; - sd_params->lora_multipliers = lora_multipliers; + sd_params->lora_map = lora_map; sd_params->lora_dynamic = lora_dynamic; + sd_params->lora_fixed = lora_fixed; //if t5 is set, and model is a gguf, load it as a diffusion model path bool endswithgguf = (sd_params->model_path.rfind(".gguf") == sd_params->model_path.size() - 5); if((sd_params->t5xxl_path!="" || sd_params->clip_l_path!="" || sd_params->clip_g_path!="") && endswithgguf) @@ -429,21 +492,13 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) { sdmodelfilename = mpath.filename().string(); // preload the LoRAs with the initial multipliers - std::vector lora_specs; - for(int i=0;ilora_paths.size();++i) - { - if (!lora_dynamic && sd_params->lora_multipliers[i] == 0.) - continue; - sd_lora_t spec = {}; - spec.path = sd_params->lora_paths[i].c_str(); - spec.multiplier = sd_params->lora_multipliers[i]; - lora_specs.push_back(spec); - } - + std::vector lora_specs = sd_params->lora_map.get_lora_specs(lora_dynamic&& lora_cache); if(lora_specs.size()>0) { printf(" applying %zu LoRAs...\n", lora_specs.size()); + sd_ctx->sd->kcpp_lora_cache_populate = lora_cache; sd_ctx->sd->apply_loras(lora_specs.data(), lora_specs.size()); + sd_ctx->sd->kcpp_lora_cache_populate = false; } input_extraimage_buffers.reserve(max_extra_images); @@ -1166,24 +1221,21 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) parse_cache_options(params.cache, sd_params->cache_mode, sd_params->cache_options); params.batch_count = 1; - std::vector lora_specs; - std::stringstream lora_meta; - lora_meta << std::setprecision(6); - for(size_t i=0;ilora_paths.size();++i) - { - float multiplier = sd_params->lora_multipliers[i]; - if (sd_params->lora_dynamic) { - multiplier = i < inputs.lora_len ? inputs.lora_multipliers[i] : 0.; - } - if (multiplier != 0.f) { - sd_lora_t spec = {}; - spec.path = sd_params->lora_paths[i].c_str(); - spec.multiplier = multiplier; - lora_specs.push_back(spec); - std::string lora_name = std::filesystem::path(sd_params->lora_paths[i]).stem().string(); - lora_meta << ""; + LoraMap lora_map = sd_params->lora_map; + if (sd_params->lora_dynamic) { + for (int i = 0; i < inputs.lora_len; i++) { + // check if it was initially fixed + std::string path = inputs.lora_filenames[i]; + float preloaded_mult = sd_params->lora_map.get_mult(path); + if (!sd_params->lora_fixed || preloaded_mult == 0.f) { + lora_map.add_lora(path, inputs.lora_multipliers[i]); + } } } + + std::vector lora_specs = lora_map.get_lora_specs(); + std::string lora_meta = lora_map.get_lora_meta(); + if(!sd_is_quiet && sddebugmode==1) { if (lora_specs.size() > 0) { printf("Applying LoRAs:\n"); @@ -1424,9 +1476,9 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) { printf("Upscaling output image...\n"); upscaled_image = upscale(upscaler_ctx, results[i], 2); - png = stbi_write_png_to_mem(upscaled_image.data, 0, upscaled_image.width, upscaled_image.height, upscaled_image.channel, &out_data_len, get_image_params(params, lora_meta.str()).c_str()); + png = stbi_write_png_to_mem(upscaled_image.data, 0, upscaled_image.width, upscaled_image.height, upscaled_image.channel, &out_data_len, get_image_params(params, lora_meta).c_str()); } else { - png = stbi_write_png_to_mem(results[i].data, 0, results[i].width, results[i].height, results[i].channel, &out_data_len, get_image_params(params, lora_meta.str()).c_str()); + png = stbi_write_png_to_mem(results[i].data, 0, results[i].width, results[i].height, results[i].channel, &out_data_len, get_image_params(params, lora_meta).c_str()); } if (png != NULL) diff --git a/otherarch/sdcpp/stable-diffusion.cpp b/otherarch/sdcpp/stable-diffusion.cpp index edf6f63da..3cf59acc6 100644 --- a/otherarch/sdcpp/stable-diffusion.cpp +++ b/otherarch/sdcpp/stable-diffusion.cpp @@ -139,6 +139,7 @@ public: std::vector> first_stage_lora_models; bool apply_lora_immediately = false; std::map> kcpp_lora_cache; + bool kcpp_lora_cache_populate = false; std::string taesd_path; bool use_tiny_autoencoder = false; @@ -1209,7 +1210,6 @@ public: return it->second; } } - // by construction, kcpp will always find the preloaded LoRAs on the cache std::string lora_path = lora_id; static std::string high_noise_tag = "|high_noise|"; @@ -1224,13 +1224,13 @@ public: LOG_WARN("load lora tensors from %s failed", lora_path.c_str()); // also cache negatives to avoid I/O at runtime lora = nullptr; - if (kcpp_at_runtime) + if (kcpp_at_runtime && kcpp_lora_cache_populate) kcpp_lora_cache[lora_key] = lora; return lora; } lora->multiplier = multiplier; - if (kcpp_at_runtime) + if (kcpp_at_runtime && kcpp_lora_cache_populate) kcpp_lora_cache[lora_key] = lora; return lora; } diff --git a/tests/test_koboldcpp.py b/tests/test_koboldcpp.py index d6b643848..b0eb4eb01 100644 --- a/tests/test_koboldcpp.py +++ b/tests/test_koboldcpp.py @@ -53,16 +53,63 @@ def extract_loras_from_prompt(*args, **kwargs): return koboldcpp.extract_loras_from_prompt(*args, **kwargs) -def mk_lora_info(*args, **kwargs): +def mk_lora_info(imgloras, multipliers): """ - >>> mk_lora_info(['/x/lora1.safetensors', '/y/lora2.gguf'], []) - [('/x/lora1.safetensors', 'lora1', 'lora1.safetensors', 1.0), ('/y/lora2.gguf', 'lora2', 'lora2.gguf', 1.0)] - >>> mk_lora_info(['/x/lora1.safetensors', '/y/lora1.safetensors'], [0.3]) - [('/x/lora1.safetensors', 'lora1', 'lora1.safetensors', 0.3), ('/y/lora1.safetensors', 'lora1_2', 'lora1_2.safetensors', 0.3)] - >>> mk_lora_info(['./lora1.gguf', '/y/lora2.gguf', 'lora3.gguf'], [0, 0.3]) - [('./lora1.gguf', 'lora1', 'lora1.gguf', 0), ('/y/lora2.gguf', 'lora2', 'lora2.gguf', 0.3), ('lora3.gguf', 'lora3', 'lora3.gguf', 0)] + >>> pre, path, name = mk_lora_info(['/x/lora1.safetensors', '/y/lora2.gguf'], []) + fake filesystem access + fake filesystem access + >>> pre + [{'fullpath': '/x/lora1.safetensors', 'name': 'lora1', 'path': 'lora1.safetensors', 'multiplier': 1.0, 'preloaded': True}, {'fullpath': '/y/lora2.gguf', 'name': 'lora2', 'path': 'lora2.gguf', 'multiplier': 1.0, 'preloaded': True}] + >>> path + {} + >>> name + {} + + >>> pre, path, name = mk_lora_info(['/x/lora1.safetensors', '/y/lora2.gguf'], [0.]) + fake filesystem access + fake filesystem access + >>> pre + [{'fullpath': '/x/lora1.safetensors', 'name': 'lora1', 'path': 'lora1.safetensors', 'multiplier': 0.0, 'preloaded': True}, {'fullpath': '/y/lora2.gguf', 'name': 'lora2', 'path': 'lora2.gguf', 'multiplier': 0.0, 'preloaded': True}] + >>> path + {'lora1.safetensors': {'fullpath': '/x/lora1.safetensors', 'name': 'lora1', 'path': 'lora1.safetensors', 'multiplier': 0.0, 'preloaded': True}, 'lora2.gguf': {'fullpath': '/y/lora2.gguf', 'name': 'lora2', 'path': 'lora2.gguf', 'multiplier': 0.0, 'preloaded': True}} + >>> name + {'lora1': 'lora1.safetensors', 'lora2': 'lora2.gguf'} + + >>> pre, path, name = mk_lora_info(['/x/lora1.safetensors', '/y/lora1.safetensors'], [0.3]) + fake filesystem access + fake filesystem access + >>> pre + [{'fullpath': '/x/lora1.safetensors', 'name': 'lora1', 'path': 'lora1.safetensors', 'multiplier': 0.3, 'preloaded': True}, {'fullpath': '/y/lora1.safetensors', 'name': 'lora1_2', 'path': 'lora1_2.safetensors', 'multiplier': 0.3, 'preloaded': True}] + >>> path + {} + + >>> pre, path, name = mk_lora_info(['/lora/dir/'], [0.3]) + fake filesystem access + Scanning /lora/dir/ for LoRAs... + fake directory scan + found 2 files under /lora/dir/ + >>> pre + [] + >>> expected = { + ... 'lora1_makebelieve.gguf': { + ... 'fullpath': '/lora/dir/lora1_makebelieve.gguf', + ... 'name': 'lora1_makebelieve', + ... 'path': 'lora1_makebelieve.gguf', + ... 'multiplier': 0.0, + ... 'preloaded': False}, + ... 'lora2/makebelieve.gguf': { + ... 'fullpath': '/lora/dir/lora2/makebelieve.gguf', + ... 'name': 'lora2/makebelieve', + ... 'path': 'lora2/makebelieve.gguf', + ... 'multiplier': 0.0, + ... 'preloaded': False}} + >>> path == expected + True + >>> name + {'lora1_makebelieve': 'lora1_makebelieve.gguf', 'lora2/makebelieve': 'lora2/makebelieve.gguf'} + """ - return koboldcpp.mk_lora_info(*args, **kwargs) + return koboldcpp.mk_lora_info(imgloras, multipliers, True) def sanitize_lora_multipliers(*args, **kwargs): """ @@ -86,6 +133,77 @@ def sanitize_lora_multipliers(*args, **kwargs): return koboldcpp.sanitize_lora_multipliers(*args, **kwargs) +def prepare_lora_multipliers(req_list, imglora_bypath): + """ + >>> req = [ + ... {"path": "a.gguf", "multiplier": "0.5"}, + ... {"path": "a.gguf", "multiplier": 1.0}, + ... ] + >>> imglora = {"a.gguf": {"fullpath": "/abs/a.gguf"}} + >>> paths, mults = prepare_lora_multipliers(req, imglora) + >>> paths == [b"/abs/a.gguf"], mults == [1.5] + (True, True) + + >>> req = [ + ... {"path": "b.gguf", "multiplier": "2"}, + ... {"path": "c.gguf"}, + ... "not a dict", + ... {"path": "", "multiplier": "3"}, + ... {"path": "b.gguf", "multiplier": 0}, + ... ] + >>> imglora = {"b.gguf": {"fullpath": "/abs/b.gguf"}, + ... "c.gguf": {"fullpath": "/abs/c.gguf"}} + >>> paths, mults = prepare_lora_multipliers(req, imglora) + >>> paths == [b"/abs/b.gguf"], mults == [2.0] + (True, True) + + >>> req = [{"path": "missing.gguf", "multiplier": "5"}] + >>> imglora = {} + >>> paths, mults = prepare_lora_multipliers(req, imglora) + >>> paths == [], mults == [] + (True, True) + + >>> req = [ + ... {"path": "x.gguf", "multiplier": 1}, + ... {"path": "y.gguf", "multiplier": 2}, + ... ] + >>> imglora = { + ... "x.gguf": {"fullpath": "/abs/x.gguf", "path": "x.gguf", "multiplier": 0.0}, + ... "y.gguf": {"fullpath": "/abs/y.gguf", "path": "y.gguf", "multiplier": 0.0}, + ... } + >>> paths, mults = prepare_lora_multipliers(req, imglora) + >>> paths == [b'/abs/x.gguf', b'/abs/y.gguf'] + True + >>> mults == [1.0, 2.0] + True + """ + return koboldcpp.prepare_lora_multipliers_backend(req_list, imglora_bypath) + +def mk_sdapi_lora_list(imglora_bypath): + ''' + >>> imglora_bypath = { + ... 'lora_a.safetensors': {'name': 'lora_a', 'path': 'lora_a.safetensors', 'multiplier': 0.0}, + ... 'lora_b.gguf' : {'name': 'lora_b', 'path': 'lora_b.gguf', 'multiplier': 0.0}, + ... 'lora_c.safetensors': {'name': 'lora_c', 'path': 'lora_c.safetensors', 'multiplier': 1.0}, + ... 'chars/waifu.gguf' : {'name': 'chars/waifu', 'path': 'chars/waifu.gguf', 'multiplier': 0.0} + ... } + >>> mk_sdapi_lora_list(imglora_bypath) + [{'name': 'lora_a', 'path': 'lora_a.safetensors'}, {'name': 'lora_b', 'path': 'lora_b.gguf'}, {'name': 'chars/waifu', 'path': 'chars/waifu.gguf'}] + + >>> empty_data = {} + >>> mk_sdapi_lora_list(empty_data) + [] + + >>> mixed_data = { + ... 'k1': {'name': 'X', 'path': 'p1', 'multiplier': 0.5}, + ... 'k2': {'name': 'Y', 'path': 'p2', 'multiplier': 0.0} + ... } + >>> mk_sdapi_lora_list(mixed_data) + [{'name': 'Y', 'path': 'p2'}] + ''' + return koboldcpp.mk_sdapi_lora_list(imglora_bypath) + + def gendefaults_parse_meta_field(*args, **kwargs): '''