mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-18 23:49:46 +00:00
sd: support for dynamic LoRA loading from a directory (#2036)
* backend support for controlling LoRA cache and fixed multipliers The generation LoRA multipliers are now added to the initial multipliers, so e.g. a merged LCM model will behave the same as a normal model with a preloaded LCM LoRA. * frontend support
This commit is contained in:
parent
b88fc44d0e
commit
feea014774
5 changed files with 402 additions and 120 deletions
1
expose.h
1
expose.h
|
|
@ -230,6 +230,7 @@ struct sd_generation_inputs
|
|||
const char * cache_options = nullptr;
|
||||
const bool upscale = false;
|
||||
const int lora_len = 0;
|
||||
const char ** lora_filenames = nullptr;
|
||||
const float * lora_multipliers = nullptr;
|
||||
};
|
||||
struct sd_generation_outputs
|
||||
|
|
|
|||
241
koboldcpp.py
241
koboldcpp.py
|
|
@ -89,7 +89,11 @@ ttsmodelpath = "" #if empty, not initialized
|
|||
embeddingsmodelpath = "" #if empty, not initialized
|
||||
musicllmmodelpath = "" #if empty, not initialized
|
||||
musicdiffusionmodelpath = "" #if empty, not initialized
|
||||
imglorainfo = []
|
||||
imglora_preload = []
|
||||
imglora_bypath = {}
|
||||
imglora_name2path = {}
|
||||
imglora_cached = True
|
||||
imglora_initial_fixed = True
|
||||
maxctx = 8192
|
||||
maxhordectx = 0 #set to whatever maxctx is if 0
|
||||
maxhordelen = 1024
|
||||
|
|
@ -362,6 +366,7 @@ class sd_generation_inputs(ctypes.Structure):
|
|||
("cache_options", ctypes.c_char_p),
|
||||
("upscale", ctypes.c_bool),
|
||||
("lora_len", ctypes.c_int),
|
||||
("lora_filenames", ctypes.POINTER(ctypes.c_char_p)),
|
||||
("lora_multipliers", ctypes.POINTER(ctypes.c_float))]
|
||||
|
||||
class sd_generation_outputs(ctypes.Structure):
|
||||
|
|
@ -1175,25 +1180,33 @@ def get_capabilities():
|
|||
admin_type = (2 if args.admin and args.admindir and args.adminpassword else (1 if args.admin and args.admindir else 0))
|
||||
return {"result":"KoboldCpp", "version":KcppVersion, "protected":has_password, "llm":has_llm, "txt2img":has_txt2img,"vision":has_vision_support,"audio":has_audio_support,"transcribe":has_whisper,"multiplayer":has_multiplayer,"websearch":has_search,"tts":has_tts, "embeddings":has_embeddings, "music":has_music, "savedata":(savedata_obj is not None), "admin": admin_type, "guidance": has_guidance, "jinja": has_jinja, "mcp":has_mcp}
|
||||
|
||||
|
||||
def scan_directory(dirpath, valid_exts, depth):
|
||||
files = []
|
||||
for entry in sorted(os.listdir(dirpath)): # Scan top-level directory
|
||||
full_path = os.path.join(dirpath, entry)
|
||||
if os.path.isfile(full_path) and entry.lower().endswith(valid_exts): # If toplevel file
|
||||
files.append(entry)
|
||||
elif depth > 0 and os.path.isdir(full_path): #if dir, scan up to 1 level deep
|
||||
for subentry in sorted(os.listdir(full_path)):
|
||||
sub_full_path = os.path.join(full_path, subentry)
|
||||
if os.path.isfile(sub_full_path) and subentry.endswith(valid_exts):
|
||||
rel_path = os.path.join(entry, subentry)
|
||||
files.append(rel_path)
|
||||
return files
|
||||
|
||||
|
||||
def get_current_admindir_list():
|
||||
opts = []
|
||||
if args.admin and args.admindir:
|
||||
dirpath = os.path.abspath(args.admindir)
|
||||
valid_exts = (".kcpps", ".kcppt", ".gguf")
|
||||
for entry in sorted(os.listdir(dirpath)): # Scan top-level directory
|
||||
full_path = os.path.join(dirpath, entry)
|
||||
if os.path.isfile(full_path) and entry.endswith(valid_exts): # If toplevel file
|
||||
opts.append(entry)
|
||||
elif os.path.isdir(full_path): #if dir, scan up to 1 level deep
|
||||
for subentry in sorted(os.listdir(full_path)):
|
||||
sub_full_path = os.path.join(full_path, subentry)
|
||||
if os.path.isfile(sub_full_path) and subentry.endswith(valid_exts):
|
||||
rel_path = os.path.join(entry, subentry)
|
||||
opts.append(rel_path)
|
||||
opts = scan_directory(dirpath, valid_exts, 1)
|
||||
opts.append("initial_model")
|
||||
opts.append("unload_model")
|
||||
return opts
|
||||
|
||||
|
||||
def dump_gguf_metadata(file_path): #if you're gonna copy this into your own project at least credit concedo
|
||||
chunk_size = 1024*1024*12 # read first 12mb of file
|
||||
try:
|
||||
|
|
@ -2018,7 +2031,7 @@ def sd_quant_option(value):
|
|||
except Exception:
|
||||
return 0
|
||||
|
||||
def sd_load_model(model_filename,vae_filename,lora_filenames,t5xxl_filename,clip1_filename,clip2_filename,photomaker_filename,upscaler_filename):
|
||||
def sd_load_model(model_filename,vae_filename,t5xxl_filename,clip1_filename,clip2_filename,photomaker_filename,upscaler_filename):
|
||||
global args
|
||||
inputs = sd_load_model_inputs()
|
||||
inputs.model_filename = model_filename.encode("UTF-8")
|
||||
|
|
@ -2047,14 +2060,22 @@ def sd_load_model(model_filename,vae_filename,lora_filenames,t5xxl_filename,clip
|
|||
inputs.photomaker_filename = photomaker_filename.encode("UTF-8")
|
||||
inputs.upscaler_filename = upscaler_filename.encode("UTF-8")
|
||||
|
||||
lora_filenames = [lf.encode("UTF-8") for lf in lora_filenames[:lora_filenames_max] if lf]
|
||||
lora_len = len(lora_filenames)
|
||||
lora_multipliers = prepare_lora_multipliers([])
|
||||
inputs.lora_len = lora_len
|
||||
inputs.lora_filenames = (ctypes.c_char_p * lora_len)(*lora_filenames)
|
||||
inputs.lora_multipliers = (ctypes.c_float * lora_len)(*lora_multipliers)
|
||||
lora_filenames, lora_multipliers = prepare_initial_lora_multipliers()
|
||||
inputs.lora_len = len(lora_filenames)
|
||||
inputs.lora_filenames = (ctypes.c_char_p * inputs.lora_len)(*lora_filenames)
|
||||
inputs.lora_multipliers = (ctypes.c_float * inputs.lora_len)(*lora_multipliers)
|
||||
if 0 and inputs.lora_len:
|
||||
print("Preloading LoRAs:")
|
||||
for i in range(inputs.lora_len):
|
||||
print(f" {inputs.lora_filenames[i]} @ {inputs.lora_multipliers[i]}")
|
||||
# auto if no zero-weight lora, dynamic otherwise
|
||||
inputs.lora_apply_mode = 3 if 0. in lora_multipliers else 0
|
||||
lora_apply_mode = 0 # auto
|
||||
if imglora_bypath:
|
||||
lora_dynamic = 1 << 3 # accept changes at runtime
|
||||
lora_cache = 1 << 4 if imglora_cached else 0 # cache the preloaded LoRAs
|
||||
lora_fixed = 1 << 5 if imglora_initial_fixed else 0 # do not allow changes to the non-zero preloaded LoRAs
|
||||
lora_apply_mode = lora_dynamic | lora_cache | lora_fixed
|
||||
inputs.lora_apply_mode = lora_apply_mode
|
||||
|
||||
inputs.img_hard_limit = args.sdclamped
|
||||
inputs.img_soft_limit = args.sdclampedsoft
|
||||
|
|
@ -2177,23 +2198,57 @@ def sanitize_lora_multipliers(sdloramult):
|
|||
sdloramult = [tryparsefloat(m, 0.) for m in sdloramult]
|
||||
return sdloramult
|
||||
|
||||
def prepare_lora_multipliers(request_list):
|
||||
orig_multipliers = [lora[3] for lora in imglorainfo]
|
||||
req_by_path = {}
|
||||
def prepare_initial_lora_multipliers():
|
||||
res_paths = []
|
||||
res_multipliers = []
|
||||
num_loras = len(imglora_preload)
|
||||
if num_loras > lora_filenames_max:
|
||||
print(f'Warning: more than {lora_filenames_max} preloaded LoRAs, extra ones will be ignored')
|
||||
num_loras = lora_filenames_max
|
||||
for info in imglora_preload[:num_loras]:
|
||||
res_paths.append(info['fullpath'].encode("UTF-8"))
|
||||
res_multipliers.append(info['multiplier'])
|
||||
return res_paths, res_multipliers
|
||||
|
||||
def prepare_lora_multipliers_backend(request_list, imglora_bypath):
|
||||
req_dedup = {}
|
||||
for r in request_list:
|
||||
if not isinstance(r, dict):
|
||||
continue
|
||||
multiplier = tryparsefloat(r.get('multiplier'), 0.)
|
||||
path = r.get('path')
|
||||
if path and isinstance(path, str):
|
||||
req_by_path[path] = req_by_path.get(path, 0.) + multiplier
|
||||
result = []
|
||||
for i, (fullpath, name, path, origmul) in enumerate(imglorainfo):
|
||||
multiplier = orig_multipliers[i]
|
||||
if multiplier == 0. and path in req_by_path:
|
||||
multiplier = req_by_path[path]
|
||||
result.append(multiplier)
|
||||
return result
|
||||
multiplier = tryparsefloat(r.get('multiplier'), 0.)
|
||||
if not path or not isinstance(path, str) or not multiplier:
|
||||
continue
|
||||
info = imglora_bypath.get(path)
|
||||
if info:
|
||||
fullpath = info["fullpath"]
|
||||
req_dedup[fullpath] = req_dedup.get(fullpath, 0.) + multiplier
|
||||
res_paths = []
|
||||
res_multipliers = []
|
||||
for fullpath, multiplier in req_dedup.items():
|
||||
if multiplier != 0.0:
|
||||
res_paths.append(fullpath.encode("UTF-8"))
|
||||
res_multipliers.append(multiplier)
|
||||
# enforce lora_filenames_max
|
||||
max_requests = lora_filenames_max - len(imglora_preload)
|
||||
if len(res_paths) > max_requests:
|
||||
msg_preloaded = ""
|
||||
if len(imglora_preload) > 0:
|
||||
msg_preloaded = f" (including {len(imglora_preload)} preloaded)"
|
||||
print(f'Warning: more than {lora_filenames_max} requested LoRAs{msg_preloaded}, extra ones will be ignored')
|
||||
res_paths = res_paths[:max_requests]
|
||||
res_multipliers = res_multipliers[:max_requests]
|
||||
return res_paths, res_multipliers
|
||||
|
||||
def prepare_lora_multipliers(request_list):
|
||||
return prepare_lora_multipliers_backend(request_list, imglora_bypath)
|
||||
|
||||
def mk_sdapi_lora_list(imglora_bypath):
|
||||
return [
|
||||
{'name': info['name'], 'path': info['path']}
|
||||
for info in imglora_bypath.values()
|
||||
if info['multiplier'] == 0.0 # both preloaded and scanned
|
||||
]
|
||||
|
||||
def extract_loras_from_prompt(prompt):
|
||||
pattern = r'<lora:([^:>]+):([^>]+)>'
|
||||
|
|
@ -2219,16 +2274,17 @@ def extract_loras_from_prompt(prompt):
|
|||
return prompt, lora_data
|
||||
|
||||
def lora_map_name_to_path(request_list):
|
||||
name2path = {}
|
||||
for _, name, path, _ in imglorainfo:
|
||||
name2path[name] = path
|
||||
result = []
|
||||
for req in request_list:
|
||||
out = dict(req)
|
||||
name = out.pop('name')
|
||||
path = name2path.get(name)
|
||||
if path:
|
||||
out['path'] = path
|
||||
path = imglora_name2path.get(name)
|
||||
if not path:
|
||||
print(f'LoRA {name} not found')
|
||||
continue
|
||||
info = imglora_bypath.get(path)
|
||||
if info:
|
||||
out['path'] = info['path']
|
||||
result.append(out)
|
||||
return result
|
||||
|
||||
|
|
@ -2283,6 +2339,7 @@ def sd_generate(genparams):
|
|||
extra_images_arr = ([] if not extra_images_arr else extra_images_arr)
|
||||
extra_images_arr = [img for img in extra_images_arr if img not in (None, "")]
|
||||
extra_images_arr = extra_images_arr[:extra_images_max]
|
||||
lora_filenames, lora_multipliers = prepare_lora_multipliers(genparams.get("lora", []))
|
||||
|
||||
#clean vars
|
||||
cfg_scale = (1 if cfg_scale < 1 else (forced_maxcfg if cfg_scale > forced_maxcfg else cfg_scale))
|
||||
|
|
@ -2334,9 +2391,8 @@ def sd_generate(genparams):
|
|||
inputs.cache_mode = cache_mode.encode("UTF-8")
|
||||
inputs.cache_options = cache_options.encode("UTF-8")
|
||||
inputs.upscale = (True if tryparseint(genparams.get("enable_hr", 0),0) else False)
|
||||
|
||||
lora_multipliers = prepare_lora_multipliers(genparams.get("lora", []))
|
||||
inputs.lora_len = len(lora_multipliers)
|
||||
inputs.lora_len = len(lora_filenames)
|
||||
inputs.lora_filenames = (ctypes.c_char_p * inputs.lora_len)(*lora_filenames)
|
||||
inputs.lora_multipliers = (ctypes.c_float * inputs.lora_len)(*lora_multipliers)
|
||||
|
||||
ret = handle.sd_generate(inputs)
|
||||
|
|
@ -4426,7 +4482,7 @@ Change Mode<br>
|
|||
response_body = (json.dumps({"object":"list","data":mlist}).encode())
|
||||
|
||||
elif clean_path.endswith('/sdapi/v1/loras'):
|
||||
response_body = (json.dumps([{'name': name, 'path': path} for _, name, path, multiplier in imglorainfo if multiplier == 0.])).encode()
|
||||
response_body = (json.dumps(mk_sdapi_lora_list(imglora_bypath))).encode()
|
||||
|
||||
elif clean_path.endswith('/sdapi/v1/upscalers'):
|
||||
if args.sdupscaler:
|
||||
|
|
@ -8739,26 +8795,88 @@ def main(launch_args, default_args):
|
|||
input()
|
||||
|
||||
|
||||
def mk_lora_info(imgloras, multipliers):
|
||||
# (full path, name, name+extension, can change multiplier)
|
||||
# XXX for each LoRA, sdapi needs a name and a path; we could use
|
||||
# the full filename as a path, but we don't know if we can expose it
|
||||
used_lora_names = set()
|
||||
result = []
|
||||
def mk_lora_info(imgloras, multipliers, mock_filesystem=False):
|
||||
first_multiplier = multipliers[0] if len(multipliers) > 0 else 1.
|
||||
lora_files = []
|
||||
lora_dirs = []
|
||||
# identify files and dirs
|
||||
for i, lora_path in enumerate(imgloras):
|
||||
multiplier = multipliers[i] if i < len(multipliers) else first_multiplier
|
||||
lora_file = os.path.basename(lora_path)
|
||||
if mock_filesystem:
|
||||
print('fake filesystem access')
|
||||
if lora_path.endswith('/'):
|
||||
lora_dirs.append(lora_path)
|
||||
else:
|
||||
lora_files.append(('', lora_path, multiplier))
|
||||
elif os.path.isfile(lora_path):
|
||||
lora_files.append(('', lora_path, multiplier))
|
||||
elif os.path.isdir(lora_path):
|
||||
lora_dirs.append(lora_path)
|
||||
elif os.path.exists(lora_path):
|
||||
print(f"Unexpected file type for SD LORA model file {lora_path}")
|
||||
else:
|
||||
print(f"Missing SD LORA model file {lora_path}...")
|
||||
# scan all dirs
|
||||
for lora_dir in lora_dirs:
|
||||
print(f'Scanning {lora_dir} for LoRAs...')
|
||||
if mock_filesystem:
|
||||
print('fake directory scan')
|
||||
files = ['lora1_makebelieve.gguf', 'lora2/makebelieve.gguf']
|
||||
else:
|
||||
files = scan_directory(lora_dir, ('.safetensors', '.gguf'), 1)
|
||||
print(f' found {len(files)} files under {lora_dir}')
|
||||
for file in files:
|
||||
lora_files.append((lora_dir, file, 0.0))
|
||||
# dedup and map all files
|
||||
unique_lora_names = set()
|
||||
lora_fullmap = {}
|
||||
for i, (lora_dir, lora_path, multiplier) in enumerate(lora_files):
|
||||
if lora_dir:
|
||||
# lora_path is relative: we can show it on the interface and accept it
|
||||
lora_fullpath = os.path.join(lora_dir, lora_path)
|
||||
# NOTE: we are including the relative directory on the short name
|
||||
lora_file = lora_path
|
||||
preloaded = False
|
||||
else:
|
||||
lora_fullpath = lora_path
|
||||
# we don't know which portion of the path we can show, so omit it
|
||||
lora_file = os.path.basename(lora_path)
|
||||
preloaded = True
|
||||
if not mock_filesystem:
|
||||
lora_fullpath = os.path.abspath(lora_fullpath)
|
||||
# dedup paths (e.g. preloaded and on directory)
|
||||
if lora_fullpath in lora_fullmap:
|
||||
lora_fullmap[lora_fullpath]["multiplier"] += multiplier
|
||||
continue
|
||||
lora_name, lora_ext = os.path.splitext(lora_file)
|
||||
# ensure unique names
|
||||
i = 1
|
||||
mapped_name = lora_name
|
||||
while mapped_name in used_lora_names:
|
||||
lora_uname = lora_name
|
||||
while lora_uname in unique_lora_names:
|
||||
i += 1
|
||||
mapped_name = lora_name + '_' + str(i)
|
||||
used_lora_names.add(mapped_name)
|
||||
result.append((lora_path, mapped_name, mapped_name + lora_ext, multiplier))
|
||||
return result
|
||||
lora_uname = lora_name + '_' + str(i)
|
||||
unique_lora_names.add(lora_uname)
|
||||
lora_upath = lora_uname + lora_ext
|
||||
lora_entry = {
|
||||
'fullpath': lora_fullpath,
|
||||
'name': lora_uname,
|
||||
'path': lora_upath,
|
||||
'multiplier': multiplier,
|
||||
'preloaded': preloaded,
|
||||
}
|
||||
lora_fullmap[lora_fullpath] = lora_entry
|
||||
# build the runtime tables
|
||||
preloaded_table = []
|
||||
lora_path_map = {}
|
||||
lora_name_map = {}
|
||||
for lora_entry in lora_fullmap.values():
|
||||
# only map LoRAs that can be changed
|
||||
if not imglora_initial_fixed or lora_entry["multiplier"] == 0.0:
|
||||
lora_path_map[lora_entry["path"]] = lora_entry
|
||||
lora_name_map[lora_entry["name"]] = lora_entry["path"]
|
||||
if lora_entry["preloaded"]:
|
||||
preloaded_table.append(lora_entry)
|
||||
return preloaded_table, lora_path_map, lora_name_map
|
||||
|
||||
|
||||
def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
||||
|
|
@ -9201,16 +9319,9 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
|||
imgclip2 = ""
|
||||
imgphotomaker = ""
|
||||
imgupscaler = ""
|
||||
if args.sdlora and len(args.sdlora)>0:
|
||||
for i in range (0,len(args.sdlora)):
|
||||
curr = args.sdlora[i]
|
||||
if os.path.exists(curr):
|
||||
imgloras.append(os.path.abspath(curr))
|
||||
else:
|
||||
print(f"Missing SD LORA model file {curr}...")
|
||||
global imglorainfo
|
||||
global imglora_preload, imglora_bypath, imglora_name2path
|
||||
args.sdloramult = sanitize_lora_multipliers(args.sdloramult)
|
||||
imglorainfo = mk_lora_info(imgloras, args.sdloramult)
|
||||
imglora_preload, imglora_bypath, imglora_name2path = mk_lora_info(args.sdlora, args.sdloramult)
|
||||
if args.sdvae:
|
||||
if os.path.exists(args.sdvae):
|
||||
imgvae = os.path.abspath(args.sdvae)
|
||||
|
|
@ -9247,7 +9358,7 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
|||
friendlysdmodelname = os.path.basename(imgmodel)
|
||||
friendlysdmodelname = os.path.splitext(friendlysdmodelname)[0]
|
||||
friendlysdmodelname = sanitize_string(friendlysdmodelname)
|
||||
loadok = sd_load_model(imgmodel,imgvae,imgloras,imgt5xxl,imgclip1,imgclip2,imgphotomaker,imgupscaler)
|
||||
loadok = sd_load_model(imgmodel,imgvae,imgt5xxl,imgclip1,imgclip2,imgphotomaker,imgupscaler)
|
||||
print("Load Image Model OK: " + str(loadok))
|
||||
if not loadok:
|
||||
exitcounter = 999
|
||||
|
|
|
|||
|
|
@ -45,6 +45,62 @@
|
|||
static_assert((int)SD_TYPE_COUNT == (int)GGML_TYPE_COUNT,
|
||||
"inconsistency between SD_TYPE_COUNT and GGML_TYPE_COUNT");
|
||||
|
||||
struct LoraMap {
|
||||
std::vector<std::pair<std::string, float>> items;
|
||||
std::unordered_map<std::string, std::size_t> index;
|
||||
|
||||
void add_lora(const std::string& k, float v) {
|
||||
auto it = index.find(k);
|
||||
if (it == index.end()) {
|
||||
index[k] = items.size();
|
||||
items.emplace_back(k, v);
|
||||
} else {
|
||||
items[it->second].second += v;
|
||||
}
|
||||
}
|
||||
|
||||
float check_small_mult(float mult) {
|
||||
if (mult > 1e-6 || mult < -1e-6)
|
||||
return mult;
|
||||
return 0.f;
|
||||
}
|
||||
|
||||
float get_mult(const std::string& k) {
|
||||
auto lora = index.find(k);
|
||||
if (lora == index.end()) return 0.f;
|
||||
return check_small_mult(items[lora->second].second);
|
||||
}
|
||||
|
||||
std::vector<sd_lora_t> get_lora_specs(bool include_zeroes = false) {
|
||||
std::vector<sd_lora_t> lora_specs;
|
||||
for (const auto & lora: items) {
|
||||
float multiplier = check_small_mult(lora.second);
|
||||
if (include_zeroes || multiplier != 0.f) {
|
||||
sd_lora_t spec = {};
|
||||
spec.path = lora.first.c_str();
|
||||
spec.multiplier = multiplier;
|
||||
lora_specs.push_back(spec);
|
||||
}
|
||||
}
|
||||
return lora_specs;
|
||||
}
|
||||
|
||||
std::string get_lora_meta() {
|
||||
std::stringstream lora_meta;
|
||||
lora_meta << std::setprecision(6);
|
||||
for (const auto & lora: items) {
|
||||
float multiplier = check_small_mult(lora.second);
|
||||
if (multiplier != 0.f) {
|
||||
std::string lora_name = std::filesystem::path(lora.first).stem().string();
|
||||
lora_meta << "<lora:" << lora_name << ":" << multiplier << ">";
|
||||
}
|
||||
}
|
||||
return lora_meta.str();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
struct SDParams {
|
||||
int n_threads = -1;
|
||||
std::string model_path;
|
||||
|
|
@ -79,9 +135,9 @@ struct SDParams {
|
|||
|
||||
bool chroma_use_dit_mask = true;
|
||||
|
||||
std::vector<std::string> lora_paths;
|
||||
std::vector<float> lora_multipliers;
|
||||
LoraMap lora_map;
|
||||
bool lora_dynamic = false;
|
||||
bool lora_fixed = false;
|
||||
|
||||
std::string cache_mode;
|
||||
std::string cache_options;
|
||||
|
|
@ -211,12 +267,10 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
|
|||
set_sd_quiet(sd_is_quiet);
|
||||
executable_path = inputs.executable_path;
|
||||
std::string taesdpath = "";
|
||||
std::vector<std::string> lora_paths;
|
||||
std::vector<float> lora_multipliers;
|
||||
LoraMap lora_map;
|
||||
for(int i=0;i<inputs.lora_len;++i)
|
||||
{
|
||||
lora_paths.push_back(inputs.lora_filenames[i]);
|
||||
lora_multipliers.push_back(inputs.lora_multipliers[i]);
|
||||
lora_map.add_lora(inputs.lora_filenames[i], inputs.lora_multipliers[i]);
|
||||
}
|
||||
std::string vaefilename = inputs.vae_filename;
|
||||
std::string t5xxl_filename = inputs.t5xxl_filename;
|
||||
|
|
@ -233,23 +287,32 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
|
|||
|
||||
int lora_apply_mode = LORA_APPLY_AT_RUNTIME;
|
||||
bool lora_dynamic = false;
|
||||
bool lora_cache = false;
|
||||
bool lora_fixed = false;
|
||||
if(inputs.lora_apply_mode >= 0 && inputs.lora_apply_mode <= 2) {
|
||||
lora_apply_mode = inputs.lora_apply_mode;
|
||||
}
|
||||
else if(inputs.lora_apply_mode == 3) {
|
||||
lora_dynamic = true;
|
||||
else {
|
||||
// bit 3: LoRAs can be changed dynamically
|
||||
// bit 4: cache the initial LoRA list in VRAM
|
||||
// bit 5: do not allow multiplier changes for the initial LoRAs
|
||||
lora_dynamic = !!(inputs.lora_apply_mode & (1<<3));
|
||||
lora_cache = lora_dynamic && !!(inputs.lora_apply_mode & (1<<4));
|
||||
lora_fixed = lora_dynamic && !!(inputs.lora_apply_mode & (1<<5));
|
||||
}
|
||||
|
||||
if(lora_paths.size() > 0)
|
||||
if(lora_map.items.size() > 0)
|
||||
{
|
||||
const char* lora_apply_mode_name = lora_apply_mode == 1 ? "immediately"
|
||||
: lora_apply_mode == 2 ? "at runtime"
|
||||
: "auto";
|
||||
const char * lora_dynamic_name = lora_dynamic ? " (dynamic)" : "";
|
||||
printf("With LoRAs in apply mode %s%s:\n", lora_apply_mode_name, lora_dynamic_name);
|
||||
for(int i=0;i<lora_paths.size();++i)
|
||||
const char * lora_dynamic_name = lora_dynamic ? ", dynamic" : "";
|
||||
const char * lora_cache_name = lora_cache ? ", with caching" : "";
|
||||
printf("With LoRAs in apply mode %s%s%s:\n", lora_apply_mode_name, lora_dynamic_name, lora_cache_name);
|
||||
for(auto lora: lora_map.items)
|
||||
{
|
||||
printf(" %s at %f power\n", lora_paths[i].c_str(),lora_multipliers[i]);
|
||||
const char * lora_fixed_name = lora_fixed && lora.second != 0.f ? " (fixed)" : "";
|
||||
printf(" %s at %f power%s\n", lora.first.c_str(), lora.second, lora_fixed_name);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -337,9 +400,9 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
|
|||
sd_params->clip_l_path = clip1_filename;
|
||||
sd_params->clip_g_path = clip2_filename;
|
||||
sd_params->stacked_id_embeddings_path = photomaker_filename;
|
||||
sd_params->lora_paths = lora_paths;
|
||||
sd_params->lora_multipliers = lora_multipliers;
|
||||
sd_params->lora_map = lora_map;
|
||||
sd_params->lora_dynamic = lora_dynamic;
|
||||
sd_params->lora_fixed = lora_fixed;
|
||||
//if t5 is set, and model is a gguf, load it as a diffusion model path
|
||||
bool endswithgguf = (sd_params->model_path.rfind(".gguf") == sd_params->model_path.size() - 5);
|
||||
if((sd_params->t5xxl_path!="" || sd_params->clip_l_path!="" || sd_params->clip_g_path!="") && endswithgguf)
|
||||
|
|
@ -429,21 +492,13 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
|
|||
sdmodelfilename = mpath.filename().string();
|
||||
|
||||
// preload the LoRAs with the initial multipliers
|
||||
std::vector<sd_lora_t> lora_specs;
|
||||
for(int i=0;i<sd_params->lora_paths.size();++i)
|
||||
{
|
||||
if (!lora_dynamic && sd_params->lora_multipliers[i] == 0.)
|
||||
continue;
|
||||
sd_lora_t spec = {};
|
||||
spec.path = sd_params->lora_paths[i].c_str();
|
||||
spec.multiplier = sd_params->lora_multipliers[i];
|
||||
lora_specs.push_back(spec);
|
||||
}
|
||||
|
||||
std::vector<sd_lora_t> lora_specs = sd_params->lora_map.get_lora_specs(lora_dynamic&& lora_cache);
|
||||
if(lora_specs.size()>0)
|
||||
{
|
||||
printf(" applying %zu LoRAs...\n", lora_specs.size());
|
||||
sd_ctx->sd->kcpp_lora_cache_populate = lora_cache;
|
||||
sd_ctx->sd->apply_loras(lora_specs.data(), lora_specs.size());
|
||||
sd_ctx->sd->kcpp_lora_cache_populate = false;
|
||||
}
|
||||
|
||||
input_extraimage_buffers.reserve(max_extra_images);
|
||||
|
|
@ -1166,24 +1221,21 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
|||
parse_cache_options(params.cache, sd_params->cache_mode, sd_params->cache_options);
|
||||
params.batch_count = 1;
|
||||
|
||||
std::vector<sd_lora_t> lora_specs;
|
||||
std::stringstream lora_meta;
|
||||
lora_meta << std::setprecision(6);
|
||||
for(size_t i=0;i<sd_params->lora_paths.size();++i)
|
||||
{
|
||||
float multiplier = sd_params->lora_multipliers[i];
|
||||
if (sd_params->lora_dynamic) {
|
||||
multiplier = i < inputs.lora_len ? inputs.lora_multipliers[i] : 0.;
|
||||
}
|
||||
if (multiplier != 0.f) {
|
||||
sd_lora_t spec = {};
|
||||
spec.path = sd_params->lora_paths[i].c_str();
|
||||
spec.multiplier = multiplier;
|
||||
lora_specs.push_back(spec);
|
||||
std::string lora_name = std::filesystem::path(sd_params->lora_paths[i]).stem().string();
|
||||
lora_meta << "<lora:" << lora_name << ":" << multiplier << ">";
|
||||
LoraMap lora_map = sd_params->lora_map;
|
||||
if (sd_params->lora_dynamic) {
|
||||
for (int i = 0; i < inputs.lora_len; i++) {
|
||||
// check if it was initially fixed
|
||||
std::string path = inputs.lora_filenames[i];
|
||||
float preloaded_mult = sd_params->lora_map.get_mult(path);
|
||||
if (!sd_params->lora_fixed || preloaded_mult == 0.f) {
|
||||
lora_map.add_lora(path, inputs.lora_multipliers[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<sd_lora_t> lora_specs = lora_map.get_lora_specs();
|
||||
std::string lora_meta = lora_map.get_lora_meta();
|
||||
|
||||
if(!sd_is_quiet && sddebugmode==1) {
|
||||
if (lora_specs.size() > 0) {
|
||||
printf("Applying LoRAs:\n");
|
||||
|
|
@ -1424,9 +1476,9 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
|||
{
|
||||
printf("Upscaling output image...\n");
|
||||
upscaled_image = upscale(upscaler_ctx, results[i], 2);
|
||||
png = stbi_write_png_to_mem(upscaled_image.data, 0, upscaled_image.width, upscaled_image.height, upscaled_image.channel, &out_data_len, get_image_params(params, lora_meta.str()).c_str());
|
||||
png = stbi_write_png_to_mem(upscaled_image.data, 0, upscaled_image.width, upscaled_image.height, upscaled_image.channel, &out_data_len, get_image_params(params, lora_meta).c_str());
|
||||
} else {
|
||||
png = stbi_write_png_to_mem(results[i].data, 0, results[i].width, results[i].height, results[i].channel, &out_data_len, get_image_params(params, lora_meta.str()).c_str());
|
||||
png = stbi_write_png_to_mem(results[i].data, 0, results[i].width, results[i].height, results[i].channel, &out_data_len, get_image_params(params, lora_meta).c_str());
|
||||
}
|
||||
|
||||
if (png != NULL)
|
||||
|
|
|
|||
|
|
@ -139,6 +139,7 @@ public:
|
|||
std::vector<std::shared_ptr<LoraModel>> first_stage_lora_models;
|
||||
bool apply_lora_immediately = false;
|
||||
std::map<std::string, std::shared_ptr<LoraModel>> kcpp_lora_cache;
|
||||
bool kcpp_lora_cache_populate = false;
|
||||
|
||||
std::string taesd_path;
|
||||
bool use_tiny_autoencoder = false;
|
||||
|
|
@ -1209,7 +1210,6 @@ public:
|
|||
return it->second;
|
||||
}
|
||||
}
|
||||
// by construction, kcpp will always find the preloaded LoRAs on the cache
|
||||
|
||||
std::string lora_path = lora_id;
|
||||
static std::string high_noise_tag = "|high_noise|";
|
||||
|
|
@ -1224,13 +1224,13 @@ public:
|
|||
LOG_WARN("load lora tensors from %s failed", lora_path.c_str());
|
||||
// also cache negatives to avoid I/O at runtime
|
||||
lora = nullptr;
|
||||
if (kcpp_at_runtime)
|
||||
if (kcpp_at_runtime && kcpp_lora_cache_populate)
|
||||
kcpp_lora_cache[lora_key] = lora;
|
||||
return lora;
|
||||
}
|
||||
|
||||
lora->multiplier = multiplier;
|
||||
if (kcpp_at_runtime)
|
||||
if (kcpp_at_runtime && kcpp_lora_cache_populate)
|
||||
kcpp_lora_cache[lora_key] = lora;
|
||||
return lora;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,16 +53,63 @@ def extract_loras_from_prompt(*args, **kwargs):
|
|||
|
||||
return koboldcpp.extract_loras_from_prompt(*args, **kwargs)
|
||||
|
||||
def mk_lora_info(*args, **kwargs):
|
||||
def mk_lora_info(imgloras, multipliers):
|
||||
"""
|
||||
>>> mk_lora_info(['/x/lora1.safetensors', '/y/lora2.gguf'], [])
|
||||
[('/x/lora1.safetensors', 'lora1', 'lora1.safetensors', 1.0), ('/y/lora2.gguf', 'lora2', 'lora2.gguf', 1.0)]
|
||||
>>> mk_lora_info(['/x/lora1.safetensors', '/y/lora1.safetensors'], [0.3])
|
||||
[('/x/lora1.safetensors', 'lora1', 'lora1.safetensors', 0.3), ('/y/lora1.safetensors', 'lora1_2', 'lora1_2.safetensors', 0.3)]
|
||||
>>> mk_lora_info(['./lora1.gguf', '/y/lora2.gguf', 'lora3.gguf'], [0, 0.3])
|
||||
[('./lora1.gguf', 'lora1', 'lora1.gguf', 0), ('/y/lora2.gguf', 'lora2', 'lora2.gguf', 0.3), ('lora3.gguf', 'lora3', 'lora3.gguf', 0)]
|
||||
>>> pre, path, name = mk_lora_info(['/x/lora1.safetensors', '/y/lora2.gguf'], [])
|
||||
fake filesystem access
|
||||
fake filesystem access
|
||||
>>> pre
|
||||
[{'fullpath': '/x/lora1.safetensors', 'name': 'lora1', 'path': 'lora1.safetensors', 'multiplier': 1.0, 'preloaded': True}, {'fullpath': '/y/lora2.gguf', 'name': 'lora2', 'path': 'lora2.gguf', 'multiplier': 1.0, 'preloaded': True}]
|
||||
>>> path
|
||||
{}
|
||||
>>> name
|
||||
{}
|
||||
|
||||
>>> pre, path, name = mk_lora_info(['/x/lora1.safetensors', '/y/lora2.gguf'], [0.])
|
||||
fake filesystem access
|
||||
fake filesystem access
|
||||
>>> pre
|
||||
[{'fullpath': '/x/lora1.safetensors', 'name': 'lora1', 'path': 'lora1.safetensors', 'multiplier': 0.0, 'preloaded': True}, {'fullpath': '/y/lora2.gguf', 'name': 'lora2', 'path': 'lora2.gguf', 'multiplier': 0.0, 'preloaded': True}]
|
||||
>>> path
|
||||
{'lora1.safetensors': {'fullpath': '/x/lora1.safetensors', 'name': 'lora1', 'path': 'lora1.safetensors', 'multiplier': 0.0, 'preloaded': True}, 'lora2.gguf': {'fullpath': '/y/lora2.gguf', 'name': 'lora2', 'path': 'lora2.gguf', 'multiplier': 0.0, 'preloaded': True}}
|
||||
>>> name
|
||||
{'lora1': 'lora1.safetensors', 'lora2': 'lora2.gguf'}
|
||||
|
||||
>>> pre, path, name = mk_lora_info(['/x/lora1.safetensors', '/y/lora1.safetensors'], [0.3])
|
||||
fake filesystem access
|
||||
fake filesystem access
|
||||
>>> pre
|
||||
[{'fullpath': '/x/lora1.safetensors', 'name': 'lora1', 'path': 'lora1.safetensors', 'multiplier': 0.3, 'preloaded': True}, {'fullpath': '/y/lora1.safetensors', 'name': 'lora1_2', 'path': 'lora1_2.safetensors', 'multiplier': 0.3, 'preloaded': True}]
|
||||
>>> path
|
||||
{}
|
||||
|
||||
>>> pre, path, name = mk_lora_info(['/lora/dir/'], [0.3])
|
||||
fake filesystem access
|
||||
Scanning /lora/dir/ for LoRAs...
|
||||
fake directory scan
|
||||
found 2 files under /lora/dir/
|
||||
>>> pre
|
||||
[]
|
||||
>>> expected = {
|
||||
... 'lora1_makebelieve.gguf': {
|
||||
... 'fullpath': '/lora/dir/lora1_makebelieve.gguf',
|
||||
... 'name': 'lora1_makebelieve',
|
||||
... 'path': 'lora1_makebelieve.gguf',
|
||||
... 'multiplier': 0.0,
|
||||
... 'preloaded': False},
|
||||
... 'lora2/makebelieve.gguf': {
|
||||
... 'fullpath': '/lora/dir/lora2/makebelieve.gguf',
|
||||
... 'name': 'lora2/makebelieve',
|
||||
... 'path': 'lora2/makebelieve.gguf',
|
||||
... 'multiplier': 0.0,
|
||||
... 'preloaded': False}}
|
||||
>>> path == expected
|
||||
True
|
||||
>>> name
|
||||
{'lora1_makebelieve': 'lora1_makebelieve.gguf', 'lora2/makebelieve': 'lora2/makebelieve.gguf'}
|
||||
|
||||
"""
|
||||
return koboldcpp.mk_lora_info(*args, **kwargs)
|
||||
return koboldcpp.mk_lora_info(imgloras, multipliers, True)
|
||||
|
||||
def sanitize_lora_multipliers(*args, **kwargs):
|
||||
"""
|
||||
|
|
@ -86,6 +133,77 @@ def sanitize_lora_multipliers(*args, **kwargs):
|
|||
return koboldcpp.sanitize_lora_multipliers(*args, **kwargs)
|
||||
|
||||
|
||||
def prepare_lora_multipliers(req_list, imglora_bypath):
|
||||
"""
|
||||
>>> req = [
|
||||
... {"path": "a.gguf", "multiplier": "0.5"},
|
||||
... {"path": "a.gguf", "multiplier": 1.0},
|
||||
... ]
|
||||
>>> imglora = {"a.gguf": {"fullpath": "/abs/a.gguf"}}
|
||||
>>> paths, mults = prepare_lora_multipliers(req, imglora)
|
||||
>>> paths == [b"/abs/a.gguf"], mults == [1.5]
|
||||
(True, True)
|
||||
|
||||
>>> req = [
|
||||
... {"path": "b.gguf", "multiplier": "2"},
|
||||
... {"path": "c.gguf"},
|
||||
... "not a dict",
|
||||
... {"path": "", "multiplier": "3"},
|
||||
... {"path": "b.gguf", "multiplier": 0},
|
||||
... ]
|
||||
>>> imglora = {"b.gguf": {"fullpath": "/abs/b.gguf"},
|
||||
... "c.gguf": {"fullpath": "/abs/c.gguf"}}
|
||||
>>> paths, mults = prepare_lora_multipliers(req, imglora)
|
||||
>>> paths == [b"/abs/b.gguf"], mults == [2.0]
|
||||
(True, True)
|
||||
|
||||
>>> req = [{"path": "missing.gguf", "multiplier": "5"}]
|
||||
>>> imglora = {}
|
||||
>>> paths, mults = prepare_lora_multipliers(req, imglora)
|
||||
>>> paths == [], mults == []
|
||||
(True, True)
|
||||
|
||||
>>> req = [
|
||||
... {"path": "x.gguf", "multiplier": 1},
|
||||
... {"path": "y.gguf", "multiplier": 2},
|
||||
... ]
|
||||
>>> imglora = {
|
||||
... "x.gguf": {"fullpath": "/abs/x.gguf", "path": "x.gguf", "multiplier": 0.0},
|
||||
... "y.gguf": {"fullpath": "/abs/y.gguf", "path": "y.gguf", "multiplier": 0.0},
|
||||
... }
|
||||
>>> paths, mults = prepare_lora_multipliers(req, imglora)
|
||||
>>> paths == [b'/abs/x.gguf', b'/abs/y.gguf']
|
||||
True
|
||||
>>> mults == [1.0, 2.0]
|
||||
True
|
||||
"""
|
||||
return koboldcpp.prepare_lora_multipliers_backend(req_list, imglora_bypath)
|
||||
|
||||
def mk_sdapi_lora_list(imglora_bypath):
|
||||
'''
|
||||
>>> imglora_bypath = {
|
||||
... 'lora_a.safetensors': {'name': 'lora_a', 'path': 'lora_a.safetensors', 'multiplier': 0.0},
|
||||
... 'lora_b.gguf' : {'name': 'lora_b', 'path': 'lora_b.gguf', 'multiplier': 0.0},
|
||||
... 'lora_c.safetensors': {'name': 'lora_c', 'path': 'lora_c.safetensors', 'multiplier': 1.0},
|
||||
... 'chars/waifu.gguf' : {'name': 'chars/waifu', 'path': 'chars/waifu.gguf', 'multiplier': 0.0}
|
||||
... }
|
||||
>>> mk_sdapi_lora_list(imglora_bypath)
|
||||
[{'name': 'lora_a', 'path': 'lora_a.safetensors'}, {'name': 'lora_b', 'path': 'lora_b.gguf'}, {'name': 'chars/waifu', 'path': 'chars/waifu.gguf'}]
|
||||
|
||||
>>> empty_data = {}
|
||||
>>> mk_sdapi_lora_list(empty_data)
|
||||
[]
|
||||
|
||||
>>> mixed_data = {
|
||||
... 'k1': {'name': 'X', 'path': 'p1', 'multiplier': 0.5},
|
||||
... 'k2': {'name': 'Y', 'path': 'p2', 'multiplier': 0.0}
|
||||
... }
|
||||
>>> mk_sdapi_lora_list(mixed_data)
|
||||
[{'name': 'Y', 'path': 'p2'}]
|
||||
'''
|
||||
return koboldcpp.mk_sdapi_lora_list(imglora_bypath)
|
||||
|
||||
|
||||
def gendefaults_parse_meta_field(*args, **kwargs):
|
||||
'''
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue