mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-07 09:02:04 +00:00
added --sdmaingpu allowing image models to be independently placed on any gpu
This commit is contained in:
parent
a3d3800f3e
commit
fdfb713d91
5 changed files with 61 additions and 11 deletions
12
expose.h
12
expose.h
|
|
@ -54,7 +54,7 @@ struct load_model_inputs
|
|||
const bool use_smartcontext = false;
|
||||
const bool use_contextshift = false;
|
||||
const bool use_fastforward = false;
|
||||
const int kcpp_main_gpu = 0;
|
||||
const int kcpp_main_gpu = -1;
|
||||
const char * vulkan_info = nullptr;
|
||||
const int batchsize = 512;
|
||||
const bool autofit = false;
|
||||
|
|
@ -172,7 +172,7 @@ struct sd_load_model_inputs
|
|||
{
|
||||
const char * model_filename = nullptr;
|
||||
const char * executable_path = nullptr;
|
||||
const int kcpp_main_gpu = 0;
|
||||
const int kcpp_main_gpu = -1;
|
||||
const char * vulkan_info = nullptr;
|
||||
const int threads = 0;
|
||||
const int quant = 0;
|
||||
|
|
@ -255,7 +255,7 @@ struct whisper_load_model_inputs
|
|||
{
|
||||
const char * model_filename = nullptr;
|
||||
const char * executable_path = nullptr;
|
||||
const int kcpp_main_gpu = 0;
|
||||
const int kcpp_main_gpu = -1;
|
||||
const char * vulkan_info = nullptr;
|
||||
const char * devices_override = nullptr;
|
||||
const bool quiet = false;
|
||||
|
|
@ -280,7 +280,7 @@ struct tts_load_model_inputs
|
|||
const char * ttc_model_filename = nullptr;
|
||||
const char * cts_model_filename = nullptr;
|
||||
const char * executable_path = nullptr;
|
||||
const int kcpp_main_gpu = 0;
|
||||
const int kcpp_main_gpu = -1;
|
||||
const char * vulkan_info = nullptr;
|
||||
const int gpulayers = 0;
|
||||
const bool flash_attention = false;
|
||||
|
|
@ -310,7 +310,7 @@ struct embeddings_load_model_inputs
|
|||
const int threads = 4;
|
||||
const char * model_filename = nullptr;
|
||||
const char * executable_path = nullptr;
|
||||
const int kcpp_main_gpu = 0;
|
||||
const int kcpp_main_gpu = -1;
|
||||
const char * vulkan_info = nullptr;
|
||||
const int gpulayers = 0;
|
||||
const bool flash_attention = false;
|
||||
|
|
@ -340,7 +340,7 @@ struct music_load_model_inputs
|
|||
const char * musicvae_filename = nullptr;
|
||||
const bool lowvram = false;
|
||||
const char * executable_path = nullptr;
|
||||
const int kcpp_main_gpu = 0;
|
||||
const int kcpp_main_gpu = -1;
|
||||
const char * vulkan_info = nullptr;
|
||||
const char * devices_override = nullptr;
|
||||
const bool quiet = false;
|
||||
|
|
|
|||
|
|
@ -4814,7 +4814,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|||
vk::PhysicalDeviceProperties tmpprops = device->physical_device.getProperties();
|
||||
if(tmpprops.vendorID == VK_VENDOR_ID_NVIDIA)
|
||||
{
|
||||
printf("Apply NVIDIA Anti-BSOD Fix for KCPP\n");
|
||||
printf("\nApply NVIDIA Anti-BSOD Fix for KCPP\n");
|
||||
device->disable_host_visible_vidmem = true; //kcpp requested fix for vulkan BSOD on Nvidia
|
||||
}
|
||||
|
||||
|
|
|
|||
21
koboldcpp.py
21
koboldcpp.py
|
|
@ -909,7 +909,7 @@ def init_library():
|
|||
def set_backend_props(inputs):
|
||||
# we must force an explicit tensor split
|
||||
# otherwise the default will divide equally and multigpu crap will slow it down badly
|
||||
inputs.kcpp_main_gpu = 0
|
||||
inputs.kcpp_main_gpu = -1
|
||||
if(args.maingpu is not None and args.maingpu>=0):
|
||||
inputs.kcpp_main_gpu = args.maingpu
|
||||
|
||||
|
|
@ -2135,6 +2135,7 @@ def sd_load_model(model_filename,vae_filename,t5xxl_filename,clip1_filename,clip
|
|||
inputs.img_hard_limit = args.sdclamped
|
||||
inputs.img_soft_limit = args.sdclampedsoft
|
||||
inputs = set_backend_props(inputs)
|
||||
inputs.kcpp_main_gpu = args.sdmaingpu
|
||||
ret = handle.sd_load_model(inputs)
|
||||
return ret
|
||||
|
||||
|
|
@ -6361,6 +6362,8 @@ def show_gui():
|
|||
sd_clamped_soft_var = ctk.StringVar(value="0")
|
||||
sd_threads_var = ctk.StringVar(value=str(default_threads))
|
||||
sd_quant_var = ctk.StringVar(value=sd_quant_choices[0])
|
||||
sd_main_gpu_var = ctk.StringVar(value="-1")
|
||||
|
||||
gen_defaults_var = ctk.StringVar()
|
||||
gen_defaults_overwrite_var = ctk.IntVar(value=0)
|
||||
|
||||
|
|
@ -6953,7 +6956,7 @@ def show_gui():
|
|||
layercounter_label.grid(row=6, column=0, padx=230, sticky="W")
|
||||
layercounter_label.configure(text_color="#ffff00")
|
||||
tensor_split_entry,tensor_split_label = makelabelentry(hardware_tab, "Tensor Split:", tensor_split_str_vars, 8, 80, padx=160, singleline=True, tooltip='When using multiple GPUs this option controls how large tensors should be split across all GPUs.\nUses a comma-separated list of non-negative values that assigns the proportion of data that each GPU should get in order.\nFor example, "3,2" will assign 60% of the data to GPU 0 and 40% to GPU 1.')
|
||||
maingpu_entry,maingpu_label = makelabelentry(hardware_tab, "Main GPU:" , maingpu_var, 8, 50,padx=340,singleline=True,tooltip="Only for multi-gpu, which GPU to set as main?\nIf left blank or -1, uses default value.",labelpadx=270)
|
||||
maingpu_entry,maingpu_label = makelabelentry(hardware_tab, "Main GPU:" , maingpu_var, 8, 50,padx=340,singleline=True,tooltip="Only for multi-gpu, which GPU ID to set as main?\nIf left blank or -1, uses default value.",labelpadx=270)
|
||||
|
||||
# threads
|
||||
makelabelentry(hardware_tab, "Threads:" , threads_var, 11, 50, padx=160, singleline=True,tooltip="How many threads to use.\nRecommended value is your CPU core count, defaults are usually OK.")
|
||||
|
|
@ -7140,7 +7143,9 @@ def show_gui():
|
|||
makefileentry(images_tab, "Image Gen. Model (safetensors/gguf):", "Select Image Gen Model File", sd_model_var, 1, width=280, singlecol=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")], tooltiptxt="Select a .safetensors or .gguf Image Generation model file on disk to be loaded.")
|
||||
makelabelentry(images_tab, "Clamp Resolution Limit (Hard):", sd_clamped_var, 4, 50, padx=(190),singleline=True,tooltip="Limit generation steps and output image size for shared use.\nSet to 0 to disable, otherwise value is clamped to the max size limit (min 512px).")
|
||||
makelabelentry(images_tab, "(Soft):", sd_clamped_soft_var, 4, 50, padx=(290),singleline=True,tooltip="Square image size restriction, to protect the server against memory crashes.\nAllows width-height tradeoffs, eg. 640 allows 640x640 and 512x768\nLeave at 0 for the default value: 832 for SD1.5/SD2, 1024 otherwise.",labelpadx=(250))
|
||||
makelabelentry(images_tab, "ImgThreads:" , sd_threads_var, 8, 50,padx=(290),singleline=True,tooltip="How many threads to use during image generation.\nIf left blank, uses same value as threads.",labelpadx=(210))
|
||||
makelabelentry(images_tab, "ImgThreads:" , sd_threads_var, 8, 40,padx=(280),singleline=True,tooltip="How many threads to use during image generation.\nIf left blank, uses same value as threads.",labelpadx=(200))
|
||||
makelabelentry(images_tab, "ImgGPU:" , sd_main_gpu_var, 8, 40,padx=394,singleline=True,tooltip="Which GPU ID to use for Image Gen?\nIf left blank or -1, uses default value.",labelpadx=340)
|
||||
|
||||
sd_model_var.trace_add("write", gui_changed_modelfile)
|
||||
makelabelcombobox(images_tab, "Compress Weights: ", sd_quant_var, 8, width=(60), padx=(126), labelpadx=8, tooltiptxt="Quantizes the SD model weights to save memory.\nHigher levels save more memory, and cause more quality degradation.", values=sd_quant_choices)
|
||||
sd_quant_var.trace_add("write", changed_gpulayers_estimate)
|
||||
|
|
@ -7493,6 +7498,7 @@ def show_gui():
|
|||
args.sdlora = [item.strip() for item in sd_lora_var.get().split("|") if item]
|
||||
# XXX the user may have used '|' since it's used for the LoRAs
|
||||
args.sdloramult = sanitize_lora_multipliers(re.split(r"[ |]+", sd_loramult_var.get()))
|
||||
args.sdmaingpu = (-1 if sd_main_gpu_var.get()=="" else int(sd_main_gpu_var.get()))
|
||||
|
||||
if gen_defaults_var.get() != "":
|
||||
args.gendefaults = gen_defaults_var.get()
|
||||
|
|
@ -7748,6 +7754,11 @@ def show_gui():
|
|||
sd_tiled_vae_var.set(str(dict["sdtiledvae"]) if ("sdtiledvae" in dict and dict["sdtiledvae"]) else str(default_vae_tile_threshold))
|
||||
sd_lora_var.set("|".join(sanitize_lora_list(dict.get('sdlora'))))
|
||||
sd_loramult_var.set(" ".join(f"{n:.3f}".rstrip('0').rstrip('.') for n in dict.get("sdloramult", [])))
|
||||
if "sdmaingpu" in dict:
|
||||
sd_main_gpu_var.set(dict["sdmaingpu"])
|
||||
else:
|
||||
sd_main_gpu_var.set("-1")
|
||||
|
||||
gendefaults = (dict["gendefaults"] if ("gendefaults" in dict and dict["gendefaults"]) else "")
|
||||
if isinstance(gendefaults, type({})):
|
||||
gendefaults = json.dumps(gendefaults)
|
||||
|
|
@ -8370,6 +8381,8 @@ def convert_args_to_template(savdict):
|
|||
savdict["hordekey"] = ""
|
||||
savdict["hordeworkername"] = ""
|
||||
savdict["sdthreads"] = 0
|
||||
savdict["maingpu"] = -1
|
||||
savdict["sdmaingpu"] = -1
|
||||
savdict["password"] = None
|
||||
savdict["adminpassword"] = None
|
||||
savdict["usemmap"] = False
|
||||
|
|
@ -10093,6 +10106,8 @@ if __name__ == '__main__':
|
|||
sdparsergrouplora.add_argument("--sdlora", metavar=('[filename]'), help="Specify image generation LoRAs safetensors models to be applied. Multiple LoRAs are accepted.", nargs='+')
|
||||
sdparsergroup.add_argument("--sdloramult", metavar=('[amounts]'), help="Multipliers for the image LoRA model to be applied.", type=float, nargs='+', default=[1.0])
|
||||
sdparsergroup.add_argument("--sdtiledvae", metavar=('[maxres]'), help="Adjust the automatic VAE tiling trigger for images above this size. 0 disables vae tiling.", type=int, default=default_vae_tile_threshold)
|
||||
sdparsergroup.add_argument("--sdmaingpu", metavar=('[Device ID]'), help="If specified, Image Generation weights will be placed on the selected GPU index", type=int, default=-1)
|
||||
|
||||
whisperparsergroup = parser.add_argument_group('Whisper Transcription Commands')
|
||||
whisperparsergroup.add_argument("--whispermodel", metavar=('[filename]'), help="Specify a Whisper .bin model to enable Speech-To-Text transcription.", default="")
|
||||
|
||||
|
|
|
|||
|
|
@ -159,6 +159,7 @@ static std::vector<uint8_t *> input_extraimage_buffers;
|
|||
const int max_extra_images = 4;
|
||||
|
||||
static std::string sdvulkandeviceenv;
|
||||
static std::string sdmaingpuenv;
|
||||
static int cfg_tiled_vae_threshold = 0;
|
||||
static int cfg_square_limit = 0;
|
||||
static int cfg_side_limit = 0;
|
||||
|
|
@ -284,6 +285,18 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
|
|||
cfg_square_limit = inputs.img_soft_limit;
|
||||
printf("\nImageGen Init - Load Model: %s\n",inputs.model_filename);
|
||||
|
||||
{
|
||||
//kcpp allow gpu id override
|
||||
std::string sdmaingpu = std::to_string(inputs.kcpp_main_gpu);
|
||||
const char* existingenv = getenv("SD_VK_DEVICE");
|
||||
int kcpp_parseinfo_maindevice = inputs.kcpp_main_gpu<=0?0:inputs.kcpp_main_gpu;
|
||||
if(kcpp_parseinfo_maindevice>0 && !existingenv && sdmaingpu!="")
|
||||
{
|
||||
sdmaingpuenv = "SD_VK_DEVICE="+sdmaingpu;
|
||||
putenv((char*)sdmaingpuenv.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
int lora_apply_mode = LORA_APPLY_AT_RUNTIME;
|
||||
bool lora_dynamic = false;
|
||||
bool lora_cache = false;
|
||||
|
|
|
|||
|
|
@ -199,7 +199,29 @@ public:
|
|||
void init_backend() {
|
||||
#ifdef SD_USE_CUDA
|
||||
LOG_DEBUG("Using CUDA backend");
|
||||
backend = ggml_backend_cuda_init(0);
|
||||
size_t device = 0; //kcpp: ported device selection from vulkan
|
||||
const int device_count = ggml_backend_cuda_get_device_count();
|
||||
if (device_count) {
|
||||
const char* SD_VK_DEVICE = getenv("SD_VK_DEVICE");
|
||||
if (SD_VK_DEVICE != nullptr) {
|
||||
std::string sd_vk_device_str = SD_VK_DEVICE;
|
||||
try {
|
||||
device = std::stoull(sd_vk_device_str);
|
||||
} catch (const std::invalid_argument&) {
|
||||
LOG_WARN("SD_VK_DEVICE environment variable is not a valid integer (%s). Falling back to device 0.", SD_VK_DEVICE);
|
||||
device = 0;
|
||||
} catch (const std::out_of_range&) {
|
||||
LOG_WARN("SD_VK_DEVICE environment variable value is out of range for `unsigned long long` type (%s). Falling back to device 0.", SD_VK_DEVICE);
|
||||
device = 0;
|
||||
}
|
||||
if (device >= device_count) {
|
||||
LOG_WARN("Cannot find targeted cuda device (%llu). Falling back to device 0.", device);
|
||||
device = 0;
|
||||
}
|
||||
}
|
||||
LOG_INFO("CUDA: Using device %llu", device);
|
||||
}
|
||||
backend = ggml_backend_cuda_init(device);
|
||||
#endif
|
||||
#ifdef SD_USE_METAL
|
||||
LOG_DEBUG("Using Metal backend");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue