added --sdmaingpu allowing image models to be independently placed on any gpu

This commit is contained in:
Concedo 2026-03-21 17:34:12 +08:00
parent a3d3800f3e
commit fdfb713d91
5 changed files with 61 additions and 11 deletions

View file

@ -54,7 +54,7 @@ struct load_model_inputs
const bool use_smartcontext = false;
const bool use_contextshift = false;
const bool use_fastforward = false;
const int kcpp_main_gpu = 0;
const int kcpp_main_gpu = -1;
const char * vulkan_info = nullptr;
const int batchsize = 512;
const bool autofit = false;
@ -172,7 +172,7 @@ struct sd_load_model_inputs
{
const char * model_filename = nullptr;
const char * executable_path = nullptr;
const int kcpp_main_gpu = 0;
const int kcpp_main_gpu = -1;
const char * vulkan_info = nullptr;
const int threads = 0;
const int quant = 0;
@ -255,7 +255,7 @@ struct whisper_load_model_inputs
{
const char * model_filename = nullptr;
const char * executable_path = nullptr;
const int kcpp_main_gpu = 0;
const int kcpp_main_gpu = -1;
const char * vulkan_info = nullptr;
const char * devices_override = nullptr;
const bool quiet = false;
@ -280,7 +280,7 @@ struct tts_load_model_inputs
const char * ttc_model_filename = nullptr;
const char * cts_model_filename = nullptr;
const char * executable_path = nullptr;
const int kcpp_main_gpu = 0;
const int kcpp_main_gpu = -1;
const char * vulkan_info = nullptr;
const int gpulayers = 0;
const bool flash_attention = false;
@ -310,7 +310,7 @@ struct embeddings_load_model_inputs
const int threads = 4;
const char * model_filename = nullptr;
const char * executable_path = nullptr;
const int kcpp_main_gpu = 0;
const int kcpp_main_gpu = -1;
const char * vulkan_info = nullptr;
const int gpulayers = 0;
const bool flash_attention = false;
@ -340,7 +340,7 @@ struct music_load_model_inputs
const char * musicvae_filename = nullptr;
const bool lowvram = false;
const char * executable_path = nullptr;
const int kcpp_main_gpu = 0;
const int kcpp_main_gpu = -1;
const char * vulkan_info = nullptr;
const char * devices_override = nullptr;
const bool quiet = false;

View file

@ -4814,7 +4814,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
vk::PhysicalDeviceProperties tmpprops = device->physical_device.getProperties();
if(tmpprops.vendorID == VK_VENDOR_ID_NVIDIA)
{
printf("Apply NVIDIA Anti-BSOD Fix for KCPP\n");
printf("\nApply NVIDIA Anti-BSOD Fix for KCPP\n");
device->disable_host_visible_vidmem = true; //kcpp requested fix for vulkan BSOD on Nvidia
}

View file

@ -909,7 +909,7 @@ def init_library():
def set_backend_props(inputs):
# we must force an explicit tensor split
# otherwise the default will divide equally and multigpu crap will slow it down badly
inputs.kcpp_main_gpu = 0
inputs.kcpp_main_gpu = -1
if(args.maingpu is not None and args.maingpu>=0):
inputs.kcpp_main_gpu = args.maingpu
@ -2135,6 +2135,7 @@ def sd_load_model(model_filename,vae_filename,t5xxl_filename,clip1_filename,clip
inputs.img_hard_limit = args.sdclamped
inputs.img_soft_limit = args.sdclampedsoft
inputs = set_backend_props(inputs)
inputs.kcpp_main_gpu = args.sdmaingpu
ret = handle.sd_load_model(inputs)
return ret
@ -6361,6 +6362,8 @@ def show_gui():
sd_clamped_soft_var = ctk.StringVar(value="0")
sd_threads_var = ctk.StringVar(value=str(default_threads))
sd_quant_var = ctk.StringVar(value=sd_quant_choices[0])
sd_main_gpu_var = ctk.StringVar(value="-1")
gen_defaults_var = ctk.StringVar()
gen_defaults_overwrite_var = ctk.IntVar(value=0)
@ -6953,7 +6956,7 @@ def show_gui():
layercounter_label.grid(row=6, column=0, padx=230, sticky="W")
layercounter_label.configure(text_color="#ffff00")
tensor_split_entry,tensor_split_label = makelabelentry(hardware_tab, "Tensor Split:", tensor_split_str_vars, 8, 80, padx=160, singleline=True, tooltip='When using multiple GPUs this option controls how large tensors should be split across all GPUs.\nUses a comma-separated list of non-negative values that assigns the proportion of data that each GPU should get in order.\nFor example, "3,2" will assign 60% of the data to GPU 0 and 40% to GPU 1.')
maingpu_entry,maingpu_label = makelabelentry(hardware_tab, "Main GPU:" , maingpu_var, 8, 50,padx=340,singleline=True,tooltip="Only for multi-gpu, which GPU to set as main?\nIf left blank or -1, uses default value.",labelpadx=270)
maingpu_entry,maingpu_label = makelabelentry(hardware_tab, "Main GPU:" , maingpu_var, 8, 50,padx=340,singleline=True,tooltip="Only for multi-gpu, which GPU ID to set as main?\nIf left blank or -1, uses default value.",labelpadx=270)
# threads
makelabelentry(hardware_tab, "Threads:" , threads_var, 11, 50, padx=160, singleline=True,tooltip="How many threads to use.\nRecommended value is your CPU core count, defaults are usually OK.")
@ -7140,7 +7143,9 @@ def show_gui():
makefileentry(images_tab, "Image Gen. Model (safetensors/gguf):", "Select Image Gen Model File", sd_model_var, 1, width=280, singlecol=True, filetypes=[("*.safetensors *.gguf","*.safetensors *.gguf")], tooltiptxt="Select a .safetensors or .gguf Image Generation model file on disk to be loaded.")
makelabelentry(images_tab, "Clamp Resolution Limit (Hard):", sd_clamped_var, 4, 50, padx=(190),singleline=True,tooltip="Limit generation steps and output image size for shared use.\nSet to 0 to disable, otherwise value is clamped to the max size limit (min 512px).")
makelabelentry(images_tab, "(Soft):", sd_clamped_soft_var, 4, 50, padx=(290),singleline=True,tooltip="Square image size restriction, to protect the server against memory crashes.\nAllows width-height tradeoffs, eg. 640 allows 640x640 and 512x768\nLeave at 0 for the default value: 832 for SD1.5/SD2, 1024 otherwise.",labelpadx=(250))
makelabelentry(images_tab, "ImgThreads:" , sd_threads_var, 8, 50,padx=(290),singleline=True,tooltip="How many threads to use during image generation.\nIf left blank, uses same value as threads.",labelpadx=(210))
makelabelentry(images_tab, "ImgThreads:" , sd_threads_var, 8, 40,padx=(280),singleline=True,tooltip="How many threads to use during image generation.\nIf left blank, uses same value as threads.",labelpadx=(200))
makelabelentry(images_tab, "ImgGPU:" , sd_main_gpu_var, 8, 40,padx=394,singleline=True,tooltip="Which GPU ID to use for Image Gen?\nIf left blank or -1, uses default value.",labelpadx=340)
sd_model_var.trace_add("write", gui_changed_modelfile)
makelabelcombobox(images_tab, "Compress Weights: ", sd_quant_var, 8, width=(60), padx=(126), labelpadx=8, tooltiptxt="Quantizes the SD model weights to save memory.\nHigher levels save more memory, and cause more quality degradation.", values=sd_quant_choices)
sd_quant_var.trace_add("write", changed_gpulayers_estimate)
@ -7493,6 +7498,7 @@ def show_gui():
args.sdlora = [item.strip() for item in sd_lora_var.get().split("|") if item]
# XXX the user may have used '|' since it's used for the LoRAs
args.sdloramult = sanitize_lora_multipliers(re.split(r"[ |]+", sd_loramult_var.get()))
args.sdmaingpu = (-1 if sd_main_gpu_var.get()=="" else int(sd_main_gpu_var.get()))
if gen_defaults_var.get() != "":
args.gendefaults = gen_defaults_var.get()
@ -7748,6 +7754,11 @@ def show_gui():
sd_tiled_vae_var.set(str(dict["sdtiledvae"]) if ("sdtiledvae" in dict and dict["sdtiledvae"]) else str(default_vae_tile_threshold))
sd_lora_var.set("|".join(sanitize_lora_list(dict.get('sdlora'))))
sd_loramult_var.set(" ".join(f"{n:.3f}".rstrip('0').rstrip('.') for n in dict.get("sdloramult", [])))
if "sdmaingpu" in dict:
sd_main_gpu_var.set(dict["sdmaingpu"])
else:
sd_main_gpu_var.set("-1")
gendefaults = (dict["gendefaults"] if ("gendefaults" in dict and dict["gendefaults"]) else "")
if isinstance(gendefaults, type({})):
gendefaults = json.dumps(gendefaults)
@ -8370,6 +8381,8 @@ def convert_args_to_template(savdict):
savdict["hordekey"] = ""
savdict["hordeworkername"] = ""
savdict["sdthreads"] = 0
savdict["maingpu"] = -1
savdict["sdmaingpu"] = -1
savdict["password"] = None
savdict["adminpassword"] = None
savdict["usemmap"] = False
@ -10093,6 +10106,8 @@ if __name__ == '__main__':
sdparsergrouplora.add_argument("--sdlora", metavar=('[filename]'), help="Specify image generation LoRAs safetensors models to be applied. Multiple LoRAs are accepted.", nargs='+')
sdparsergroup.add_argument("--sdloramult", metavar=('[amounts]'), help="Multipliers for the image LoRA model to be applied.", type=float, nargs='+', default=[1.0])
sdparsergroup.add_argument("--sdtiledvae", metavar=('[maxres]'), help="Adjust the automatic VAE tiling trigger for images above this size. 0 disables vae tiling.", type=int, default=default_vae_tile_threshold)
sdparsergroup.add_argument("--sdmaingpu", metavar=('[Device ID]'), help="If specified, Image Generation weights will be placed on the selected GPU index", type=int, default=-1)
whisperparsergroup = parser.add_argument_group('Whisper Transcription Commands')
whisperparsergroup.add_argument("--whispermodel", metavar=('[filename]'), help="Specify a Whisper .bin model to enable Speech-To-Text transcription.", default="")

View file

@ -159,6 +159,7 @@ static std::vector<uint8_t *> input_extraimage_buffers;
const int max_extra_images = 4;
static std::string sdvulkandeviceenv;
static std::string sdmaingpuenv;
static int cfg_tiled_vae_threshold = 0;
static int cfg_square_limit = 0;
static int cfg_side_limit = 0;
@ -284,6 +285,18 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
cfg_square_limit = inputs.img_soft_limit;
printf("\nImageGen Init - Load Model: %s\n",inputs.model_filename);
{
//kcpp allow gpu id override
std::string sdmaingpu = std::to_string(inputs.kcpp_main_gpu);
const char* existingenv = getenv("SD_VK_DEVICE");
int kcpp_parseinfo_maindevice = inputs.kcpp_main_gpu<=0?0:inputs.kcpp_main_gpu;
if(kcpp_parseinfo_maindevice>0 && !existingenv && sdmaingpu!="")
{
sdmaingpuenv = "SD_VK_DEVICE="+sdmaingpu;
putenv((char*)sdmaingpuenv.c_str());
}
}
int lora_apply_mode = LORA_APPLY_AT_RUNTIME;
bool lora_dynamic = false;
bool lora_cache = false;

View file

@ -199,7 +199,29 @@ public:
void init_backend() {
#ifdef SD_USE_CUDA
LOG_DEBUG("Using CUDA backend");
backend = ggml_backend_cuda_init(0);
size_t device = 0; //kcpp: ported device selection from vulkan
const int device_count = ggml_backend_cuda_get_device_count();
if (device_count) {
const char* SD_VK_DEVICE = getenv("SD_VK_DEVICE");
if (SD_VK_DEVICE != nullptr) {
std::string sd_vk_device_str = SD_VK_DEVICE;
try {
device = std::stoull(sd_vk_device_str);
} catch (const std::invalid_argument&) {
LOG_WARN("SD_VK_DEVICE environment variable is not a valid integer (%s). Falling back to device 0.", SD_VK_DEVICE);
device = 0;
} catch (const std::out_of_range&) {
LOG_WARN("SD_VK_DEVICE environment variable value is out of range for `unsigned long long` type (%s). Falling back to device 0.", SD_VK_DEVICE);
device = 0;
}
if (device >= device_count) {
LOG_WARN("Cannot find targeted cuda device (%llu). Falling back to device 0.", device);
device = 0;
}
}
LOG_INFO("CUDA: Using device %llu", device);
}
backend = ggml_backend_cuda_init(device);
#endif
#ifdef SD_USE_METAL
LOG_DEBUG("Using Metal backend");