mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-22 11:16:08 +00:00
sd: add backend support for max_vram (#2221)
This commit is contained in:
parent
095bf63b58
commit
f85a747dc0
3 changed files with 6 additions and 0 deletions
1
expose.h
1
expose.h
|
|
@ -207,6 +207,7 @@ struct sd_load_model_inputs
|
|||
const char * upscaler_filename = nullptr;
|
||||
const int img_hard_limit = 0;
|
||||
const int img_soft_limit = 0;
|
||||
float max_vram = 0.f;
|
||||
const char * devices_override = nullptr;
|
||||
const bool quiet = false;
|
||||
const int debugmode = 0;
|
||||
|
|
|
|||
|
|
@ -389,6 +389,7 @@ class sd_load_model_inputs(ctypes.Structure):
|
|||
("upscaler_filename", ctypes.c_char_p),
|
||||
("img_hard_limit", ctypes.c_int),
|
||||
("img_soft_limit", ctypes.c_int),
|
||||
("max_vram", ctypes.c_float),
|
||||
("devices_override", ctypes.c_char_p),
|
||||
("quiet", ctypes.c_bool),
|
||||
("debugmode", ctypes.c_int)]
|
||||
|
|
|
|||
|
|
@ -397,6 +397,9 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
|
|||
} else if (inputs.use_mmap) {
|
||||
printf("Using mmap for I/O\n");
|
||||
}
|
||||
if(inputs.max_vram != 0.f) {
|
||||
printf("Using max VRAM = %0.2f\n", inputs.max_vram);
|
||||
}
|
||||
if(inputs.quant > 0)
|
||||
{
|
||||
printf("Note: Loading a pre-quantized model is always faster than using compress weights!\n");
|
||||
|
|
@ -460,6 +463,7 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
|
|||
params.diffusion_conv_direct = sd_params->diffusion_conv_direct;
|
||||
params.vae_conv_direct = sd_params->vae_conv_direct;
|
||||
params.chroma_use_dit_mask = sd_params->chroma_use_dit_mask;
|
||||
params.max_vram = inputs.max_vram;
|
||||
params.enable_mmap = inputs.use_mmap;
|
||||
// the _cpu flags are only used if the backend string is empty, but
|
||||
// we always set both for consistency
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue