mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 17:44:38 +00:00
added toggle for vae tiling, use custom memory buffer
This commit is contained in:
parent
d752846116
commit
568e476997
5 changed files with 44 additions and 2 deletions
1
expose.h
1
expose.h
|
@ -149,6 +149,7 @@ struct sd_load_model_inputs
|
||||||
const int threads = 0;
|
const int threads = 0;
|
||||||
const int quant = 0;
|
const int quant = 0;
|
||||||
const bool taesd = false;
|
const bool taesd = false;
|
||||||
|
const bool notile = false;
|
||||||
const char * t5xxl_filename = nullptr;
|
const char * t5xxl_filename = nullptr;
|
||||||
const char * clipl_filename = nullptr;
|
const char * clipl_filename = nullptr;
|
||||||
const char * clipg_filename = nullptr;
|
const char * clipg_filename = nullptr;
|
||||||
|
|
|
@ -234,6 +234,7 @@ class sd_load_model_inputs(ctypes.Structure):
|
||||||
("threads", ctypes.c_int),
|
("threads", ctypes.c_int),
|
||||||
("quant", ctypes.c_int),
|
("quant", ctypes.c_int),
|
||||||
("taesd", ctypes.c_bool),
|
("taesd", ctypes.c_bool),
|
||||||
|
("notile", ctypes.c_bool),
|
||||||
("t5xxl_filename", ctypes.c_char_p),
|
("t5xxl_filename", ctypes.c_char_p),
|
||||||
("clipl_filename", ctypes.c_char_p),
|
("clipl_filename", ctypes.c_char_p),
|
||||||
("clipg_filename", ctypes.c_char_p),
|
("clipg_filename", ctypes.c_char_p),
|
||||||
|
@ -1121,6 +1122,7 @@ def sd_load_model(model_filename,vae_filename,lora_filename,t5xxl_filename,clipl
|
||||||
inputs.threads = thds
|
inputs.threads = thds
|
||||||
inputs.quant = quant
|
inputs.quant = quant
|
||||||
inputs.taesd = True if args.sdvaeauto else False
|
inputs.taesd = True if args.sdvaeauto else False
|
||||||
|
inputs.notile = True if args.sdnotile else False
|
||||||
inputs.vae_filename = vae_filename.encode("UTF-8")
|
inputs.vae_filename = vae_filename.encode("UTF-8")
|
||||||
inputs.lora_filename = lora_filename.encode("UTF-8")
|
inputs.lora_filename = lora_filename.encode("UTF-8")
|
||||||
inputs.lora_multiplier = args.sdloramult
|
inputs.lora_multiplier = args.sdloramult
|
||||||
|
@ -2980,6 +2982,7 @@ def show_gui():
|
||||||
sd_clipl_var = ctk.StringVar()
|
sd_clipl_var = ctk.StringVar()
|
||||||
sd_clipg_var = ctk.StringVar()
|
sd_clipg_var = ctk.StringVar()
|
||||||
sd_vaeauto_var = ctk.IntVar(value=0)
|
sd_vaeauto_var = ctk.IntVar(value=0)
|
||||||
|
sd_notile_var = ctk.IntVar(value=0)
|
||||||
sd_clamped_var = ctk.StringVar(value="0")
|
sd_clamped_var = ctk.StringVar(value="0")
|
||||||
sd_threads_var = ctk.StringVar(value=str(default_threads))
|
sd_threads_var = ctk.StringVar(value=str(default_threads))
|
||||||
sd_quant_var = ctk.IntVar(value=0)
|
sd_quant_var = ctk.IntVar(value=0)
|
||||||
|
@ -3548,6 +3551,7 @@ def show_gui():
|
||||||
sdvaeitem2.grid()
|
sdvaeitem2.grid()
|
||||||
sdvaeitem3.grid()
|
sdvaeitem3.grid()
|
||||||
makecheckbox(images_tab, "Use TAE SD (AutoFix Broken VAE)", sd_vaeauto_var, 22,command=toggletaesd,tooltiptxt="Replace VAE with TAESD. May fix bad VAE.")
|
makecheckbox(images_tab, "Use TAE SD (AutoFix Broken VAE)", sd_vaeauto_var, 22,command=toggletaesd,tooltiptxt="Replace VAE with TAESD. May fix bad VAE.")
|
||||||
|
makecheckbox(images_tab, "No VAE Tiling", sd_notile_var, 24,tooltiptxt="Disables VAE tiling, may not work for large images.")
|
||||||
|
|
||||||
# audio tab
|
# audio tab
|
||||||
audio_tab = tabcontent["Audio"]
|
audio_tab = tabcontent["Audio"]
|
||||||
|
@ -3738,6 +3742,7 @@ def show_gui():
|
||||||
|
|
||||||
args.sdthreads = (0 if sd_threads_var.get()=="" else int(sd_threads_var.get()))
|
args.sdthreads = (0 if sd_threads_var.get()=="" else int(sd_threads_var.get()))
|
||||||
args.sdclamped = (0 if int(sd_clamped_var.get())<=0 else int(sd_clamped_var.get()))
|
args.sdclamped = (0 if int(sd_clamped_var.get())<=0 else int(sd_clamped_var.get()))
|
||||||
|
args.sdnotile = (True if sd_notile_var.get()==1 else False)
|
||||||
if sd_vaeauto_var.get()==1:
|
if sd_vaeauto_var.get()==1:
|
||||||
args.sdvaeauto = True
|
args.sdvaeauto = True
|
||||||
args.sdvae = ""
|
args.sdvae = ""
|
||||||
|
@ -3919,6 +3924,7 @@ def show_gui():
|
||||||
sd_clipl_var.set(dict["sdclipl"] if ("sdclipl" in dict and dict["sdclipl"]) else "")
|
sd_clipl_var.set(dict["sdclipl"] if ("sdclipl" in dict and dict["sdclipl"]) else "")
|
||||||
sd_clipg_var.set(dict["sdclipg"] if ("sdclipg" in dict and dict["sdclipg"]) else "")
|
sd_clipg_var.set(dict["sdclipg"] if ("sdclipg" in dict and dict["sdclipg"]) else "")
|
||||||
sd_vaeauto_var.set(1 if ("sdvaeauto" in dict and dict["sdvaeauto"]) else 0)
|
sd_vaeauto_var.set(1 if ("sdvaeauto" in dict and dict["sdvaeauto"]) else 0)
|
||||||
|
sd_notile_var.set(1 if ("sdnotile" in dict and dict["sdnotile"]) else 0)
|
||||||
sd_lora_var.set(dict["sdlora"] if ("sdlora" in dict and dict["sdlora"]) else "")
|
sd_lora_var.set(dict["sdlora"] if ("sdlora" in dict and dict["sdlora"]) else "")
|
||||||
sd_loramult_var.set(str(dict["sdloramult"]) if ("sdloramult" in dict and dict["sdloramult"]) else "1.0")
|
sd_loramult_var.set(str(dict["sdloramult"]) if ("sdloramult" in dict and dict["sdloramult"]) else "1.0")
|
||||||
|
|
||||||
|
@ -5237,6 +5243,7 @@ if __name__ == '__main__':
|
||||||
sdparsergrouplora.add_argument("--sdquant", help="If specified, loads the model quantized to save memory.", action='store_true')
|
sdparsergrouplora.add_argument("--sdquant", help="If specified, loads the model quantized to save memory.", action='store_true')
|
||||||
sdparsergrouplora.add_argument("--sdlora", metavar=('[filename]'), help="Specify a stable diffusion LORA safetensors model to be applied. Cannot be used with quant models.", default="")
|
sdparsergrouplora.add_argument("--sdlora", metavar=('[filename]'), help="Specify a stable diffusion LORA safetensors model to be applied. Cannot be used with quant models.", default="")
|
||||||
sdparsergroup.add_argument("--sdloramult", metavar=('[amount]'), help="Multiplier for the LORA model to be applied.", type=float, default=1.0)
|
sdparsergroup.add_argument("--sdloramult", metavar=('[amount]'), help="Multiplier for the LORA model to be applied.", type=float, default=1.0)
|
||||||
|
sdparsergroup.add_argument("--sdnotile", help="Disables VAE tiling, may not work for large images.", action='store_true')
|
||||||
|
|
||||||
whisperparsergroup = parser.add_argument_group('Whisper Transcription Commands')
|
whisperparsergroup = parser.add_argument_group('Whisper Transcription Commands')
|
||||||
whisperparsergroup.add_argument("--whispermodel", metavar=('[filename]'), help="Specify a Whisper bin model to enable Speech-To-Text transcription.", default="")
|
whisperparsergroup.add_argument("--whispermodel", metavar=('[filename]'), help="Specify a Whisper bin model to enable Speech-To-Text transcription.", default="")
|
||||||
|
|
|
@ -52,6 +52,25 @@
|
||||||
#define __STATIC_INLINE__ static inline
|
#define __STATIC_INLINE__ static inline
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
__STATIC_INLINE__ void* sd_aligned_malloc(size_t required_bytes, size_t alignment)
|
||||||
|
{
|
||||||
|
void* p1; // original block
|
||||||
|
void** p2; // aligned block
|
||||||
|
int offset = alignment - 1 + sizeof(void*);
|
||||||
|
if ((p1 = (void*)calloc(1, required_bytes + offset)) == NULL)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
p2 = (void**)(((size_t)(p1) + offset) & ~(alignment - 1));
|
||||||
|
p2[-1] = p1;
|
||||||
|
return p2;
|
||||||
|
}
|
||||||
|
|
||||||
|
__STATIC_INLINE__ void sd_aligned_free(void *p)
|
||||||
|
{
|
||||||
|
free(((void**)p)[-1]);
|
||||||
|
}
|
||||||
|
|
||||||
__STATIC_INLINE__ void ggml_log_callback_default(ggml_log_level level, const char* text, void* user_data) {
|
__STATIC_INLINE__ void ggml_log_callback_default(ggml_log_level level, const char* text, void* user_data) {
|
||||||
(void)level;
|
(void)level;
|
||||||
(void)user_data;
|
(void)user_data;
|
||||||
|
@ -507,15 +526,23 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
|
||||||
params.mem_size += tile_size * tile_size * input->ne[2] * sizeof(float); // input chunk
|
params.mem_size += tile_size * tile_size * input->ne[2] * sizeof(float); // input chunk
|
||||||
params.mem_size += (tile_size * scale) * (tile_size * scale) * output->ne[2] * sizeof(float); // output chunk
|
params.mem_size += (tile_size * scale) * (tile_size * scale) * output->ne[2] * sizeof(float); // output chunk
|
||||||
params.mem_size += 3 * ggml_tensor_overhead();
|
params.mem_size += 3 * ggml_tensor_overhead();
|
||||||
|
params.mem_size += 512; //extra 512 bytes why not, we will use and handle our own memory
|
||||||
|
params.mem_size = GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
|
||||||
params.mem_buffer = NULL;
|
params.mem_buffer = NULL;
|
||||||
params.no_alloc = false;
|
params.no_alloc = false;
|
||||||
|
|
||||||
LOG_DEBUG("tile work buffer size: %.2f MB", params.mem_size / 1024.f / 1024.f);
|
LOG_DEBUG("tile work buffer size: %.2f MB", params.mem_size / 1024.f / 1024.f);
|
||||||
|
|
||||||
|
params.mem_buffer = sd_aligned_malloc(params.mem_size,64);
|
||||||
|
|
||||||
// draft context
|
// draft context
|
||||||
struct ggml_context* tiles_ctx = ggml_init(params);
|
struct ggml_context* tiles_ctx = ggml_init(params);
|
||||||
if (!tiles_ctx) {
|
if (!tiles_ctx) {
|
||||||
LOG_ERROR("ggml_init() failed");
|
LOG_ERROR("ggml_init() failed");
|
||||||
|
if(params.mem_buffer!=NULL)
|
||||||
|
{
|
||||||
|
sd_aligned_free(params.mem_buffer);
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -554,6 +581,10 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
|
||||||
pretty_progress(num_tiles, num_tiles, last_time);
|
pretty_progress(num_tiles, num_tiles, last_time);
|
||||||
}
|
}
|
||||||
ggml_free(tiles_ctx);
|
ggml_free(tiles_ctx);
|
||||||
|
if(params.mem_buffer!=NULL)
|
||||||
|
{
|
||||||
|
sd_aligned_free(params.mem_buffer);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__STATIC_INLINE__ struct ggml_tensor* ggml_group_norm_32(struct ggml_context* ctx,
|
__STATIC_INLINE__ struct ggml_tensor* ggml_group_norm_32(struct ggml_context* ctx,
|
||||||
|
|
|
@ -135,6 +135,7 @@ std::string base64_encode(const unsigned char* data, unsigned int data_length) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::string sdplatformenv, sddeviceenv, sdvulkandeviceenv;
|
static std::string sdplatformenv, sddeviceenv, sdvulkandeviceenv;
|
||||||
|
static bool notiling = false;
|
||||||
bool sdtype_load_model(const sd_load_model_inputs inputs) {
|
bool sdtype_load_model(const sd_load_model_inputs inputs) {
|
||||||
|
|
||||||
executable_path = inputs.executable_path;
|
executable_path = inputs.executable_path;
|
||||||
|
@ -144,6 +145,7 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
|
||||||
std::string t5xxl_filename = inputs.t5xxl_filename;
|
std::string t5xxl_filename = inputs.t5xxl_filename;
|
||||||
std::string clipl_filename = inputs.clipl_filename;
|
std::string clipl_filename = inputs.clipl_filename;
|
||||||
std::string clipg_filename = inputs.clipg_filename;
|
std::string clipg_filename = inputs.clipg_filename;
|
||||||
|
notiling = inputs.notile;
|
||||||
printf("\nImageGen Init - Load Model: %s\n",inputs.model_filename);
|
printf("\nImageGen Init - Load Model: %s\n",inputs.model_filename);
|
||||||
if(lorafilename!="")
|
if(lorafilename!="")
|
||||||
{
|
{
|
||||||
|
@ -352,7 +354,7 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
|
||||||
sd_params->width = newwidth;
|
sd_params->width = newwidth;
|
||||||
sd_params->height = newheight;
|
sd_params->height = newheight;
|
||||||
}
|
}
|
||||||
bool dotile = (sd_params->width>768 || sd_params->height>768);
|
bool dotile = (sd_params->width>768 || sd_params->height>768) && !notiling;
|
||||||
set_sd_vae_tiling(sd_ctx,dotile); //changes vae tiling, prevents memory related crash/oom
|
set_sd_vae_tiling(sd_ctx,dotile); //changes vae tiling, prevents memory related crash/oom
|
||||||
|
|
||||||
//for img2img
|
//for img2img
|
||||||
|
|
|
@ -1084,7 +1084,8 @@ public:
|
||||||
ggml_tensor_scale_output(result);
|
ggml_tensor_scale_output(result);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (vae_tiling && decode) { // TODO: support tiling vae encode
|
//koboldcpp never use tiling with taesd
|
||||||
|
if (false && vae_tiling && decode) { // TODO: support tiling vae encode
|
||||||
// split latent in 64x64 tiles and compute in several steps
|
// split latent in 64x64 tiles and compute in several steps
|
||||||
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
|
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
|
||||||
tae_first_stage->compute(n_threads, in, decode, &out);
|
tae_first_stage->compute(n_threads, in, decode, &out);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue