updated sdcpp, also set euler as default sampler

This commit is contained in:
Concedo 2024-12-01 17:00:20 +08:00
parent e93c2427b4
commit 2ba5949054
27 changed files with 1514 additions and 521 deletions

View file

@ -28,11 +28,8 @@ const char* model_version_to_str[] = {
"SD 2.x",
"SDXL",
"SVD",
"SD3 2B",
"Flux Dev",
"Flux Schnell",
"SD3.5 8B",
"SD3.5 2B"};
"SD3.x",
"Flux"};
const char* sampling_methods_str[] = {
"Euler A",
@ -93,6 +90,7 @@ public:
std::shared_ptr<ControlNet> control_net;
std::shared_ptr<PhotoMakerIDEncoder> pmid_model;
std::shared_ptr<LoraModel> pmid_lora;
std::shared_ptr<PhotoMakerIDEmbed> pmid_id_embeds;
std::string taesd_path;
bool use_tiny_autoencoder = false;
@ -153,7 +151,8 @@ public:
schedule_t schedule,
bool clip_on_cpu,
bool control_net_cpu,
bool vae_on_cpu) {
bool vae_on_cpu,
bool diffusion_flash_attn) {
use_tiny_autoencoder = taesd_path.size() > 0;
std::string taesd_path_fixed = taesd_path;
#ifdef SD_USE_CUBLAS
@ -166,7 +165,12 @@ public:
#endif
#ifdef SD_USE_VULKAN
LOG_DEBUG("Using Vulkan backend");
backend = ggml_backend_vk_init(0);
for (int device = 0; device < ggml_backend_vk_get_device_count(); ++device) {
backend = ggml_backend_vk_init(device);
}
if (!backend) {
LOG_WARN("Failed to initialize Vulkan backend");
}
#endif
#ifdef SD_USE_SYCL
LOG_DEBUG("Using SYCL backend");
@ -177,13 +181,7 @@ public:
LOG_DEBUG("Using CPU backend");
backend = ggml_backend_cpu_init();
}
#ifdef SD_USE_FLASH_ATTENTION
#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined(SD_USE_SYCL) || defined(SD_USE_VULKAN)
LOG_WARN("Flash Attention not supported with GPU Backend");
#else
LOG_INFO("Flash Attention enabled");
#endif
#endif
ModelLoader model_loader;
vae_tiling = vae_tiling_;
@ -285,16 +283,18 @@ public:
conditioner_wtype = wtype;
diffusion_model_wtype = wtype;
vae_wtype = wtype;
model_loader.set_wtype_override(wtype);
}
if (version == VERSION_SDXL) {
vae_wtype = GGML_TYPE_F32;
model_loader.set_wtype_override(GGML_TYPE_F32, "vae.");
}
LOG_INFO("Weight type: %s", ggml_type_name(model_wtype));
LOG_INFO("Conditioner weight type: %s", ggml_type_name(conditioner_wtype));
LOG_INFO("Diffusion model weight type: %s", ggml_type_name(diffusion_model_wtype));
LOG_INFO("VAE weight type: %s", ggml_type_name(vae_wtype));
LOG_INFO("Weight type: %s", model_wtype != SD_TYPE_COUNT ? ggml_type_name(model_wtype) : "??");
LOG_INFO("Conditioner weight type: %s", conditioner_wtype != SD_TYPE_COUNT ? ggml_type_name(conditioner_wtype) : "??");
LOG_INFO("Diffusion model weight type: %s", diffusion_model_wtype != SD_TYPE_COUNT ? ggml_type_name(diffusion_model_wtype) : "??");
LOG_INFO("VAE weight type: %s", vae_wtype != SD_TYPE_COUNT ? ggml_type_name(vae_wtype) : "??");
LOG_DEBUG("ggml tensor size = %d bytes", (int)sizeof(ggml_tensor));
@ -307,30 +307,30 @@ public:
"try specifying SDXL VAE FP16 Fix with the --vae parameter. "
"You can find it here: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/blob/main/sdxl_vae.safetensors");
}
} else if (version == VERSION_SD3_2B || version == VERSION_SD3_5_8B || version == VERSION_SD3_5_2B) {
} else if (sd_version_is_sd3(version)) {
scale_factor = 1.5305f;
} else if (version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL) {
} else if (sd_version_is_flux(version)) {
scale_factor = 0.3611;
// TODO: shift_factor
}
if (version == VERSION_SVD) {
clip_vision = std::make_shared<FrozenCLIPVisionEmbedder>(backend, conditioner_wtype);
clip_vision = std::make_shared<FrozenCLIPVisionEmbedder>(backend, model_loader.tensor_storages_types);
clip_vision->alloc_params_buffer();
clip_vision->get_param_tensors(tensors);
diffusion_model = std::make_shared<UNetModel>(backend, diffusion_model_wtype, version);
diffusion_model = std::make_shared<UNetModel>(backend, model_loader.tensor_storages_types, version);
diffusion_model->alloc_params_buffer();
diffusion_model->get_param_tensors(tensors);
first_stage_model = std::make_shared<AutoEncoderKL>(backend, vae_wtype, vae_decode_only, true, version);
first_stage_model = std::make_shared<AutoEncoderKL>(backend, model_loader.tensor_storages_types, "first_stage_model", vae_decode_only, true, version);
LOG_DEBUG("vae_decode_only %d", vae_decode_only);
first_stage_model->alloc_params_buffer();
first_stage_model->get_param_tensors(tensors, "first_stage_model");
} else {
clip_backend = backend;
bool use_t5xxl = false;
if (version == VERSION_SD3_2B || version == VERSION_SD3_5_8B || version == VERSION_SD3_5_2B || version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL) {
if (sd_version_is_dit(version)) {
use_t5xxl = true;
}
if (!ggml_backend_is_cpu(backend) && use_t5xxl && conditioner_wtype != GGML_TYPE_F32) {
@ -341,16 +341,27 @@ public:
LOG_INFO("CLIP: Using CPU backend");
clip_backend = ggml_backend_cpu_init();
}
if (version == VERSION_SD3_2B || version == VERSION_SD3_5_8B || version == VERSION_SD3_5_2B) {
cond_stage_model = std::make_shared<SD3CLIPEmbedder>(clip_backend, conditioner_wtype);
diffusion_model = std::make_shared<MMDiTModel>(backend, diffusion_model_wtype, version);
} else if (version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL) {
cond_stage_model = std::make_shared<FluxCLIPEmbedder>(clip_backend, conditioner_wtype);
diffusion_model = std::make_shared<FluxModel>(backend, diffusion_model_wtype, version);
} else {
cond_stage_model = std::make_shared<FrozenCLIPEmbedderWithCustomWords>(clip_backend, conditioner_wtype, embeddings_path, version);
diffusion_model = std::make_shared<UNetModel>(backend, diffusion_model_wtype, version);
if (diffusion_flash_attn) {
LOG_INFO("Using flash attention in the diffusion model");
}
if (sd_version_is_sd3(version)) {
if (diffusion_flash_attn) {
LOG_WARN("flash attention in this diffusion model is currently unsupported!");
}
cond_stage_model = std::make_shared<SD3CLIPEmbedder>(clip_backend, model_loader.tensor_storages_types);
diffusion_model = std::make_shared<MMDiTModel>(backend, model_loader.tensor_storages_types);
} else if (sd_version_is_flux(version)) {
cond_stage_model = std::make_shared<FluxCLIPEmbedder>(clip_backend, model_loader.tensor_storages_types);
diffusion_model = std::make_shared<FluxModel>(backend, model_loader.tensor_storages_types, diffusion_flash_attn);
} else {
if (id_embeddings_path.find("v2") != std::string::npos) {
cond_stage_model = std::make_shared<FrozenCLIPEmbedderWithCustomWords>(clip_backend, model_loader.tensor_storages_types, embeddings_path, version, PM_VERSION_2);
} else {
cond_stage_model = std::make_shared<FrozenCLIPEmbedderWithCustomWords>(clip_backend, model_loader.tensor_storages_types, embeddings_path, version);
}
diffusion_model = std::make_shared<UNetModel>(backend, model_loader.tensor_storages_types, version, diffusion_flash_attn);
}
cond_stage_model->alloc_params_buffer();
cond_stage_model->get_param_tensors(tensors);
@ -364,11 +375,11 @@ public:
} else {
vae_backend = backend;
}
first_stage_model = std::make_shared<AutoEncoderKL>(vae_backend, vae_wtype, vae_decode_only, false, version);
first_stage_model = std::make_shared<AutoEncoderKL>(vae_backend, model_loader.tensor_storages_types, "first_stage_model", vae_decode_only, false, version);
first_stage_model->alloc_params_buffer();
first_stage_model->get_param_tensors(tensors, "first_stage_model");
} else {
tae_first_stage = std::make_shared<TinyAutoEncoder>(backend, vae_wtype, vae_decode_only);
tae_first_stage = std::make_shared<TinyAutoEncoder>(backend, model_loader.tensor_storages_types, "decoder.layers", vae_decode_only);
}
// first_stage_model->get_param_tensors(tensors, "first_stage_model.");
@ -380,12 +391,17 @@ public:
} else {
controlnet_backend = backend;
}
control_net = std::make_shared<ControlNet>(controlnet_backend, diffusion_model_wtype, version);
control_net = std::make_shared<ControlNet>(controlnet_backend, model_loader.tensor_storages_types, version);
}
pmid_model = std::make_shared<PhotoMakerIDEncoder>(clip_backend, model_wtype, version);
if (id_embeddings_path.find("v2") != std::string::npos) {
pmid_model = std::make_shared<PhotoMakerIDEncoder>(backend, model_loader.tensor_storages_types, "pmid", version, PM_VERSION_2);
LOG_INFO("using PhotoMaker Version 2");
} else {
pmid_model = std::make_shared<PhotoMakerIDEncoder>(backend, model_loader.tensor_storages_types, "pmid", version);
}
if (id_embeddings_path.size() > 0) {
pmid_lora = std::make_shared<LoraModel>(backend, model_wtype, id_embeddings_path, "");
pmid_lora = std::make_shared<LoraModel>(backend, id_embeddings_path, "");
if (!pmid_lora->load_from_file(true)) {
LOG_WARN("load photomaker lora tensors from %s failed", id_embeddings_path.c_str());
return false;
@ -402,14 +418,8 @@ public:
LOG_ERROR(" pmid model params buffer allocation failed");
return false;
}
// LOG_INFO("pmid param memory buffer size = %.2fMB ",
// pmid_model->params_buffer_size / 1024.0 / 1024.0);
pmid_model->get_param_tensors(tensors, "pmid");
}
// if(stacked_id){
// pmid_model.init_params(GGML_TYPE_F32);
// pmid_model.map_by_name(tensors, "pmid.");
// }
}
struct ggml_init_params params;
@ -539,14 +549,17 @@ public:
is_using_v_parameterization = true;
}
if (version == VERSION_SD3_2B || version == VERSION_SD3_5_8B || version == VERSION_SD3_5_2B) {
if (sd_version_is_sd3(version)) {
LOG_INFO("running in FLOW mode");
denoiser = std::make_shared<DiscreteFlowDenoiser>();
} else if (version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL) {
} else if (sd_version_is_flux(version)) {
LOG_INFO("running in Flux FLOW mode");
float shift = 1.15f;
if (version == VERSION_FLUX_SCHNELL) {
shift = 1.0f; // TODO: validate
float shift = 1.0f; // TODO: validate
for (auto pair : model_loader.tensor_storages_types) {
if (pair.first.find("model.diffusion_model.guidance_in.in_layer.weight") != std::string::npos) {
shift = 1.15f;
break;
}
}
denoiser = std::make_shared<FluxFlowDenoiser>(shift);
} else if (is_using_v_parameterization) {
@ -647,7 +660,7 @@ public:
LOG_WARN("can not find %s for lora %s", st_file_path.c_str(), lora_path.c_str());
return;
}
LoraModel lora(backend, model_wtype, file_path);
LoraModel lora(backend, file_path);
if (!lora.load_from_file()) {
LOG_WARN("load lora tensors from %s failed", file_path.c_str());
return;
@ -677,7 +690,7 @@ public:
LOG_WARN("can not find %s or %s for lora %s", st_file_path.c_str(), ckpt_file_path.c_str(), lora_name.c_str());
return;
}
LoraModel lora(backend, model_wtype, file_path);
LoraModel lora(backend, file_path);
if (!lora.load_from_file()) {
LOG_WARN("load lora tensors from %s failed", file_path.c_str());
return;
@ -724,10 +737,10 @@ public:
ggml_tensor* id_encoder(ggml_context* work_ctx,
ggml_tensor* init_img,
ggml_tensor* prompts_embeds,
ggml_tensor* id_embeds,
std::vector<bool>& class_tokens_mask) {
ggml_tensor* res = NULL;
pmid_model->compute(n_threads, init_img, prompts_embeds, class_tokens_mask, &res, work_ctx);
pmid_model->compute(n_threads, init_img, prompts_embeds, id_embeds, class_tokens_mask, &res, work_ctx);
return res;
}
@ -822,7 +835,11 @@ public:
sample_method_t method,
const std::vector<float>& sigmas,
int start_merge_step,
SDCondition id_cond) {
SDCondition id_cond,
std::vector<int> skip_layers = {},
float slg_scale = 0,
float skip_layer_start = 0.01,
float skip_layer_end = 0.2) {
size_t steps = sigmas.size() - 1;
// noise = load_tensor_from_file(work_ctx, "./rand0.bin");
// print_ggml_tensor(noise);
@ -833,13 +850,24 @@ public:
struct ggml_tensor* noised_input = ggml_dup_tensor(work_ctx, noise);
bool has_unconditioned = cfg_scale != 1.0 && uncond.c_crossattn != NULL;
bool has_skiplayer = slg_scale != 0.0 && skip_layers.size() > 0;
// denoise wrapper
struct ggml_tensor* out_cond = ggml_dup_tensor(work_ctx, x);
struct ggml_tensor* out_uncond = NULL;
struct ggml_tensor* out_skip = NULL;
if (has_unconditioned) {
out_uncond = ggml_dup_tensor(work_ctx, x);
}
if (has_skiplayer) {
if (sd_version_is_dit(version)) {
out_skip = ggml_dup_tensor(work_ctx, x);
} else {
has_skiplayer = false;
LOG_WARN("SLG is incompatible with %s models", model_version_to_str[version]);
}
}
struct ggml_tensor* denoised = ggml_dup_tensor(work_ctx, x);
auto denoise = [&](ggml_tensor* input, float sigma, int step) -> ggml_tensor* {
@ -920,6 +948,28 @@ public:
&out_uncond);
negative_data = (float*)out_uncond->data;
}
int step_count = sigmas.size();
bool is_skiplayer_step = has_skiplayer && step > (int)(skip_layer_start * step_count) && step < (int)(skip_layer_end * step_count);
float* skip_layer_data = NULL;
if (is_skiplayer_step) {
LOG_DEBUG("Skipping layers at step %d\n", step);
// skip layer (same as conditionned)
diffusion_model->compute(n_threads,
noised_input,
timesteps,
cond.c_crossattn,
cond.c_concat,
cond.c_vector,
guidance_tensor,
-1,
controls,
control_strength,
&out_skip,
NULL,
skip_layers);
skip_layer_data = (float*)out_skip->data;
}
float* vec_denoised = (float*)denoised->data;
float* vec_input = (float*)input->data;
float* positive_data = (float*)out_cond->data;
@ -936,6 +986,9 @@ public:
latent_result = negative_data[i] + cfg_scale * (positive_data[i] - negative_data[i]);
}
}
if (is_skiplayer_step) {
latent_result = latent_result + (positive_data[i] - skip_layer_data[i]) * slg_scale;
}
// v = latent_result, eps = latent_result
// denoised = (v * c_out + input * c_skip) or (input + eps * c_out)
vec_denoised[i] = latent_result * c_out + vec_input[i] * c_skip;
@ -999,9 +1052,9 @@ public:
if (use_tiny_autoencoder) {
C = 4;
} else {
if (version == VERSION_SD3_2B || version == VERSION_SD3_5_8B || version == VERSION_SD3_5_2B) {
if (sd_version_is_sd3(version)) {
C = 32;
} else if (version == VERSION_FLUX_DEV || version == VERSION_FLUX_SCHNELL) {
} else if (sd_version_is_flux(version)) {
C = 32;
}
}
@ -1096,7 +1149,8 @@ sd_ctx_t* new_sd_ctx(const char* model_path_c_str,
enum schedule_t s,
bool keep_clip_on_cpu,
bool keep_control_net_cpu,
bool keep_vae_on_cpu) {
bool keep_vae_on_cpu,
bool diffusion_flash_attn) {
sd_ctx_t* sd_ctx = (sd_ctx_t*)malloc(sizeof(sd_ctx_t));
if (sd_ctx == NULL) {
return NULL;
@ -1137,7 +1191,8 @@ sd_ctx_t* new_sd_ctx(const char* model_path_c_str,
s,
keep_clip_on_cpu,
keep_control_net_cpu,
keep_vae_on_cpu)) {
keep_vae_on_cpu,
diffusion_flash_attn)) {
delete sd_ctx->sd;
sd_ctx->sd = NULL;
free(sd_ctx);
@ -1172,7 +1227,11 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
float control_strength,
float style_ratio,
bool normalize_input,
std::string input_id_images_path) {
std::string input_id_images_path,
std::vector<int> skip_layers = {},
float slg_scale = 0,
float skip_layer_start = 0.01,
float skip_layer_end = 0.2) {
if (seed < 0) {
// Generally, when using the provided command line, the seed is always >0.
// However, to prevent potential issues if 'stable-diffusion.cpp' is invoked as a library
@ -1228,11 +1287,15 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
}
// preprocess input id images
std::vector<sd_image_t*> input_id_images;
bool pmv2 = sd_ctx->sd->pmid_model->get_version() == PM_VERSION_2;
if (sd_ctx->sd->pmid_model && input_id_images_path.size() > 0) {
std::vector<std::string> img_files = get_files_from_dir(input_id_images_path);
for (std::string img_file : img_files) {
int c = 0;
int width, height;
if (ends_with(img_file, "safetensors")) {
continue;
}
uint8_t* input_image_buffer = stbi_load(img_file.c_str(), &width, &height, &c, 3);
if (input_image_buffer == NULL) {
LOG_ERROR("PhotoMaker load image from '%s' failed", img_file.c_str());
@ -1270,20 +1333,25 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
else
sd_mul_images_to_tensor(init_image->data, init_img, i, NULL, NULL);
}
t0 = ggml_time_ms();
auto cond_tup = sd_ctx->sd->cond_stage_model->get_learned_condition_with_trigger(work_ctx,
sd_ctx->sd->n_threads, prompt,
clip_skip,
width,
height,
num_input_images,
sd_ctx->sd->diffusion_model->get_adm_in_channels());
id_cond = std::get<0>(cond_tup);
class_tokens_mask = std::get<1>(cond_tup); //
id_cond.c_crossattn = sd_ctx->sd->id_encoder(work_ctx, init_img, id_cond.c_crossattn, class_tokens_mask);
t0 = ggml_time_ms();
auto cond_tup = sd_ctx->sd->cond_stage_model->get_learned_condition_with_trigger(work_ctx,
sd_ctx->sd->n_threads, prompt,
clip_skip,
width,
height,
num_input_images,
sd_ctx->sd->diffusion_model->get_adm_in_channels());
id_cond = std::get<0>(cond_tup);
class_tokens_mask = std::get<1>(cond_tup); //
struct ggml_tensor* id_embeds = NULL;
if (pmv2) {
// id_embeds = sd_ctx->sd->pmid_id_embeds->get();
id_embeds = load_tensor_from_file(work_ctx, path_join(input_id_images_path, "id_embeds.bin"));
// print_ggml_tensor(id_embeds, true, "id_embeds:");
}
id_cond.c_crossattn = sd_ctx->sd->id_encoder(work_ctx, init_img, id_cond.c_crossattn, id_embeds, class_tokens_mask);
t1 = ggml_time_ms();
LOG_INFO("Photomaker ID Stacking, taking %d ms", t1 - t0);
LOG_INFO("Photomaker ID Stacking, taking %" PRId64 " ms", t1 - t0);
if (sd_ctx->sd->free_params_immediately) {
sd_ctx->sd->pmid_model->free_params_buffer();
}
@ -1348,9 +1416,9 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
// Sample
std::vector<struct ggml_tensor*> final_latents; // collect latents to decode
int C = 4;
if (sd_ctx->sd->version == VERSION_SD3_2B || sd_ctx->sd->version == VERSION_SD3_5_8B || sd_ctx->sd->version == VERSION_SD3_5_2B) {
if (sd_version_is_sd3(sd_ctx->sd->version)) {
C = 16;
} else if (sd_ctx->sd->version == VERSION_FLUX_DEV || sd_ctx->sd->version == VERSION_FLUX_SCHNELL) {
} else if (sd_version_is_flux(sd_ctx->sd->version)) {
C = 16;
}
int W = width / 8;
@ -1387,7 +1455,11 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
sample_method,
sigmas,
start_merge_step,
id_cond);
id_cond,
skip_layers,
slg_scale,
skip_layer_start,
skip_layer_end);
// struct ggml_tensor* x_0 = load_tensor_from_file(ctx, "samples_ddim.bin");
// print_ggml_tensor(x_0);
int64_t sampling_end = ggml_time_ms();
@ -1453,7 +1525,13 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
float control_strength,
float style_ratio,
bool normalize_input,
const char* input_id_images_path_c_str) {
const char* input_id_images_path_c_str,
int* skip_layers = NULL,
size_t skip_layers_count = 0,
float slg_scale = 0,
float skip_layer_start = 0.01,
float skip_layer_end = 0.2) {
std::vector<int> skip_layers_vec(skip_layers, skip_layers + skip_layers_count);
LOG_DEBUG("txt2img %dx%d", width, height);
if (sd_ctx == NULL) {
return NULL;
@ -1461,10 +1539,10 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
struct ggml_init_params params;
params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10 MB
if (sd_ctx->sd->version == VERSION_SD3_2B || sd_ctx->sd->version == VERSION_SD3_5_8B || sd_ctx->sd->version == VERSION_SD3_5_2B) {
if (sd_version_is_sd3(sd_ctx->sd->version)) {
params.mem_size *= 3;
}
if (sd_ctx->sd->version == VERSION_FLUX_DEV || sd_ctx->sd->version == VERSION_FLUX_SCHNELL) {
if (sd_version_is_flux(sd_ctx->sd->version)) {
params.mem_size *= 4;
}
if (sd_ctx->sd->stacked_id) {
@ -1487,17 +1565,17 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps);
int C = 4;
if (sd_ctx->sd->version == VERSION_SD3_2B || sd_ctx->sd->version == VERSION_SD3_5_8B || sd_ctx->sd->version == VERSION_SD3_5_2B) {
if (sd_version_is_sd3(sd_ctx->sd->version)) {
C = 16;
} else if (sd_ctx->sd->version == VERSION_FLUX_DEV || sd_ctx->sd->version == VERSION_FLUX_SCHNELL) {
} else if (sd_version_is_flux(sd_ctx->sd->version)) {
C = 16;
}
int W = width / 8;
int H = height / 8;
ggml_tensor* init_latent = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, C, 1);
if (sd_ctx->sd->version == VERSION_SD3_2B || sd_ctx->sd->version == VERSION_SD3_5_8B || sd_ctx->sd->version == VERSION_SD3_5_2B) {
if (sd_version_is_sd3(sd_ctx->sd->version)) {
ggml_set_f32(init_latent, 0.0609f);
} else if (sd_ctx->sd->version == VERSION_FLUX_DEV || sd_ctx->sd->version == VERSION_FLUX_SCHNELL) {
} else if (sd_version_is_flux(sd_ctx->sd->version)) {
ggml_set_f32(init_latent, 0.1159f);
} else {
ggml_set_f32(init_latent, 0.f);
@ -1521,7 +1599,11 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
control_strength,
style_ratio,
normalize_input,
input_id_images_path_c_str);
input_id_images_path_c_str,
skip_layers_vec,
slg_scale,
skip_layer_start,
skip_layer_end);
size_t t1 = ggml_time_ms();
@ -1548,7 +1630,13 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
float control_strength,
float style_ratio,
bool normalize_input,
const char* input_id_images_path_c_str) {
const char* input_id_images_path_c_str,
int* skip_layers = NULL,
size_t skip_layers_count = 0,
float slg_scale = 0,
float skip_layer_start = 0.01,
float skip_layer_end = 0.2) {
std::vector<int> skip_layers_vec(skip_layers, skip_layers + skip_layers_count);
LOG_DEBUG("img2img %dx%d", width, height);
if (sd_ctx == NULL) {
return NULL;
@ -1556,10 +1644,10 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
struct ggml_init_params params;
params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10 MB
if (sd_ctx->sd->version == VERSION_SD3_2B || sd_ctx->sd->version == VERSION_SD3_5_8B || sd_ctx->sd->version == VERSION_SD3_5_2B) {
if (sd_version_is_sd3(sd_ctx->sd->version)) {
params.mem_size *= 2;
}
if (sd_ctx->sd->version == VERSION_FLUX_DEV || sd_ctx->sd->version == VERSION_FLUX_SCHNELL) {
if (sd_version_is_flux(sd_ctx->sd->version)) {
params.mem_size *= 3;
}
if (sd_ctx->sd->stacked_id) {
@ -1622,7 +1710,11 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
control_strength,
style_ratio,
normalize_input,
input_id_images_path_c_str);
input_id_images_path_c_str,
skip_layers_vec,
slg_scale,
skip_layer_start,
skip_layer_end);
size_t t2 = ggml_time_ms();