WIP: update stable-diffusion.cpp to 5900ef6605c6 (new API) (#1669)

* Update stable-diffusion.cpp to 5900ef6605c6 (new API)

* Clean up pending LoRA code and simplify LoRA changes to upstream

* Move VAE tiling disabling for TAESD to sdtype_adapter.cpp

* Move auxiliary ctx functions to sdtype_adapter.cpp

* Use ref_images parameter for Kontext images

* Drop clip skip workaround (fixed upstream)

* Workaround for flash attention with img2img

leejet/stable-diffusion.cpp#756

* Workaround for Chroma with flash attention, debug prints

* Disable forcing CLIP weights to F32 for reduced memory usage
This commit is contained in:
Wagner Bruna 2025-08-12 12:25:02 -03:00 committed by GitHub
parent 7b5cf7143f
commit 5de7ed3d56
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
26 changed files with 2255 additions and 1870 deletions

View file

@ -545,9 +545,15 @@ protected:
int64_t vocab_size;
int64_t num_positions;
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
enum ggml_type token_wtype = (tensor_types.find(prefix + "token_embedding.weight") != tensor_types.end()) ? tensor_types[prefix + "token_embedding.weight"] : GGML_TYPE_F32;
enum ggml_type position_wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "position_embedding.weight") != tensor_types.end()) ? tensor_types[prefix + "position_embedding.weight"] : GGML_TYPE_F32;
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
enum ggml_type token_wtype = GGML_TYPE_F32;
#if 1
// kcpp reduce memory usage (reverts https://github.com/leejet/stable-diffusion.cpp/pull/601)
auto tensor_type = tensor_types.find(prefix + "token_embedding.weight");
if (tensor_type != tensor_types.end())
token_wtype = tensor_type->second;
#endif
enum ggml_type position_wtype = GGML_TYPE_F32;
params["token_embedding.weight"] = ggml_new_tensor_2d(ctx, token_wtype, embed_dim, vocab_size);
params["position_embedding.weight"] = ggml_new_tensor_2d(ctx, position_wtype, embed_dim, num_positions);
@ -594,10 +600,10 @@ protected:
int64_t image_size;
int64_t num_patches;
int64_t num_positions;
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
enum ggml_type patch_wtype = GGML_TYPE_F16; // tensor_types.find(prefix + "patch_embedding.weight") != tensor_types.end() ? tensor_types[prefix + "patch_embedding.weight"] : GGML_TYPE_F16;
enum ggml_type class_wtype = GGML_TYPE_F32; // tensor_types.find(prefix + "class_embedding") != tensor_types.end() ? tensor_types[prefix + "class_embedding"] : GGML_TYPE_F32;
enum ggml_type position_wtype = GGML_TYPE_F32; // tensor_types.find(prefix + "position_embedding.weight") != tensor_types.end() ? tensor_types[prefix + "position_embedding.weight"] : GGML_TYPE_F32;
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
enum ggml_type patch_wtype = GGML_TYPE_F16;
enum ggml_type class_wtype = GGML_TYPE_F32;
enum ggml_type position_wtype = GGML_TYPE_F32;
params["patch_embedding.weight"] = ggml_new_tensor_4d(ctx, patch_wtype, patch_size, patch_size, num_channels, embed_dim);
params["class_embedding"] = ggml_new_tensor_1d(ctx, class_wtype, embed_dim);
@ -657,9 +663,9 @@ enum CLIPVersion {
class CLIPTextModel : public GGMLBlock {
protected:
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
if (version == OPEN_CLIP_VIT_BIGG_14) {
enum ggml_type wtype = GGML_TYPE_F32; // tensor_types.find(prefix + "text_projection") != tensor_types.end() ? tensor_types[prefix + "text_projection"] : GGML_TYPE_F32;
enum ggml_type wtype = GGML_TYPE_F32;
params["text_projection"] = ggml_new_tensor_2d(ctx, wtype, projection_dim, hidden_size);
}
}
@ -678,8 +684,8 @@ public:
bool with_final_ln = true;
CLIPTextModel(CLIPVersion version = OPENAI_CLIP_VIT_L_14,
int clip_skip_value = -1,
bool with_final_ln = true)
bool with_final_ln = true,
int clip_skip_value = -1)
: version(version), with_final_ln(with_final_ln) {
if (version == OPEN_CLIP_VIT_H_14) {
hidden_size = 1024;
@ -701,7 +707,7 @@ public:
void set_clip_skip(int skip) {
if (skip <= 0) {
return;
skip = -1;
}
clip_skip = skip;
}
@ -805,8 +811,8 @@ protected:
int64_t out_features;
bool transpose_weight;
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
enum ggml_type wtype = tensor_types.find(prefix + "weight") != tensor_types.end() ? tensor_types[prefix + "weight"] : GGML_TYPE_F32;
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
enum ggml_type wtype = get_type(prefix + "weight", tensor_types, GGML_TYPE_F32);
if (transpose_weight) {
params["weight"] = ggml_new_tensor_2d(ctx, wtype, out_features, in_features);
} else {
@ -868,12 +874,12 @@ struct CLIPTextModelRunner : public GGMLRunner {
CLIPTextModel model;
CLIPTextModelRunner(ggml_backend_t backend,
std::map<std::string, enum ggml_type>& tensor_types,
const String2GGMLType& tensor_types,
const std::string prefix,
CLIPVersion version = OPENAI_CLIP_VIT_L_14,
int clip_skip_value = 1,
bool with_final_ln = true)
: GGMLRunner(backend), model(version, clip_skip_value, with_final_ln) {
bool with_final_ln = true,
int clip_skip_value = -1)
: GGMLRunner(backend), model(version, with_final_ln, clip_skip_value) {
model.init(params_ctx, tensor_types, prefix);
}
@ -949,4 +955,4 @@ struct CLIPTextModelRunner : public GGMLRunner {
}
};
#endif // __CLIP_HPP__
#endif // __CLIP_HPP__

View file

@ -56,8 +56,8 @@ public:
// x: [N, channels, h, w]
auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["conv"]);
x = ggml_upscale(ctx, x, 2, ggml_scale_mode::GGML_SCALE_MODE_NEAREST); // [N, channels, h*2, w*2]
x = conv->forward(ctx, x); // [N, out_channels, h*2, w*2]
x = ggml_upscale(ctx, x, 2, GGML_SCALE_MODE_NEAREST); // [N, channels, h*2, w*2]
x = conv->forward(ctx, x); // [N, out_channels, h*2, w*2]
return x;
}
};
@ -182,9 +182,9 @@ protected:
int64_t dim_in;
int64_t dim_out;
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, std::string prefix = "") {
enum ggml_type wtype = (tensor_types.find(prefix + "proj.weight") != tensor_types.end()) ? tensor_types[prefix + "proj.weight"] : GGML_TYPE_F32;
enum ggml_type bias_wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "proj.bias") != tensor_types.end()) ? tensor_types[prefix + "proj.bias"] : GGML_TYPE_F32;
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") {
enum ggml_type wtype = get_type(prefix + "proj.weight", tensor_types, GGML_TYPE_F32);
enum ggml_type bias_wtype = GGML_TYPE_F32;
params["proj.weight"] = ggml_new_tensor_2d(ctx, wtype, dim_in, dim_out * 2);
params["proj.bias"] = ggml_new_tensor_1d(ctx, bias_wtype, dim_out * 2);
}
@ -440,9 +440,9 @@ public:
class AlphaBlender : public GGMLBlock {
protected:
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, std::string prefix = "") {
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") {
// Get the type of the "mix_factor" tensor from the input tensors map with the specified prefix
enum ggml_type wtype = GGML_TYPE_F32; //(tensor_types.ypes.find(prefix + "mix_factor") != tensor_types.end()) ? tensor_types[prefix + "mix_factor"] : GGML_TYPE_F32;
enum ggml_type wtype = GGML_TYPE_F32;
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
}

View file

@ -57,29 +57,30 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
std::vector<std::string> readed_embeddings;
FrozenCLIPEmbedderWithCustomWords(ggml_backend_t backend,
std::map<std::string, enum ggml_type>& tensor_types,
const String2GGMLType& tensor_types,
const std::string& embd_dir,
SDVersion version = VERSION_SD1,
PMVersion pv = PM_VERSION_1,
int clip_skip = -1)
: version(version), pm_version(pv), tokenizer(sd_version_is_sd2(version) ? 0 : 49407), embd_dir(embd_dir) {
if (sd_version_is_sd1(version)) {
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14);
} else if (sd_version_is_sd2(version)) {
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPEN_CLIP_VIT_H_14);
} else if (sd_version_is_sdxl(version)) {
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, false);
text_model2 = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.1.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false);
}
set_clip_skip(clip_skip);
}
void set_clip_skip(int clip_skip) {
if (clip_skip <= 0) {
clip_skip = 1;
if (sd_version_is_sd2(version) || sd_version_is_sdxl(version)) {
clip_skip = 2;
}
}
if (sd_version_is_sd1(version)) {
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, clip_skip);
} else if (sd_version_is_sd2(version)) {
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPEN_CLIP_VIT_H_14, clip_skip);
} else if (sd_version_is_sdxl(version)) {
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, clip_skip, false);
text_model2 = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.1.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, clip_skip, false);
}
}
void set_clip_skip(int clip_skip) {
text_model->set_clip_skip(clip_skip);
if (sd_version_is_sdxl(version)) {
text_model2->set_clip_skip(clip_skip);
@ -458,8 +459,8 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
if (sd_version_is_sdxl(version)) {
text_model2->compute(n_threads,
input_ids2,
0,
NULL,
num_custom_embeddings,
token_embed_custom.data(),
max_token_idx,
false,
&chunk_hidden_states2, work_ctx);
@ -469,8 +470,8 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
if (chunk_idx == 0) {
text_model2->compute(n_threads,
input_ids2,
0,
NULL,
num_custom_embeddings,
token_embed_custom.data(),
max_token_idx,
true,
&pooled,
@ -617,7 +618,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
struct FrozenCLIPVisionEmbedder : public GGMLRunner {
CLIPVisionModelProjection vision_model;
FrozenCLIPVisionEmbedder(ggml_backend_t backend, std::map<std::string, enum ggml_type>& tensor_types)
FrozenCLIPVisionEmbedder(ggml_backend_t backend, const String2GGMLType& tensor_types = {})
: vision_model(OPEN_CLIP_VIT_H_14, true), GGMLRunner(backend) {
vision_model.init(params_ctx, tensor_types, "cond_stage_model.transformer");
}
@ -662,18 +663,19 @@ struct SD3CLIPEmbedder : public Conditioner {
std::shared_ptr<T5Runner> t5;
SD3CLIPEmbedder(ggml_backend_t backend,
std::map<std::string, enum ggml_type>& tensor_types,
int clip_skip = -1)
const String2GGMLType& tensor_types = {},
int clip_skip = -1)
: clip_g_tokenizer(0) {
if (clip_skip <= 0) {
clip_skip = 2;
}
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, clip_skip, false);
clip_g = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_g.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, clip_skip, false);
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, false);
clip_g = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_g.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false);
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
set_clip_skip(clip_skip);
}
void set_clip_skip(int clip_skip) {
if (clip_skip <= 0) {
clip_skip = 2;
}
clip_l->set_clip_skip(clip_skip);
clip_g->set_clip_skip(clip_skip);
}
@ -1008,16 +1010,17 @@ struct FluxCLIPEmbedder : public Conditioner {
size_t chunk_len = 256;
FluxCLIPEmbedder(ggml_backend_t backend,
std::map<std::string, enum ggml_type>& tensor_types,
int clip_skip = -1) {
if (clip_skip <= 0) {
clip_skip = 2;
}
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, clip_skip, true);
const String2GGMLType& tensor_types = {},
int clip_skip = -1) {
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, true);
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
set_clip_skip(clip_skip);
}
void set_clip_skip(int clip_skip) {
if (clip_skip <= 0) {
clip_skip = 2;
}
clip_l->set_clip_skip(clip_skip);
}
@ -1228,10 +1231,10 @@ struct PixArtCLIPEmbedder : public Conditioner {
int mask_pad = 1;
PixArtCLIPEmbedder(ggml_backend_t backend,
std::map<std::string, enum ggml_type>& tensor_types,
int clip_skip = -1,
bool use_mask = false,
int mask_pad = 1)
const String2GGMLType& tensor_types = {},
int clip_skip = -1,
bool use_mask = false,
int mask_pad = 1)
: use_mask(use_mask), mask_pad(mask_pad) {
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
}
@ -1422,4 +1425,4 @@ struct PixArtCLIPEmbedder : public Conditioner {
}
};
#endif
#endif

View file

@ -317,12 +317,23 @@ struct ControlNet : public GGMLRunner {
bool guided_hint_cached = false;
ControlNet(ggml_backend_t backend,
std::map<std::string, enum ggml_type>& tensor_types,
SDVersion version = VERSION_SD1)
const String2GGMLType& tensor_types = {},
SDVersion version = VERSION_SD1)
: GGMLRunner(backend), control_net(version) {
control_net.init(params_ctx, tensor_types, "");
}
void enable_conv2d_direct() {
std::vector<GGMLBlock*> blocks;
control_net.get_all_blocks(blocks);
for (auto block : blocks) {
if (block->get_desc() == "Conv2d") {
auto conv_block = (Conv2d*)block;
conv_block->enable_direct();
}
}
}
~ControlNet() {
free_control_ctx();
}

View file

@ -168,24 +168,21 @@ struct AYSSchedule : SigmaSchedule {
std::vector<float> inputs;
std::vector<float> results(n + 1);
switch (version) {
case VERSION_SD2: /* fallthrough */
LOG_WARN("AYS not designed for SD2.X models");
case VERSION_SD1:
LOG_INFO("AYS using SD1.5 noise levels");
inputs = noise_levels[0];
break;
case VERSION_SDXL:
LOG_INFO("AYS using SDXL noise levels");
inputs = noise_levels[1];
break;
case VERSION_SVD:
LOG_INFO("AYS using SVD noise levels");
inputs = noise_levels[2];
break;
default:
LOG_ERROR("Version not compatable with AYS scheduler");
return results;
if (sd_version_is_sd2((SDVersion)version)) {
LOG_WARN("AYS not designed for SD2.X models");
} /* fallthrough */
else if (sd_version_is_sd1((SDVersion)version)) {
LOG_INFO("AYS using SD1.5 noise levels");
inputs = noise_levels[0];
} else if (sd_version_is_sdxl((SDVersion)version)) {
LOG_INFO("AYS using SDXL noise levels");
inputs = noise_levels[1];
} else if (version == VERSION_SVD) {
LOG_INFO("AYS using SVD noise levels");
inputs = noise_levels[2];
} else {
LOG_ERROR("Version not compatible with AYS scheduler");
return results;
}
/* Stretches those pre-calculated reference levels out to the desired
@ -346,6 +343,32 @@ struct CompVisVDenoiser : public CompVisDenoiser {
}
};
struct EDMVDenoiser : public CompVisVDenoiser {
float min_sigma = 0.002;
float max_sigma = 120.0;
EDMVDenoiser(float min_sigma = 0.002, float max_sigma = 120.0)
: min_sigma(min_sigma), max_sigma(max_sigma) {
schedule = std::make_shared<ExponentialSchedule>();
}
float t_to_sigma(float t) {
return std::exp(t * 4 / (float)TIMESTEPS);
}
float sigma_to_t(float s) {
return 0.25 * std::log(s);
}
float sigma_min() {
return min_sigma;
}
float sigma_max() {
return max_sigma;
}
};
float time_snr_shift(float alpha, float t) {
if (alpha == 1.0f) {
return t;
@ -1019,7 +1042,7 @@ static void sample_k_diffusion(sample_method_t method,
// also needed to invert the behavior of CompVisDenoiser
// (k-diffusion's LMSDiscreteScheduler)
float beta_start = 0.00085f;
float beta_end = 0.0120f;
float beta_end = 0.0120f;
std::vector<double> alphas_cumprod;
std::vector<double> compvis_sigmas;
@ -1030,8 +1053,9 @@ static void sample_k_diffusion(sample_method_t method,
(i == 0 ? 1.0f : alphas_cumprod[i - 1]) *
(1.0f -
std::pow(sqrtf(beta_start) +
(sqrtf(beta_end) - sqrtf(beta_start)) *
((float)i / (TIMESTEPS - 1)), 2));
(sqrtf(beta_end) - sqrtf(beta_start)) *
((float)i / (TIMESTEPS - 1)),
2));
compvis_sigmas[i] =
std::sqrt((1 - alphas_cumprod[i]) /
alphas_cumprod[i]);
@ -1061,7 +1085,8 @@ static void sample_k_diffusion(sample_method_t method,
// - pred_prev_sample -> "x_t-1"
int timestep =
roundf(TIMESTEPS -
i * ((float)TIMESTEPS / steps)) - 1;
i * ((float)TIMESTEPS / steps)) -
1;
// 1. get previous step value (=t-1)
int prev_timestep = timestep - TIMESTEPS / steps;
// The sigma here is chosen to cause the
@ -1086,10 +1111,9 @@ static void sample_k_diffusion(sample_method_t method,
float* vec_x = (float*)x->data;
for (int j = 0; j < ggml_nelements(x); j++) {
vec_x[j] *= std::sqrt(sigma * sigma + 1) /
sigma;
sigma;
}
}
else {
} else {
// For the subsequent steps after the first one,
// at this point x = latents or x = sample, and
// needs to be prescaled with x <- sample / c_in
@ -1127,9 +1151,8 @@ static void sample_k_diffusion(sample_method_t method,
float alpha_prod_t = alphas_cumprod[timestep];
// Note final_alpha_cumprod = alphas_cumprod[0] due to
// trailing timestep spacing
float alpha_prod_t_prev = prev_timestep >= 0 ?
alphas_cumprod[prev_timestep] : alphas_cumprod[0];
float beta_prod_t = 1 - alpha_prod_t;
float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0];
float beta_prod_t = 1 - alpha_prod_t;
// 3. compute predicted original sample from predicted
// noise also called "predicted x_0" of formula (12)
// from https://arxiv.org/pdf/2010.02502.pdf
@ -1145,7 +1168,7 @@ static void sample_k_diffusion(sample_method_t method,
vec_pred_original_sample[j] =
(vec_x[j] / std::sqrt(sigma * sigma + 1) -
std::sqrt(beta_prod_t) *
vec_model_output[j]) *
vec_model_output[j]) *
(1 / std::sqrt(alpha_prod_t));
}
}
@ -1159,8 +1182,8 @@ static void sample_k_diffusion(sample_method_t method,
// sigma_t = sqrt((1 - alpha_t-1)/(1 - alpha_t)) *
// sqrt(1 - alpha_t/alpha_t-1)
float beta_prod_t_prev = 1 - alpha_prod_t_prev;
float variance = (beta_prod_t_prev / beta_prod_t) *
(1 - alpha_prod_t / alpha_prod_t_prev);
float variance = (beta_prod_t_prev / beta_prod_t) *
(1 - alpha_prod_t / alpha_prod_t_prev);
float std_dev_t = eta * std::sqrt(variance);
// 6. compute "direction pointing to x_t" of formula
// (12) from https://arxiv.org/pdf/2010.02502.pdf
@ -1179,8 +1202,8 @@ static void sample_k_diffusion(sample_method_t method,
std::pow(std_dev_t, 2)) *
vec_model_output[j];
vec_x[j] = std::sqrt(alpha_prod_t_prev) *
vec_pred_original_sample[j] +
pred_sample_direction;
vec_pred_original_sample[j] +
pred_sample_direction;
}
}
if (eta > 0) {
@ -1208,7 +1231,7 @@ static void sample_k_diffusion(sample_method_t method,
// by Semi-Linear Consistency Function with Trajectory
// Mapping", arXiv:2402.19159 [cs.CV]
float beta_start = 0.00085f;
float beta_end = 0.0120f;
float beta_end = 0.0120f;
std::vector<double> alphas_cumprod;
std::vector<double> compvis_sigmas;
@ -1219,8 +1242,9 @@ static void sample_k_diffusion(sample_method_t method,
(i == 0 ? 1.0f : alphas_cumprod[i - 1]) *
(1.0f -
std::pow(sqrtf(beta_start) +
(sqrtf(beta_end) - sqrtf(beta_start)) *
((float)i / (TIMESTEPS - 1)), 2));
(sqrtf(beta_end) - sqrtf(beta_start)) *
((float)i / (TIMESTEPS - 1)),
2));
compvis_sigmas[i] =
std::sqrt((1 - alphas_cumprod[i]) /
alphas_cumprod[i]);
@ -1235,13 +1259,10 @@ static void sample_k_diffusion(sample_method_t method,
for (int i = 0; i < steps; i++) {
// Analytic form for TCD timesteps
int timestep = TIMESTEPS - 1 -
(TIMESTEPS / original_steps) *
(int)floor(i * ((float)original_steps / steps));
(TIMESTEPS / original_steps) *
(int)floor(i * ((float)original_steps / steps));
// 1. get previous step value
int prev_timestep = i >= steps - 1 ? 0 :
TIMESTEPS - 1 - (TIMESTEPS / original_steps) *
(int)floor((i + 1) *
((float)original_steps / steps));
int prev_timestep = i >= steps - 1 ? 0 : TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int)floor((i + 1) * ((float)original_steps / steps));
// Here timestep_s is tau_n' in Algorithm 4. The _s
// notation appears to be that from C. Lu,
// "DPM-Solver: A Fast ODE Solver for Diffusion
@ -1258,10 +1279,9 @@ static void sample_k_diffusion(sample_method_t method,
float* vec_x = (float*)x->data;
for (int j = 0; j < ggml_nelements(x); j++) {
vec_x[j] *= std::sqrt(sigma * sigma + 1) /
sigma;
sigma;
}
}
else {
} else {
float* vec_x = (float*)x->data;
for (int j = 0; j < ggml_nelements(x); j++) {
vec_x[j] *= std::sqrt(sigma * sigma + 1);
@ -1294,15 +1314,14 @@ static void sample_k_diffusion(sample_method_t method,
// DPM-Solver. In fact, we have alpha_{t_n} =
// \sqrt{\hat{alpha_n}}, [...]"
float alpha_prod_t = alphas_cumprod[timestep];
float beta_prod_t = 1 - alpha_prod_t;
float beta_prod_t = 1 - alpha_prod_t;
// Note final_alpha_cumprod = alphas_cumprod[0] since
// TCD is always "trailing"
float alpha_prod_t_prev = prev_timestep >= 0 ?
alphas_cumprod[prev_timestep] : alphas_cumprod[0];
float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0];
// The subscript _s are the only portion in this
// section (2) unique to TCD
float alpha_prod_s = alphas_cumprod[timestep_s];
float beta_prod_s = 1 - alpha_prod_s;
float beta_prod_s = 1 - alpha_prod_s;
// 3. Compute the predicted noised sample x_s based on
// the model parameterization
//
@ -1317,7 +1336,7 @@ static void sample_k_diffusion(sample_method_t method,
vec_pred_original_sample[j] =
(vec_x[j] / std::sqrt(sigma * sigma + 1) -
std::sqrt(beta_prod_t) *
vec_model_output[j]) *
vec_model_output[j]) *
(1 / std::sqrt(alpha_prod_t));
}
}
@ -1339,9 +1358,9 @@ static void sample_k_diffusion(sample_method_t method,
// pred_epsilon = model_output
vec_x[j] =
std::sqrt(alpha_prod_s) *
vec_pred_original_sample[j] +
vec_pred_original_sample[j] +
std::sqrt(beta_prod_s) *
vec_model_output[j];
vec_model_output[j];
}
}
// 4. Sample and inject noise z ~ N(0, I) for
@ -1357,7 +1376,7 @@ static void sample_k_diffusion(sample_method_t method,
// In this case, x is still pred_noised_sample,
// continue in-place
ggml_tensor_set_f32_randn(noise, rng);
float* vec_x = (float*)x->data;
float* vec_x = (float*)x->data;
float* vec_noise = (float*)noise->data;
for (int j = 0; j < ggml_nelements(x); j++) {
// Corresponding to (35) in Zheng et
@ -1366,10 +1385,10 @@ static void sample_k_diffusion(sample_method_t method,
vec_x[j] =
std::sqrt(alpha_prod_t_prev /
alpha_prod_s) *
vec_x[j] +
vec_x[j] +
std::sqrt(1 - alpha_prod_t_prev /
alpha_prod_s) *
vec_noise[j];
alpha_prod_s) *
vec_noise[j];
}
}
}
@ -1381,4 +1400,4 @@ static void sample_k_diffusion(sample_method_t method,
}
}
#endif // __DENOISER_HPP__
#endif // __DENOISER_HPP__

View file

@ -13,7 +13,7 @@ struct DiffusionModel {
struct ggml_tensor* c_concat,
struct ggml_tensor* y,
struct ggml_tensor* guidance,
std::vector<ggml_tensor*> ref_latents = {},
std::vector<ggml_tensor*> ref_latents = {},
int num_video_frames = -1,
std::vector<struct ggml_tensor*> controls = {},
float control_strength = 0.f,
@ -32,9 +32,9 @@ struct UNetModel : public DiffusionModel {
UNetModelRunner unet;
UNetModel(ggml_backend_t backend,
std::map<std::string, enum ggml_type>& tensor_types,
SDVersion version = VERSION_SD1,
bool flash_attn = false)
const String2GGMLType& tensor_types = {},
SDVersion version = VERSION_SD1,
bool flash_attn = false)
: unet(backend, tensor_types, "model.diffusion_model", version, flash_attn) {
}
@ -69,7 +69,7 @@ struct UNetModel : public DiffusionModel {
struct ggml_tensor* c_concat,
struct ggml_tensor* y,
struct ggml_tensor* guidance,
std::vector<ggml_tensor*> ref_latents = {},
std::vector<ggml_tensor*> ref_latents = {},
int num_video_frames = -1,
std::vector<struct ggml_tensor*> controls = {},
float control_strength = 0.f,
@ -85,7 +85,7 @@ struct MMDiTModel : public DiffusionModel {
MMDiTRunner mmdit;
MMDiTModel(ggml_backend_t backend,
std::map<std::string, enum ggml_type>& tensor_types)
const String2GGMLType& tensor_types = {})
: mmdit(backend, tensor_types, "model.diffusion_model") {
}
@ -120,7 +120,7 @@ struct MMDiTModel : public DiffusionModel {
struct ggml_tensor* c_concat,
struct ggml_tensor* y,
struct ggml_tensor* guidance,
std::vector<ggml_tensor*> ref_latents = {},
std::vector<ggml_tensor*> ref_latents = {},
int num_video_frames = -1,
std::vector<struct ggml_tensor*> controls = {},
float control_strength = 0.f,
@ -135,10 +135,10 @@ struct FluxModel : public DiffusionModel {
Flux::FluxRunner flux;
FluxModel(ggml_backend_t backend,
std::map<std::string, enum ggml_type>& tensor_types,
SDVersion version = VERSION_FLUX,
bool flash_attn = false,
bool use_mask = false)
const String2GGMLType& tensor_types = {},
SDVersion version = VERSION_FLUX,
bool flash_attn = false,
bool use_mask = false)
: flux(backend, tensor_types, "model.diffusion_model", version, flash_attn, use_mask) {
}
@ -173,7 +173,7 @@ struct FluxModel : public DiffusionModel {
struct ggml_tensor* c_concat,
struct ggml_tensor* y,
struct ggml_tensor* guidance,
std::vector<ggml_tensor*> ref_latents = {},
std::vector<ggml_tensor*> ref_latents = {},
int num_video_frames = -1,
std::vector<struct ggml_tensor*> controls = {},
float control_strength = 0.f,
@ -184,4 +184,4 @@ struct FluxModel : public DiffusionModel {
}
};
#endif
#endif

View file

@ -130,8 +130,8 @@ public:
body_feat = conv_body->forward(ctx, body_feat);
feat = ggml_add(ctx, feat, body_feat);
// upsample
feat = lrelu(ctx, conv_up1->forward(ctx, ggml_upscale(ctx, feat, 2, ggml_scale_mode::GGML_SCALE_MODE_NEAREST)));
feat = lrelu(ctx, conv_up2->forward(ctx, ggml_upscale(ctx, feat, 2, ggml_scale_mode::GGML_SCALE_MODE_NEAREST)));
feat = lrelu(ctx, conv_up1->forward(ctx, ggml_upscale(ctx, feat, 2, GGML_SCALE_MODE_NEAREST)));
feat = lrelu(ctx, conv_up2->forward(ctx, ggml_upscale(ctx, feat, 2, GGML_SCALE_MODE_NEAREST)));
auto out = conv_last->forward(ctx, lrelu(ctx, conv_hr->forward(ctx, feat)));
return out;
}
@ -142,11 +142,22 @@ struct ESRGAN : public GGMLRunner {
int scale = 4;
int tile_size = 128; // avoid cuda OOM for 4gb VRAM
ESRGAN(ggml_backend_t backend, std::map<std::string, enum ggml_type>& tensor_types)
ESRGAN(ggml_backend_t backend, const String2GGMLType& tensor_types = {})
: GGMLRunner(backend) {
rrdb_net.init(params_ctx, tensor_types, "");
}
void enable_conv2d_direct() {
std::vector<GGMLBlock*> blocks;
rrdb_net.get_all_blocks(blocks);
for (auto block : blocks) {
if (block->get_desc() == "Conv2d") {
auto conv_block = (Conv2d*)block;
conv_block->enable_direct();
}
}
}
std::string get_desc() {
return "esrgan";
}

View file

@ -35,8 +35,8 @@ namespace Flux {
int64_t hidden_size;
float eps;
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
ggml_type wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "scale") != tensor_types.end()) ? tensor_types[prefix + "scale"] : GGML_TYPE_F32;
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
ggml_type wtype = GGML_TYPE_F32;
params["scale"] = ggml_new_tensor_1d(ctx, wtype, hidden_size);
}
@ -512,7 +512,8 @@ namespace Flux {
LastLayer(int64_t hidden_size,
int64_t patch_size,
int64_t out_channels,
bool prune_mod = false) : prune_mod(prune_mod) {
bool prune_mod = false)
: prune_mod(prune_mod) {
blocks["norm_final"] = std::shared_ptr<GGMLBlock>(new LayerNorm(hidden_size, 1e-06f, false));
blocks["linear"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, patch_size * patch_size * out_channels));
if (!prune_mod) {
@ -723,7 +724,7 @@ namespace Flux {
auto txt_ids = gen_txt_ids(bs, context_len);
auto img_ids = gen_img_ids(h, w, patch_size, bs);
auto ids = concat_ids(txt_ids, img_ids, bs);
auto ids = concat_ids(txt_ids, img_ids, bs);
uint64_t curr_h_offset = 0;
uint64_t curr_w_offset = 0;
for (ggml_tensor* ref : ref_latents) {
@ -736,7 +737,7 @@ namespace Flux {
}
auto ref_ids = gen_img_ids(ref->ne[1], ref->ne[0], patch_size, bs, 1, h_offset, w_offset);
ids = concat_ids(ids, ref_ids, bs);
ids = concat_ids(ids, ref_ids, bs);
curr_h_offset = std::max(curr_h_offset, ref->ne[1] + h_offset);
curr_w_offset = std::max(curr_w_offset, ref->ne[0] + w_offset);
@ -744,7 +745,6 @@ namespace Flux {
return ids;
}
// Generate positional embeddings
std::vector<float> gen_pe(int h, int w, int patch_size, int bs, int context_len, std::vector<ggml_tensor*> ref_latents, int theta, const std::vector<int>& axes_dim) {
std::vector<std::vector<float>> ids = gen_ids(h, w, patch_size, bs, context_len, ref_latents);
@ -872,8 +872,8 @@ namespace Flux {
struct ggml_tensor* y,
struct ggml_tensor* guidance,
struct ggml_tensor* pe,
struct ggml_tensor* mod_index_arange = NULL,
std::vector<int> skip_layers = {}) {
struct ggml_tensor* mod_index_arange = NULL,
std::vector<int> skip_layers = {}) {
auto img_in = std::dynamic_pointer_cast<Linear>(blocks["img_in"]);
auto txt_in = std::dynamic_pointer_cast<Linear>(blocks["txt_in"]);
auto final_layer = std::dynamic_pointer_cast<LastLayer>(blocks["final_layer"]);
@ -962,7 +962,6 @@ namespace Flux {
struct ggml_tensor* process_img(struct ggml_context* ctx,
struct ggml_tensor* x) {
int64_t W = x->ne[0];
int64_t H = x->ne[1];
int64_t patch_size = 2;
@ -983,9 +982,9 @@ namespace Flux {
struct ggml_tensor* y,
struct ggml_tensor* guidance,
struct ggml_tensor* pe,
struct ggml_tensor* mod_index_arange = NULL,
struct ggml_tensor* mod_index_arange = NULL,
std::vector<ggml_tensor*> ref_latents = {},
std::vector<int> skip_layers = {}) {
std::vector<int> skip_layers = {}) {
// Forward pass of DiT.
// x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images)
// timestep: (N,) tensor of diffusion timesteps
@ -1005,7 +1004,7 @@ namespace Flux {
int pad_h = (patch_size - H % patch_size) % patch_size;
int pad_w = (patch_size - W % patch_size) % patch_size;
auto img = process_img(ctx, x);
auto img = process_img(ctx, x);
uint64_t img_tokens = img->ne[1];
if (c_concat != NULL) {
@ -1013,7 +1012,7 @@ namespace Flux {
ggml_tensor* mask = ggml_view_4d(ctx, c_concat, c_concat->ne[0], c_concat->ne[1], 8 * 8, 1, c_concat->nb[1], c_concat->nb[2], c_concat->nb[3], c_concat->nb[2] * C);
masked = process_img(ctx, masked);
mask = process_img(ctx, mask);
mask = process_img(ctx, mask);
img = ggml_concat(ctx, img, ggml_concat(ctx, masked, mask, 0), 0);
}
@ -1027,9 +1026,9 @@ namespace Flux {
auto out = forward_orig(ctx, img, context, timestep, y, guidance, pe, mod_index_arange, skip_layers); // [N, num_tokens, C * patch_size * patch_size]
if (out->ne[1] > img_tokens) {
out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [num_tokens, N, C * patch_size * patch_size]
out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [num_tokens, N, C * patch_size * patch_size]
out = ggml_view_3d(ctx, out, out->ne[0], out->ne[1], img_tokens, out->nb[1], out->nb[2], 0);
out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [N, h*w, C * patch_size * patch_size]
out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [N, h*w, C * patch_size * patch_size]
}
// rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)
@ -1040,8 +1039,6 @@ namespace Flux {
};
struct FluxRunner : public GGMLRunner {
static std::map<std::string, enum ggml_type> empty_tensor_types;
public:
FluxParams flux_params;
Flux flux;
@ -1051,11 +1048,11 @@ namespace Flux {
bool use_mask = false;
FluxRunner(ggml_backend_t backend,
std::map<std::string, enum ggml_type>& tensor_types = empty_tensor_types,
const std::string prefix = "",
SDVersion version = VERSION_FLUX,
bool flash_attn = false,
bool use_mask = false)
const String2GGMLType& tensor_types = {},
const std::string prefix = "",
SDVersion version = VERSION_FLUX,
bool flash_attn = false,
bool use_mask = false)
: GGMLRunner(backend), use_mask(use_mask) {
flux_params.flash_attn = flash_attn;
flux_params.guidance_embed = false;
@ -1120,7 +1117,7 @@ namespace Flux {
struct ggml_tensor* y,
struct ggml_tensor* guidance,
std::vector<ggml_tensor*> ref_latents = {},
std::vector<int> skip_layers = {}) {
std::vector<int> skip_layers = {}) {
GGML_ASSERT(x->ne[3] == 1);
struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, FLUX_GRAPH_SIZE, false);
@ -1139,8 +1136,8 @@ namespace Flux {
}
// ggml_arange is not working on some backends, precompute it
mod_index_arange_vec = arange(0, 344);
mod_index_arange = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_F32, mod_index_arange_vec.size());
mod_index_arange_vec = arange(0, 344);
mod_index_arange = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_F32, mod_index_arange_vec.size());
set_backend_tensor_data(mod_index_arange, mod_index_arange_vec.data());
}
y = to_backend(y);
@ -1187,9 +1184,9 @@ namespace Flux {
struct ggml_tensor* y,
struct ggml_tensor* guidance,
std::vector<ggml_tensor*> ref_latents = {},
struct ggml_tensor** output = NULL,
struct ggml_context* output_ctx = NULL,
std::vector<int> skip_layers = std::vector<int>()) {
struct ggml_tensor** output = NULL,
struct ggml_context* output_ctx = NULL,
std::vector<int> skip_layers = std::vector<int>()) {
// x: [N, in_channels, h, w]
// timesteps: [N, ]
// context: [N, max_position, hidden_size]
@ -1277,4 +1274,4 @@ namespace Flux {
} // namespace Flux
#endif // __FLUX_HPP__
#endif // __FLUX_HPP__

View file

@ -40,6 +40,10 @@
#include "ggml-vulkan.h"
#endif
#ifdef SD_USE_OPENCL
#include "ggml-opencl.h"
#endif
#ifdef SD_USE_SYCL
#include "ggml-sycl.h"
#endif
@ -53,6 +57,8 @@
#define __STATIC_INLINE__ static inline
#endif
static_assert(GGML_MAX_NAME >= 128, "GGML_MAX_NAME must be at least 128");
// n-mode trensor-matrix product
// example: 2-mode product
// A: [ne03, k, ne01, ne00]
@ -109,13 +115,13 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_merge_lora(ggml_context* ctx, struct
// [ne03,ne02,ne01,ne00] x [ne13,ne12,ne11,ne10] => [ne03*ne13,ne02*ne12,ne01*ne11,ne00*ne10]
__STATIC_INLINE__ struct ggml_tensor* ggml_kronecker(ggml_context* ctx, struct ggml_tensor* a, struct ggml_tensor* b) {
return ggml_mul(ctx,
ggml_upscale_ext(ctx,
ggml_interpolate(ctx,
a,
a->ne[0] * b->ne[0],
a->ne[1] * b->ne[1],
a->ne[2] * b->ne[2],
a->ne[3] * b->ne[3],
ggml_scale_mode::GGML_SCALE_MODE_NEAREST),
GGML_SCALE_MODE_NEAREST),
b);
}
@ -811,6 +817,25 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_conv_2d(struct ggml_context* ctx,
return x;
}
__STATIC_INLINE__ struct ggml_tensor* ggml_nn_conv_2d_direct(struct ggml_context* ctx,
struct ggml_tensor* x,
struct ggml_tensor* w,
struct ggml_tensor* b,
int s0 = 1,
int s1 = 1,
int p0 = 0,
int p1 = 0,
int d0 = 1,
int d1 = 1) {
x = ggml_conv_2d_direct(ctx, w, x, s0, s1, p0, p1, d0, d1);
if (b != NULL) {
b = ggml_reshape_4d(ctx, b, 1, 1, b->ne[0], 1);
// b = ggml_repeat(ctx, b, x);
x = ggml_add(ctx, x, b);
}
return x;
}
// w: [OCIC, KD, 1 * 1]
// x: [N, IC, IH, IW]
// b: [OC,]
@ -945,18 +970,33 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
float scale = (1.0f / sqrt((float)d_head));
int kv_pad = 0;
// if (flash_attn) {
// LOG_DEBUG("attention_ext L_q:%d L_k:%d n_head:%d C:%d d_head:%d N:%d", L_q, L_k, n_head, C, d_head, N);
// }
// is there anything oddly shaped?? ping Green-Sky if you can trip this assert
// is there anything oddly shaped?? ping Green-Sky if you can trip this assert
GGML_ASSERT(((L_k % 256 == 0) && L_q == L_k) || !(L_k % 256 == 0));
bool can_use_flash_attn = true;
can_use_flash_attn = can_use_flash_attn && (d_head == 64 ||
d_head == 80 ||
d_head == 96 ||
d_head == 112 ||
d_head == 128 ||
d_head == 256);
// kcpp disable kv_pad (leejet/stable-diffusion.cpp#756)
#if 1
can_use_flash_attn = can_use_flash_attn && L_k % 256 == 0;
can_use_flash_attn = can_use_flash_attn && d_head % 64 == 0; // double check
// cuda max d_head seems to be 256, cpu does seem to work with 512
can_use_flash_attn = can_use_flash_attn && d_head <= 256; // double check
#else
if (can_use_flash_attn && L_k % 256 != 0) {
// TODO(Green-Sky): might be worth just padding by default
if (L_k == 77 || L_k == 4208 || L_k == 3952) {
kv_pad = GGML_PAD(L_k, 256) - L_k;
} else {
can_use_flash_attn = false;
}
}
#endif
if (mask != nullptr) {
// TODO(Green-Sky): figure out if we can bend t5 to work too
@ -969,11 +1009,18 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
ggml_tensor* kqv = nullptr;
// GGML_ASSERT((flash_attn && can_use_flash_attn) || !flash_attn);
if (can_use_flash_attn && flash_attn) {
// LOG_DEBUG("using flash attention");
// LOG_DEBUG(" uses flash attention");
if (kv_pad != 0) {
// LOG_DEBUG(" padding k and v dim1 by %d", kv_pad);
k = ggml_pad(ctx, k, 0, kv_pad, 0, 0);
}
k = ggml_cast(ctx, k, GGML_TYPE_F16);
v = ggml_cont(ctx, ggml_permute(ctx, v, 0, 2, 1, 3)); // [N, n_head, L_k, d_head]
v = ggml_reshape_3d(ctx, v, d_head, L_k, n_head * N); // [N * n_head, L_k, d_head]
if (kv_pad != 0) {
v = ggml_pad(ctx, v, 0, kv_pad, 0, 0);
}
v = ggml_cast(ctx, v, GGML_TYPE_F16);
if (mask != nullptr) {
@ -1181,6 +1228,8 @@ __STATIC_INLINE__ size_t ggml_tensor_num(ggml_context* ctx) {
#define MAX_PARAMS_TENSOR_NUM 32768
#define MAX_GRAPH_SIZE 32768
typedef std::map<std::string, enum ggml_type> String2GGMLType;
struct GGMLRunner {
protected:
typedef std::function<struct ggml_cgraph*()> get_graph_cb_t;
@ -1365,13 +1414,7 @@ public:
ggml_backend_cpu_set_n_threads(backend, n_threads);
}
// #ifdef SD_USE_METAL
// if (ggml_backend_is_metal(backend)) {
// ggml_backend_metal_set_n_cb(backend, n_threads);
// }
// #endif
ggml_backend_graph_compute(backend, gf);
#ifdef GGML_PERF
ggml_graph_print(gf);
#endif
@ -1398,17 +1441,25 @@ protected:
GGMLBlockMap blocks;
ParameterMap params;
void init_blocks(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
ggml_type get_type(const std::string& name, const String2GGMLType& tensor_types, ggml_type default_type) {
auto iter = tensor_types.find(name);
if (iter != tensor_types.end()) {
return iter->second;
}
return default_type;
}
void init_blocks(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
for (auto& pair : blocks) {
auto& block = pair.second;
block->init(ctx, tensor_types, prefix + pair.first);
}
}
virtual void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {}
virtual void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {}
public:
void init(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, std::string prefix = "") {
void init(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") {
if (prefix.size() > 0) {
prefix = prefix + ".";
}
@ -1455,6 +1506,19 @@ public:
tensors[prefix + pair.first] = pair.second;
}
}
virtual std::string get_desc() {
return "GGMLBlock";
}
void get_all_blocks(std::vector<GGMLBlock*>& result) {
result.push_back(this);
for (auto& block_iter : blocks) {
if (block_iter.second) {
block_iter.second->get_all_blocks(result);
}
}
}
};
class UnaryBlock : public GGMLBlock {
@ -1469,8 +1533,8 @@ protected:
bool bias;
bool force_f32;
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
enum ggml_type wtype = (tensor_types.find(prefix + "weight") != tensor_types.end()) ? tensor_types[prefix + "weight"] : GGML_TYPE_F32;
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
enum ggml_type wtype = get_type(prefix + "weight", tensor_types, GGML_TYPE_F32);
if (in_features % ggml_blck_size(wtype) != 0 || force_f32) {
wtype = GGML_TYPE_F32;
}
@ -1505,8 +1569,8 @@ class Embedding : public UnaryBlock {
protected:
int64_t embedding_dim;
int64_t num_embeddings;
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
enum ggml_type wtype = (tensor_types.find(prefix + "weight") != tensor_types.end()) ? tensor_types[prefix + "weight"] : GGML_TYPE_F32;
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types, const std::string prefix = "") {
enum ggml_type wtype = get_type(prefix + "weight", tensor_types, GGML_TYPE_F32);
params["weight"] = ggml_new_tensor_2d(ctx, wtype, embedding_dim, num_embeddings);
}
@ -1544,12 +1608,13 @@ protected:
std::pair<int, int> padding;
std::pair<int, int> dilation;
bool bias;
bool direct = false;
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
enum ggml_type wtype = GGML_TYPE_F16; //(tensor_types.find(prefix + "weight") != tensor_types.end()) ? tensor_types[prefix + "weight"] : GGML_TYPE_F16;
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types, const std::string prefix = "") {
enum ggml_type wtype = GGML_TYPE_F16;
params["weight"] = ggml_new_tensor_4d(ctx, wtype, kernel_size.second, kernel_size.first, in_channels, out_channels);
if (bias) {
enum ggml_type wtype = GGML_TYPE_F32; // (tensor_types.find(prefix + "bias") != tensor_types.end()) ? tensor_types[prefix + "bias"] : GGML_TYPE_F32;
enum ggml_type wtype = GGML_TYPE_F32;
params["bias"] = ggml_new_tensor_1d(ctx, wtype, out_channels);
}
}
@ -1570,13 +1635,25 @@ public:
dilation(dilation),
bias(bias) {}
void enable_direct() {
direct = true;
}
std::string get_desc() {
return "Conv2d";
}
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
struct ggml_tensor* w = params["weight"];
struct ggml_tensor* b = NULL;
if (bias) {
b = params["bias"];
}
return ggml_nn_conv_2d(ctx, x, w, b, stride.second, stride.first, padding.second, padding.first, dilation.second, dilation.first);
if (direct) {
return ggml_nn_conv_2d_direct(ctx, x, w, b, stride.second, stride.first, padding.second, padding.first, dilation.second, dilation.first);
} else {
return ggml_nn_conv_2d(ctx, x, w, b, stride.second, stride.first, padding.second, padding.first, dilation.second, dilation.first);
}
}
};
@ -1590,11 +1667,11 @@ protected:
int64_t dilation;
bool bias;
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
enum ggml_type wtype = GGML_TYPE_F16; //(tensor_types.find(prefix + "weight") != tensor_types.end()) ? tensor_types[prefix + "weight"] : GGML_TYPE_F16;
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types, const std::string prefix = "") {
enum ggml_type wtype = GGML_TYPE_F16;
params["weight"] = ggml_new_tensor_4d(ctx, wtype, 1, kernel_size, in_channels, out_channels); // 5d => 4d
if (bias) {
enum ggml_type wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "bias") != tensor_types.end()) ? tensor_types[prefix + "bias"] : GGML_TYPE_F32;
enum ggml_type wtype = GGML_TYPE_F32;
params["bias"] = ggml_new_tensor_1d(ctx, wtype, out_channels);
}
}
@ -1634,12 +1711,12 @@ protected:
bool elementwise_affine;
bool bias;
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
if (elementwise_affine) {
enum ggml_type wtype = GGML_TYPE_F32; //(tensor_types.ypes.find(prefix + "weight") != tensor_types.end()) ? tensor_types[prefix + "weight"] : GGML_TYPE_F32;
enum ggml_type wtype = GGML_TYPE_F32;
params["weight"] = ggml_new_tensor_1d(ctx, wtype, normalized_shape);
if (bias) {
enum ggml_type wtype = GGML_TYPE_F32; //(tensor_types.ypes.find(prefix + "bias") != tensor_types.end()) ? tensor_types[prefix + "bias"] : GGML_TYPE_F32;
enum ggml_type wtype = GGML_TYPE_F32;
params["bias"] = ggml_new_tensor_1d(ctx, wtype, normalized_shape);
}
}
@ -1676,10 +1753,10 @@ protected:
float eps;
bool affine;
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
if (affine) {
enum ggml_type wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "weight") != tensor_types.end()) ? tensor_types[prefix + "weight"] : GGML_TYPE_F32;
enum ggml_type bias_wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "bias") != tensor_types.end()) ? tensor_types[prefix + "bias"] : GGML_TYPE_F32;
enum ggml_type wtype = GGML_TYPE_F32;
enum ggml_type bias_wtype = GGML_TYPE_F32;
params["weight"] = ggml_new_tensor_1d(ctx, wtype, num_channels);
params["bias"] = ggml_new_tensor_1d(ctx, bias_wtype, num_channels);
}
@ -1760,4 +1837,4 @@ public:
}
};
#endif // __GGML_EXTEND__HPP__
#endif // __GGML_EXTEND__HPP__

View file

@ -346,4 +346,4 @@ const std::vector<const std::vector<std::vector<float>>*> GITS_NOISE = {
&GITS_NOISE_1_50
};
#endif // GITS_NOISE_INL
#endif // GITS_NOISE_INL

View file

@ -3,7 +3,7 @@
#include "ggml_extend.hpp"
#define LORA_GRAPH_SIZE 20480
#define LORA_GRAPH_BASE_SIZE 10240
struct LoraModel : public GGMLRunner {
enum lora_t {
@ -238,7 +238,8 @@ struct LoraModel : public GGMLRunner {
}
struct ggml_cgraph* build_lora_graph(std::map<std::string, struct ggml_tensor*> model_tensors, SDVersion version) {
struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, LORA_GRAPH_SIZE, false);
size_t lora_graph_size = LORA_GRAPH_BASE_SIZE + lora_tensors.size() * 10;
struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, lora_graph_size, false);
zero_index = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_I32, 1);
set_backend_tensor_data(zero_index, zero_index_vec.data());
@ -291,7 +292,6 @@ struct LoraModel : public GGMLRunner {
std::string hada_2_down_name = "";
std::string hada_2_up_name = "";
hada_1_down_name = fk + ".hada_w1_b";
hada_1_up_name = fk + ".hada_w1_a";
hada_1_mid_name = fk + ".hada_t1";
@ -843,4 +843,4 @@ struct LoraModel : public GGMLRunner {
}
};
#endif // __LORA_HPP__
#endif // __LORA_HPP__

File diff suppressed because it is too large Load diff

View file

@ -147,8 +147,8 @@ protected:
int64_t hidden_size;
float eps;
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, std::string prefix = "") {
enum ggml_type wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "weight") != tensor_types.end()) ? tensor_types[prefix + "weight"] : GGML_TYPE_F32;
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") {
enum ggml_type wtype = GGML_TYPE_F32;
params["weight"] = ggml_new_tensor_1d(ctx, wtype, hidden_size);
}
@ -652,13 +652,13 @@ protected:
int64_t hidden_size;
std::string qk_norm;
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, std::string prefix = "") {
enum ggml_type wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "pos_embed") != tensor_types.end()) ? tensor_types[prefix + "pos_embed"] : GGML_TYPE_F32;
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") {
enum ggml_type wtype = GGML_TYPE_F32;
params["pos_embed"] = ggml_new_tensor_3d(ctx, wtype, hidden_size, num_patchs, 1);
}
public:
MMDiT(std::map<std::string, enum ggml_type>& tensor_types) {
MMDiT(const String2GGMLType& tensor_types = {}) {
// input_size is always None
// learn_sigma is always False
// register_length is alwalys 0
@ -869,11 +869,9 @@ public:
struct MMDiTRunner : public GGMLRunner {
MMDiT mmdit;
static std::map<std::string, enum ggml_type> empty_tensor_types;
MMDiTRunner(ggml_backend_t backend,
std::map<std::string, enum ggml_type>& tensor_types = empty_tensor_types,
const std::string prefix = "")
const String2GGMLType& tensor_types = {},
const std::string prefix = "")
: GGMLRunner(backend), mmdit(tensor_types) {
mmdit.init(params_ctx, tensor_types, prefix);
}

View file

@ -16,7 +16,6 @@
#include "ggml-backend.h"
#include "ggml-cpu.h"
#include "ggml.h"
#include "gguf.h"
#include "stable-diffusion.h"
@ -28,6 +27,10 @@
#include "ggml-vulkan.h"
#endif
#ifdef SD_USE_OPENCL
#include "ggml-opencl.h"
#endif
#define ST_HEADER_SIZE_LEN 8
static std::string format(const char* fmt, ...) {
@ -111,6 +114,7 @@ const char* unused_tensors[] = {
"model_ema.diffusion_model",
"embedding_manager",
"denoiser.sigmas",
"text_encoders.t5xxl.transformer.encoder.embed_tokens.weight", // only used during training
};
bool is_unused_tensor(std::string name) {
@ -192,7 +196,7 @@ std::unordered_map<std::string, std::string> pmid_v2_name_map = {
std::string convert_open_clip_to_hf_clip(const std::string& name) {
std::string new_name = name;
std::string prefix;
if (contains(new_name, ".enc.")) {
if (contains(new_name, ".enc.")) {
// llama.cpp naming convention for T5
size_t pos = new_name.find(".enc.");
if (pos != std::string::npos) {
@ -348,6 +352,10 @@ std::unordered_map<std::string, std::unordered_map<std::string, std::string>> su
{"to_v", "v"},
{"to_out_0", "proj_out"},
{"group_norm", "norm"},
{"key", "k"},
{"query", "q"},
{"value", "v"},
{"proj_attn", "proj_out"},
},
},
{
@ -372,6 +380,10 @@ std::unordered_map<std::string, std::unordered_map<std::string, std::string>> su
{"to_v", "v"},
{"to_out.0", "proj_out"},
{"group_norm", "norm"},
{"key", "k"},
{"query", "q"},
{"value", "v"},
{"proj_attn", "proj_out"},
},
},
{
@ -443,6 +455,10 @@ std::string convert_diffusers_name_to_compvis(std::string key, char seq) {
return format("model%cdiffusion_model%ctime_embed%c", seq, seq, seq) + std::to_string(std::stoi(m[0]) * 2 - 2) + m[1];
}
if (match(m, std::regex(format("unet%cadd_embedding%clinear_(\\d+)(.*)", seq, seq)), key)) {
return format("model%cdiffusion_model%clabel_emb%c0%c", seq, seq, seq, seq) + std::to_string(std::stoi(m[0]) * 2 - 2) + m[1];
}
if (match(m, std::regex(format("unet%cdown_blocks%c(\\d+)%c(attentions|resnets)%c(\\d+)%c(.+)", seq, seq, seq, seq, seq)), key)) {
std::string suffix = get_converted_suffix(m[1], m[3]);
// LOG_DEBUG("%s %s %s %s", m[0].c_str(), m[1].c_str(), m[2].c_str(), m[3].c_str());
@ -480,6 +496,19 @@ std::string convert_diffusers_name_to_compvis(std::string key, char seq) {
return format("cond_stage_model%ctransformer%ctext_model", seq, seq) + m[0];
}
// clip-g
if (match(m, std::regex(format("te%c1%ctext_model%cencoder%clayers%c(\\d+)%c(.+)", seq, seq, seq, seq, seq, seq)), key)) {
return format("cond_stage_model%c1%ctransformer%ctext_model%cencoder%clayers%c", seq, seq, seq, seq, seq, seq) + m[0] + seq + m[1];
}
if (match(m, std::regex(format("te%c1%ctext_model(.*)", seq, seq)), key)) {
return format("cond_stage_model%c1%ctransformer%ctext_model", seq, seq, seq) + m[0];
}
if (match(m, std::regex(format("te%c1%ctext_projection", seq, seq)), key)) {
return format("cond_stage_model%c1%ctransformer%ctext_model%ctext_projection", seq, seq, seq, seq);
}
// vae
if (match(m, std::regex(format("vae%c(.*)%cconv_norm_out(.*)", seq, seq)), key)) {
return format("first_stage_model%c%s%cnorm_out%s", seq, m[0].c_str(), seq, m[1].c_str());
@ -616,6 +645,8 @@ std::string convert_tensor_name(std::string name) {
std::string new_key = convert_diffusers_name_to_compvis(name_without_network_parts, '.');
if (new_key.empty()) {
new_name = name;
} else if (new_key == "cond_stage_model.1.transformer.text_model.text_projection") {
new_name = new_key;
} else {
new_name = new_key + "." + network_part;
}
@ -631,7 +662,7 @@ std::string convert_tensor_name(std::string name) {
return new_name;
}
void add_preprocess_tensor_storage_types(std::map<std::string, enum ggml_type>& tensor_storages_types, std::string name, enum ggml_type type) {
void add_preprocess_tensor_storage_types(String2GGMLType& tensor_storages_types, std::string name, enum ggml_type type) {
std::string new_name = convert_tensor_name(name);
if (new_name.find("cond_stage_model") != std::string::npos && ends_with(new_name, "attn.in_proj_weight")) {
@ -798,6 +829,7 @@ void f8_e4m3_to_f16_vec(uint8_t* src, uint16_t* dst, int64_t n) {
dst[i] = f8_e4m3_to_f16(src[i]);
}
}
void f8_e5m2_to_f16_vec(uint8_t* src, uint16_t* dst, int64_t n) {
// support inplace op
for (int64_t i = n - 1; i >= 0; i--) {
@ -805,6 +837,20 @@ void f8_e5m2_to_f16_vec(uint8_t* src, uint16_t* dst, int64_t n) {
}
}
void f64_to_f32_vec(double* src, float* dst, int64_t n) {
// support inplace op
for (int64_t i = 0; i < n; i++) {
dst[i] = (float)src[i];
}
}
void i64_to_i32_vec(int64_t* src, int32_t* dst, int64_t n) {
// support inplace op
for (int64_t i = 0; i < n; i++) {
dst[i] = (int32_t)src[i];
}
}
void convert_tensor(void* src,
ggml_type src_type,
void* dst,
@ -1050,10 +1096,14 @@ ggml_type str_to_ggml_type(const std::string& dtype) {
ttype = GGML_TYPE_F32;
} else if (dtype == "F32") {
ttype = GGML_TYPE_F32;
} else if (dtype == "F64") {
ttype = GGML_TYPE_F32;
} else if (dtype == "F8_E4M3") {
ttype = GGML_TYPE_F16;
} else if (dtype == "F8_E5M2") {
ttype = GGML_TYPE_F16;
} else if (dtype == "I64") {
ttype = GGML_TYPE_I32;
}
return ttype;
}
@ -1071,6 +1121,7 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
std::ifstream file(fpath, std::ios::binary);
if (!file.is_open()) {
LOG_ERROR("failed to open '%s'", file_path.c_str());
file_paths_.pop_back();
return false;
}
@ -1082,6 +1133,7 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
// read header size
if (file_size_ <= ST_HEADER_SIZE_LEN) {
LOG_ERROR("invalid safetensor file '%s'", file_path.c_str());
file_paths_.pop_back();
return false;
}
@ -1095,6 +1147,7 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
size_t header_size_ = read_u64(header_size_buf);
if (header_size_ >= file_size_) {
LOG_ERROR("invalid safetensor file '%s'", file_path.c_str());
file_paths_.pop_back();
return false;
}
@ -1105,6 +1158,7 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
file.read(header_buf.data(), header_size_);
if (!file) {
LOG_ERROR("read safetensors header failed: '%s'", file_path.c_str());
file_paths_.pop_back();
return false;
}
@ -1176,6 +1230,14 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
tensor_storage.is_f8_e5m2 = true;
// f8 -> f16
GGML_ASSERT(tensor_storage.nbytes() == tensor_data_size * 2);
} else if (dtype == "F64") {
tensor_storage.is_f64 = true;
// f64 -> f32
GGML_ASSERT(tensor_storage.nbytes() * 2 == tensor_data_size);
} else if (dtype == "I64") {
tensor_storage.is_i64 = true;
// i64 -> i32
GGML_ASSERT(tensor_storage.nbytes() * 2 == tensor_data_size);
} else {
GGML_ASSERT(tensor_storage.nbytes() == tensor_data_size);
}
@ -1192,18 +1254,45 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
/*================================================= DiffusersModelLoader ==================================================*/
bool ModelLoader::init_from_diffusers_file(const std::string& file_path, const std::string& prefix) {
std::string unet_path = path_join(file_path, "unet/diffusion_pytorch_model.safetensors");
std::string vae_path = path_join(file_path, "vae/diffusion_pytorch_model.safetensors");
std::string clip_path = path_join(file_path, "text_encoder/model.safetensors");
std::string unet_path = path_join(file_path, "unet/diffusion_pytorch_model.safetensors");
std::string vae_path = path_join(file_path, "vae/diffusion_pytorch_model.safetensors");
std::string clip_path = path_join(file_path, "text_encoder/model.safetensors");
std::string clip_g_path = path_join(file_path, "text_encoder_2/model.safetensors");
if (!init_from_safetensors_file(unet_path, "unet.")) {
return false;
}
for (auto ts : tensor_storages) {
if (ts.name.find("add_embedding") != std::string::npos || ts.name.find("label_emb") != std::string::npos) {
// probably SDXL
LOG_DEBUG("Fixing name for SDXL output blocks.2.2");
for (auto& tensor_storage : tensor_storages) {
int len = 34;
auto pos = tensor_storage.name.find("unet.up_blocks.0.upsamplers.0.conv");
if (pos == std::string::npos) {
len = 44;
pos = tensor_storage.name.find("model.diffusion_model.output_blocks.2.1.conv");
}
if (pos != std::string::npos) {
tensor_storage.name = "model.diffusion_model.output_blocks.2.2.conv" + tensor_storage.name.substr(len);
LOG_DEBUG("NEW NAME: %s", tensor_storage.name.c_str());
add_preprocess_tensor_storage_types(tensor_storages_types, tensor_storage.name, tensor_storage.type);
}
}
break;
}
}
if (!init_from_safetensors_file(vae_path, "vae.")) {
return false;
LOG_WARN("Couldn't find working VAE in %s", file_path.c_str());
// return false;
}
if (!init_from_safetensors_file(clip_path, "te.")) {
return false;
LOG_WARN("Couldn't find working text encoder in %s", file_path.c_str());
// return false;
}
if (!init_from_safetensors_file(clip_g_path, "te.1.")) {
LOG_DEBUG("Couldn't find working second text encoder in %s", file_path.c_str());
}
return true;
}
@ -1566,6 +1655,15 @@ bool ModelLoader::init_from_ckpt_file(const std::string& file_path, const std::s
return true;
}
bool ModelLoader::model_is_unet() {
for (auto& tensor_storage : tensor_storages) {
if (tensor_storage.name.find("model.diffusion_model.input_blocks.") != std::string::npos) {
return true;
}
}
return false;
}
bool ModelLoader::has_diffusion_model_tensors()
{
for (auto& tensor_storage : tensor_storages) {
@ -1598,7 +1696,7 @@ SDVersion ModelLoader::get_sd_version() {
if (tensor_storage.name.find("model.diffusion_model.joint_blocks.") != std::string::npos) {
return VERSION_SD3;
}
if (tensor_storage.name.find("model.diffusion_model.input_blocks.") != std::string::npos) {
if (tensor_storage.name.find("model.diffusion_model.input_blocks.") != std::string::npos || tensor_storage.name.find("unet.down_blocks.") != std::string::npos) {
is_unet = true;
if (has_multiple_encoders) {
is_xl = true;
@ -1607,7 +1705,7 @@ SDVersion ModelLoader::get_sd_version() {
}
}
}
if (tensor_storage.name.find("conditioner.embedders.1") != std::string::npos || tensor_storage.name.find("cond_stage_model.1") != std::string::npos) {
if (tensor_storage.name.find("conditioner.embedders.1") != std::string::npos || tensor_storage.name.find("cond_stage_model.1") != std::string::npos || tensor_storage.name.find("te.1") != std::string::npos) {
has_multiple_encoders = true;
if (is_unet) {
is_xl = true;
@ -1629,7 +1727,7 @@ SDVersion ModelLoader::get_sd_version() {
token_embedding_weight = tensor_storage;
// break;
}
if (tensor_storage.name == "model.diffusion_model.input_blocks.0.0.weight" || tensor_storage.name == "model.diffusion_model.img_in.weight") {
if (tensor_storage.name == "model.diffusion_model.input_blocks.0.0.weight" || tensor_storage.name == "model.diffusion_model.img_in.weight" || tensor_storage.name == "unet.conv_in.weight") {
input_block_weight = tensor_storage;
input_block_checked = true;
if (found_family) {
@ -1638,10 +1736,14 @@ SDVersion ModelLoader::get_sd_version() {
}
}
bool is_inpaint = input_block_weight.ne[2] == 9;
bool is_ip2p = input_block_weight.ne[2] == 8;
if (is_xl) {
if (is_inpaint) {
return VERSION_SDXL_INPAINT;
}
if (is_ip2p) {
return VERSION_SDXL_PIX2PIX;
}
return VERSION_SDXL;
}
@ -1657,6 +1759,9 @@ SDVersion ModelLoader::get_sd_version() {
if (is_inpaint) {
return VERSION_SD1_INPAINT;
}
if (is_ip2p) {
return VERSION_SD1_PIX2PIX;
}
return VERSION_SD1;
} else if (token_embedding_weight.ne[0] == 1024) {
if (is_inpaint) {
@ -1714,7 +1819,7 @@ ggml_type ModelLoader::get_diffusion_model_wtype() {
continue;
}
if (tensor_storage.name.find("model.diffusion_model.") == std::string::npos) {
if (tensor_storage.name.find("model.diffusion_model.") == std::string::npos && tensor_storage.name.find("unet.") == std::string::npos) {
continue;
}
@ -1883,6 +1988,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
};
int tensor_count = 0;
int64_t t1 = ggml_time_ms();
bool partial = false;
for (auto& tensor_storage : processed_tensor_storages) {
if (tensor_storage.file_index != file_index) {
++tensor_count;
@ -1907,7 +2013,12 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
// for the CPU and Metal backend, we can copy directly into the tensor
if (tensor_storage.type == dst_tensor->type) {
GGML_ASSERT(ggml_nbytes(dst_tensor) == tensor_storage.nbytes());
read_data(tensor_storage, (char*)dst_tensor->data, nbytes_to_read);
if (tensor_storage.is_f64 || tensor_storage.is_i64) {
read_buffer.resize(tensor_storage.nbytes_to_read());
read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read);
} else {
read_data(tensor_storage, (char*)dst_tensor->data, nbytes_to_read);
}
if (tensor_storage.is_bf16) {
// inplace op
@ -1918,9 +2029,13 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
} else if (tensor_storage.is_f8_e5m2) {
// inplace op
f8_e5m2_to_f16_vec((uint8_t*)dst_tensor->data, (uint16_t*)dst_tensor->data, tensor_storage.nelements());
} else if (tensor_storage.is_f64) {
f64_to_f32_vec((double*)read_buffer.data(), (float*)dst_tensor->data, tensor_storage.nelements());
} else if (tensor_storage.is_i64) {
i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)dst_tensor->data, tensor_storage.nelements());
}
} else {
read_buffer.resize(tensor_storage.nbytes());
read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read()));
read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read);
if (tensor_storage.is_bf16) {
@ -1932,13 +2047,19 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
} else if (tensor_storage.is_f8_e5m2) {
// inplace op
f8_e5m2_to_f16_vec((uint8_t*)read_buffer.data(), (uint16_t*)read_buffer.data(), tensor_storage.nelements());
} else if (tensor_storage.is_f64) {
// inplace op
f64_to_f32_vec((double*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
} else if (tensor_storage.is_i64) {
// inplace op
i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)read_buffer.data(), tensor_storage.nelements());
}
convert_tensor((void*)read_buffer.data(), tensor_storage.type, dst_tensor->data,
dst_tensor->type, (int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]);
}
} else {
read_buffer.resize(tensor_storage.nbytes());
read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read()));
read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read);
if (tensor_storage.is_bf16) {
@ -1950,6 +2071,12 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
} else if (tensor_storage.is_f8_e5m2) {
// inplace op
f8_e5m2_to_f16_vec((uint8_t*)read_buffer.data(), (uint16_t*)read_buffer.data(), tensor_storage.nelements());
} else if (tensor_storage.is_f64) {
// inplace op
f64_to_f32_vec((double*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
} else if (tensor_storage.is_i64) {
// inplace op
i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)read_buffer.data(), tensor_storage.nelements());
}
if (tensor_storage.type == dst_tensor->type) {
@ -1964,20 +2091,26 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
ggml_backend_tensor_set(dst_tensor, convert_buffer.data(), 0, ggml_nbytes(dst_tensor));
}
}
int64_t t2 = ggml_time_ms();
size_t tensor_max = processed_tensor_storages.size();
int64_t t2 = ggml_time_ms();
// kcpp throttle progress printing
++tensor_count;
if(tensor_count<2 || tensor_count%5==0 || (tensor_count+10) > processed_tensor_storages.size())
if(tensor_count<2 || tensor_count%5==0 || (tensor_count+10) > tensor_max)
{
//throttle progress printing
pretty_progress(tensor_count, processed_tensor_storages.size(), (t2 - t1) / 1000.0f);
pretty_progress(tensor_count, tensor_max, (t2 - t1) / 1000.0f);
}
t1 = t2;
t1 = t2;
partial = tensor_count != tensor_max;
}
if (zip != NULL) {
zip_close(zip);
}
if (partial) {
printf("\n");
}
if (!success) {
break;
}
@ -2055,6 +2188,41 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso
return true;
}
std::vector<std::pair<std::string, ggml_type>> parse_tensor_type_rules(const std::string& tensor_type_rules) {
std::vector<std::pair<std::string, ggml_type>> result;
for (const auto& item : splitString(tensor_type_rules, ',')) {
if (item.size() == 0)
continue;
std::string::size_type pos = item.find('=');
if (pos == std::string::npos) {
LOG_WARN("ignoring invalid quant override \"%s\"", item.c_str());
continue;
}
std::string tensor_pattern = item.substr(0, pos);
std::string type_name = item.substr(pos + 1);
ggml_type tensor_type = GGML_TYPE_COUNT;
if (type_name == "f32") {
tensor_type = GGML_TYPE_F32;
} else {
for (size_t i = 0; i < SD_TYPE_COUNT; i++) {
auto trait = ggml_get_type_traits((ggml_type)i);
if (trait->to_float && trait->type_size && type_name == trait->type_name) {
tensor_type = (ggml_type)i;
}
}
}
if (tensor_type != GGML_TYPE_COUNT) {
result.emplace_back(tensor_pattern, tensor_type);
} else {
LOG_WARN("ignoring invalid quant override \"%s\"", item.c_str());
}
}
return result;
}
bool ModelLoader::tensor_should_be_converted(const TensorStorage& tensor_storage, ggml_type type) {
const std::string& name = tensor_storage.name;
if (type != GGML_TYPE_COUNT) {
@ -2086,7 +2254,7 @@ bool ModelLoader::tensor_should_be_converted(const TensorStorage& tensor_storage
return false;
}
bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type) {
bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type, const std::string& tensor_type_rules_str) {
auto backend = ggml_backend_cpu_init();
size_t mem_size = 1 * 1024 * 1024; // for padding
mem_size += tensor_storages.size() * ggml_tensor_overhead();
@ -2096,12 +2264,23 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
gguf_context* gguf_ctx = gguf_init_empty();
auto tensor_type_rules = parse_tensor_type_rules(tensor_type_rules_str);
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
const std::string& name = tensor_storage.name;
ggml_type tensor_type = tensor_storage.type;
ggml_type dst_type = type;
ggml_type tensor_type = tensor_storage.type;
if (tensor_should_be_converted(tensor_storage, type)) {
tensor_type = type;
for (const auto& tensor_type_rule : tensor_type_rules) {
std::regex pattern(tensor_type_rule.first);
if (std::regex_search(name, pattern)) {
dst_type = tensor_type_rule.second;
break;
}
}
if (tensor_should_be_converted(tensor_storage, dst_type)) {
tensor_type = dst_type;
}
ggml_tensor* tensor = ggml_new_tensor(ggml_ctx, tensor_type, tensor_storage.n_dims, tensor_storage.ne);
@ -2160,7 +2339,7 @@ int64_t ModelLoader::get_params_mem_size(ggml_backend_t backend, ggml_type type)
return mem_size;
}
bool convert(const char* input_path, const char* vae_path, const char* output_path, sd_type_t output_type) {
bool convert(const char* input_path, const char* vae_path, const char* output_path, sd_type_t output_type, const char* tensor_type_rules) {
ModelLoader model_loader;
if (!model_loader.init_from_file(input_path)) {
@ -2174,6 +2353,6 @@ bool convert(const char* input_path, const char* vae_path, const char* output_pa
return false;
}
}
bool success = model_loader.save_to_gguf_file(output_path, (ggml_type)output_type);
bool success = model_loader.save_to_gguf_file(output_path, (ggml_type)output_type, tensor_type_rules);
return success;
}
}

View file

@ -12,19 +12,21 @@
#include "ggml-backend.h"
#include "ggml.h"
#include "gguf.h"
#include <nlohmann/json.hpp>
#include "zip.h"
#include "gguf.h"
#define SD_MAX_DIMS 5
enum SDVersion {
VERSION_SD1,
VERSION_SD1_INPAINT,
VERSION_SD1_PIX2PIX,
VERSION_SD2,
VERSION_SD2_INPAINT,
VERSION_SDXL,
VERSION_SDXL_INPAINT,
VERSION_SDXL_PIX2PIX,
VERSION_SVD,
VERSION_SD3,
VERSION_FLUX,
@ -47,7 +49,7 @@ static inline bool sd_version_is_sd3(SDVersion version) {
}
static inline bool sd_version_is_sd1(SDVersion version) {
if (version == VERSION_SD1 || version == VERSION_SD1_INPAINT) {
if (version == VERSION_SD1 || version == VERSION_SD1_INPAINT || version == VERSION_SD1_PIX2PIX) {
return true;
}
return false;
@ -61,7 +63,7 @@ static inline bool sd_version_is_sd2(SDVersion version) {
}
static inline bool sd_version_is_sdxl(SDVersion version) {
if (version == VERSION_SDXL || version == VERSION_SDXL_INPAINT) {
if (version == VERSION_SDXL || version == VERSION_SDXL_INPAINT || version == VERSION_SDXL_PIX2PIX) {
return true;
}
return false;
@ -81,6 +83,14 @@ static inline bool sd_version_is_dit(SDVersion version) {
return false;
}
static inline bool sd_version_is_unet_edit(SDVersion version) {
return version == VERSION_SD1_PIX2PIX || version == VERSION_SDXL_PIX2PIX;
}
static bool sd_version_is_inpaint_or_unet_edit(SDVersion version) {
return sd_version_is_unet_edit(version) || sd_version_is_inpaint(version);
}
enum PMVersion {
PM_VERSION_1,
PM_VERSION_2,
@ -92,6 +102,8 @@ struct TensorStorage {
bool is_bf16 = false;
bool is_f8_e4m3 = false;
bool is_f8_e5m2 = false;
bool is_f64 = false;
bool is_i64 = false;
int64_t ne[SD_MAX_DIMS] = {1, 1, 1, 1, 1};
int n_dims = 0;
@ -123,6 +135,8 @@ struct TensorStorage {
int64_t nbytes_to_read() const {
if (is_bf16 || is_f8_e4m3 || is_f8_e5m2) {
return nbytes() / 2;
} else if (is_f64 || is_i64) {
return nbytes() * 2;
} else {
return nbytes();
}
@ -173,6 +187,10 @@ struct TensorStorage {
type_name = "f8_e4m3";
} else if (is_f8_e5m2) {
type_name = "f8_e5m2";
} else if (is_f64) {
type_name = "f64";
} else if (is_i64) {
type_name = "i64";
}
ss << name << " | " << type_name << " | ";
ss << n_dims << " [";
@ -189,6 +207,8 @@ struct TensorStorage {
typedef std::function<bool(const TensorStorage&, ggml_tensor**)> on_new_tensor_cb_t;
typedef std::map<std::string, enum ggml_type> String2GGMLType;
class ModelLoader {
protected:
std::vector<std::string> file_paths_;
@ -207,10 +227,11 @@ protected:
bool init_from_diffusers_file(const std::string& file_path, const std::string& prefix = "");
public:
std::map<std::string, enum ggml_type> tensor_storages_types;
String2GGMLType tensor_storages_types;
bool init_from_file(const std::string& file_path, const std::string& prefix = "");
bool has_diffusion_model_tensors();
bool model_is_unet();
SDVersion get_sd_version();
ggml_type get_sd_wtype();
ggml_type get_conditioner_wtype();
@ -222,7 +243,7 @@ public:
ggml_backend_t backend,
std::set<std::string> ignore_tensors = {});
bool save_to_gguf_file(const std::string& file_path, ggml_type type);
bool save_to_gguf_file(const std::string& file_path, ggml_type type, const std::string& tensor_type_rules);
bool tensor_should_be_converted(const TensorStorage& tensor_storage, ggml_type type);
int64_t get_params_mem_size(ggml_backend_t backend, ggml_type type = GGML_TYPE_COUNT);
~ModelLoader() = default;
@ -231,4 +252,4 @@ public:
static std::string load_t5_tokenizer_json();
};
#endif // __MODEL_H__
#endif // __MODEL_H__

View file

@ -623,7 +623,12 @@ public:
std::vector<float> zeros_right;
public:
PhotoMakerIDEncoder(ggml_backend_t backend, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix, SDVersion version = VERSION_SDXL, PMVersion pm_v = PM_VERSION_1, float sty = 20.f)
PhotoMakerIDEncoder(ggml_backend_t backend,
const String2GGMLType& tensor_types,
const std::string prefix,
SDVersion version = VERSION_SDXL,
PMVersion pm_v = PM_VERSION_1,
float sty = 20.f)
: GGMLRunner(backend),
version(version),
pm_version(pm_v),

View file

@ -134,6 +134,27 @@ static bool sd_is_quiet = false;
static std::string sdmodelfilename = "";
static bool photomaker_enabled = false;
static void set_sd_vae_tiling(sd_ctx_t* ctx, bool tiling)
{
ctx->sd->vae_tiling = tiling;
}
static int get_loaded_sd_version(sd_ctx_t* ctx)
{
return ctx->sd->version;
}
static bool loaded_model_is_chroma(sd_ctx_t* ctx)
{
if (ctx != nullptr && ctx->sd != nullptr) {
auto maybe_flux = std::dynamic_pointer_cast<FluxModel>(ctx->sd->diffusion_model);
if (maybe_flux != nullptr) {
return maybe_flux->flux.flux_params.is_chroma;
}
}
return false;
}
bool sdtype_load_model(const sd_load_model_inputs inputs) {
sd_is_quiet = inputs.quiet;
set_sd_quiet(sd_is_quiet);
@ -160,6 +181,10 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
{
taesdpath = executable_path + "taesd.embd";
printf("With TAE SD VAE: %s\n",taesdpath.c_str());
if (cfg_tiled_vae_threshold < 8192) {
printf(" disabling VAE tiling for TAESD\n");
cfg_tiled_vae_threshold = 8192;
}
}
else if(vaefilename!="")
{
@ -267,31 +292,35 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
sd_params->control_net_cpu);
}
sd_ctx = new_sd_ctx(sd_params->model_path.c_str(),
sd_params->clip_l_path.c_str(),
sd_params->clip_g_path.c_str(),
sd_params->t5xxl_path.c_str(),
sd_params->diffusion_model_path.c_str(),
sd_params->vae_path.c_str(),
sd_params->taesd_path.c_str(),
sd_params->controlnet_path.c_str(),
sd_params->lora_model_dir.c_str(),
sd_params->embeddings_path.c_str(),
sd_params->stacked_id_embeddings_path.c_str(),
vae_decode_only,
sd_params->vae_tiling,
free_param,
sd_params->n_threads,
sd_params->wtype,
sd_params->rng_type,
sd_params->schedule,
sd_params->clip_on_cpu,
sd_params->control_net_cpu,
sd_params->vae_on_cpu,
sd_params->diffusion_flash_attn,
sd_params->chroma_use_dit_mask,
sd_params->chroma_use_t5_mask,
sd_params->chroma_t5_mask_pad);
sd_ctx_params_t params;
sd_ctx_params_init(&params);
params.model_path = sd_params->model_path.c_str();
params.clip_l_path = sd_params->clip_l_path.c_str();
params.clip_g_path = sd_params->clip_g_path.c_str();
params.t5xxl_path = sd_params->t5xxl_path.c_str();
params.diffusion_model_path = sd_params->diffusion_model_path.c_str();
params.vae_path = sd_params->vae_path.c_str();
params.taesd_path = sd_params->taesd_path.c_str();
params.control_net_path = sd_params->controlnet_path.c_str();
params.lora_model_dir = sd_params->lora_model_dir.c_str();
params.embedding_dir = sd_params->embeddings_path.c_str();
params.stacked_id_embed_dir = sd_params->stacked_id_embeddings_path.c_str();
params.vae_decode_only = vae_decode_only;
params.vae_tiling = sd_params->vae_tiling;
params.free_params_immediately = free_param;
params.n_threads = sd_params->n_threads;
params.wtype = sd_params->wtype;
params.rng_type = sd_params->rng_type;
params.schedule = sd_params->schedule;
params.keep_clip_on_cpu = sd_params->clip_on_cpu;
params.keep_control_net_on_cpu = sd_params->control_net_cpu;
params.keep_vae_on_cpu = sd_params->vae_on_cpu;
params.diffusion_flash_attn = sd_params->diffusion_flash_attn;
params.chroma_use_dit_mask = sd_params->chroma_use_dit_mask;
params.chroma_use_t5_mask = sd_params->chroma_use_t5_mask;
params.chroma_t5_mask_pad = sd_params->chroma_t5_mask_pad;
sd_ctx = new_sd_ctx(&params);
if (sd_ctx == NULL) {
printf("\nError: KCPP SD Failed to create context!\nIf using Flux/SD3.5, make sure you have ALL files required (e.g. VAE, T5, Clip...) or baked in!\n");
@ -305,7 +334,6 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
if(lorafilename!="" && inputs.lora_multiplier>0)
{
printf("\nApply LoRA...\n");
// sd_ctx->sd->set_pending_lora(lorafilename,inputs.lora_multiplier);
sd_ctx->sd->apply_lora_from_file(lorafilename,inputs.lora_multiplier);
}
@ -482,11 +510,29 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
auto loadedsdver = get_loaded_sd_version(sd_ctx);
if (loadedsdver == SDVersion::VERSION_FLUX)
{
if (!sd_loaded_chroma()) {
sd_params->cfg_scale = 1; //non chroma clamp cfg scale
if (loaded_model_is_chroma(sd_ctx)) {
if (sd_params->diffusion_flash_attn && sd_params->chroma_use_dit_mask) {
if (!sd_is_quiet && sddebugmode) {
printf("Chroma: flash attention is on, disabling DiT mask\n");
}
sd_params->chroma_use_dit_mask = false;
}
}
else {
if (sd_params->cfg_scale != 1.0f) {
//non chroma clamp cfg scale
if (!sd_is_quiet && sddebugmode) {
printf("Flux: clamping CFG Scale to 1\n");
}
sd_params->cfg_scale = 1.0f;
}
}
if (sampler == "euler a" || sampler == "k_euler_a" || sampler == "euler_a") {
sampler = "euler"; //euler a broken on flux
//euler a broken on flux
if (!sd_is_quiet && sddebugmode) {
printf("Flux: switching Euler A to Euler\n");
}
sampler = "euler";
}
}
@ -521,17 +567,6 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
bool dotile = (sd_params->width*sd_params->height > cfg_tiled_vae_threshold*cfg_tiled_vae_threshold);
set_sd_vae_tiling(sd_ctx,dotile); //changes vae tiling, prevents memory related crash/oom
if (sd_params->clip_skip <= 0) {
// workaround for clip_skip being "stuck" at the previous requested value
// 2 is the default for all recent base models (SD2, SDXL, Flux, SD3)
if (sd_version_is_sd1((SDVersion)loadedsdver)) {
sd_params->clip_skip = 1;
}
else {
sd_params->clip_skip = 2;
}
}
//for img2img
sd_image_t input_image = {0,0,0,nullptr};
std::vector<sd_image_t> extraimage_references;
@ -663,25 +698,25 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
}
}
std::vector<sd_image_t> kontext_imgs;
if(extra_image_data.size()>0 && loadedsdver==SDVersion::VERSION_FLUX && !sd_loaded_chroma())
std::vector<sd_image_t> reference_imgs;
if(extra_image_data.size()>0 && loadedsdver==SDVersion::VERSION_FLUX && !loaded_model_is_chroma(sd_ctx))
{
for(int i=0;i<extra_image_data.size();++i)
{
kontext_imgs.push_back(extraimage_references[i]);
reference_imgs.push_back(extraimage_references[i]);
}
if(!sd_is_quiet && sddebugmode==1)
{
printf("\nFlux Kontext: Using %d reference images\n",kontext_imgs.size());
printf("\nFlux Kontext: Using %d reference images\n",reference_imgs.size());
}
}
std::vector<sd_image_t*> photomaker_imgs;
std::vector<sd_image_t> photomaker_imgs;
if(photomaker_enabled && extra_image_data.size()>0)
{
for(int i=0;i<extra_image_data.size();++i)
{
photomaker_imgs.push_back(&extraimage_references[i]);
photomaker_imgs.push_back(extraimage_references[i]);
}
if(!sd_is_quiet && sddebugmode==1)
{
@ -689,6 +724,41 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
}
}
sd_img_gen_params_t params;
sd_img_gen_params_init (&params);
params.prompt = sd_params->prompt.c_str();
params.negative_prompt = sd_params->negative_prompt.c_str();
params.clip_skip = sd_params->clip_skip;
params.guidance.txt_cfg = sd_params->cfg_scale;
params.guidance.img_cfg = sd_params->cfg_scale;
params.guidance.distilled_guidance = sd_params->guidance;
params.eta = sd_params->eta;
params.width = sd_params->width;
params.height = sd_params->height;
params.sample_method = sd_params->sample_method;
params.sample_steps = sd_params->sample_steps;
params.seed = sd_params->seed;
params.batch_count = sd_params->batch_count;
params.control_cond = control_image;
params.control_strength = sd_params->control_strength;
params.style_strength = sd_params->style_ratio;
params.normalize_input = sd_params->normalize_input;
params.input_id_images_path = sd_params->input_id_images_path.c_str();
params.guidance.slg.layers = sd_params->skip_layers.data();
params.guidance.slg.layer_count = sd_params->skip_layers.size();
params.guidance.slg.layer_start = sd_params->skip_layer_start;
params.guidance.slg.layer_end = sd_params->skip_layer_end;
params.guidance.slg.scale = sd_params->slg_scale;
params.ref_images = reference_imgs.data();
params.ref_images_count = reference_imgs.size();
kcpp_img_gen_params_t extra_params = {};
extra_params.photomaker_references = photomaker_imgs.data();
extra_params.photomaker_reference_count = photomaker_imgs.size();
if (sd_params->mode == TXT2IMG) {
if(!sd_is_quiet && sddebugmode==1)
@ -708,32 +778,8 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
sd_params->control_strength);
}
results = generate_image(sd_ctx, &params, &extra_params);
results = txt2img(sd_ctx,
sd_params->prompt.c_str(),
sd_params->negative_prompt.c_str(),
sd_params->clip_skip,
sd_params->cfg_scale,
sd_params->guidance,
sd_params->eta,
sd_params->width,
sd_params->height,
sd_params->sample_method,
sd_params->sample_steps,
sd_params->seed,
sd_params->batch_count,
control_image,
sd_params->control_strength,
sd_params->style_ratio,
sd_params->normalize_input,
sd_params->input_id_images_path.c_str(),
kontext_imgs.data(), kontext_imgs.size(),
sd_params->skip_layers.data(),
sd_params->skip_layers.size(),
sd_params->slg_scale,
sd_params->skip_layer_start,
sd_params->skip_layer_end,
photomaker_imgs);
} else {
if (sd_params->width <= 0 || sd_params->width % 64 != 0 || sd_params->height <= 0 || sd_params->height % 64 != 0) {
@ -839,34 +885,12 @@ sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs)
sd_params->strength);
}
results = img2img(sd_ctx,
input_image,
mask_image,
sd_params->prompt.c_str(),
sd_params->negative_prompt.c_str(),
sd_params->clip_skip,
sd_params->cfg_scale,
sd_params->guidance,
sd_params->eta,
sd_params->width,
sd_params->height,
sd_params->sample_method,
sd_params->sample_steps,
sd_params->strength,
sd_params->seed,
sd_params->batch_count,
control_image,
sd_params->control_strength,
sd_params->style_ratio,
sd_params->normalize_input,
sd_params->input_id_images_path.c_str(),
kontext_imgs.data(), kontext_imgs.size(),
sd_params->skip_layers.data(),
sd_params->skip_layers.size(),
sd_params->slg_scale,
sd_params->skip_layer_start,
sd_params->skip_layer_end,
photomaker_imgs);
params.strength = sd_params->strength;
params.init_image = input_image;
params.mask_image = mask_image;
results = generate_image(sd_ctx, &params, &extra_params);
}
if (results == NULL) {

File diff suppressed because it is too large Load diff

View file

@ -30,7 +30,8 @@ extern "C" {
enum rng_type_t {
STD_DEFAULT_RNG,
CUDA_RNG
CUDA_RNG,
RNG_TYPE_COUNT
};
enum sample_method_t {
@ -46,7 +47,7 @@ enum sample_method_t {
LCM,
DDIM_TRAILING,
TCD,
N_SAMPLE_METHODS
SAMPLE_METHOD_COUNT
};
enum schedule_t {
@ -56,15 +57,15 @@ enum schedule_t {
EXPONENTIAL,
AYS,
GITS,
N_SCHEDULES
SCHEDULE_COUNT
};
// same as enum ggml_type
enum sd_type_t {
SD_TYPE_F32 = 0,
SD_TYPE_F16 = 1,
SD_TYPE_Q4_0 = 2,
SD_TYPE_Q4_1 = 3,
SD_TYPE_F32 = 0,
SD_TYPE_F16 = 1,
SD_TYPE_Q4_0 = 2,
SD_TYPE_Q4_1 = 3,
// SD_TYPE_Q4_2 = 4, support has been removed
// SD_TYPE_Q4_3 = 5, support has been removed
SD_TYPE_Q5_0 = 6,
@ -92,19 +93,17 @@ enum sd_type_t {
SD_TYPE_F64 = 28,
SD_TYPE_IQ1_M = 29,
SD_TYPE_BF16 = 30,
SD_TYPE_Q4_0_4_4 = 31,
SD_TYPE_Q4_0_4_8 = 32,
SD_TYPE_Q4_0_8_8 = 33,
SD_TYPE_TQ1_0 = 34,
SD_TYPE_TQ2_0 = 35,
SD_TYPE_IQ4_NL_4_4 = 36,
// SD_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
// SD_TYPE_Q4_0_4_8 = 32,
// SD_TYPE_Q4_0_8_8 = 33,
SD_TYPE_TQ1_0 = 34,
SD_TYPE_TQ2_0 = 35,
// SD_TYPE_IQ4_NL_4_4 = 36,
// SD_TYPE_IQ4_NL_4_8 = 37,
// SD_TYPE_IQ4_NL_8_8 = 38,
SD_TYPE_COUNT = 40,
};
SD_API const char* sd_type_name(enum sd_type_t type);
enum sd_log_level_t {
SD_LOG_DEBUG,
SD_LOG_INFO,
@ -112,6 +111,105 @@ enum sd_log_level_t {
SD_LOG_ERROR
};
typedef struct {
const char* model_path;
const char* clip_l_path;
const char* clip_g_path;
const char* t5xxl_path;
const char* diffusion_model_path;
const char* vae_path;
const char* taesd_path;
const char* control_net_path;
const char* lora_model_dir;
const char* embedding_dir;
const char* stacked_id_embed_dir;
bool vae_decode_only;
bool vae_tiling;
bool free_params_immediately;
int n_threads;
enum sd_type_t wtype;
enum rng_type_t rng_type;
enum schedule_t schedule;
bool keep_clip_on_cpu;
bool keep_control_net_on_cpu;
bool keep_vae_on_cpu;
bool diffusion_flash_attn;
bool diffusion_conv_direct;
bool vae_conv_direct;
bool chroma_use_dit_mask;
bool chroma_use_t5_mask;
int chroma_t5_mask_pad;
} sd_ctx_params_t;
typedef struct {
uint32_t width;
uint32_t height;
uint32_t channel;
uint8_t* data;
} sd_image_t;
typedef struct {
int* layers;
size_t layer_count;
float layer_start;
float layer_end;
float scale;
} sd_slg_params_t;
typedef struct {
float txt_cfg;
float img_cfg;
float min_cfg;
float distilled_guidance;
sd_slg_params_t slg;
} sd_guidance_params_t;
typedef struct {
const char* prompt;
const char* negative_prompt;
int clip_skip;
sd_guidance_params_t guidance;
sd_image_t init_image;
sd_image_t* ref_images;
int ref_images_count;
sd_image_t mask_image;
int width;
int height;
enum sample_method_t sample_method;
int sample_steps;
float eta;
float strength;
int64_t seed;
int batch_count;
const sd_image_t* control_cond;
float control_strength;
float style_strength;
bool normalize_input;
const char* input_id_images_path;
} sd_img_gen_params_t;
typedef struct {
sd_image_t* photomaker_references;
int photomaker_reference_count;
} kcpp_img_gen_params_t;
typedef struct {
sd_image_t init_image;
int width;
int height;
sd_guidance_params_t guidance;
enum sample_method_t sample_method;
int sample_steps;
float strength;
int64_t seed;
int video_frames;
int motion_bucket_id;
int fps;
float augmentation_level;
} sd_vid_gen_params_t;
typedef struct sd_ctx_t sd_ctx_t;
typedef void (*sd_log_cb_t)(enum sd_log_level_t level, const char* text, void* data);
typedef void (*sd_progress_cb_t)(int step, int steps, float time, void* data);
@ -120,154 +218,42 @@ SD_API void sd_set_progress_callback(sd_progress_cb_t cb, void* data);
SD_API int32_t sd_get_num_physical_cores();
SD_API const char* sd_get_system_info();
typedef struct {
uint32_t width;
uint32_t height;
uint32_t channel;
uint8_t* data;
} sd_image_t;
SD_API const char* sd_type_name(enum sd_type_t type);
SD_API enum sd_type_t str_to_sd_type(const char* str);
SD_API const char* sd_rng_type_name(enum rng_type_t rng_type);
SD_API enum rng_type_t str_to_rng_type(const char* str);
SD_API const char* sd_sample_method_name(enum sample_method_t sample_method);
SD_API enum sample_method_t str_to_sample_method(const char* str);
SD_API const char* sd_schedule_name(enum schedule_t schedule);
SD_API enum schedule_t str_to_schedule(const char* str);
typedef struct sd_ctx_t sd_ctx_t;
SD_API void set_sd_vae_tiling(sd_ctx_t* ctx, bool tiling);
SD_API int get_loaded_sd_version(sd_ctx_t* ctx);
SD_API bool sd_loaded_chroma();
SD_API sd_ctx_t* new_sd_ctx(const char* model_path,
const char* clip_l_path,
const char* clip_g_path,
const char* t5xxl_path,
const char* diffusion_model_path,
const char* vae_path,
const char* taesd_path,
const char* control_net_path_c_str,
const char* lora_model_dir,
const char* embed_dir_c_str,
const char* stacked_id_embed_dir_c_str,
bool vae_decode_only,
bool vae_tiling,
bool free_params_immediately,
int n_threads,
enum sd_type_t wtype,
enum rng_type_t rng_type,
enum schedule_t s,
bool keep_clip_on_cpu,
bool keep_control_net_cpu,
bool keep_vae_on_cpu,
bool diffusion_flash_attn,
bool chroma_use_dit_mask,
bool chroma_use_t5_mask,
int chroma_t5_mask_pad);
SD_API void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params);
SD_API char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params);
SD_API sd_ctx_t* new_sd_ctx(const sd_ctx_params_t* sd_ctx_params);
SD_API void free_sd_ctx(sd_ctx_t* sd_ctx);
SD_API sd_image_t* txt2img(sd_ctx_t* sd_ctx,
const char* prompt,
const char* negative_prompt,
int clip_skip,
float cfg_scale,
float guidance,
float eta,
int width,
int height,
enum sample_method_t sample_method,
int sample_steps,
int64_t seed,
int batch_count,
const sd_image_t* control_cond,
float control_strength,
float style_strength,
bool normalize_input,
const char* input_id_images_path,
sd_image_t* kontext_imgs,
int kontext_img_count,
int* skip_layers,
size_t skip_layers_count,
float slg_scale,
float skip_layer_start,
float skip_layer_end,
const std::vector<sd_image_t*> photomaker_references);
SD_API void sd_img_gen_params_init(sd_img_gen_params_t* sd_img_gen_params);
SD_API char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params);
SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_gen_params, const kcpp_img_gen_params_t* kcpp_img_gen_params);
SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx,
sd_image_t init_image,
sd_image_t mask_image,
const char* prompt,
const char* negative_prompt,
int clip_skip,
float cfg_scale,
float guidance,
float eta,
int width,
int height,
enum sample_method_t sample_method,
int sample_steps,
float strength,
int64_t seed,
int batch_count,
const sd_image_t* control_cond,
float control_strength,
float style_strength,
bool normalize_input,
const char* input_id_images_path,
sd_image_t* kontext_imgs,
int kontext_img_count,
int* skip_layers,
size_t skip_layers_count,
float slg_scale,
float skip_layer_start,
float skip_layer_end,
const std::vector<sd_image_t*> photomaker_references);
SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx,
sd_image_t init_image,
int width,
int height,
int video_frames,
int motion_bucket_id,
int fps,
float augmentation_level,
float min_cfg,
float cfg_scale,
enum sample_method_t sample_method,
int sample_steps,
float strength,
int64_t seed);
SD_API sd_image_t* edit(sd_ctx_t* sd_ctx,
sd_image_t* ref_images,
int ref_images_count,
const char* prompt,
const char* negative_prompt,
int clip_skip,
float cfg_scale,
float guidance,
float eta,
int width,
int height,
enum sample_method_t sample_method,
int sample_steps,
float strength,
int64_t seed,
int batch_count,
const sd_image_t* control_cond,
float control_strength,
float style_strength,
bool normalize_input,
int* skip_layers,
size_t skip_layers_count,
float slg_scale,
float skip_layer_start,
float skip_layer_end);
SD_API void sd_vid_gen_params_init(sd_vid_gen_params_t* sd_vid_gen_params);
SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* sd_vid_gen_params); // broken
typedef struct upscaler_ctx_t upscaler_ctx_t;
SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path,
int n_threads);
int n_threads,
bool direct);
SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx);
SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, sd_image_t input_image, uint32_t upscale_factor);
SD_API bool convert(const char* input_path, const char* vae_path, const char* output_path, enum sd_type_t output_type);
SD_API bool convert(const char* input_path,
const char* vae_path,
const char* output_path,
enum sd_type_t output_type,
const char* tensor_type_rules);
SD_API uint8_t* preprocess_canny(uint8_t* img,
int width,
@ -282,4 +268,4 @@ SD_API uint8_t* preprocess_canny(uint8_t* img,
}
#endif
#endif // __STABLE_DIFFUSION_H__
#endif // __STABLE_DIFFUSION_H__

View file

@ -457,8 +457,8 @@ protected:
int64_t hidden_size;
float eps;
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
enum ggml_type wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "weight") != tensor_types.end()) ? tensor_types[prefix + "weight"] : GGML_TYPE_F32;
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
enum ggml_type wtype = GGML_TYPE_F32;
params["weight"] = ggml_new_tensor_1d(ctx, wtype, hidden_size);
}
@ -735,7 +735,7 @@ struct T5Runner : public GGMLRunner {
std::vector<int> relative_position_bucket_vec;
T5Runner(ggml_backend_t backend,
std::map<std::string, enum ggml_type>& tensor_types,
const String2GGMLType& tensor_types,
const std::string prefix,
int64_t num_layers = 24,
int64_t model_dim = 4096,
@ -876,16 +876,14 @@ struct T5Embedder {
T5UniGramTokenizer tokenizer;
T5Runner model;
static std::map<std::string, enum ggml_type> empty_tensor_types;
T5Embedder(ggml_backend_t backend,
std::map<std::string, enum ggml_type>& tensor_types = empty_tensor_types,
const std::string prefix = "",
int64_t num_layers = 24,
int64_t model_dim = 4096,
int64_t ff_dim = 10240,
int64_t num_heads = 64,
int64_t vocab_size = 32128)
const String2GGMLType& tensor_types = {},
const std::string prefix = "",
int64_t num_layers = 24,
int64_t model_dim = 4096,
int64_t ff_dim = 10240,
int64_t num_heads = 64,
int64_t vocab_size = 32128)
: model(backend, tensor_types, prefix, num_layers, model_dim, ff_dim, num_heads, vocab_size) {
}

View file

@ -149,7 +149,7 @@ public:
if (i == 1) {
h = ggml_relu_inplace(ctx, h);
} else {
h = ggml_upscale(ctx, h, 2, ggml_scale_mode::GGML_SCALE_MODE_NEAREST);
h = ggml_upscale(ctx, h, 2, GGML_SCALE_MODE_NEAREST);
}
continue;
}
@ -196,7 +196,7 @@ struct TinyAutoEncoder : public GGMLRunner {
bool decode_only = false;
TinyAutoEncoder(ggml_backend_t backend,
std::map<std::string, enum ggml_type>& tensor_types,
const String2GGMLType& tensor_types,
const std::string prefix,
bool decoder_only = true,
SDVersion version = VERSION_SD1)
@ -206,6 +206,17 @@ struct TinyAutoEncoder : public GGMLRunner {
taesd.init(params_ctx, tensor_types, prefix);
}
void enable_conv2d_direct() {
std::vector<GGMLBlock*> blocks;
taesd.get_all_blocks(blocks);
for (auto block : blocks) {
if (block->get_desc() == "Conv2d") {
auto conv_block = (Conv2d*)block;
conv_block->enable_direct();
}
}
}
std::string get_desc() {
return "taesd";
}

View file

@ -166,7 +166,6 @@ public:
// ldm.modules.diffusionmodules.openaimodel.UNetModel
class UnetModelBlock : public GGMLBlock {
protected:
static std::map<std::string, enum ggml_type> empty_tensor_types;
SDVersion version = VERSION_SD1;
// network hparams
int in_channels = 4;
@ -184,7 +183,7 @@ public:
int model_channels = 320;
int adm_in_channels = 2816; // only for VERSION_SDXL/SVD
UnetModelBlock(SDVersion version = VERSION_SD1, std::map<std::string, enum ggml_type>& tensor_types = empty_tensor_types, bool flash_attn = false)
UnetModelBlock(SDVersion version = VERSION_SD1, const String2GGMLType& tensor_types = {}, bool flash_attn = false)
: version(version) {
if (sd_version_is_sd2(version)) {
context_dim = 1024;
@ -207,6 +206,8 @@ public:
}
if (sd_version_is_inpaint(version)) {
in_channels = 9;
} else if (sd_version_is_unet_edit(version)) {
in_channels = 8;
}
// dims is always 2
@ -537,7 +538,7 @@ struct UNetModelRunner : public GGMLRunner {
UnetModelBlock unet;
UNetModelRunner(ggml_backend_t backend,
std::map<std::string, enum ggml_type>& tensor_types,
const String2GGMLType& tensor_types,
const std::string prefix,
SDVersion version = VERSION_SD1,
bool flash_attn = false)
@ -545,6 +546,18 @@ struct UNetModelRunner : public GGMLRunner {
unet.init(params_ctx, tensor_types, prefix);
}
void enable_conv2d_direct() {
std::vector<GGMLBlock*> blocks;
unet.get_all_blocks(blocks);
for (auto block : blocks) {
if (block->get_desc() == "Conv2d") {
LOG_DEBUG("block %s", block->get_desc().c_str());
auto conv_block = (Conv2d*)block;
conv_block->enable_direct();
}
}
}
std::string get_desc() {
return "unet";
}
@ -657,4 +670,4 @@ struct UNetModelRunner : public GGMLRunner {
}
};
#endif // __UNET_HPP__
#endif // __UNET_HPP__

View file

@ -9,9 +9,12 @@ struct UpscalerGGML {
std::shared_ptr<ESRGAN> esrgan_upscaler;
std::string esrgan_path;
int n_threads;
bool direct = false;
UpscalerGGML(int n_threads)
: n_threads(n_threads) {
UpscalerGGML(int n_threads,
bool direct = false)
: n_threads(n_threads),
direct(direct) {
}
bool load_from_file(const std::string& esrgan_path) {
@ -21,12 +24,17 @@ struct UpscalerGGML {
#endif
#ifdef SD_USE_METAL
LOG_DEBUG("Using Metal backend");
ggml_log_set(ggml_log_callback_default, nullptr);
backend = ggml_backend_metal_init();
#endif
#ifdef SD_USE_VULKAN
LOG_DEBUG("Using Vulkan backend");
backend = ggml_backend_vk_init(0);
#endif
#ifdef SD_USE_OPENCL
LOG_DEBUG("Using OpenCL backend");
backend = ggml_backend_opencl_init();
#endif
#ifdef SD_USE_SYCL
LOG_DEBUG("Using SYCL backend");
backend = ggml_backend_sycl_init(0);
@ -42,6 +50,9 @@ struct UpscalerGGML {
}
LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type));
esrgan_upscaler = std::make_shared<ESRGAN>(backend, model_loader.tensor_storages_types);
if (direct) {
esrgan_upscaler->enable_conv2d_direct();
}
if (!esrgan_upscaler->load_from_file(esrgan_path)) {
return false;
}
@ -99,14 +110,15 @@ struct upscaler_ctx_t {
};
upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str,
int n_threads) {
int n_threads,
bool direct = false) {
upscaler_ctx_t* upscaler_ctx = (upscaler_ctx_t*)malloc(sizeof(upscaler_ctx_t));
if (upscaler_ctx == NULL) {
return NULL;
}
std::string esrgan_path(esrgan_path_c_str);
upscaler_ctx->upscaler = new UpscalerGGML(n_threads);
upscaler_ctx->upscaler = new UpscalerGGML(n_threads, direct);
if (upscaler_ctx->upscaler == NULL) {
return NULL;
}

View file

@ -102,19 +102,32 @@ std::vector<std::string> get_files_from_dir(const std::string& dir) {
sprintf(directoryPath, "%s\\%s\\*", currentDirectory, dir.c_str());
// Find the first file in the directory
hFind = FindFirstFile(directoryPath, &findFileData);
hFind = FindFirstFile(directoryPath, &findFileData);
bool isAbsolutePath = false;
// Check if the directory was found
if (hFind == INVALID_HANDLE_VALUE) {
printf("Unable to find directory.\n");
return files;
printf("Unable to find directory. Try with original path \n");
char directoryPathAbsolute[MAX_PATH];
sprintf(directoryPathAbsolute, "%s*", dir.c_str());
hFind = FindFirstFile(directoryPathAbsolute, &findFileData);
isAbsolutePath = true;
if (hFind == INVALID_HANDLE_VALUE) {
printf("Absolute path was also wrong.\n");
return files;
}
}
// Loop through all files in the directory
do {
// Check if the found file is a regular file (not a directory)
if (!(findFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
files.push_back(std::string(currentDirectory) + "\\" + dir + "\\" + std::string(findFileData.cFileName));
if (isAbsolutePath) {
files.push_back(dir + "\\" + std::string(findFileData.cFileName));
} else {
files.push_back(std::string(currentDirectory) + "\\" + dir + "\\" + std::string(findFileData.cFileName));
}
}
} while (FindNextFile(hFind, &findFileData) != 0);
@ -447,10 +460,6 @@ const char* sd_get_system_info() {
return buffer;
}
const char* sd_type_name(enum sd_type_t type) {
return ggml_type_name((ggml_type)type);
}
sd_image_f32_t sd_image_t_to_sd_image_f32_t(sd_image_t image) {
sd_image_f32_t converted_image;
converted_image.width = image.width;

View file

@ -7,6 +7,9 @@
#include "stable-diffusion.h"
#define SAFE_STR(s) ((s) ? (s) : "")
#define BOOL_STR(b) ((b) ? "true" : "false")
bool ends_with(const std::string& str, const std::string& ending);
bool starts_with(const std::string& str, const std::string& start);
bool contains(const std::string& str, const std::string& substr);

View file

@ -163,8 +163,8 @@ public:
class VideoResnetBlock : public ResnetBlock {
protected:
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
enum ggml_type wtype = (tensor_types.find(prefix + "mix_factor") != tensor_types.end()) ? tensor_types[prefix + "mix_factor"] : GGML_TYPE_F32;
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
enum ggml_type wtype = get_type(prefix + "mix_factor", tensor_types, GGML_TYPE_F32);
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
}
@ -525,7 +525,7 @@ struct AutoEncoderKL : public GGMLRunner {
AutoencodingEngine ae;
AutoEncoderKL(ggml_backend_t backend,
std::map<std::string, enum ggml_type>& tensor_types,
const String2GGMLType& tensor_types,
const std::string prefix,
bool decode_only = false,
bool use_video_decoder = false,
@ -534,6 +534,17 @@ struct AutoEncoderKL : public GGMLRunner {
ae.init(params_ctx, tensor_types, prefix);
}
void enable_conv2d_direct() {
std::vector<GGMLBlock*> blocks;
ae.get_all_blocks(blocks);
for (auto block : blocks) {
if (block->get_desc() == "Conv2d") {
auto conv_block = (Conv2d*)block;
conv_block->enable_direct();
}
}
}
std::string get_desc() {
return "vae";
}