mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-13 10:29:43 +00:00
sync with sd.cpp
This commit is contained in:
parent
e5af9b5ea9
commit
186227fc26
8 changed files with 234 additions and 82 deletions
|
@ -744,10 +744,10 @@ namespace Flux {
|
|||
return ids;
|
||||
}
|
||||
|
||||
|
||||
// Generate positional embeddings
|
||||
std::vector<float> gen_pe(int h, int w, int patch_size, int bs, int context_len, std::vector<ggml_tensor*> ref_latents, int theta, const std::vector<int>& axes_dim) {
|
||||
std::vector<std::vector<float>> ids = gen_ids(h, w, patch_size, bs, context_len, ref_latents);
|
||||
|
||||
std::vector<std::vector<float>> trans_ids = transpose(ids);
|
||||
size_t pos_len = ids.size();
|
||||
int num_axes = axes_dim.size();
|
||||
|
@ -872,7 +872,7 @@ namespace Flux {
|
|||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
struct ggml_tensor* pe,
|
||||
struct ggml_tensor* arange = NULL,
|
||||
struct ggml_tensor* mod_index_arange = NULL,
|
||||
std::vector<int> skip_layers = {}) {
|
||||
auto img_in = std::dynamic_pointer_cast<Linear>(blocks["img_in"]);
|
||||
auto txt_in = std::dynamic_pointer_cast<Linear>(blocks["txt_in"]);
|
||||
|
@ -887,9 +887,10 @@ namespace Flux {
|
|||
auto distill_timestep = ggml_nn_timestep_embedding(ctx, timesteps, 16, 10000, 1000.f);
|
||||
auto distill_guidance = ggml_nn_timestep_embedding(ctx, guidance, 16, 10000, 1000.f);
|
||||
|
||||
// auto arange = ggml_arange(ctx, 0, (float)mod_index_length, 1); // Not working on a lot of backends, precomputing it on CPU instead
|
||||
// auto mod_index_arange = ggml_arange(ctx, 0, (float)mod_index_length, 1);
|
||||
// ggml_arange tot working on a lot of backends, precomputing it on CPU instead
|
||||
GGML_ASSERT(arange != NULL);
|
||||
auto modulation_index = ggml_nn_timestep_embedding(ctx, arange, 32, 10000, 1000.f); // [1, 344, 32]
|
||||
auto modulation_index = ggml_nn_timestep_embedding(ctx, mod_index_arange, 32, 10000, 1000.f); // [1, 344, 32]
|
||||
|
||||
// Batch broadcast (will it ever be useful)
|
||||
modulation_index = ggml_repeat(ctx, modulation_index, ggml_new_tensor_3d(ctx, GGML_TYPE_F32, modulation_index->ne[0], modulation_index->ne[1], img->ne[2])); // [N, 344, 32]
|
||||
|
@ -982,7 +983,7 @@ namespace Flux {
|
|||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
struct ggml_tensor* pe,
|
||||
struct ggml_tensor* arange = NULL,
|
||||
struct ggml_tensor* mod_index_arange = NULL,
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
std::vector<int> skip_layers = {}) {
|
||||
// Forward pass of DiT.
|
||||
|
@ -1024,7 +1025,7 @@ namespace Flux {
|
|||
}
|
||||
}
|
||||
|
||||
auto out = forward_orig(ctx, img, context, timestep, y, guidance, pe, arange, skip_layers); // [N, num_tokens, C * patch_size * patch_size]
|
||||
auto out = forward_orig(ctx, img, context, timestep, y, guidance, pe, mod_index_arange, skip_layers); // [N, num_tokens, C * patch_size * patch_size]
|
||||
if (out->ne[1] > img_tokens) {
|
||||
out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [num_tokens, N, C * patch_size * patch_size]
|
||||
out = ggml_view_3d(ctx, out, out->ne[0], out->ne[1], img_tokens, out->nb[1], out->nb[2], 0);
|
||||
|
@ -1044,15 +1045,18 @@ namespace Flux {
|
|||
public:
|
||||
FluxParams flux_params;
|
||||
Flux flux;
|
||||
std::vector<float> pe_vec, range; // for cache
|
||||
std::vector<float> pe_vec;
|
||||
std::vector<float> mod_index_arange_vec; // for cache
|
||||
SDVersion version;
|
||||
bool use_mask = false;
|
||||
|
||||
FluxRunner(ggml_backend_t backend,
|
||||
std::map<std::string, enum ggml_type>& tensor_types = empty_tensor_types,
|
||||
const std::string prefix = "",
|
||||
SDVersion version = VERSION_FLUX,
|
||||
bool flash_attn = false)
|
||||
: GGMLRunner(backend) {
|
||||
bool flash_attn = false,
|
||||
bool use_mask = false)
|
||||
: GGMLRunner(backend), use_mask(use_mask) {
|
||||
flux_params.flash_attn = flash_attn;
|
||||
flux_params.guidance_embed = false;
|
||||
flux_params.depth = 0;
|
||||
|
@ -1116,51 +1120,28 @@ namespace Flux {
|
|||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
std::vector<int> skip_layers = std::vector<int>()) {
|
||||
std::vector<int> skip_layers = {}) {
|
||||
GGML_ASSERT(x->ne[3] == 1);
|
||||
struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, FLUX_GRAPH_SIZE, false);
|
||||
|
||||
struct ggml_tensor* precompute_arange = NULL;
|
||||
struct ggml_tensor* mod_index_arange = NULL;
|
||||
|
||||
x = to_backend(x);
|
||||
context = to_backend(context);
|
||||
if (c_concat != NULL) {
|
||||
c_concat = to_backend(c_concat);
|
||||
}
|
||||
|
||||
if (flux_params.is_chroma) {
|
||||
const char* SD_CHROMA_ENABLE_GUIDANCE = getenv("SD_CHROMA_ENABLE_GUIDANCE");
|
||||
bool disable_guidance = true;
|
||||
if (SD_CHROMA_ENABLE_GUIDANCE != NULL) {
|
||||
std::string enable_guidance_str = SD_CHROMA_ENABLE_GUIDANCE;
|
||||
if (enable_guidance_str == "ON" || enable_guidance_str == "TRUE") {
|
||||
LOG_WARN("Chroma guidance has been enabled. Image might be broken. (SD_CHROMA_ENABLE_GUIDANCE env variable to \"OFF\" to disable)", SD_CHROMA_ENABLE_GUIDANCE);
|
||||
disable_guidance = false;
|
||||
} else if (enable_guidance_str != "OFF" && enable_guidance_str != "FALSE") {
|
||||
LOG_WARN("SD_CHROMA_ENABLE_GUIDANCE environment variable has unexpected value. Assuming default (\"OFF\"). (Expected \"ON\"/\"TRUE\" or\"OFF\"/\"FALSE\", got \"%s\")", SD_CHROMA_ENABLE_GUIDANCE);
|
||||
}
|
||||
}
|
||||
if (disable_guidance) {
|
||||
// LOG_DEBUG("Forcing guidance to 0 for chroma model (SD_CHROMA_ENABLE_GUIDANCE env variable to \"ON\" to enable)");
|
||||
guidance = ggml_set_f32(guidance, 0);
|
||||
guidance = ggml_set_f32(guidance, 0);
|
||||
|
||||
if (!use_mask) {
|
||||
y = NULL;
|
||||
}
|
||||
|
||||
|
||||
const char* SD_CHROMA_USE_DIT_MASK = getenv("SD_CHROMA_USE_DIT_MASK");
|
||||
if (SD_CHROMA_USE_DIT_MASK != nullptr) {
|
||||
std::string sd_chroma_use_DiT_mask_str = SD_CHROMA_USE_DIT_MASK;
|
||||
if (sd_chroma_use_DiT_mask_str == "OFF" || sd_chroma_use_DiT_mask_str == "FALSE") {
|
||||
y = NULL;
|
||||
} else if (sd_chroma_use_DiT_mask_str != "ON" && sd_chroma_use_DiT_mask_str != "TRUE") {
|
||||
LOG_WARN("SD_CHROMA_USE_DIT_MASK environment variable has unexpected value. Assuming default (\"ON\"). (Expected \"ON\"/\"TRUE\" or\"OFF\"/\"FALSE\", got \"%s\")", SD_CHROMA_USE_DIT_MASK);
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_arrange is not working on some backends, and y isn't used, so let's reuse y to precompute it
|
||||
range = arange(0, 344);
|
||||
precompute_arange = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_F32, range.size());
|
||||
set_backend_tensor_data(precompute_arange, range.data());
|
||||
// y = NULL;
|
||||
// ggml_arange is not working on some backends, precompute it
|
||||
mod_index_arange_vec = arange(0, 344);
|
||||
mod_index_arange = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_F32, mod_index_arange_vec.size());
|
||||
set_backend_tensor_data(mod_index_arange, mod_index_arange_vec.data());
|
||||
}
|
||||
y = to_backend(y);
|
||||
|
||||
|
@ -1189,7 +1170,7 @@ namespace Flux {
|
|||
y,
|
||||
guidance,
|
||||
pe,
|
||||
precompute_arange,
|
||||
mod_index_arange,
|
||||
ref_latents,
|
||||
skip_layers);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue